From 776ac527f144701e0323398d4bcd73b728bf3fa3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 7 Apr 2026 19:42:06 -0400 Subject: [PATCH] trim_entries: take ContextBudget instead of recomputing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit compact() already computes context_budget() — pass it to trim_entries so it has access to all budget components without recomputing them. Co-Authored-By: Proof of Concept --- src/agent/context.rs | 13 ++++--------- src/agent/mod.rs | 3 ++- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/agent/context.rs b/src/agent/context.rs index 9391ae0..35a178b 100644 --- a/src/agent/context.rs +++ b/src/agent/context.rs @@ -45,10 +45,11 @@ fn context_budget_tokens() -> usize { /// 2. Trim: drop oldest entries until the conversation fits, snapping /// to user message boundaries. pub fn trim_entries( - context: &ContextState, entries: &[ConversationEntry], tokenizer: &CoreBPE, + budget: &ContextBudget, ) -> Vec { + let fixed_tokens = budget.system + budget.identity + budget.journal; // --- Phase 1: dedup memory entries by key (keep last) --- let mut seen_keys: std::collections::HashMap<&str, usize> = std::collections::HashMap::new(); let mut drop_indices: std::collections::HashSet = std::collections::HashSet::new(); @@ -67,19 +68,13 @@ pub fn trim_entries( .collect(); // --- Phase 2: trim to fit context budget --- - // Everything in the context window is a message. Count them all, - // trim entries until the total fits. let max_tokens = context_budget_tokens(); let count_msg = |m: &Message| msg_token_count(tokenizer, m); - let fixed_cost = count_msg(&Message::system(&context.system_prompt)) - + count_msg(&Message::user(context.render_context_message())) - + count_msg(&Message::user(render_journal(&context.journal))); - let msg_costs: Vec = deduped.iter() .map(|e| if e.is_log() { 0 } else { count_msg(e.api_message()) }).collect(); let entry_total: usize = msg_costs.iter().sum(); - let total: usize = fixed_cost + entry_total; + let total: usize = fixed_tokens + entry_total; let mem_tokens: usize = deduped.iter().zip(&msg_costs) .filter(|(e, _)| e.is_memory()) @@ -87,7 +82,7 @@ pub fn trim_entries( let conv_tokens: usize = entry_total - mem_tokens; dbglog!("[trim] max_tokens={} fixed={} mem={} conv={} total={} entries={}", - max_tokens, fixed_cost, mem_tokens, conv_tokens, total, deduped.len()); + max_tokens, fixed_tokens, mem_tokens, conv_tokens, total, deduped.len()); // Phase 2a: evict all DMN entries first — they're ephemeral let mut drop = vec![false; deduped.len()]; diff --git a/src/agent/mod.rs b/src/agent/mod.rs index a6c6dde..1039156 100644 --- a/src/agent/mod.rs +++ b/src/agent/mod.rs @@ -987,11 +987,12 @@ impl Agent { self.load_startup_journal(); // Dedup memory, trim to budget + let budget = self.context_budget(); let entries = self.context.entries.clone(); self.context.entries = crate::agent::context::trim_entries( - &self.context, &entries, &self.tokenizer, + &budget, ); let after = self.context.entries.len();