Simplify trim_entries, kill ContextBudget

trim_entries is now a simple loop: 1. Drop duplicate memories and DMN entries 2. While over budget: if memories > 50% of entry tokens, drop lowest-scored memory; otherwise drop oldest conversation entry 3. Snap to user message boundary ContextBudget is gone — sections already have cached token totals: - total_tokens() on ContextState replaces budget.total() - format_budget() on ContextState replaces budget.format() - trim() takes fixed_tokens: usize (system + identity + journal) Co-Authored-By: Proof of Concept <poc@bcachefs.org> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2026-04-07 20:55:35 -04:00 · 2026-04-07 20:55:35 -04:00 · b892cae2be
commit b892cae2be
parent 62996e27d7
4 changed files with 71 additions and 136 deletions
--- a/src/agent/mod.rs
+++ b/src/agent/mod.rs
@ -690,21 +690,6 @@ impl Agent {
        self.push_message(Message::tool_result(&call.id, &output));
    }

-    /// Token budget by category — just reads cached section totals.
-    pub fn context_budget(&self) -> context::ContextBudget {
-        let memory: usize = self.context.conversation.entries().iter()
-            .filter(|e| e.entry.is_memory())
-            .map(|e| e.tokens)
-            .sum();
-        let conv_total = self.context.conversation.tokens();
-        context::ContextBudget {
-            system: self.context.system.tokens(),
-            identity: self.context.identity.tokens(),
-            journal: self.context.journal.tokens(),
-            memory,
-            conversation: conv_total - memory,
-        }
-    }

    /// Context state sections — just returns references to the live data.
    pub fn context_sections(&self) -> [&ContextSection; 4] {
@ -907,8 +892,9 @@ impl Agent {
        self.load_startup_journal();

        // Dedup memory, trim to budget
-        let budget = self.context_budget();
-        self.context.conversation.trim(&budget, &self.tokenizer);
+        let fixed = self.context.system.tokens() + self.context.identity.tokens()
+            + self.context.journal.tokens();
+        self.context.conversation.trim(fixed);

        let after = self.context.conversation.len();
        let after_mem = self.context.conversation.entries().iter()
@ -920,8 +906,7 @@ impl Agent {
        self.generation += 1;
        self.last_prompt_tokens = 0;

-        let budget = self.context_budget();
-        dbglog!("[compact] budget: {}", budget.format());
+        dbglog!("[compact] budget: {}", self.context.format_budget());
    }

    /// Restore from the conversation log. Builds the context window
@ -960,7 +945,7 @@ impl Agent {
        self.context.conversation.set_entries(all);
        self.compact();
        // Estimate prompt tokens so status bar isn't 0 on startup
-        self.last_prompt_tokens = self.context_budget().total() as u32;
+        self.last_prompt_tokens = self.context.total_tokens() as u32;
        true
    }