increase context budget: 80% window, 15% journal, no double reserve

Context was too aggressively trimmed — 80% free after compaction. Budget was 60% of window minus 25% reserve = only 45% usable. Now: 80% of window for total budget (20% output reserve built in), no extra reserve subtraction. Journal budget 5% → 15% to carry more context across compactions. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-02 22:53:54 -04:00 · 2026-04-02 22:53:54 -04:00 · d921e76f82
commit d921e76f82
parent 78abf90461
2 changed files with 7 additions and 8 deletions
--- a/src/agent/runner.rs
+++ b/src/agent/runner.rs
@ -825,11 +825,11 @@ impl Agent {
        };
        dbg_log!("[journal] cutoff_idx={}", cutoff_idx);

-        // Walk backwards from cutoff, accumulating entries within 5% of context
+        // Walk backwards from cutoff, accumulating entries within 15% of context
        let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len();
        let context_window = crate::thought::context::context_window();
-        let journal_budget = context_window * 5 / 100;
-        dbg_log!("[journal] budget={} tokens ({}*5%)", journal_budget, context_window);
+        let journal_budget = context_window * 15 / 100;
+        dbg_log!("[journal] budget={} tokens ({}*15%)", journal_budget, context_window);

        let mut entries = Vec::new();
        let mut total_tokens = 0;
--- a/src/thought/context.rs
+++ b/src/thought/context.rs
@ -20,9 +20,10 @@ pub fn context_window() -> usize {
    crate::config::get().api_context_window
 }

-/// Context budget in tokens: 60% of the model's context window.
+/// Context budget in tokens: 80% of the model's context window.
+/// The remaining 20% is reserved for model output.
 fn context_budget_tokens() -> usize {
-    context_window() * 60 / 100
+    context_window() * 80 / 100
 }

 /// Dedup and trim conversation entries to fit within the context budget.
@ -61,11 +62,9 @@ pub fn trim_entries(
    let identity_cost = count(&context.system_prompt)
        + context.personality.iter().map(|(_, c)| count(c)).sum::<usize>();
    let journal_cost: usize = context.journal.iter().map(|e| count(&e.content)).sum();
-    let reserve = max_tokens / 4;
    let available = max_tokens
        .saturating_sub(identity_cost)
-        .saturating_sub(journal_cost)
-        .saturating_sub(reserve);
+        .saturating_sub(journal_cost);

    let msg_costs: Vec<usize> = deduped.iter()
        .map(|e| msg_token_count(tokenizer, e.message())).collect();