increase context budget: 80% window, 15% journal, no double reserve

Context was too aggressively trimmed — 80% free after compaction.
Budget was 60% of window minus 25% reserve = only 45% usable.

Now: 80% of window for total budget (20% output reserve built in),
no extra reserve subtraction. Journal budget 5% → 15% to carry
more context across compactions.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-02 22:53:54 -04:00
parent 78abf90461
commit d921e76f82
2 changed files with 7 additions and 8 deletions

View file

@ -825,11 +825,11 @@ impl Agent {
}; };
dbg_log!("[journal] cutoff_idx={}", cutoff_idx); dbg_log!("[journal] cutoff_idx={}", cutoff_idx);
// Walk backwards from cutoff, accumulating entries within 5% of context // Walk backwards from cutoff, accumulating entries within 15% of context
let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len(); let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len();
let context_window = crate::thought::context::context_window(); let context_window = crate::thought::context::context_window();
let journal_budget = context_window * 5 / 100; let journal_budget = context_window * 15 / 100;
dbg_log!("[journal] budget={} tokens ({}*5%)", journal_budget, context_window); dbg_log!("[journal] budget={} tokens ({}*15%)", journal_budget, context_window);
let mut entries = Vec::new(); let mut entries = Vec::new();
let mut total_tokens = 0; let mut total_tokens = 0;

View file

@ -20,9 +20,10 @@ pub fn context_window() -> usize {
crate::config::get().api_context_window crate::config::get().api_context_window
} }
/// Context budget in tokens: 60% of the model's context window. /// Context budget in tokens: 80% of the model's context window.
/// The remaining 20% is reserved for model output.
fn context_budget_tokens() -> usize { fn context_budget_tokens() -> usize {
context_window() * 60 / 100 context_window() * 80 / 100
} }
/// Dedup and trim conversation entries to fit within the context budget. /// Dedup and trim conversation entries to fit within the context budget.
@ -61,11 +62,9 @@ pub fn trim_entries(
let identity_cost = count(&context.system_prompt) let identity_cost = count(&context.system_prompt)
+ context.personality.iter().map(|(_, c)| count(c)).sum::<usize>(); + context.personality.iter().map(|(_, c)| count(c)).sum::<usize>();
let journal_cost: usize = context.journal.iter().map(|e| count(&e.content)).sum(); let journal_cost: usize = context.journal.iter().map(|e| count(&e.content)).sum();
let reserve = max_tokens / 4;
let available = max_tokens let available = max_tokens
.saturating_sub(identity_cost) .saturating_sub(identity_cost)
.saturating_sub(journal_cost) .saturating_sub(journal_cost);
.saturating_sub(reserve);
let msg_costs: Vec<usize> = deduped.iter() let msg_costs: Vec<usize> = deduped.iter()
.map(|e| msg_token_count(tokenizer, e.message())).collect(); .map(|e| msg_token_count(tokenizer, e.message())).collect();