diff --git a/src/agent/runner.rs b/src/agent/runner.rs index 6259db3..ec69412 100644 --- a/src/agent/runner.rs +++ b/src/agent/runner.rs @@ -825,11 +825,11 @@ impl Agent { }; dbg_log!("[journal] cutoff_idx={}", cutoff_idx); - // Walk backwards from cutoff, accumulating entries within 5% of context + // Walk backwards from cutoff, accumulating entries within 15% of context let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len(); let context_window = crate::thought::context::context_window(); - let journal_budget = context_window * 5 / 100; - dbg_log!("[journal] budget={} tokens ({}*5%)", journal_budget, context_window); + let journal_budget = context_window * 15 / 100; + dbg_log!("[journal] budget={} tokens ({}*15%)", journal_budget, context_window); let mut entries = Vec::new(); let mut total_tokens = 0; diff --git a/src/thought/context.rs b/src/thought/context.rs index 98cdd4c..3146cd3 100644 --- a/src/thought/context.rs +++ b/src/thought/context.rs @@ -20,9 +20,10 @@ pub fn context_window() -> usize { crate::config::get().api_context_window } -/// Context budget in tokens: 60% of the model's context window. +/// Context budget in tokens: 80% of the model's context window. +/// The remaining 20% is reserved for model output. fn context_budget_tokens() -> usize { - context_window() * 60 / 100 + context_window() * 80 / 100 } /// Dedup and trim conversation entries to fit within the context budget. @@ -61,11 +62,9 @@ pub fn trim_entries( let identity_cost = count(&context.system_prompt) + context.personality.iter().map(|(_, c)| count(c)).sum::(); let journal_cost: usize = context.journal.iter().map(|e| count(&e.content)).sum(); - let reserve = max_tokens / 4; let available = max_tokens .saturating_sub(identity_cost) - .saturating_sub(journal_cost) - .saturating_sub(reserve); + .saturating_sub(journal_cost); let msg_costs: Vec = deduped.iter() .map(|e| msg_token_count(tokenizer, e.message())).collect();