diff --git a/src/agent/context.rs b/src/agent/context.rs index a6b38d5..bee049d 100644 --- a/src/agent/context.rs +++ b/src/agent/context.rs @@ -47,55 +47,44 @@ pub struct ContextPlan { /// it's what's happening now. Journal fills the rest, newest first. /// /// Returns (messages, journal_text) — caller stores journal_text in ContextState. -pub fn build_context_window( +/// Trim conversation to fit within the context budget. +/// Returns the trimmed conversation messages (oldest dropped first). +pub fn trim_conversation( context: &ContextState, conversation: &[Message], model: &str, tokenizer: &CoreBPE, -) -> (Vec, String) { - let journal_path = journal::default_journal_path(); - let all_entries = journal::parse_journal(&journal_path); - dbglog!("[ctx] {} journal entries from {}", all_entries.len(), journal_path.display()); +) -> Vec { let count = |s: &str| tokenizer.encode_with_special_tokens(s).len(); - - let system_prompt = context.system_prompt.clone(); - let context_message = context.render_context_message(); - - // Cap memory to 50% of the context budget so conversation always - // gets space. Truncate at the last complete section boundary. let max_tokens = context_budget_tokens(model); - let memory_cap = max_tokens / 2; - let memory_tokens = count(&context_message); - let context_message = if memory_tokens > memory_cap { - dbglog!("[ctx] memory too large: {} tokens > {} cap, truncating", memory_tokens, memory_cap); - truncate_at_section(&context_message, memory_cap, &count) - } else { - context_message - }; - let recent_start = find_journal_cutoff(conversation, all_entries.last()); - dbglog!("[ctx] journal cutoff: {} of {} conversation messages are 'recent'", - conversation.len() - recent_start, conversation.len()); - let recent = &conversation[recent_start..]; + let identity_cost = count(&context.system_prompt) + + context.personality.iter().map(|(_, c)| count(c)).sum::(); + let journal_cost: usize = context.journal.iter().map(|e| count(&e.content)).sum(); + let reserve = max_tokens / 4; + let available = max_tokens + .saturating_sub(identity_cost) + .saturating_sub(journal_cost) + .saturating_sub(reserve); - let plan = plan_context( - &system_prompt, - &context_message, - recent, - &all_entries, - model, - &count, - ); + // Trim oldest messages until we fit + let msg_costs: Vec = conversation.iter() + .map(|m| msg_token_count(tokenizer, m)).collect(); + let total: usize = msg_costs.iter().sum(); - let journal_text = render_journal_text(&all_entries, &plan); - dbglog!("[ctx] plan: header_start={} full_start={} entry_count={} conv_trim={} journal_text={} chars", - plan.header_start, plan.full_start, plan.entry_count, plan.conv_trim, journal_text.len()); + let mut skip = 0; + let mut trimmed = total; + while trimmed > available && skip < conversation.len() { + trimmed -= msg_costs[skip]; + skip += 1; + } - let messages = assemble_context( - system_prompt, context_message, &journal_text, - recent, &plan, - ); - (messages, journal_text) + // Walk forward to user message boundary + while skip < conversation.len() && conversation[skip].role != Role::User { + skip += 1; + } + + conversation[skip..].to_vec() } pub fn plan_context( diff --git a/src/agent/runner.rs b/src/agent/runner.rs index eb4824c..c145bd2 100644 --- a/src/agent/runner.rs +++ b/src/agent/runner.rs @@ -967,7 +967,7 @@ impl Agent { fn do_compact(&mut self) { let conversation: Vec = self.context.entries.iter() .map(|e| e.api_message().clone()).collect(); - let (messages, _) = crate::agent::context::build_context_window( + let messages = crate::agent::context::trim_conversation( &self.context, &conversation, &self.client.model, @@ -1030,7 +1030,7 @@ impl Agent { .collect(); dbglog!("[restore] {} messages after filtering system", conversation.len()); - let (messages, _) = crate::agent::context::build_context_window( + let messages = crate::agent::context::trim_conversation( &self.context, &conversation, &self.client.model,