diff --git a/src/agent/runner.rs b/src/agent/runner.rs index df70cad..bd306ae 100644 --- a/src/agent/runner.rs +++ b/src/agent/runner.rs @@ -909,23 +909,24 @@ impl Agent { self.do_compact(); } - /// Internal compaction — rebuilds context window from current messages. - fn do_compact(&mut self) { - let conversation: Vec = self.context.entries.iter() - .map(|e| e.api_message().clone()).collect(); - let messages = crate::thought::context::trim_conversation( + /// Dedup memory entries, trim to fit, reload journal for new time range. + fn trim_and_reload(&mut self, entries: &[ConversationEntry]) { + self.context.entries = crate::thought::context::trim_entries( &self.context, - &conversation, - &self.client.model, + entries, &self.tokenizer, ); - self.context.entries = messages.into_iter() - .map(ConversationEntry::Message).collect(); + self.load_startup_journal(); self.last_prompt_tokens = 0; - self.publish_context_state(); } + /// Internal compaction — dedup memory entries and trim to fit. + fn do_compact(&mut self) { + let entries = self.context.entries.clone(); + self.trim_and_reload(&entries); + } + /// Emergency compaction using stored config — called on context overflow. fn emergency_compact(&mut self) { self.do_compact(); @@ -964,32 +965,14 @@ impl Agent { } }; - // Filter out system messages, keep everything else (including Memory entries) + // Filter out system messages, dedup memory, trim to fit let entries: Vec = entries .into_iter() .filter(|e| e.message().role != Role::System) .collect(); - - // Trim to fit context budget - let n = entries.len(); - let conversation: Vec = entries.iter() - .map(|e| e.api_message().clone()).collect(); - let trimmed = crate::thought::context::trim_conversation( - &self.context, - &conversation, - &self.client.model, - &self.tokenizer, - ); - // Keep only the entries that survived trimming (by count from the end) - let keep = trimmed.len(); - self.context.entries = entries.into_iter() - .skip(n.saturating_sub(keep)) - .collect(); - + self.trim_and_reload(&entries); dbglog!("[restore] {} entries, journal: {} entries", self.context.entries.len(), self.context.journal.len()); - self.last_prompt_tokens = 0; - self.publish_context_state(); true } diff --git a/src/bin/poc-agent.rs b/src/bin/poc-agent.rs index 6fb9061..d53eced 100644 --- a/src/bin/poc-agent.rs +++ b/src/bin/poc-agent.rs @@ -40,14 +40,14 @@ use poc_memory::agent::ui_channel::{ContextInfo, StatusInfo, StreamTarget, UiMes /// Hard compaction threshold — context is rebuilt immediately. /// Uses config percentage of model context window. -fn compaction_threshold(model: &str, app: &AppConfig) -> u32 { - (poc_memory::thought::context::model_context_window(model) as u32) * app.compaction.hard_threshold_pct / 100 +fn compaction_threshold(app: &AppConfig) -> u32 { + (poc_memory::thought::context::context_window() as u32) * app.compaction.hard_threshold_pct / 100 } /// Soft threshold — nudge the model to journal before compaction. /// Fires once; the hard threshold handles the actual rebuild. -fn pre_compaction_threshold(model: &str, app: &AppConfig) -> u32 { - (poc_memory::thought::context::model_context_window(model) as u32) * app.compaction.soft_threshold_pct / 100 +fn pre_compaction_threshold(app: &AppConfig) -> u32 { + (poc_memory::thought::context::context_window() as u32) * app.compaction.soft_threshold_pct / 100 } #[tokio::main] @@ -280,8 +280,8 @@ impl Session { async fn check_compaction(&mut self) { let mut agent_guard = self.agent.lock().await; let tokens = agent_guard.last_prompt_tokens(); - let hard = compaction_threshold(agent_guard.model(), &self.config.app); - let soft = pre_compaction_threshold(agent_guard.model(), &self.config.app); + let hard = compaction_threshold(&self.config.app); + let soft = pre_compaction_threshold(&self.config.app); if tokens > hard { let _ = self.ui_tx.send(UiMessage::Info(format!( @@ -452,7 +452,7 @@ impl Session { let total_chars: usize = msgs.iter().map(|e| e.message().content_text().len()).sum(); let prompt_tokens = agent.last_prompt_tokens(); - let threshold = compaction_threshold(agent.model(), &self.config.app); + let threshold = compaction_threshold(&self.config.app); let _ = self.ui_tx.send(UiMessage::Info(format!( " {} messages, ~{} chars", msgs.len(), diff --git a/src/thought/context.rs b/src/thought/context.rs index 8c82edf..98cdd4c 100644 --- a/src/thought/context.rs +++ b/src/thought/context.rs @@ -15,27 +15,49 @@ pub struct JournalEntry { pub content: String, } -/// Look up a model's context window size in tokens. -pub fn model_context_window(_model: &str) -> usize { +/// Context window size in tokens (from config). +pub fn context_window() -> usize { crate::config::get().api_context_window } /// Context budget in tokens: 60% of the model's context window. -fn context_budget_tokens(model: &str) -> usize { - model_context_window(model) * 60 / 100 +fn context_budget_tokens() -> usize { + context_window() * 60 / 100 } -/// Trim conversation to fit within the context budget. -/// Returns the trimmed conversation messages (oldest dropped first). -pub fn trim_conversation( +/// Dedup and trim conversation entries to fit within the context budget. +/// +/// 1. Dedup: if the same memory key appears multiple times, keep only +/// the latest render (drop the earlier Memory entry and its +/// corresponding assistant tool_call message). +/// 2. Trim: drop oldest entries until the conversation fits, snapping +/// to user message boundaries. +pub fn trim_entries( context: &ContextState, - conversation: &[Message], - model: &str, + entries: &[ConversationEntry], tokenizer: &CoreBPE, -) -> Vec { +) -> Vec { let count = |s: &str| tokenizer.encode_with_special_tokens(s).len(); - let max_tokens = context_budget_tokens(model); + // --- Phase 1: dedup memory entries by key (keep last) --- + let mut seen_keys: std::collections::HashMap<&str, usize> = std::collections::HashMap::new(); + let mut drop_indices: std::collections::HashSet = std::collections::HashSet::new(); + + for (i, entry) in entries.iter().enumerate() { + if let ConversationEntry::Memory { key, .. } = entry { + if let Some(prev) = seen_keys.insert(key.as_str(), i) { + drop_indices.insert(prev); + } + } + } + + let deduped: Vec = entries.iter().enumerate() + .filter(|(i, _)| !drop_indices.contains(i)) + .map(|(_, e)| e.clone()) + .collect(); + + // --- Phase 2: trim to fit context budget --- + let max_tokens = context_budget_tokens(); let identity_cost = count(&context.system_prompt) + context.personality.iter().map(|(_, c)| count(c)).sum::(); let journal_cost: usize = context.journal.iter().map(|e| count(&e.content)).sum(); @@ -45,23 +67,23 @@ pub fn trim_conversation( .saturating_sub(journal_cost) .saturating_sub(reserve); - let msg_costs: Vec = conversation.iter() - .map(|m| msg_token_count(tokenizer, m)).collect(); + let msg_costs: Vec = deduped.iter() + .map(|e| msg_token_count(tokenizer, e.message())).collect(); let total: usize = msg_costs.iter().sum(); let mut skip = 0; let mut trimmed = total; - while trimmed > available && skip < conversation.len() { + while trimmed > available && skip < deduped.len() { trimmed -= msg_costs[skip]; skip += 1; } // Walk forward to user message boundary - while skip < conversation.len() && conversation[skip].role != Role::User { + while skip < deduped.len() && deduped[skip].message().role != Role::User { skip += 1; } - conversation[skip..].to_vec() + deduped[skip..].to_vec() } /// Count the token footprint of a message using BPE tokenization.