// agent.rs — Core agent loop // // The simplest possible implementation of the agent pattern: // send messages + tool definitions to the model, if it responds // with tool calls then dispatch them and loop, if it responds // with text then display it and wait for the next prompt. // // Uses streaming by default so text tokens appear as they're // generated. Tool calls are accumulated from stream deltas and // dispatched after the stream completes. // // The DMN (dmn.rs) is the outer loop that decides what prompts // to send here. This module just handles single turns: prompt // in, response out, tool calls dispatched. use anyhow::Result; use tiktoken_rs::CoreBPE; use crate::agent::api::ApiClient; use crate::agent::journal; use crate::agent::log::ConversationLog; use crate::agent::api::StreamEvent; use crate::agent::tools; use crate::agent::tools::ProcessTracker; use crate::agent::types::*; use crate::agent::ui_channel::{ContextSection, SharedContextState, StatusInfo, StreamTarget, UiMessage, UiSender}; /// Result of a single agent turn. pub struct TurnResult { /// The text response (already sent through UI channel). #[allow(dead_code)] pub text: String, /// Whether the model called yield_to_user during this turn. pub yield_requested: bool, /// Whether any tools (other than yield_to_user) were called. pub had_tool_calls: bool, /// Number of tool calls that returned errors this turn. pub tool_errors: u32, /// Model name to switch to after this turn completes. pub model_switch: Option, /// Agent requested DMN pause (full stop on autonomous behavior). pub dmn_pause: bool, } /// Accumulated state across tool dispatches within a single turn. struct DispatchState { yield_requested: bool, had_tool_calls: bool, tool_errors: u32, model_switch: Option, dmn_pause: bool, } pub struct Agent { client: ApiClient, messages: Vec, tool_defs: Vec, /// Last known prompt token count from the API (tracks context size). last_prompt_tokens: u32, /// Shared process tracker for bash tool — lets TUI show/kill running commands. pub process_tracker: ProcessTracker, /// Current reasoning effort level ("none", "low", "high"). pub reasoning_effort: String, /// Persistent conversation log — append-only record of all messages. conversation_log: Option, /// Current context window budget breakdown. pub context_budget: ContextBudget, /// BPE tokenizer for token counting (cl100k_base — close enough /// for Claude and Qwen budget allocation, ~85-90% count accuracy). tokenizer: CoreBPE, /// Mutable context state — personality, working stack, etc. pub context: ContextState, /// Shared live context summary — TUI reads this directly for debug screen. pub shared_context: SharedContextState, /// Stable session ID for memory-search dedup across turns. session_id: String, } impl Agent { pub fn new( client: ApiClient, system_prompt: String, personality: Vec<(String, String)>, conversation_log: Option, shared_context: SharedContextState, ) -> Self { let tool_defs = tools::definitions(); let tokenizer = tiktoken_rs::cl100k_base() .expect("failed to load cl100k_base tokenizer"); let context = ContextState { system_prompt: system_prompt.clone(), personality, journal: String::new(), working_stack: Vec::new(), loaded_nodes: Vec::new(), }; let session_id = format!("poc-agent-{}", chrono::Utc::now().format("%Y%m%d-%H%M%S")); let mut agent = Self { client, messages: Vec::new(), tool_defs, last_prompt_tokens: 0, process_tracker: ProcessTracker::new(), reasoning_effort: "none".to_string(), conversation_log, context_budget: ContextBudget::default(), tokenizer, context, shared_context, session_id, }; // Load recent journal entries at startup for orientation agent.load_startup_journal(); agent.load_working_stack(); agent.push_context(Message::system(system_prompt)); let rendered = agent.context.render_context_message(); if !rendered.is_empty() { agent.push_context(Message::user(rendered)); } if !agent.context.journal.is_empty() { agent.push_context(Message::user(agent.context.journal.clone())); } agent.measure_budget(); agent.publish_context_state(); agent } /// Run memory search for a given event, returning any output to inject. /// Direct library call — no subprocess needed since everything is one crate. fn run_hook(&self, event: &str, _prompt: &str) -> Option { let transcript_path = self.conversation_log.as_ref() .map(|l| l.path().to_string_lossy().to_string()) .unwrap_or_default(); let hook_input = serde_json::json!({ "hook_event_name": event, "session_id": self.session_id, "transcript_path": transcript_path, }); let text = crate::memory_search::run_hook(&hook_input.to_string()); if text.trim().is_empty() { None } else { Some(text) } } /// Push a conversation message — stamped and logged. fn push_message(&mut self, mut msg: Message) { msg.stamp(); if let Some(ref log) = self.conversation_log { if let Err(e) = log.append(&msg) { eprintln!("warning: failed to log message: {:#}", e); } } self.messages.push(msg); } /// Push a context-only message (system prompt, identity context, /// journal summaries). Not logged — these are reconstructed on /// every startup/compaction. fn push_context(&mut self, msg: Message) { self.messages.push(msg); } /// Measure context window usage by category. Uses the BPE tokenizer /// for direct token counting (no chars/4 approximation). fn measure_budget(&mut self) { let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len(); let mut id_tokens: usize = 0; let mut jnl_tokens: usize = 0; let mut conv_tokens: usize = 0; let mut in_conversation = false; for msg in &self.messages { let tokens = crate::agent::context::msg_token_count(&self.tokenizer, msg); if in_conversation { conv_tokens += tokens; continue; } match msg.role { Role::System => id_tokens += tokens, Role::User => { let text = msg.content_text(); if text.starts_with("[Earlier in this conversation") { jnl_tokens += tokens; } else if text.starts_with("Your context was just rebuilt") { jnl_tokens += tokens; } else if jnl_tokens == 0 && conv_tokens == 0 { // Personality context — part of identity id_tokens += tokens; } else { in_conversation = true; conv_tokens += tokens; } } _ => { in_conversation = true; conv_tokens += tokens; } } } // Memory = nodes loaded during the session via tool calls let mem_tokens: usize = self.context.loaded_nodes.iter() .map(|node| count(&node.render())) .sum(); self.context_budget = ContextBudget { identity_tokens: id_tokens, memory_tokens: mem_tokens, journal_tokens: jnl_tokens, conversation_tokens: conv_tokens, window_tokens: crate::agent::context::model_context_window(&self.client.model), }; } /// Send a user message and run the agent loop until the model /// produces a text response (no more tool calls). Streams text /// and tool activity through the UI channel. pub async fn turn( &mut self, user_input: &str, ui_tx: &UiSender, target: StreamTarget, ) -> Result { // Run poc-hook (memory search, notifications, context check) if let Some(hook_output) = self.run_hook("UserPromptSubmit", user_input) { let enriched = format!("{}\n\n\n{}\n", user_input, hook_output); self.push_message(Message::user(enriched)); } else { self.push_message(Message::user(user_input)); } let mut overflow_retries: u32 = 0; let mut empty_retries: u32 = 0; let mut ds = DispatchState { yield_requested: false, had_tool_calls: false, tool_errors: 0, model_switch: None, dmn_pause: false, }; loop { let _ = ui_tx.send(UiMessage::Activity("thinking...".into())); // Stream events from the API — we route each event to the // appropriate UI pane rather than letting the API layer do it. let mut rx = self.client.start_stream( &self.messages, Some(&self.tool_defs), ui_tx, &self.reasoning_effort, None, ); let mut content = String::new(); let mut tool_calls: Vec = Vec::new(); let mut usage = None; let mut finish_reason = None; let mut in_tool_call = false; let mut stream_error = None; let mut first_content = true; // Buffer for content not yet sent to UI — holds a tail // that might be a partial tag. let mut display_buf = String::new(); while let Some(event) = rx.recv().await { match event { StreamEvent::Content(text) => { if first_content { let _ = ui_tx.send(UiMessage::Activity("streaming...".into())); first_content = false; } content.push_str(&text); if in_tool_call { // Already inside a tool call — suppress display. } else { display_buf.push_str(&text); if let Some(pos) = display_buf.find("") { // Flush content before the tag, suppress the rest. let before = &display_buf[..pos]; if !before.is_empty() { let _ = ui_tx.send(UiMessage::TextDelta(before.to_string(), target)); } display_buf.clear(); in_tool_call = true; } else { // Flush display_buf except a tail that could be // a partial "" (10 chars). let safe = display_buf.len().saturating_sub(10); if safe > 0 { let flush = display_buf[..safe].to_string(); display_buf = display_buf[safe..].to_string(); let _ = ui_tx.send(UiMessage::TextDelta(flush, target)); } } } } StreamEvent::Reasoning(text) => { let _ = ui_tx.send(UiMessage::Reasoning(text)); } StreamEvent::ToolCallDelta { index, id, call_type, name, arguments } => { while tool_calls.len() <= index { tool_calls.push(ToolCall { id: String::new(), call_type: "function".to_string(), function: FunctionCall { name: String::new(), arguments: String::new() }, }); } if let Some(id) = id { tool_calls[index].id = id; } if let Some(ct) = call_type { tool_calls[index].call_type = ct; } if let Some(n) = name { tool_calls[index].function.name = n; } if let Some(a) = arguments { tool_calls[index].function.arguments.push_str(&a); } } StreamEvent::Usage(u) => usage = Some(u), StreamEvent::Finished { reason, .. } => { finish_reason = Some(reason); break; } StreamEvent::Error(e) => { stream_error = Some(e); break; } } } // Handle stream errors with retry logic if let Some(e) = stream_error { let err = anyhow::anyhow!("{}", e); if crate::agent::context::is_context_overflow(&err) && overflow_retries < 2 { overflow_retries += 1; let _ = ui_tx.send(UiMessage::Info(format!( "[context overflow — compacting and retrying ({}/2)]", overflow_retries, ))); self.emergency_compact(); continue; } if crate::agent::context::is_stream_error(&err) && empty_retries < 2 { empty_retries += 1; let _ = ui_tx.send(UiMessage::Info(format!( "[stream error: {} — retrying ({}/2)]", e, empty_retries, ))); tokio::time::sleep(std::time::Duration::from_secs(2)).await; continue; } return Err(err); } if finish_reason.as_deref() == Some("error") { let detail = if content.is_empty() { "no details".into() } else { content }; return Err(anyhow::anyhow!("model stream error: {}", detail)); } // Flush remaining display buffer (normal responses without tool calls). if !in_tool_call && !display_buf.is_empty() { let _ = ui_tx.send(UiMessage::TextDelta(display_buf, target)); } if !content.is_empty() && !in_tool_call { let _ = ui_tx.send(UiMessage::TextDelta("\n".to_string(), target)); } let msg = crate::agent::api::build_response_message(content, tool_calls); // Strip ephemeral tool calls (journal) that the API has // now processed. They're persisted to disk; no need to keep // them in the conversation history burning tokens. self.strip_ephemeral_tool_calls(); if let Some(usage) = &usage { self.last_prompt_tokens = usage.prompt_tokens; self.measure_budget(); self.publish_context_state(); let _ = ui_tx.send(UiMessage::StatusUpdate(StatusInfo { dmn_state: String::new(), // filled by main loop dmn_turns: 0, dmn_max_turns: 0, prompt_tokens: usage.prompt_tokens, completion_tokens: usage.completion_tokens, model: self.client.model.clone(), turn_tools: 0, // tracked by TUI from ToolCall messages context_budget: self.context_budget.status_string(), })); } // Empty response — model returned finish=stop with no content // or tool calls. Inject a nudge so the retry has different input. let has_content = msg.content.is_some(); let has_tools = msg.tool_calls.as_ref().map_or(false, |tc| !tc.is_empty()); if !has_content && !has_tools { if empty_retries < 2 { empty_retries += 1; let _ = ui_tx.send(UiMessage::Debug(format!( "empty response, injecting nudge and retrying ({}/2)", empty_retries, ))); self.push_message(Message::user( "[system] Your previous response was empty. \ Please respond with text or use a tool." )); continue; } // After max retries, fall through — return the empty response } else { empty_retries = 0; } // Tool calls (structured from API, or recovered from content // by build_response_message if the model leaked them as XML). if let Some(ref tool_calls) = msg.tool_calls { if !tool_calls.is_empty() { self.push_message(msg.clone()); for call in tool_calls { self.dispatch_tool_call(call, None, ui_tx, &mut ds) .await; } continue; } } // Genuinely text-only response let text = msg.content_text().to_string(); let _ = ui_tx.send(UiMessage::Activity(String::new())); self.push_message(msg); return Ok(TurnResult { text, yield_requested: ds.yield_requested, had_tool_calls: ds.had_tool_calls, tool_errors: ds.tool_errors, model_switch: ds.model_switch, dmn_pause: ds.dmn_pause, }); } } /// Dispatch a single tool call: send UI annotations, run the tool, /// push results into the conversation, handle images. async fn dispatch_tool_call( &mut self, call: &ToolCall, tag: Option<&str>, ui_tx: &UiSender, ds: &mut DispatchState, ) { let args: serde_json::Value = serde_json::from_str(&call.function.arguments).unwrap_or_default(); let args_summary = summarize_args(&call.function.name, &args); let label = match tag { Some(t) => format!("calling: {} ({})", call.function.name, t), None => format!("calling: {}", call.function.name), }; let _ = ui_tx.send(UiMessage::Activity(label)); let _ = ui_tx.send(UiMessage::ToolCall { name: call.function.name.clone(), args_summary: args_summary.clone(), }); let _ = ui_tx.send(UiMessage::ToolStarted { id: call.id.clone(), name: call.function.name.clone(), detail: args_summary, }); // Handle working_stack tool — needs &mut self for context state if call.function.name == "working_stack" { let result = tools::working_stack::handle(&args, &mut self.context.working_stack); let output = tools::ToolOutput { text: result.clone(), is_yield: false, images: Vec::new(), model_switch: None, dmn_pause: false, }; let _ = ui_tx.send(UiMessage::ToolResult { name: call.function.name.clone(), result: output.text.clone(), }); let _ = ui_tx.send(UiMessage::ToolFinished { id: call.id.clone() }); self.push_message(Message::tool_result(&call.id, &output.text)); ds.had_tool_calls = true; // Re-render the context message so the model sees the updated stack if !result.starts_with("Error:") { self.refresh_context_message(); } return; } // Handle memory tools — needs &mut self for node tracking if call.function.name.starts_with("memory_") { let result = tools::memory::dispatch(&call.function.name, &args, None); let text = match &result { Ok(s) => s.clone(), Err(e) => format!("Error: {:#}", e), }; // Track loaded/updated nodes if result.is_ok() { match call.function.name.as_str() { "memory_render" | "memory_links" => { if let Some(key) = args.get("key").and_then(|v| v.as_str()) { if let Some(node) = crate::hippocampus::memory::MemoryNode::load(key) { // Replace if already tracked, otherwise add if let Some(existing) = self.context.loaded_nodes.iter_mut() .find(|n| n.key == node.key) { *existing = node; } else { self.context.loaded_nodes.push(node); } } } } "memory_write" => { if let Some(key) = args.get("key").and_then(|v| v.as_str()) { if let Some(node) = crate::hippocampus::memory::MemoryNode::load(key) { // Refresh if already tracked if let Some(existing) = self.context.loaded_nodes.iter_mut() .find(|n| n.key == node.key) { *existing = node; } // Don't auto-add writes — only renders register nodes } } } _ => {} } } let output = tools::ToolOutput { text, is_yield: false, images: Vec::new(), model_switch: None, dmn_pause: false, }; let _ = ui_tx.send(UiMessage::ToolResult { name: call.function.name.clone(), result: output.text.clone(), }); let _ = ui_tx.send(UiMessage::ToolFinished { id: call.id.clone() }); self.push_message(Message::tool_result(&call.id, &output.text)); ds.had_tool_calls = true; if output.text.starts_with("Error:") { ds.tool_errors += 1; } self.measure_budget(); self.publish_context_state(); return; } let output = tools::dispatch(&call.function.name, &args, &self.process_tracker).await; if output.is_yield { ds.yield_requested = true; } else { ds.had_tool_calls = true; } if output.model_switch.is_some() { ds.model_switch = output.model_switch; } if output.dmn_pause { ds.dmn_pause = true; } if output.text.starts_with("Error:") { ds.tool_errors += 1; } let _ = ui_tx.send(UiMessage::ToolResult { name: call.function.name.clone(), result: output.text.clone(), }); let _ = ui_tx.send(UiMessage::ToolFinished { id: call.id.clone() }); self.push_message(Message::tool_result(&call.id, &output.text)); if !output.images.is_empty() { // Only one live image in context at a time — age out any // previous ones to avoid accumulating ~90KB+ per image. self.age_out_images(); self.push_message(Message::user_with_images( "Here is the image you requested:", &output.images, )); } } /// Build context state summary for the debug screen. pub fn context_state_summary(&self) -> Vec { let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len(); let mut sections = Vec::new(); // System prompt sections.push(ContextSection { name: "System prompt".into(), tokens: count(&self.context.system_prompt), content: self.context.system_prompt.clone(), children: Vec::new(), }); // Personality — parent with file children let personality_children: Vec = self.context.personality.iter() .map(|(name, content)| ContextSection { name: name.clone(), tokens: count(content), content: content.clone(), children: Vec::new(), }) .collect(); let personality_tokens: usize = personality_children.iter().map(|c| c.tokens).sum(); sections.push(ContextSection { name: format!("Personality ({} files)", personality_children.len()), tokens: personality_tokens, content: String::new(), children: personality_children, }); // Journal — split into per-entry children { let mut journal_children = Vec::new(); let mut current_header = String::new(); let mut current_body = String::new(); for line in self.context.journal.lines() { if line.starts_with("## ") { if !current_header.is_empty() { let body = std::mem::take(&mut current_body); let preview: String = body.lines().next().unwrap_or("").chars().take(60).collect(); journal_children.push(ContextSection { name: format!("{}: {}", current_header, preview), tokens: count(&body), content: body, children: Vec::new(), }); } current_header = line.trim_start_matches("## ").to_string(); current_body.clear(); } else { if !current_body.is_empty() || !line.is_empty() { current_body.push_str(line); current_body.push('\n'); } } } if !current_header.is_empty() { let preview: String = current_body.lines().next().unwrap_or("").chars().take(60).collect(); journal_children.push(ContextSection { name: format!("{}: {}", current_header, preview), tokens: count(¤t_body), content: current_body, children: Vec::new(), }); } let journal_tokens: usize = journal_children.iter().map(|c| c.tokens).sum(); sections.push(ContextSection { name: format!("Journal ({} entries)", journal_children.len()), tokens: journal_tokens, content: String::new(), children: journal_children, }); } // Working stack — instructions + items as children let instructions = std::fs::read_to_string(WORKING_STACK_INSTRUCTIONS) .unwrap_or_default(); let mut stack_children = vec![ContextSection { name: "Instructions".into(), tokens: count(&instructions), content: instructions, children: Vec::new(), }]; for (i, item) in self.context.working_stack.iter().enumerate() { let marker = if i == self.context.working_stack.len() - 1 { "→" } else { " " }; stack_children.push(ContextSection { name: format!("{} [{}] {}", marker, i, item), tokens: count(item), content: String::new(), children: Vec::new(), }); } let stack_tokens: usize = stack_children.iter().map(|c| c.tokens).sum(); sections.push(ContextSection { name: format!("Working stack ({} items)", self.context.working_stack.len()), tokens: stack_tokens, content: String::new(), children: stack_children, }); // Loaded memory nodes — tracked by memory tools if !self.context.loaded_nodes.is_empty() { let node_children: Vec = self.context.loaded_nodes.iter() .map(|node| { let rendered = node.render(); ContextSection { name: format!("{} (v{}, w={:.2}, {} links)", node.key, node.version, node.weight, node.links.len()), tokens: count(&rendered), content: String::new(), // don't duplicate in debug view children: Vec::new(), } }) .collect(); let node_tokens: usize = node_children.iter().map(|c| c.tokens).sum(); sections.push(ContextSection { name: format!("Memory nodes ({} loaded)", self.context.loaded_nodes.len()), tokens: node_tokens, content: String::new(), children: node_children, }); } // Conversation — each message as a child let conv_start = self.messages.iter() .position(|m| m.role == Role::Assistant || m.role == Role::Tool) .unwrap_or(self.messages.len()); let conv_messages = &self.messages[conv_start..]; let conv_children: Vec = conv_messages.iter().enumerate() .map(|(i, msg)| { let text = msg.content.as_ref() .map(|c| c.as_text().to_string()) .unwrap_or_default(); let tool_info = msg.tool_calls.as_ref().map(|tc| { tc.iter() .map(|c| c.function.name.clone()) .collect::>() .join(", ") }); let label = match (&msg.role, &tool_info) { (_, Some(tools)) => format!("[tool_call: {}]", tools), _ => { let preview: String = text.chars().take(60).collect(); let preview = preview.replace('\n', " "); if text.len() > 60 { format!("{}...", preview) } else { preview } } }; let tokens = count(&text); let role_name = match msg.role { Role::Assistant => "PoC", Role::User => "Kent", Role::Tool => "tool", Role::System => "system", }; ContextSection { name: format!("[{}] {}: {}", conv_start + i, role_name, label), tokens, content: text, children: Vec::new(), } }) .collect(); let conv_tokens: usize = conv_children.iter().map(|c| c.tokens).sum(); sections.push(ContextSection { name: format!("Conversation ({} messages)", conv_children.len()), tokens: conv_tokens, content: String::new(), children: conv_children, }); sections } /// Load recent journal entries at startup for orientation. /// Uses the same budget logic as compaction but with empty conversation. /// Only parses the tail of the journal file (last 64KB) for speed. fn load_startup_journal(&mut self) { let journal_path = journal::default_journal_path(); let entries = journal::parse_journal_tail(&journal_path, 64 * 1024); if entries.is_empty() { return; } let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len(); let context_message = self.context.render_context_message(); let plan = crate::agent::context::plan_context( &self.context.system_prompt, &context_message, &[], // no conversation yet &entries, &self.client.model, &count, ); self.context.journal = crate::agent::context::render_journal_text(&entries, &plan); } /// Re-render the context message in self.messages from live ContextState. /// Called after any change to context state (working stack, etc). fn refresh_context_message(&mut self) { let rendered = self.context.render_context_message(); // The context message is the first user message (index 1, after system prompt) if self.messages.len() >= 2 && self.messages[1].role == Role::User { self.messages[1] = Message::user(rendered); } self.publish_context_state(); self.save_working_stack(); } /// Persist working stack to disk. fn save_working_stack(&self) { if let Ok(json) = serde_json::to_string(&self.context.working_stack) { let _ = std::fs::write(WORKING_STACK_FILE, json); } } /// Load working stack from disk. fn load_working_stack(&mut self) { if let Ok(data) = std::fs::read_to_string(WORKING_STACK_FILE) { if let Ok(stack) = serde_json::from_str::>(&data) { self.context.working_stack = stack; } } } /// Push the current context summary to the shared state for the TUI to read. fn publish_context_state(&self) { if let Ok(mut state) = self.shared_context.write() { *state = self.context_state_summary(); } } /// Replace base64 image data in older messages with text placeholders. /// Only the most recent image stays live — each new image ages out /// all previous ones. The tool result message (right before each image /// message) already records what was loaded, so no info is lost. fn age_out_images(&mut self) { for msg in &mut self.messages { if let Some(MessageContent::Parts(parts)) = &msg.content { let has_images = parts.iter().any(|p| matches!(p, ContentPart::ImageUrl { .. })); if !has_images { continue; } let mut replacement = String::new(); for part in parts { match part { ContentPart::Text { text } => { if !replacement.is_empty() { replacement.push('\n'); } replacement.push_str(text); } ContentPart::ImageUrl { .. } => { if !replacement.is_empty() { replacement.push('\n'); } replacement.push_str( "[image aged out — see tool result above for details]", ); } } } msg.content = Some(MessageContent::Text(replacement)); } } } /// Strip ephemeral tool calls from the conversation history. /// /// Ephemeral tools (like journal) persist their output to disk, /// so the tool call + result don't need to stay in the context /// window. We keep them for exactly one API round-trip (the model /// needs to see the result was acknowledged), then strip them. /// /// If an assistant message contains ONLY ephemeral tool calls, /// the entire message and its tool results are removed. If mixed /// with non-ephemeral calls, we leave it (rare case, small cost). fn strip_ephemeral_tool_calls(&mut self) { // Collect IDs of tool calls to strip let mut strip_ids: Vec = Vec::new(); let mut strip_msg_indices: Vec = Vec::new(); for (i, msg) in self.messages.iter().enumerate() { if msg.role != Role::Assistant { continue; } let calls = match &msg.tool_calls { Some(c) if !c.is_empty() => c, _ => continue, }; let all_ephemeral = calls.iter().all(|c| { c.function.name == tools::journal::TOOL_NAME }); if all_ephemeral { strip_msg_indices.push(i); for call in calls { strip_ids.push(call.id.clone()); } } } if strip_ids.is_empty() { return; } // Remove in reverse order to preserve indices self.messages.retain(|msg| { // Strip the assistant messages we identified if msg.role == Role::Assistant { if let Some(calls) = &msg.tool_calls { if calls.iter().all(|c| strip_ids.contains(&c.id)) { return false; } } } // Strip matching tool results if msg.role == Role::Tool { if let Some(ref id) = msg.tool_call_id { if strip_ids.contains(id) { return false; } } } true }); } /// Last prompt token count reported by the API. pub fn last_prompt_tokens(&self) -> u32 { self.last_prompt_tokens } /// Build context window from conversation messages + journal. /// Used by both compact() (in-memory messages) and restore_from_log() /// (conversation log). The context window is always: /// identity + journal summaries + raw recent messages pub fn compact(&mut self, new_system_prompt: String, new_personality: Vec<(String, String)>) { self.context.system_prompt = new_system_prompt; self.context.personality = new_personality; self.do_compact(); } /// Internal compaction — rebuilds context window from current messages. fn do_compact(&mut self) { // Find where actual conversation starts (after system + context) let conv_start = self .messages .iter() .position(|m| m.role == Role::Assistant || m.role == Role::Tool) .unwrap_or(self.messages.len()); let conversation: Vec = self.messages[conv_start..].to_vec(); let (messages, journal) = crate::agent::context::build_context_window( &self.context, &conversation, &self.client.model, &self.tokenizer, ); self.context.journal = journal; self.messages = messages; self.last_prompt_tokens = 0; self.measure_budget(); self.publish_context_state(); } /// Emergency compaction using stored config — called on context overflow. fn emergency_compact(&mut self) { self.do_compact(); } /// Restore from the conversation log. Builds the context window /// the same way compact() does — journal summaries for old messages, /// raw recent messages. This is the unified startup path. /// Returns true if the log had content to restore. pub fn restore_from_log( &mut self, system_prompt: String, personality: Vec<(String, String)>, ) -> bool { self.context.system_prompt = system_prompt; self.context.personality = personality; let all_messages = match &self.conversation_log { Some(log) => match log.read_tail(512 * 1024) { Ok(msgs) if !msgs.is_empty() => { dbglog!("[restore] read {} messages from log tail", msgs.len()); msgs } Ok(_) => { dbglog!("[restore] log exists but is empty"); return false; } Err(e) => { dbglog!("[restore] failed to read log: {}", e); return false; } }, None => { dbglog!("[restore] no conversation log configured"); return false; } }; // Filter out system/context messages — we only want the // actual conversation (user prompts, assistant responses, // tool calls/results) let conversation: Vec = all_messages .into_iter() .filter(|m| m.role != Role::System) .collect(); dbglog!("[restore] {} messages after filtering system", conversation.len()); let (messages, journal) = crate::agent::context::build_context_window( &self.context, &conversation, &self.client.model, &self.tokenizer, ); dbglog!("[restore] journal text: {} chars, {} lines", journal.len(), journal.lines().count()); self.context.journal = journal; self.messages = messages; dbglog!("[restore] built context window: {} messages", self.messages.len()); self.last_prompt_tokens = 0; self.measure_budget(); self.publish_context_state(); true } /// Replace the API client (for model switching). pub fn swap_client(&mut self, new_client: ApiClient) { self.client = new_client; } /// Get the model identifier. pub fn model(&self) -> &str { &self.client.model } /// Get the conversation history for persistence. pub fn messages(&self) -> &[Message] { &self.messages } /// Mutable access to conversation history (for /retry). pub fn messages_mut(&mut self) -> &mut Vec { &mut self.messages } /// Restore from a saved conversation. pub fn restore(&mut self, messages: Vec) { self.messages = messages; } } // Context window building, token counting, and error classification // live in context.rs /// Create a short summary of tool args for the tools pane header. fn summarize_args(tool_name: &str, args: &serde_json::Value) -> String { match tool_name { "read_file" | "write_file" | "edit_file" => args["file_path"] .as_str() .unwrap_or("") .to_string(), "bash" => { let cmd = args["command"].as_str().unwrap_or(""); if cmd.len() > 60 { let end = cmd.char_indices() .map(|(i, _)| i) .take_while(|&i| i <= 60) .last() .unwrap_or(0); format!("{}...", &cmd[..end]) } else { cmd.to_string() } } "grep" => { let pattern = args["pattern"].as_str().unwrap_or(""); let path = args["path"].as_str().unwrap_or("."); format!("{} in {}", pattern, path) } "glob" => args["pattern"] .as_str() .unwrap_or("") .to_string(), "view_image" => { if let Some(pane) = args["pane_id"].as_str() { format!("pane {}", pane) } else { args["file_path"].as_str().unwrap_or("").to_string() } } "journal" => { let entry = args["entry"].as_str().unwrap_or(""); if entry.len() > 60 { format!("{}...", &entry[..60]) } else { entry.to_string() } } "yield_to_user" => args["message"] .as_str() .unwrap_or("") .to_string(), "switch_model" => args["model"] .as_str() .unwrap_or("") .to_string(), "pause" => String::new(), _ => String::new(), } } // Parsing functions (parse_leaked_tool_calls, strip_leaked_artifacts) // and their tests live in parsing.rs