// agent.rs — Core agent loop // // The simplest possible implementation of the agent pattern: // send messages + tool definitions to the model, if it responds // with tool calls then dispatch them and loop, if it responds // with text then display it and wait for the next prompt. // // Uses streaming by default so text tokens appear as they're // generated. Tool calls are accumulated from stream deltas and // dispatched after the stream completes. // // The DMN (dmn.rs) is the outer loop that decides what prompts // to send here. This module just handles single turns: prompt // in, response out, tool calls dispatched. pub mod api; pub mod context; pub mod oneshot; pub mod tools; use std::sync::Arc; use anyhow::Result; use tiktoken_rs::CoreBPE; use api::{ApiClient, ToolCall}; use api::types::{ContentPart, Message, MessageContent, Role}; use context::{ConversationEntry, ContextState, ContextBudget}; use tools::{summarize_args, working_stack}; use crate::mind::log::ConversationLog; use crate::user::ui_channel::{ContextSection, SharedContextState, StreamTarget, UiSender}; use crate::subconscious::learn; // --- Activity tracking (RAII guards) --- pub struct ActivityEntry { pub id: u64, pub label: String, pub started: std::time::Instant, /// Auto-expires this long after creation (or completion). pub expires_at: std::time::Instant, } /// RAII guard — marks the activity "(complete)" on drop, starts expiry timer. pub struct ActivityGuard { agent: Arc>, id: u64, } const ACTIVITY_LINGER: std::time::Duration = std::time::Duration::from_secs(5); impl Drop for ActivityGuard { fn drop(&mut self) { if let Ok(mut ag) = self.agent.try_lock() { if let Some(entry) = ag.activities.iter_mut().find(|a| a.id == self.id) { entry.label.push_str(" (complete)"); entry.expires_at = std::time::Instant::now() + ACTIVITY_LINGER; } } } } impl Agent { /// Register an activity, returns its ID. Caller creates the guard. pub fn push_activity(&mut self, label: impl Into) -> u64 { self.expire_activities(); let id = self.next_activity_id; self.next_activity_id += 1; self.activities.push(ActivityEntry { id, label: label.into(), started: std::time::Instant::now(), expires_at: std::time::Instant::now() + std::time::Duration::from_secs(3600), }); id } /// Push a notification — auto-expires after 5 seconds. pub fn notify(&mut self, label: impl Into) { self.expire_activities(); let id = self.next_activity_id; self.next_activity_id += 1; self.activities.push(ActivityEntry { id, label: label.into(), started: std::time::Instant::now(), expires_at: std::time::Instant::now() + ACTIVITY_LINGER, }); } /// Remove expired activities. pub fn expire_activities(&mut self) { let now = std::time::Instant::now(); self.activities.retain(|a| a.expires_at > now); } } /// Create an activity guard from outside the lock. pub fn activity_guard(agent: &Arc>, id: u64) -> ActivityGuard { ActivityGuard { agent: agent.clone(), id } } /// Convenience: lock, push activity, unlock, return guard. pub async fn start_activity(agent: &Arc>, label: impl Into) -> ActivityGuard { let id = agent.lock().await.push_activity(label); ActivityGuard { agent: agent.clone(), id } } /// Result of a single agent turn. pub struct TurnResult { /// The text response (already sent through UI channel). #[allow(dead_code)] pub text: String, /// Whether the model called yield_to_user during this turn. pub yield_requested: bool, /// Whether any tools (other than yield_to_user) were called. pub had_tool_calls: bool, /// Number of tool calls that returned errors this turn. pub tool_errors: u32, /// Model name to switch to after this turn completes. pub model_switch: Option, /// Agent requested DMN pause (full stop on autonomous behavior). pub dmn_pause: bool, } /// Accumulated state across tool dispatches within a single turn. struct DispatchState { yield_requested: bool, had_tool_calls: bool, tool_errors: u32, model_switch: Option, dmn_pause: bool, } impl DispatchState { fn new() -> Self { Self { yield_requested: false, had_tool_calls: false, tool_errors: 0, model_switch: None, dmn_pause: false, } } } pub struct Agent { client: ApiClient, tools: Vec, /// Last known prompt token count from the API (tracks context size). last_prompt_tokens: u32, /// Current reasoning effort level ("none", "low", "high"). pub reasoning_effort: String, /// Sampling parameters — adjustable at runtime from the thalamus screen. pub temperature: f32, pub top_p: f32, pub top_k: u32, /// Active activities — RAII guards auto-remove on drop. pub activities: Vec, next_activity_id: u64, /// Control tool flags — set by tool handlers, consumed by turn loop. pub pending_yield: bool, pub pending_model_switch: Option, pub pending_dmn_pause: bool, /// Persistent conversation log — append-only record of all messages. conversation_log: Option, /// BPE tokenizer for token counting (cl100k_base — close enough /// for Claude and Qwen budget allocation, ~85-90% count accuracy). tokenizer: CoreBPE, /// Mutable context state — personality, working stack, etc. pub context: ContextState, /// Shared live context summary — TUI reads this directly for debug screen. pub shared_context: SharedContextState, /// App config — used to reload identity on compaction and model switching. pub app_config: crate::config::AppConfig, pub prompt_file: String, /// Stable session ID for memory-search dedup across turns. pub session_id: String, /// Incremented on compaction — UI uses this to detect resets. pub generation: u64, /// Agent orchestration state (surface-observe, journal, reflect). /// TODO: move to Session — it's session-level, not agent-level. pub agent_cycles: crate::subconscious::subconscious::AgentCycleState, /// Shared active tools — Agent writes, TUI reads. pub active_tools: crate::user::ui_channel::SharedActiveTools, } fn render_journal(entries: &[context::JournalEntry]) -> String { if entries.is_empty() { return String::new(); } let mut text = String::from("[Earlier — from your journal]\n\n"); for entry in entries { use std::fmt::Write; writeln!(text, "## {}\n{}\n", entry.timestamp.format("%Y-%m-%dT%H:%M"), entry.content).ok(); } text } impl Agent { pub fn new( client: ApiClient, system_prompt: String, personality: Vec<(String, String)>, app_config: crate::config::AppConfig, prompt_file: String, conversation_log: Option, shared_context: SharedContextState, active_tools: crate::user::ui_channel::SharedActiveTools, ) -> Self { let tokenizer = tiktoken_rs::cl100k_base() .expect("failed to load cl100k_base tokenizer"); let context = ContextState { system_prompt: system_prompt.clone(), personality, journal: Vec::new(), working_stack: Vec::new(), entries: Vec::new(), }; let session_id = format!("consciousness-{}", chrono::Utc::now().format("%Y%m%d-%H%M%S")); let agent_cycles = crate::subconscious::subconscious::AgentCycleState::new(&session_id); let mut agent = Self { client, tools: tools::tools(), last_prompt_tokens: 0, reasoning_effort: "none".to_string(), temperature: 0.6, top_p: 0.95, top_k: 20, activities: Vec::new(), next_activity_id: 0, pending_yield: false, pending_model_switch: None, pending_dmn_pause: false, conversation_log, tokenizer, context, shared_context, app_config, prompt_file, session_id, generation: 0, agent_cycles, active_tools, }; agent.load_startup_journal(); agent.load_working_stack(); agent.publish_context_state(); agent } /// Assemble the full message list for the API call from typed sources. /// System prompt + personality context + journal + conversation messages. fn assemble_api_messages(&self) -> Vec { let mut msgs = Vec::new(); msgs.push(Message::system(&self.context.system_prompt)); let ctx = self.context.render_context_message(); if !ctx.is_empty() { msgs.push(Message::user(ctx)); } let jnl = render_journal(&self.context.journal); if !jnl.is_empty() { msgs.push(Message::user(jnl)); } msgs.extend(self.context.entries.iter().map(|e| e.api_message().clone())); msgs } /// Run agent orchestration cycle, returning structured output. fn run_agent_cycle(&mut self) -> crate::subconscious::subconscious::AgentCycleOutput { let transcript_path = self.conversation_log.as_ref() .map(|l| l.path().to_string_lossy().to_string()) .unwrap_or_default(); let session = crate::session::HookSession::from_fields( self.session_id.clone(), transcript_path, "UserPromptSubmit".into(), ); self.agent_cycles.trigger(&session); std::mem::take(&mut self.agent_cycles.last_output) } /// Push a conversation message — stamped and logged. fn push_message(&mut self, mut msg: Message) { msg.stamp(); let entry = ConversationEntry::Message(msg); self.push_entry(entry); } fn push_entry(&mut self, entry: ConversationEntry) { if let Some(ref log) = self.conversation_log { if let Err(e) = log.append(&entry) { eprintln!("warning: failed to log entry: {:#}", e); } } self.context.entries.push(entry); } /// Append streaming text to the last entry (creating a partial /// assistant entry if needed). Called by collect_stream per token batch. pub fn append_streaming(&mut self, text: &str) { if let Some(entry) = self.context.entries.last_mut() { let msg = entry.message_mut(); if msg.role == Role::Assistant { msg.append_content(text); return; } } // No assistant entry yet — push a new partial one self.context.entries.push(ConversationEntry::Message( Message::assistant(text), )); } pub fn budget(&self) -> ContextBudget { let count_str = |s: &str| self.tokenizer.encode_with_special_tokens(s).len(); let count_msg = |m: &Message| crate::agent::context::msg_token_count(&self.tokenizer, m); let window = crate::agent::context::context_window(); self.context.budget(&count_str, &count_msg, window) } /// Send a user message and run the agent loop until the model /// produces a text response (no more tool calls). Streams text /// and tool activity through the UI channel. /// /// Takes Arc> and manages locking internally so the /// lock is never held across I/O (API streaming, tool dispatch). pub async fn turn( agent: Arc>, user_input: &str, ui_tx: &UiSender, target: StreamTarget, ) -> Result { // --- Pre-loop setup (lock 1): agent cycle, memories, user input --- let active_tools = { let mut finished = Vec::new(); let tools = { let mut me = agent.lock().await; let cycle = me.run_agent_cycle(); for key in &cycle.surfaced_keys { if let Some(rendered) = crate::cli::node::render_node( &crate::store::Store::load().unwrap_or_default(), key, ) { let mut msg = Message::user(format!( "\n--- {} (surfaced) ---\n{}\n", key, rendered, )); msg.stamp(); me.push_entry(ConversationEntry::Memory { key: key.clone(), message: msg }); } } if let Some(ref reflection) = cycle.reflection { me.push_message(Message::user(format!( "\n--- subconscious reflection ---\n{}\n", reflection.trim(), ))); } // Collect completed background tool handles — remove from active list // but don't await yet (MutexGuard isn't Send). let mut tools = me.active_tools.lock().unwrap(); let mut i = 0; while i < tools.len() { if tools[i].handle.is_finished() { finished.push(tools.remove(i)); } else { i += 1; } } me.active_tools.clone() }; // Await finished handles without holding the agent lock let mut bg_results = Vec::new(); for entry in finished { if let Ok((call, output)) = entry.handle.await { bg_results.push((call, output)); } } // Re-acquire to apply results and push user input { let mut me = agent.lock().await; let mut bg_ds = DispatchState::new(); for (call, output) in bg_results { me.apply_tool_result(&call, output, ui_tx, &mut bg_ds); } me.push_message(Message::user(user_input)); } tools }; let mut overflow_retries: u32 = 0; let mut empty_retries: u32 = 0; let mut ds = DispatchState::new(); loop { // --- Lock 2: assemble messages, start stream --- let _thinking = start_activity(&agent, "thinking...").await; let (mut rx, _stream_guard) = { let me = agent.lock().await; let api_messages = me.assemble_api_messages(); let sampling = api::SamplingParams { temperature: me.temperature, top_p: me.top_p, top_k: me.top_k, }; me.client.start_stream( &api_messages, &me.tools, ui_tx, &me.reasoning_effort, sampling, None, ) }; // --- Lock released --- // --- Stream loop (no lock) --- let sr = api::collect_stream( &mut rx, &agent, &active_tools, ).await; let api::StreamResult { content, tool_calls, usage, finish_reason, error: stream_error, display_buf, in_tool_call, } = sr; // --- Stream complete --- // --- Lock 3: process results --- let (msg, pending) = { let mut me = agent.lock().await; // Handle stream errors with retry logic if let Some(e) = stream_error { let err = anyhow::anyhow!("{}", e); if crate::agent::context::is_context_overflow(&err) && overflow_retries < 2 { overflow_retries += 1; me.notify(format!("context overflow — retrying ({}/2)", overflow_retries)); me.compact(); continue; } if crate::agent::context::is_stream_error(&err) && empty_retries < 2 { empty_retries += 1; me.notify(format!("stream error — retrying ({}/2)", empty_retries)); drop(me); tokio::time::sleep(std::time::Duration::from_secs(2)).await; continue; } return Err(err); } if finish_reason.as_deref() == Some("error") { let detail = if content.is_empty() { "no details".into() } else { content }; return Err(anyhow::anyhow!("model stream error: {}", detail)); } // Flush remaining display buffer to streaming entry if !in_tool_call && !display_buf.is_empty() { me.append_streaming(&display_buf); } // Pop the streaming entry — the proper entry gets pushed below // via build_response_message which handles tool calls, leaked // tool calls, etc. sync_from_agent handles the swap. if let Some(entry) = me.context.entries.last() { if entry.message().role == Role::Assistant && entry.message().timestamp.is_none() { me.context.entries.pop(); } } let msg = api::build_response_message(content, tool_calls); if let Some(usage) = &usage { me.last_prompt_tokens = usage.prompt_tokens; me.publish_context_state(); } // Empty response — nudge and retry let has_content = msg.content.is_some(); let has_tools = msg.tool_calls.as_ref().map_or(false, |tc| !tc.is_empty()); if !has_content && !has_tools { if empty_retries < 2 { empty_retries += 1; dbglog!( "empty response, injecting nudge and retrying ({}/2)", empty_retries, ); me.push_message(Message::user( "[system] Your previous response was empty. \ Please respond with text or use a tool." )); continue; } } else { empty_retries = 0; } // Collect non-background tool calls fired during streaming let mut tools_guard = active_tools.lock().unwrap(); let mut non_bg = Vec::new(); let mut i = 0; while i < tools_guard.len() { if !tools_guard[i].background { non_bg.push(tools_guard.remove(i)); } else { i += 1; } } (msg, non_bg) }; if !pending.is_empty() { agent.lock().await.push_message(msg.clone()); // Drop lock before awaiting tool handles let mut results = Vec::new(); for entry in pending { if let Ok(r) = entry.handle.await { results.push(r); } } // Reacquire to apply results let mut me = agent.lock().await; for (call, output) in results { me.apply_tool_result(&call, output, ui_tx, &mut ds); } me.publish_context_state(); continue; } // Tool calls (structured API path) if let Some(ref tool_calls) = msg.tool_calls { if !tool_calls.is_empty() { agent.lock().await.push_message(msg.clone()); let calls: Vec = tool_calls.clone(); // Drop lock before tool dispatch for call in &calls { Agent::dispatch_tool_call_unlocked( &agent, &active_tools, call, ui_tx, &mut ds, ).await; } continue; } } // Genuinely text-only response let text = msg.content_text().to_string(); let mut me = agent.lock().await; me.push_message(msg); // Drain pending control flags if me.pending_yield { ds.yield_requested = true; me.pending_yield = false; } if me.pending_model_switch.is_some() { ds.model_switch = me.pending_model_switch.take(); } if me.pending_dmn_pause { ds.dmn_pause = true; me.pending_dmn_pause = false; } return Ok(TurnResult { text, yield_requested: ds.yield_requested, had_tool_calls: ds.had_tool_calls, tool_errors: ds.tool_errors, model_switch: ds.model_switch, dmn_pause: ds.dmn_pause, }); } } /// Dispatch a tool call without holding the agent lock across I/O. /// Used by `turn()` which manages its own locking. async fn dispatch_tool_call_unlocked( agent: &Arc>, active_tools: &crate::user::ui_channel::SharedActiveTools, call: &ToolCall, ui_tx: &UiSender, ds: &mut DispatchState, ) { let args: serde_json::Value = match serde_json::from_str(&call.function.arguments) { Ok(v) => v, Err(e) => { let err = format!("Error: malformed tool call arguments: {e}"); let _act = start_activity(agent, format!("rejected: {} (bad args)", call.function.name)).await; let mut me = agent.lock().await; me.apply_tool_result(call, err, ui_tx, ds); return; } }; let args_summary = summarize_args(&call.function.name, &args); let _calling = start_activity(agent, format!("calling: {}", call.function.name)).await; // Spawn tool, track it let call_clone = call.clone(); let agent_handle = agent.clone(); let handle = tokio::spawn(async move { let output = tools::dispatch_with_agent(&call_clone.function.name, &args, Some(agent_handle)).await; (call_clone, output) }); active_tools.lock().unwrap().push( tools::ActiveToolCall { id: call.id.clone(), name: call.function.name.clone(), detail: args_summary, started: std::time::Instant::now(), background: false, handle, } ); // Pop it back and await — no agent lock held let entry = { let mut tools = active_tools.lock().unwrap(); tools.pop().unwrap() }; if let Ok((call, output)) = entry.handle.await { // Brief lock to apply result let mut me = agent.lock().await; me.apply_tool_result(&call, output, ui_tx, ds); } } /// Apply a completed tool result to conversation state. fn apply_tool_result( &mut self, call: &ToolCall, output: String, ui_tx: &UiSender, ds: &mut DispatchState, ) { let args: serde_json::Value = serde_json::from_str(&call.function.arguments).unwrap_or_default(); ds.had_tool_calls = true; if output.starts_with("Error:") { ds.tool_errors += 1; } self.active_tools.lock().unwrap().retain(|t| t.id != call.id); // Tag memory_render results for context deduplication if call.function.name == "memory_render" && !output.starts_with("Error:") { if let Some(key) = args.get("key").and_then(|v| v.as_str()) { let mut msg = Message::tool_result(&call.id, &output); msg.stamp(); self.push_entry(ConversationEntry::Memory { key: key.to_string(), message: msg }); self.publish_context_state(); return; } } self.push_message(Message::tool_result(&call.id, &output)); } /// Build context state summary for the debug screen. pub fn context_state_summary(&self, memory_scores: Option<&learn::MemoryScore>) -> Vec { let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len(); let mut sections = Vec::new(); // System prompt sections.push(ContextSection { name: "System prompt".into(), tokens: count(&self.context.system_prompt), content: self.context.system_prompt.clone(), children: Vec::new(), }); // Personality — parent with file children let personality_children: Vec = self.context.personality.iter() .map(|(name, content)| ContextSection { name: name.clone(), tokens: count(content), content: content.clone(), children: Vec::new(), }) .collect(); let personality_tokens: usize = personality_children.iter().map(|c| c.tokens).sum(); sections.push(ContextSection { name: format!("Personality ({} files)", personality_children.len()), tokens: personality_tokens, content: String::new(), children: personality_children, }); // Journal { let journal_children: Vec = self.context.journal.iter() .map(|entry| { let preview: String = entry.content.lines() .find(|l| !l.trim().is_empty()) .unwrap_or("").chars().take(60).collect(); ContextSection { name: format!("{}: {}", entry.timestamp.format("%Y-%m-%dT%H:%M"), preview), tokens: count(&entry.content), content: entry.content.clone(), children: Vec::new(), } }) .collect(); let journal_tokens: usize = journal_children.iter().map(|c| c.tokens).sum(); sections.push(ContextSection { name: format!("Journal ({} entries)", journal_children.len()), tokens: journal_tokens, content: String::new(), children: journal_children, }); } // Working stack — instructions + items as children let instructions = std::fs::read_to_string(working_stack::instructions_path()) .unwrap_or_default(); let mut stack_children = vec![ContextSection { name: "Instructions".into(), tokens: count(&instructions), content: instructions, children: Vec::new(), }]; for (i, item) in self.context.working_stack.iter().enumerate() { let marker = if i == self.context.working_stack.len() - 1 { "→" } else { " " }; stack_children.push(ContextSection { name: format!("{} [{}] {}", marker, i, item), tokens: count(item), content: String::new(), children: Vec::new(), }); } let stack_tokens: usize = stack_children.iter().map(|c| c.tokens).sum(); sections.push(ContextSection { name: format!("Working stack ({} items)", self.context.working_stack.len()), tokens: stack_tokens, content: String::new(), children: stack_children, }); // Memory nodes — extracted from Memory entries in the conversation let memory_entries: Vec<&ConversationEntry> = self.context.entries.iter() .filter(|e| e.is_memory()) .collect(); if !memory_entries.is_empty() { let node_children: Vec = memory_entries.iter() .map(|entry| { let key = match entry { ConversationEntry::Memory { key, .. } => key.as_str(), _ => unreachable!(), }; let text = entry.message().content_text(); // Show node weight from graph (updated by incremental scorer) let graph_weight = crate::hippocampus::store::Store::load().ok() .and_then(|s| s.nodes.get(key).map(|n| n.weight)); // Show full matrix score if available let matrix_score = memory_scores .and_then(|s| s.memory_weights.iter() .find(|(k, _)| k == key) .map(|(_, v)| *v)); let label = match (graph_weight, matrix_score) { (Some(w), Some(s)) => format!("{} (w:{:.2} score:{:.1})", key, w, s), (Some(w), None) => format!("{} (w:{:.2})", key, w), (None, Some(s)) => format!("{} (score:{:.1})", key, s), (None, None) => key.to_string(), }; ContextSection { name: label, tokens: count(text), content: String::new(), children: Vec::new(), } }) .collect(); let node_tokens: usize = node_children.iter().map(|c| c.tokens).sum(); sections.push(ContextSection { name: format!("Memory nodes ({} loaded)", memory_entries.len()), tokens: node_tokens, content: String::new(), children: node_children, }); } // Conversation — each message as a child let conv_messages = &self.context.entries; let conv_children: Vec = conv_messages.iter().enumerate() .map(|(i, entry)| { let m = entry.message(); let text = m.content.as_ref() .map(|c| c.as_text().to_string()) .unwrap_or_default(); let tool_info = m.tool_calls.as_ref().map(|tc| { tc.iter() .map(|c| c.function.name.clone()) .collect::>() .join(", ") }); let label = if entry.is_memory() { if let ConversationEntry::Memory { key, .. } = entry { format!("[memory: {}]", key) } else { unreachable!() } } else { match &tool_info { Some(tools) => format!("[tool_call: {}]", tools), None => { let preview: String = text.chars().take(60).collect(); let preview = preview.replace('\n', " "); if text.len() > 60 { format!("{}...", preview) } else { preview } } } }; let tokens = count(&text); let cfg = crate::config::get(); let role_name = if entry.is_memory() { "mem".to_string() } else { match m.role { Role::Assistant => cfg.assistant_name.clone(), Role::User => cfg.user_name.clone(), Role::Tool => "tool".to_string(), Role::System => "system".to_string(), } }; // Show which memories were important for this response let children = if m.role == Role::Assistant { memory_scores .map(|s| s.important_memories_for_entry(i)) .unwrap_or_default() .into_iter() .map(|(key, score)| ContextSection { name: format!("← {} ({:.1})", key, score), tokens: 0, content: String::new(), children: Vec::new(), }) .collect() } else { Vec::new() }; ContextSection { name: format!("[{}] {}: {}", i, role_name, label), tokens, content: text, children, } }) .collect(); let conv_tokens: usize = conv_children.iter().map(|c| c.tokens).sum(); sections.push(ContextSection { name: format!("Conversation ({} messages)", conv_children.len()), tokens: conv_tokens, content: String::new(), children: conv_children, }); sections } /// Load recent journal entries at startup for orientation. /// Uses the same budget logic as compaction but with empty conversation. /// Only parses the tail of the journal file (last 64KB) for speed. fn load_startup_journal(&mut self) { let store = match crate::store::Store::load() { Ok(s) => s, Err(_) => return, }; // Find oldest message timestamp in conversation log let oldest_msg_ts = self.conversation_log.as_ref() .and_then(|log| log.oldest_timestamp()); // Get journal entries from the memory graph let mut journal_nodes: Vec<_> = store.nodes.values() .filter(|n| n.node_type == crate::store::NodeType::EpisodicSession) .collect(); let mut dbg = std::fs::OpenOptions::new().create(true).append(true) .open("/tmp/poc-journal-debug.log").ok(); macro_rules! dbg_log { ($($arg:tt)*) => { if let Some(ref mut f) = dbg { use std::io::Write; let _ = writeln!(f, $($arg)*); } } } dbg_log!("[journal] {} nodes, oldest_msg={:?}", journal_nodes.len(), oldest_msg_ts); journal_nodes.sort_by_key(|n| n.created_at); if let Some(first) = journal_nodes.first() { dbg_log!("[journal] first created_at={}", first.created_at); } if let Some(last) = journal_nodes.last() { dbg_log!("[journal] last created_at={}", last.created_at); } // Find the cutoff index — entries older than conversation, plus one overlap let cutoff_idx = if let Some(cutoff) = oldest_msg_ts { let cutoff_ts = cutoff.timestamp(); dbg_log!("[journal] cutoff timestamp={}", cutoff_ts); let mut idx = journal_nodes.len(); for (i, node) in journal_nodes.iter().enumerate() { if node.created_at >= cutoff_ts { idx = i + 1; break; } } idx } else { journal_nodes.len() }; dbg_log!("[journal] cutoff_idx={}", cutoff_idx); // Walk backwards from cutoff, accumulating entries within 15% of context let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len(); let context_window = crate::agent::context::context_window(); let journal_budget = context_window * 15 / 100; dbg_log!("[journal] budget={} tokens ({}*15%)", journal_budget, context_window); let mut entries = Vec::new(); let mut total_tokens = 0; for node in journal_nodes[..cutoff_idx].iter().rev() { let tokens = count(&node.content); if total_tokens + tokens > journal_budget && !entries.is_empty() { break; } entries.push(context::JournalEntry { timestamp: chrono::DateTime::from_timestamp(node.created_at, 0) .unwrap_or_default(), content: node.content.clone(), }); total_tokens += tokens; } entries.reverse(); dbg_log!("[journal] loaded {} entries, {} tokens", entries.len(), total_tokens); if entries.is_empty() { dbg_log!("[journal] no entries!"); return; } self.context.journal = entries; dbg_log!("[journal] context.journal now has {} entries", self.context.journal.len()); } /// Called after any change to context state (working stack, etc). /// Load working stack from disk. fn load_working_stack(&mut self) { if let Ok(data) = std::fs::read_to_string(working_stack::file_path()) { if let Ok(stack) = serde_json::from_str::>(&data) { self.context.working_stack = stack; } } } /// Push the current context summary to the shared state for the TUI to read. pub fn publish_context_state(&self) { self.publish_context_state_with_scores(None); } pub fn publish_context_state_with_scores(&self, memory_scores: Option<&learn::MemoryScore>) { let summary = self.context_state_summary(memory_scores); if let Ok(mut dbg) = std::fs::OpenOptions::new().create(true).append(true) .open("/tmp/poc-journal-debug.log") { use std::io::Write; for s in &summary { let _ = writeln!(dbg, "[publish] {} ({} tokens, {} children)", s.name, s.tokens, s.children.len()); } } if let Ok(mut state) = self.shared_context.write() { *state = summary; } } /// Replace base64 image data in older messages with text placeholders. /// Keeps the 2 most recent images live (enough for motion/comparison). /// The tool result message before each image records what was loaded. pub fn age_out_images(&mut self) { // Find image entries newest-first, skip 1 (caller is about to add another) let to_age: Vec = self.context.entries.iter().enumerate() .rev() .filter(|(_, e)| { if let Some(MessageContent::Parts(parts)) = &e.message().content { parts.iter().any(|p| matches!(p, ContentPart::ImageUrl { .. })) } else { false } }) .map(|(i, _)| i) .skip(1) // keep 1 existing + 1 about to be added = 2 live .collect(); for i in to_age { let msg = self.context.entries[i].message_mut(); if let Some(MessageContent::Parts(parts)) = &msg.content { let mut replacement = String::new(); for part in parts { match part { ContentPart::Text { text } => { if !replacement.is_empty() { replacement.push('\n'); } replacement.push_str(text); } ContentPart::ImageUrl { .. } => { if !replacement.is_empty() { replacement.push('\n'); } replacement.push_str("[image aged out]"); } } } msg.content = Some(MessageContent::Text(replacement)); } } self.generation += 1; } /// Strip ephemeral tool calls from the conversation history. /// /// Last prompt token count reported by the API. pub fn last_prompt_tokens(&self) -> u32 { self.last_prompt_tokens } /// Rebuild the context window: reload identity, dedup, trim, reload journal. pub fn compact(&mut self) { // Reload identity from config match crate::config::reload_for_model(&self.app_config, &self.prompt_file) { Ok((system_prompt, personality)) => { self.context.system_prompt = system_prompt; self.context.personality = personality; } Err(e) => { eprintln!("warning: failed to reload identity: {:#}", e); } } let before = self.context.entries.len(); let before_mem = self.context.entries.iter().filter(|e| e.is_memory()).count(); let before_conv = before - before_mem; // Dedup memory, trim to budget, reload journal let entries = self.context.entries.clone(); self.context.entries = crate::agent::context::trim_entries( &self.context, &entries, &self.tokenizer, ); let after = self.context.entries.len(); let after_mem = self.context.entries.iter().filter(|e| e.is_memory()).count(); let after_conv = after - after_mem; dbglog!("[compact] entries: {} → {} (mem: {} → {}, conv: {} → {})", before, after, before_mem, after_mem, before_conv, after_conv); let budget = self.budget(); dbglog!("[compact] budget: {}", budget.status_string()); self.load_startup_journal(); self.generation += 1; self.last_prompt_tokens = 0; self.publish_context_state(); } /// Restore from the conversation log. Builds the context window /// the same way compact() does — journal summaries for old messages, /// raw recent messages. This is the unified startup path. /// Returns true if the log had content to restore. pub fn restore_from_log(&mut self) -> bool { let entries = match &self.conversation_log { Some(log) => match log.read_tail(64 * 1024 * 1024) { Ok(entries) if !entries.is_empty() => entries, _ => return false, }, None => return false, }; // Load extra — compact() will dedup, trim, reload identity + journal let all: Vec<_> = entries.into_iter() .filter(|e| e.message().role != Role::System) .collect(); let mem_count = all.iter().filter(|e| e.is_memory()).count(); let conv_count = all.len() - mem_count; dbglog!("[restore] loaded {} entries from log (mem: {}, conv: {})", all.len(), mem_count, conv_count); self.context.entries = all; self.compact(); // Estimate prompt tokens from budget so status bar isn't 0 on startup let b = self.budget(); self.last_prompt_tokens = b.used() as u32; true } /// Replace the API client (for model switching). pub fn swap_client(&mut self, new_client: ApiClient) { self.client = new_client; } /// Get the model identifier. pub fn model(&self) -> &str { &self.client.model } /// Get the conversation entries for persistence. pub fn entries(&self) -> &[ConversationEntry] { &self.context.entries } /// Mutable access to conversation entries (for /retry). pub fn client_clone(&self) -> ApiClient { self.client.clone() } pub fn entries_mut(&mut self) -> &mut Vec { &mut self.context.entries } }