consciousness/src/agent/runner.rs

1060 lines
40 KiB
Rust
Raw Normal View History

// agent.rs — Core agent loop
//
// The simplest possible implementation of the agent pattern:
// send messages + tool definitions to the model, if it responds
// with tool calls then dispatch them and loop, if it responds
// with text then display it and wait for the next prompt.
//
// Uses streaming by default so text tokens appear as they're
// generated. Tool calls are accumulated from stream deltas and
// dispatched after the stream completes.
//
// The DMN (dmn.rs) is the outer loop that decides what prompts
// to send here. This module just handles single turns: prompt
// in, response out, tool calls dispatched.
use anyhow::Result;
use tiktoken_rs::CoreBPE;
use crate::agent::api::ApiClient;
use crate::agent::journal;
use crate::agent::log::ConversationLog;
use crate::agent::tools;
use crate::agent::tools::ProcessTracker;
use crate::agent::types::*;
use crate::agent::ui_channel::{ContextSection, SharedContextState, StatusInfo, StreamTarget, UiMessage, UiSender};
/// Result of a single agent turn.
pub struct TurnResult {
/// The text response (already sent through UI channel).
#[allow(dead_code)]
pub text: String,
/// Whether the model called yield_to_user during this turn.
pub yield_requested: bool,
/// Whether any tools (other than yield_to_user) were called.
pub had_tool_calls: bool,
/// Number of tool calls that returned errors this turn.
pub tool_errors: u32,
/// Model name to switch to after this turn completes.
pub model_switch: Option<String>,
/// Agent requested DMN pause (full stop on autonomous behavior).
pub dmn_pause: bool,
}
/// Accumulated state across tool dispatches within a single turn.
struct DispatchState {
yield_requested: bool,
had_tool_calls: bool,
tool_errors: u32,
model_switch: Option<String>,
dmn_pause: bool,
}
pub struct Agent {
client: ApiClient,
messages: Vec<Message>,
tool_defs: Vec<ToolDef>,
/// Last known prompt token count from the API (tracks context size).
last_prompt_tokens: u32,
/// Shared process tracker for bash tool — lets TUI show/kill running commands.
pub process_tracker: ProcessTracker,
/// Current reasoning effort level ("none", "low", "high").
pub reasoning_effort: String,
/// Persistent conversation log — append-only record of all messages.
conversation_log: Option<ConversationLog>,
/// Current context window budget breakdown.
pub context_budget: ContextBudget,
/// BPE tokenizer for token counting (cl100k_base — close enough
/// for Claude and Qwen budget allocation, ~85-90% count accuracy).
tokenizer: CoreBPE,
/// Mutable context state — personality, working stack, etc.
pub context: ContextState,
/// Shared live context summary — TUI reads this directly for debug screen.
pub shared_context: SharedContextState,
/// Stable session ID for memory-search dedup across turns.
session_id: String,
}
impl Agent {
pub fn new(
client: ApiClient,
system_prompt: String,
personality: Vec<(String, String)>,
conversation_log: Option<ConversationLog>,
shared_context: SharedContextState,
) -> Self {
let tool_defs = tools::definitions();
let tokenizer = tiktoken_rs::cl100k_base()
.expect("failed to load cl100k_base tokenizer");
let context = ContextState {
system_prompt: system_prompt.clone(),
personality,
journal: String::new(),
working_stack: Vec::new(),
loaded_nodes: Vec::new(),
};
let session_id = format!("poc-agent-{}", chrono::Utc::now().format("%Y%m%d-%H%M%S"));
let mut agent = Self {
client,
messages: Vec::new(),
tool_defs,
last_prompt_tokens: 0,
process_tracker: ProcessTracker::new(),
reasoning_effort: "none".to_string(),
conversation_log,
context_budget: ContextBudget::default(),
tokenizer,
context,
shared_context,
session_id,
};
// Load recent journal entries at startup for orientation
agent.load_startup_journal();
agent.load_working_stack();
agent.push_context(Message::system(system_prompt));
let rendered = agent.context.render_context_message();
if !rendered.is_empty() {
agent.push_context(Message::user(rendered));
}
if !agent.context.journal.is_empty() {
agent.push_context(Message::user(agent.context.journal.clone()));
}
agent.measure_budget();
agent.publish_context_state();
agent
}
/// Run memory search for a given event, returning any output to inject.
/// Direct library call — no subprocess needed since everything is one crate.
fn run_hook(&self, event: &str, _prompt: &str) -> Option<String> {
let transcript_path = self.conversation_log.as_ref()
.map(|l| l.path().to_string_lossy().to_string())
.unwrap_or_default();
let hook_input = serde_json::json!({
"hook_event_name": event,
"session_id": self.session_id,
"transcript_path": transcript_path,
});
let text = crate::memory_search::run_hook(&hook_input.to_string());
if text.trim().is_empty() {
None
} else {
Some(text)
}
}
/// Push a conversation message — stamped and logged.
fn push_message(&mut self, mut msg: Message) {
msg.stamp();
if let Some(ref log) = self.conversation_log {
if let Err(e) = log.append(&msg) {
eprintln!("warning: failed to log message: {:#}", e);
}
}
self.messages.push(msg);
}
/// Push a context-only message (system prompt, identity context,
/// journal summaries). Not logged — these are reconstructed on
/// every startup/compaction.
fn push_context(&mut self, msg: Message) {
self.messages.push(msg);
}
/// Measure context window usage by category. Uses the BPE tokenizer
/// for direct token counting (no chars/4 approximation).
fn measure_budget(&mut self) {
let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len();
let mut id_tokens: usize = 0;
let mut jnl_tokens: usize = 0;
let mut conv_tokens: usize = 0;
let mut in_conversation = false;
for msg in &self.messages {
let tokens = crate::agent::context::msg_token_count(&self.tokenizer, msg);
if in_conversation {
conv_tokens += tokens;
continue;
}
match msg.role {
Role::System => id_tokens += tokens,
Role::User => {
let text = msg.content_text();
if text.starts_with("[Earlier in this conversation") {
jnl_tokens += tokens;
} else if text.starts_with("Your context was just rebuilt") {
jnl_tokens += tokens;
} else if jnl_tokens == 0 && conv_tokens == 0 {
// Personality context — part of identity
id_tokens += tokens;
} else {
in_conversation = true;
conv_tokens += tokens;
}
}
_ => {
in_conversation = true;
conv_tokens += tokens;
}
}
}
// Memory = nodes loaded during the session via tool calls
let mem_tokens: usize = self.context.loaded_nodes.iter()
.map(|node| count(&node.render()))
.sum();
self.context_budget = ContextBudget {
identity_tokens: id_tokens,
memory_tokens: mem_tokens,
journal_tokens: jnl_tokens,
conversation_tokens: conv_tokens,
window_tokens: crate::agent::context::model_context_window(&self.client.model),
};
}
/// Send a user message and run the agent loop until the model
/// produces a text response (no more tool calls). Streams text
/// and tool activity through the UI channel.
pub async fn turn(
&mut self,
user_input: &str,
ui_tx: &UiSender,
target: StreamTarget,
) -> Result<TurnResult> {
// Run poc-hook (memory search, notifications, context check)
if let Some(hook_output) = self.run_hook("UserPromptSubmit", user_input) {
let enriched = format!("{}\n\n<system-reminder>\n{}\n</system-reminder>",
user_input, hook_output);
self.push_message(Message::user(enriched));
} else {
self.push_message(Message::user(user_input));
}
let mut overflow_retries: u32 = 0;
let mut empty_retries: u32 = 0;
let mut ds = DispatchState {
yield_requested: false,
had_tool_calls: false,
tool_errors: 0,
model_switch: None,
dmn_pause: false,
};
loop {
let _ = ui_tx.send(UiMessage::Activity("thinking...".into()));
let api_result = self
.client
.chat_completion_stream(
&self.messages,
Some(&self.tool_defs),
ui_tx,
target,
&self.reasoning_effort,
)
.await;
// Context overflow → compact and retry (max 2 attempts)
// Stream error → retry with backoff (max 2 attempts)
let (msg, usage) = match api_result {
Err(e) if crate::agent::context::is_context_overflow(&e) && overflow_retries < 2 => {
overflow_retries += 1;
let _ = ui_tx.send(UiMessage::Info(format!(
"[context overflow — compacting and retrying ({}/2)]",
overflow_retries,
)));
self.emergency_compact();
continue;
}
Err(e) if crate::agent::context::is_stream_error(&e) && empty_retries < 2 => {
empty_retries += 1;
let _ = ui_tx.send(UiMessage::Info(format!(
"[stream error: {} — retrying ({}/2)]",
e, empty_retries,
)));
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
continue;
}
other => other?,
};
// Strip ephemeral tool calls (journal) that the API has
// now processed. They're persisted to disk; no need to keep
// them in the conversation history burning tokens.
self.strip_ephemeral_tool_calls();
if let Some(usage) = &usage {
self.last_prompt_tokens = usage.prompt_tokens;
self.measure_budget();
self.publish_context_state();
let _ = ui_tx.send(UiMessage::StatusUpdate(StatusInfo {
dmn_state: String::new(), // filled by main loop
dmn_turns: 0,
dmn_max_turns: 0,
prompt_tokens: usage.prompt_tokens,
completion_tokens: usage.completion_tokens,
model: self.client.model.clone(),
turn_tools: 0, // tracked by TUI from ToolCall messages
context_budget: self.context_budget.status_string(),
}));
}
// Empty response — model returned finish=stop with no content
// or tool calls. Inject a nudge so the retry has different input.
let has_content = msg.content.is_some();
let has_tools = msg.tool_calls.as_ref().map_or(false, |tc| !tc.is_empty());
if !has_content && !has_tools {
if empty_retries < 2 {
empty_retries += 1;
let _ = ui_tx.send(UiMessage::Debug(format!(
"empty response, injecting nudge and retrying ({}/2)",
empty_retries,
)));
self.push_message(Message::user(
"[system] Your previous response was empty. \
Please respond with text or use a tool."
));
continue;
}
// After max retries, fall through — return the empty response
} else {
empty_retries = 0;
}
// Structured tool calls from the API
if let Some(ref tool_calls) = msg.tool_calls {
if !tool_calls.is_empty() {
self.push_message(msg.clone());
for call in tool_calls {
self.dispatch_tool_call(call, None, ui_tx, &mut ds)
.await;
}
continue;
}
}
// No structured tool calls — check for leaked tool calls
// (Qwen sometimes outputs <tool_call> XML as text).
let text = msg.content_text().to_string();
let leaked = crate::agent::parsing::parse_leaked_tool_calls(&text);
if !leaked.is_empty() {
let _ = ui_tx.send(UiMessage::Debug(format!(
"recovered {} leaked tool call(s) from text",
leaked.len()
)));
// Strip tool call XML and thinking tokens from the message
// so they don't clutter the conversation history.
let cleaned = crate::agent::parsing::strip_leaked_artifacts(&text);
let mut clean_msg = msg.clone();
clean_msg.content = if cleaned.trim().is_empty() {
None
} else {
Some(MessageContent::Text(cleaned))
};
self.push_message(clean_msg);
for call in &leaked {
self.dispatch_tool_call(call, Some("recovered"), ui_tx, &mut ds)
.await;
}
continue;
}
// Genuinely text-only response
let _ = ui_tx.send(UiMessage::Activity(String::new()));
self.push_message(msg);
return Ok(TurnResult {
text,
yield_requested: ds.yield_requested,
had_tool_calls: ds.had_tool_calls,
tool_errors: ds.tool_errors,
model_switch: ds.model_switch,
dmn_pause: ds.dmn_pause,
});
}
}
/// Dispatch a single tool call: send UI annotations, run the tool,
/// push results into the conversation, handle images.
async fn dispatch_tool_call(
&mut self,
call: &ToolCall,
tag: Option<&str>,
ui_tx: &UiSender,
ds: &mut DispatchState,
) {
let args: serde_json::Value =
serde_json::from_str(&call.function.arguments).unwrap_or_default();
let args_summary = summarize_args(&call.function.name, &args);
let label = match tag {
Some(t) => format!("calling: {} ({})", call.function.name, t),
None => format!("calling: {}", call.function.name),
};
let _ = ui_tx.send(UiMessage::Activity(label));
let _ = ui_tx.send(UiMessage::ToolCall {
name: call.function.name.clone(),
args_summary: args_summary.clone(),
});
let _ = ui_tx.send(UiMessage::ToolStarted {
id: call.id.clone(),
name: call.function.name.clone(),
detail: args_summary,
});
// Handle working_stack tool — needs &mut self for context state
if call.function.name == "working_stack" {
let result = tools::working_stack::handle(&args, &mut self.context.working_stack);
let output = tools::ToolOutput {
text: result.clone(),
is_yield: false,
images: Vec::new(),
model_switch: None,
dmn_pause: false,
};
let _ = ui_tx.send(UiMessage::ToolResult {
name: call.function.name.clone(),
result: output.text.clone(),
});
let _ = ui_tx.send(UiMessage::ToolFinished { id: call.id.clone() });
self.push_message(Message::tool_result(&call.id, &output.text));
ds.had_tool_calls = true;
// Re-render the context message so the model sees the updated stack
if !result.starts_with("Error:") {
self.refresh_context_message();
}
return;
}
// Handle memory tools — needs &mut self for node tracking
if call.function.name.starts_with("memory_") {
let result = tools::memory::dispatch(&call.function.name, &args, None);
let text = match &result {
Ok(s) => s.clone(),
Err(e) => format!("Error: {:#}", e),
};
// Track loaded/updated nodes
if result.is_ok() {
match call.function.name.as_str() {
"memory_render" | "memory_links" => {
if let Some(key) = args.get("key").and_then(|v| v.as_str()) {
if let Some(node) = crate::hippocampus::memory::MemoryNode::load(key) {
// Replace if already tracked, otherwise add
if let Some(existing) = self.context.loaded_nodes.iter_mut()
.find(|n| n.key == node.key) {
*existing = node;
} else {
self.context.loaded_nodes.push(node);
}
}
}
}
"memory_write" => {
if let Some(key) = args.get("key").and_then(|v| v.as_str()) {
if let Some(node) = crate::hippocampus::memory::MemoryNode::load(key) {
// Refresh if already tracked
if let Some(existing) = self.context.loaded_nodes.iter_mut()
.find(|n| n.key == node.key) {
*existing = node;
}
// Don't auto-add writes — only renders register nodes
}
}
}
_ => {}
}
}
let output = tools::ToolOutput {
text,
is_yield: false,
images: Vec::new(),
model_switch: None,
dmn_pause: false,
};
let _ = ui_tx.send(UiMessage::ToolResult {
name: call.function.name.clone(),
result: output.text.clone(),
});
let _ = ui_tx.send(UiMessage::ToolFinished { id: call.id.clone() });
self.push_message(Message::tool_result(&call.id, &output.text));
ds.had_tool_calls = true;
if output.text.starts_with("Error:") {
ds.tool_errors += 1;
}
self.measure_budget();
self.publish_context_state();
return;
}
let output =
tools::dispatch(&call.function.name, &args, &self.process_tracker).await;
if output.is_yield {
ds.yield_requested = true;
} else {
ds.had_tool_calls = true;
}
if output.model_switch.is_some() {
ds.model_switch = output.model_switch;
}
if output.dmn_pause {
ds.dmn_pause = true;
}
if output.text.starts_with("Error:") {
ds.tool_errors += 1;
}
let _ = ui_tx.send(UiMessage::ToolResult {
name: call.function.name.clone(),
result: output.text.clone(),
});
let _ = ui_tx.send(UiMessage::ToolFinished { id: call.id.clone() });
self.push_message(Message::tool_result(&call.id, &output.text));
if !output.images.is_empty() {
// Only one live image in context at a time — age out any
// previous ones to avoid accumulating ~90KB+ per image.
self.age_out_images();
self.push_message(Message::user_with_images(
"Here is the image you requested:",
&output.images,
));
}
}
/// Build context state summary for the debug screen.
pub fn context_state_summary(&self) -> Vec<ContextSection> {
let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len();
let mut sections = Vec::new();
// System prompt
sections.push(ContextSection {
name: "System prompt".into(),
tokens: count(&self.context.system_prompt),
content: self.context.system_prompt.clone(),
children: Vec::new(),
});
// Personality — parent with file children
let personality_children: Vec<ContextSection> = self.context.personality.iter()
.map(|(name, content)| ContextSection {
name: name.clone(),
tokens: count(content),
content: content.clone(),
children: Vec::new(),
})
.collect();
let personality_tokens: usize = personality_children.iter().map(|c| c.tokens).sum();
sections.push(ContextSection {
name: format!("Personality ({} files)", personality_children.len()),
tokens: personality_tokens,
content: String::new(),
children: personality_children,
});
// Journal — split into per-entry children
{
let mut journal_children = Vec::new();
let mut current_header = String::new();
let mut current_body = String::new();
for line in self.context.journal.lines() {
if line.starts_with("## ") {
if !current_header.is_empty() {
let body = std::mem::take(&mut current_body);
let preview: String = body.lines().next().unwrap_or("").chars().take(60).collect();
journal_children.push(ContextSection {
name: format!("{}: {}", current_header, preview),
tokens: count(&body),
content: body,
children: Vec::new(),
});
}
current_header = line.trim_start_matches("## ").to_string();
current_body.clear();
} else {
if !current_body.is_empty() || !line.is_empty() {
current_body.push_str(line);
current_body.push('\n');
}
}
}
if !current_header.is_empty() {
let preview: String = current_body.lines().next().unwrap_or("").chars().take(60).collect();
journal_children.push(ContextSection {
name: format!("{}: {}", current_header, preview),
tokens: count(&current_body),
content: current_body,
children: Vec::new(),
});
}
let journal_tokens: usize = journal_children.iter().map(|c| c.tokens).sum();
sections.push(ContextSection {
name: format!("Journal ({} entries)", journal_children.len()),
tokens: journal_tokens,
content: String::new(),
children: journal_children,
});
}
// Working stack — instructions + items as children
let instructions = std::fs::read_to_string(WORKING_STACK_INSTRUCTIONS)
.unwrap_or_default();
let mut stack_children = vec![ContextSection {
name: "Instructions".into(),
tokens: count(&instructions),
content: instructions,
children: Vec::new(),
}];
for (i, item) in self.context.working_stack.iter().enumerate() {
let marker = if i == self.context.working_stack.len() - 1 { "" } else { " " };
stack_children.push(ContextSection {
name: format!("{} [{}] {}", marker, i, item),
tokens: count(item),
content: String::new(),
children: Vec::new(),
});
}
let stack_tokens: usize = stack_children.iter().map(|c| c.tokens).sum();
sections.push(ContextSection {
name: format!("Working stack ({} items)", self.context.working_stack.len()),
tokens: stack_tokens,
content: String::new(),
children: stack_children,
});
// Loaded memory nodes — tracked by memory tools
if !self.context.loaded_nodes.is_empty() {
let node_children: Vec<ContextSection> = self.context.loaded_nodes.iter()
.map(|node| {
let rendered = node.render();
ContextSection {
name: format!("{} (v{}, w={:.2}, {} links)",
node.key, node.version, node.weight, node.links.len()),
tokens: count(&rendered),
content: String::new(), // don't duplicate in debug view
children: Vec::new(),
}
})
.collect();
let node_tokens: usize = node_children.iter().map(|c| c.tokens).sum();
sections.push(ContextSection {
name: format!("Memory nodes ({} loaded)", self.context.loaded_nodes.len()),
tokens: node_tokens,
content: String::new(),
children: node_children,
});
}
// Conversation — each message as a child
let conv_start = self.messages.iter()
.position(|m| m.role == Role::Assistant || m.role == Role::Tool)
.unwrap_or(self.messages.len());
let conv_messages = &self.messages[conv_start..];
let conv_children: Vec<ContextSection> = conv_messages.iter().enumerate()
.map(|(i, msg)| {
let text = msg.content.as_ref()
.map(|c| c.as_text().to_string())
.unwrap_or_default();
let tool_info = msg.tool_calls.as_ref().map(|tc| {
tc.iter()
.map(|c| c.function.name.clone())
.collect::<Vec<_>>()
.join(", ")
});
let label = match (&msg.role, &tool_info) {
(_, Some(tools)) => format!("[tool_call: {}]", tools),
_ => {
let preview: String = text.chars().take(60).collect();
let preview = preview.replace('\n', " ");
if text.len() > 60 { format!("{}...", preview) } else { preview }
}
};
let tokens = count(&text);
let role_name = match msg.role {
Role::Assistant => "PoC",
Role::User => "Kent",
Role::Tool => "tool",
Role::System => "system",
};
ContextSection {
name: format!("[{}] {}: {}", conv_start + i, role_name, label),
tokens,
content: text,
children: Vec::new(),
}
})
.collect();
let conv_tokens: usize = conv_children.iter().map(|c| c.tokens).sum();
sections.push(ContextSection {
name: format!("Conversation ({} messages)", conv_children.len()),
tokens: conv_tokens,
content: String::new(),
children: conv_children,
});
sections
}
/// Load recent journal entries at startup for orientation.
/// Uses the same budget logic as compaction but with empty conversation.
/// Only parses the tail of the journal file (last 64KB) for speed.
fn load_startup_journal(&mut self) {
let journal_path = journal::default_journal_path();
let entries = journal::parse_journal_tail(&journal_path, 64 * 1024);
if entries.is_empty() {
return;
}
let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len();
let context_message = self.context.render_context_message();
let plan = crate::agent::context::plan_context(
&self.context.system_prompt,
&context_message,
&[], // no conversation yet
&entries,
&self.client.model,
&count,
);
self.context.journal = crate::agent::context::render_journal_text(&entries, &plan);
}
/// Re-render the context message in self.messages from live ContextState.
/// Called after any change to context state (working stack, etc).
fn refresh_context_message(&mut self) {
let rendered = self.context.render_context_message();
// The context message is the first user message (index 1, after system prompt)
if self.messages.len() >= 2 && self.messages[1].role == Role::User {
self.messages[1] = Message::user(rendered);
}
self.publish_context_state();
self.save_working_stack();
}
/// Persist working stack to disk.
fn save_working_stack(&self) {
if let Ok(json) = serde_json::to_string(&self.context.working_stack) {
let _ = std::fs::write(WORKING_STACK_FILE, json);
}
}
/// Load working stack from disk.
fn load_working_stack(&mut self) {
if let Ok(data) = std::fs::read_to_string(WORKING_STACK_FILE) {
if let Ok(stack) = serde_json::from_str::<Vec<String>>(&data) {
self.context.working_stack = stack;
}
}
}
/// Push the current context summary to the shared state for the TUI to read.
fn publish_context_state(&self) {
if let Ok(mut state) = self.shared_context.write() {
*state = self.context_state_summary();
}
}
/// Replace base64 image data in older messages with text placeholders.
/// Only the most recent image stays live — each new image ages out
/// all previous ones. The tool result message (right before each image
/// message) already records what was loaded, so no info is lost.
fn age_out_images(&mut self) {
for msg in &mut self.messages {
if let Some(MessageContent::Parts(parts)) = &msg.content {
let has_images = parts.iter().any(|p| matches!(p, ContentPart::ImageUrl { .. }));
if !has_images {
continue;
}
let mut replacement = String::new();
for part in parts {
match part {
ContentPart::Text { text } => {
if !replacement.is_empty() {
replacement.push('\n');
}
replacement.push_str(text);
}
ContentPart::ImageUrl { .. } => {
if !replacement.is_empty() {
replacement.push('\n');
}
replacement.push_str(
"[image aged out — see tool result above for details]",
);
}
}
}
msg.content = Some(MessageContent::Text(replacement));
}
}
}
/// Strip ephemeral tool calls from the conversation history.
///
/// Ephemeral tools (like journal) persist their output to disk,
/// so the tool call + result don't need to stay in the context
/// window. We keep them for exactly one API round-trip (the model
/// needs to see the result was acknowledged), then strip them.
///
/// If an assistant message contains ONLY ephemeral tool calls,
/// the entire message and its tool results are removed. If mixed
/// with non-ephemeral calls, we leave it (rare case, small cost).
fn strip_ephemeral_tool_calls(&mut self) {
// Collect IDs of tool calls to strip
let mut strip_ids: Vec<String> = Vec::new();
let mut strip_msg_indices: Vec<usize> = Vec::new();
for (i, msg) in self.messages.iter().enumerate() {
if msg.role != Role::Assistant {
continue;
}
let calls = match &msg.tool_calls {
Some(c) if !c.is_empty() => c,
_ => continue,
};
let all_ephemeral = calls.iter().all(|c| {
c.function.name == tools::journal::TOOL_NAME
});
if all_ephemeral {
strip_msg_indices.push(i);
for call in calls {
strip_ids.push(call.id.clone());
}
}
}
if strip_ids.is_empty() {
return;
}
// Remove in reverse order to preserve indices
self.messages.retain(|msg| {
// Strip the assistant messages we identified
if msg.role == Role::Assistant {
if let Some(calls) = &msg.tool_calls {
if calls.iter().all(|c| strip_ids.contains(&c.id)) {
return false;
}
}
}
// Strip matching tool results
if msg.role == Role::Tool {
if let Some(ref id) = msg.tool_call_id {
if strip_ids.contains(id) {
return false;
}
}
}
true
});
}
/// Last prompt token count reported by the API.
pub fn last_prompt_tokens(&self) -> u32 {
self.last_prompt_tokens
}
/// Build context window from conversation messages + journal.
/// Used by both compact() (in-memory messages) and restore_from_log()
/// (conversation log). The context window is always:
/// identity + journal summaries + raw recent messages
pub fn compact(&mut self, new_system_prompt: String, new_personality: Vec<(String, String)>) {
self.context.system_prompt = new_system_prompt;
self.context.personality = new_personality;
self.do_compact();
}
/// Internal compaction — rebuilds context window from current messages.
fn do_compact(&mut self) {
// Find where actual conversation starts (after system + context)
let conv_start = self
.messages
.iter()
.position(|m| m.role == Role::Assistant || m.role == Role::Tool)
.unwrap_or(self.messages.len());
let conversation: Vec<Message> = self.messages[conv_start..].to_vec();
let (messages, journal) = crate::agent::context::build_context_window(
&self.context,
&conversation,
&self.client.model,
&self.tokenizer,
);
self.context.journal = journal;
self.messages = messages;
self.last_prompt_tokens = 0;
self.measure_budget();
self.publish_context_state();
}
/// Emergency compaction using stored config — called on context overflow.
fn emergency_compact(&mut self) {
self.do_compact();
}
/// Restore from the conversation log. Builds the context window
/// the same way compact() does — journal summaries for old messages,
/// raw recent messages. This is the unified startup path.
/// Returns true if the log had content to restore.
pub fn restore_from_log(
&mut self,
system_prompt: String,
personality: Vec<(String, String)>,
) -> bool {
self.context.system_prompt = system_prompt;
self.context.personality = personality;
let all_messages = match &self.conversation_log {
Some(log) => match log.read_tail(512 * 1024) {
Ok(msgs) if !msgs.is_empty() => {
dbglog!("[restore] read {} messages from log tail", msgs.len());
msgs
}
Ok(_) => {
dbglog!("[restore] log exists but is empty");
return false;
}
Err(e) => {
dbglog!("[restore] failed to read log: {}", e);
return false;
}
},
None => {
dbglog!("[restore] no conversation log configured");
return false;
}
};
// Filter out system/context messages — we only want the
// actual conversation (user prompts, assistant responses,
// tool calls/results)
let conversation: Vec<Message> = all_messages
.into_iter()
.filter(|m| m.role != Role::System)
.collect();
dbglog!("[restore] {} messages after filtering system", conversation.len());
let (messages, journal) = crate::agent::context::build_context_window(
&self.context,
&conversation,
&self.client.model,
&self.tokenizer,
);
dbglog!("[restore] journal text: {} chars, {} lines",
journal.len(), journal.lines().count());
self.context.journal = journal;
self.messages = messages;
dbglog!("[restore] built context window: {} messages", self.messages.len());
self.last_prompt_tokens = 0;
self.measure_budget();
self.publish_context_state();
true
}
/// Replace the API client (for model switching).
pub fn swap_client(&mut self, new_client: ApiClient) {
self.client = new_client;
}
/// Get the model identifier.
pub fn model(&self) -> &str {
&self.client.model
}
/// Get the conversation history for persistence.
pub fn messages(&self) -> &[Message] {
&self.messages
}
/// Mutable access to conversation history (for /retry).
pub fn messages_mut(&mut self) -> &mut Vec<Message> {
&mut self.messages
}
/// Restore from a saved conversation.
pub fn restore(&mut self, messages: Vec<Message>) {
self.messages = messages;
}
}
// Context window building, token counting, and error classification
// live in context.rs
/// Create a short summary of tool args for the tools pane header.
fn summarize_args(tool_name: &str, args: &serde_json::Value) -> String {
match tool_name {
"read_file" | "write_file" | "edit_file" => args["file_path"]
.as_str()
.unwrap_or("")
.to_string(),
"bash" => {
let cmd = args["command"].as_str().unwrap_or("");
if cmd.len() > 60 {
let end = cmd.char_indices()
.map(|(i, _)| i)
.take_while(|&i| i <= 60)
.last()
.unwrap_or(0);
format!("{}...", &cmd[..end])
} else {
cmd.to_string()
}
}
"grep" => {
let pattern = args["pattern"].as_str().unwrap_or("");
let path = args["path"].as_str().unwrap_or(".");
format!("{} in {}", pattern, path)
}
"glob" => args["pattern"]
.as_str()
.unwrap_or("")
.to_string(),
"view_image" => {
if let Some(pane) = args["pane_id"].as_str() {
format!("pane {}", pane)
} else {
args["file_path"].as_str().unwrap_or("").to_string()
}
}
"journal" => {
let entry = args["entry"].as_str().unwrap_or("");
if entry.len() > 60 {
format!("{}...", &entry[..60])
} else {
entry.to_string()
}
}
"yield_to_user" => args["message"]
.as_str()
.unwrap_or("")
.to_string(),
"switch_model" => args["model"]
.as_str()
.unwrap_or("")
.to_string(),
"pause" => String::new(),
_ => String::new(),
}
}
// Parsing functions (parse_leaked_tool_calls, strip_leaked_artifacts)
// and their tests live in parsing.rs