The working_stack tool was defined in tools/mod.rs but implemented in agent.rs as Agent::handle_working_stack(). This orphaned the tool from the rest of the tool infrastructure. Move the implementation to tools/working_stack.rs so it follows the same pattern as other tools. The tool still needs special handling in agent.rs because it requires mutable access to context state, but the implementation is now in the right place. Changes: - Created tools/working_stack.rs with handle() and format_stack() - Updated tools/mod.rs to use working_stack::definition() - Removed handle_working_stack() and format_stack() from Agent - Agent now calls tools::working_stack::handle() directly
1698 lines
62 KiB
Rust
1698 lines
62 KiB
Rust
// agent.rs — Core agent loop
|
|
//
|
|
// The simplest possible implementation of the agent pattern:
|
|
// send messages + tool definitions to the model, if it responds
|
|
// with tool calls then dispatch them and loop, if it responds
|
|
// with text then display it and wait for the next prompt.
|
|
//
|
|
// Uses streaming by default so text tokens appear as they're
|
|
// generated. Tool calls are accumulated from stream deltas and
|
|
// dispatched after the stream completes.
|
|
//
|
|
// The DMN (dmn.rs) is the outer loop that decides what prompts
|
|
// to send here. This module just handles single turns: prompt
|
|
// in, response out, tool calls dispatched.
|
|
|
|
use anyhow::Result;
|
|
use chrono::{DateTime, Utc};
|
|
use tiktoken_rs::CoreBPE;
|
|
|
|
use std::io::Write;
|
|
use std::process::{Command, Stdio};
|
|
|
|
use crate::api::ApiClient;
|
|
use crate::journal;
|
|
use crate::log::ConversationLog;
|
|
use crate::tools;
|
|
use crate::tools::ProcessTracker;
|
|
use crate::types::*;
|
|
use crate::ui_channel::{ContextSection, SharedContextState, StatusInfo, StreamTarget, UiMessage, UiSender};
|
|
|
|
/// Result of a single agent turn.
|
|
pub struct TurnResult {
|
|
/// The text response (already sent through UI channel).
|
|
#[allow(dead_code)]
|
|
pub text: String,
|
|
/// Whether the model called yield_to_user during this turn.
|
|
pub yield_requested: bool,
|
|
/// Whether any tools (other than yield_to_user) were called.
|
|
pub had_tool_calls: bool,
|
|
/// Number of tool calls that returned errors this turn.
|
|
pub tool_errors: u32,
|
|
/// Model name to switch to after this turn completes.
|
|
pub model_switch: Option<String>,
|
|
/// Agent requested DMN pause (full stop on autonomous behavior).
|
|
pub dmn_pause: bool,
|
|
}
|
|
|
|
/// Accumulated state across tool dispatches within a single turn.
|
|
struct DispatchState {
|
|
yield_requested: bool,
|
|
had_tool_calls: bool,
|
|
tool_errors: u32,
|
|
model_switch: Option<String>,
|
|
dmn_pause: bool,
|
|
}
|
|
|
|
/// Mutable context state — the structured regions of the context window.
|
|
///
|
|
/// Each field is a different dimension of awareness. The struct renders
|
|
/// itself to text for inclusion in the context message sent to the model.
|
|
/// Tools can update individual fields mid-session.
|
|
#[derive(Debug, Clone)]
|
|
pub struct ContextState {
|
|
/// System prompt (identity, instructions, loaded from prompt file).
|
|
pub system_prompt: String,
|
|
/// Identity files: (filename, contents). Transparent structure for
|
|
/// debug inspection and per-file budget control.
|
|
pub personality: Vec<(String, String)>,
|
|
/// Journal entries rendered as text — bridges old conversation.
|
|
pub journal: String,
|
|
/// Working stack — what the agent is currently doing.
|
|
/// Top of stack (last element) is the current focus.
|
|
pub working_stack: Vec<String>,
|
|
}
|
|
|
|
/// Path to working stack instructions, included in context before the stack state.
|
|
const WORKING_STACK_INSTRUCTIONS: &str = "/home/kent/.config/poc-agent/working-stack.md";
|
|
|
|
/// Path to persisted working stack state.
|
|
const WORKING_STACK_FILE: &str = "/home/kent/.claude/memory/working-stack.json";
|
|
|
|
impl ContextState {
|
|
/// Render the context message for the model. Personality + working stack.
|
|
/// Journal is rendered separately as its own message in the conversation.
|
|
pub fn render_context_message(&self) -> String {
|
|
let mut parts: Vec<String> = self.personality.iter()
|
|
.map(|(name, content)| format!("## {}\n\n{}", name, content))
|
|
.collect();
|
|
|
|
// Always include working stack section — instructions + current state
|
|
let instructions = std::fs::read_to_string(WORKING_STACK_INSTRUCTIONS)
|
|
.unwrap_or_default();
|
|
let mut stack_section = instructions;
|
|
|
|
if self.working_stack.is_empty() {
|
|
stack_section.push_str("\n## Current stack\n\n(empty)\n");
|
|
} else {
|
|
stack_section.push_str("\n## Current stack\n\n");
|
|
for (i, item) in self.working_stack.iter().enumerate() {
|
|
if i == self.working_stack.len() - 1 {
|
|
stack_section.push_str(&format!("→ {}\n", item));
|
|
} else {
|
|
stack_section.push_str(&format!(" [{}] {}\n", i, item));
|
|
}
|
|
}
|
|
}
|
|
parts.push(stack_section);
|
|
|
|
parts.join("\n\n---\n\n")
|
|
}
|
|
}
|
|
|
|
/// Breakdown of context window usage by category, in tokens.
|
|
///
|
|
/// Categories:
|
|
/// id — static identity context (system prompt + CLAUDE.md + memory files)
|
|
/// mem — dynamically recalled content from poc-memory (future)
|
|
/// jnl — journal entries bridging old conversation
|
|
/// conv — raw recent conversation messages
|
|
/// free — unused context window (headroom before compaction)
|
|
///
|
|
/// Token estimates are derived from char proportions scaled by the
|
|
/// API-reported prompt_tokens count. Before the first API call, uses
|
|
/// chars/4 as a rough approximation.
|
|
#[derive(Debug, Clone, Default)]
|
|
pub struct ContextBudget {
|
|
pub identity_tokens: usize,
|
|
pub memory_tokens: usize,
|
|
pub journal_tokens: usize,
|
|
pub conversation_tokens: usize,
|
|
/// Model's context window size in tokens.
|
|
pub window_tokens: usize,
|
|
}
|
|
|
|
impl ContextBudget {
|
|
pub fn used(&self) -> usize {
|
|
self.identity_tokens + self.memory_tokens + self.journal_tokens + self.conversation_tokens
|
|
}
|
|
|
|
pub fn free(&self) -> usize {
|
|
self.window_tokens.saturating_sub(self.used())
|
|
}
|
|
|
|
/// Format as a compact status string with percentages of the token window.
|
|
/// Non-zero values always show at least 1%.
|
|
pub fn status_string(&self) -> String {
|
|
let total = self.window_tokens;
|
|
if total == 0 {
|
|
return String::new();
|
|
}
|
|
let pct = |n: usize| {
|
|
if n == 0 { return 0; }
|
|
((n * 100) / total).max(1)
|
|
};
|
|
format!(
|
|
"id:{}% mem:{}% jnl:{}% conv:{}% free:{}%",
|
|
pct(self.identity_tokens),
|
|
pct(self.memory_tokens),
|
|
pct(self.journal_tokens),
|
|
pct(self.conversation_tokens),
|
|
pct(self.free()),
|
|
)
|
|
}
|
|
}
|
|
|
|
pub struct Agent {
|
|
client: ApiClient,
|
|
messages: Vec<Message>,
|
|
tool_defs: Vec<ToolDef>,
|
|
/// Last known prompt token count from the API (tracks context size).
|
|
last_prompt_tokens: u32,
|
|
/// Shared process tracker for bash tool — lets TUI show/kill running commands.
|
|
pub process_tracker: ProcessTracker,
|
|
/// Current reasoning effort level ("none", "low", "high").
|
|
pub reasoning_effort: String,
|
|
/// Persistent conversation log — append-only record of all messages.
|
|
conversation_log: Option<ConversationLog>,
|
|
/// Current context window budget breakdown.
|
|
pub context_budget: ContextBudget,
|
|
/// BPE tokenizer for token counting (cl100k_base — close enough
|
|
/// for Claude and Qwen budget allocation, ~85-90% count accuracy).
|
|
tokenizer: CoreBPE,
|
|
/// Mutable context state — personality, working stack, etc.
|
|
pub context: ContextState,
|
|
/// Shared live context summary — TUI reads this directly for debug screen.
|
|
pub shared_context: SharedContextState,
|
|
/// Stable session ID for memory-search dedup across turns.
|
|
session_id: String,
|
|
}
|
|
|
|
impl Agent {
|
|
pub fn new(
|
|
client: ApiClient,
|
|
system_prompt: String,
|
|
personality: Vec<(String, String)>,
|
|
conversation_log: Option<ConversationLog>,
|
|
shared_context: SharedContextState,
|
|
) -> Self {
|
|
let tool_defs = tools::definitions();
|
|
let tokenizer = tiktoken_rs::cl100k_base()
|
|
.expect("failed to load cl100k_base tokenizer");
|
|
|
|
let context = ContextState {
|
|
system_prompt: system_prompt.clone(),
|
|
personality,
|
|
journal: String::new(),
|
|
working_stack: Vec::new(),
|
|
};
|
|
let session_id = format!("poc-agent-{}", chrono::Utc::now().format("%Y%m%d-%H%M%S"));
|
|
let mut agent = Self {
|
|
client,
|
|
messages: Vec::new(),
|
|
tool_defs,
|
|
last_prompt_tokens: 0,
|
|
process_tracker: ProcessTracker::new(),
|
|
reasoning_effort: "none".to_string(),
|
|
conversation_log,
|
|
context_budget: ContextBudget::default(),
|
|
tokenizer,
|
|
context,
|
|
shared_context,
|
|
session_id,
|
|
};
|
|
|
|
// Load recent journal entries at startup for orientation
|
|
agent.load_startup_journal();
|
|
agent.load_working_stack();
|
|
|
|
agent.push_context(Message::system(system_prompt));
|
|
let rendered = agent.context.render_context_message();
|
|
if !rendered.is_empty() {
|
|
agent.push_context(Message::user(rendered));
|
|
}
|
|
if !agent.context.journal.is_empty() {
|
|
agent.push_context(Message::user(agent.context.journal.clone()));
|
|
}
|
|
agent.measure_budget();
|
|
agent.publish_context_state();
|
|
agent
|
|
}
|
|
|
|
/// Run poc-hook for a given event, returning any output to inject.
|
|
fn run_hook(&self, event: &str, prompt: &str) -> Option<String> {
|
|
let transcript_path = self.conversation_log.as_ref()
|
|
.map(|l| l.path().to_string_lossy().to_string())
|
|
.unwrap_or_default();
|
|
|
|
let hook_input = serde_json::json!({
|
|
"hook_event_name": event,
|
|
"session_id": self.session_id,
|
|
"transcript_path": transcript_path,
|
|
"prompt": prompt,
|
|
});
|
|
|
|
let mut child = Command::new("poc-hook")
|
|
.stdin(Stdio::piped())
|
|
.stdout(Stdio::piped())
|
|
.stderr(Stdio::null())
|
|
.spawn()
|
|
.ok()?;
|
|
|
|
if let Some(ref mut stdin) = child.stdin {
|
|
let _ = stdin.write_all(hook_input.to_string().as_bytes());
|
|
}
|
|
drop(child.stdin.take());
|
|
|
|
let output = child.wait_with_output().ok()?;
|
|
let text = String::from_utf8_lossy(&output.stdout).to_string();
|
|
if text.trim().is_empty() {
|
|
None
|
|
} else {
|
|
Some(text)
|
|
}
|
|
}
|
|
|
|
/// Push a conversation message — stamped and logged.
|
|
fn push_message(&mut self, mut msg: Message) {
|
|
msg.stamp();
|
|
if let Some(ref log) = self.conversation_log {
|
|
if let Err(e) = log.append(&msg) {
|
|
eprintln!("warning: failed to log message: {:#}", e);
|
|
}
|
|
}
|
|
self.messages.push(msg);
|
|
}
|
|
|
|
/// Push a context-only message (system prompt, identity context,
|
|
/// journal summaries). Not logged — these are reconstructed on
|
|
/// every startup/compaction.
|
|
fn push_context(&mut self, msg: Message) {
|
|
self.messages.push(msg);
|
|
}
|
|
|
|
/// Measure context window usage by category. Uses the BPE tokenizer
|
|
/// for direct token counting (no chars/4 approximation).
|
|
fn measure_budget(&mut self) {
|
|
let mut id_tokens: usize = 0;
|
|
let mem_tokens: usize = 0;
|
|
let mut jnl_tokens: usize = 0;
|
|
let mut conv_tokens: usize = 0;
|
|
let mut in_conversation = false;
|
|
|
|
for msg in &self.messages {
|
|
let tokens = msg_token_count(&self.tokenizer, msg);
|
|
|
|
if in_conversation {
|
|
conv_tokens += tokens;
|
|
continue;
|
|
}
|
|
|
|
match msg.role {
|
|
Role::System => id_tokens += tokens,
|
|
Role::User => {
|
|
let text = msg.content_text();
|
|
if text.starts_with("[Earlier in this conversation") {
|
|
jnl_tokens += tokens;
|
|
} else if text.starts_with("Your context was just rebuilt") {
|
|
jnl_tokens += tokens;
|
|
} else if jnl_tokens == 0 && conv_tokens == 0 {
|
|
// Static identity context (before any journal/conversation)
|
|
id_tokens += tokens;
|
|
} else {
|
|
in_conversation = true;
|
|
conv_tokens += tokens;
|
|
}
|
|
}
|
|
_ => {
|
|
in_conversation = true;
|
|
conv_tokens += tokens;
|
|
}
|
|
}
|
|
}
|
|
|
|
self.context_budget = ContextBudget {
|
|
identity_tokens: id_tokens,
|
|
memory_tokens: mem_tokens,
|
|
journal_tokens: jnl_tokens,
|
|
conversation_tokens: conv_tokens,
|
|
window_tokens: model_context_window(&self.client.model),
|
|
};
|
|
}
|
|
|
|
/// Send a user message and run the agent loop until the model
|
|
/// produces a text response (no more tool calls). Streams text
|
|
/// and tool activity through the UI channel.
|
|
pub async fn turn(
|
|
&mut self,
|
|
user_input: &str,
|
|
ui_tx: &UiSender,
|
|
target: StreamTarget,
|
|
) -> Result<TurnResult> {
|
|
// Run poc-hook (memory search, notifications, context check)
|
|
if let Some(hook_output) = self.run_hook("UserPromptSubmit", user_input) {
|
|
let enriched = format!("{}\n\n<system-reminder>\n{}\n</system-reminder>",
|
|
user_input, hook_output);
|
|
self.push_message(Message::user(enriched));
|
|
} else {
|
|
self.push_message(Message::user(user_input));
|
|
}
|
|
|
|
let mut overflow_retries: u32 = 0;
|
|
let mut empty_retries: u32 = 0;
|
|
let mut ds = DispatchState {
|
|
yield_requested: false,
|
|
had_tool_calls: false,
|
|
tool_errors: 0,
|
|
model_switch: None,
|
|
dmn_pause: false,
|
|
};
|
|
|
|
loop {
|
|
let _ = ui_tx.send(UiMessage::Activity("thinking...".into()));
|
|
let api_result = self
|
|
.client
|
|
.chat_completion_stream(
|
|
&self.messages,
|
|
Some(&self.tool_defs),
|
|
ui_tx,
|
|
target,
|
|
&self.reasoning_effort,
|
|
)
|
|
.await;
|
|
|
|
// Context overflow → compact and retry (max 2 attempts)
|
|
// Stream error → retry with backoff (max 2 attempts)
|
|
let (msg, usage) = match api_result {
|
|
Err(e) if is_context_overflow(&e) && overflow_retries < 2 => {
|
|
overflow_retries += 1;
|
|
let _ = ui_tx.send(UiMessage::Info(format!(
|
|
"[context overflow — compacting and retrying ({}/2)]",
|
|
overflow_retries,
|
|
)));
|
|
self.emergency_compact();
|
|
continue;
|
|
}
|
|
Err(e) if is_stream_error(&e) && empty_retries < 2 => {
|
|
empty_retries += 1;
|
|
let _ = ui_tx.send(UiMessage::Info(format!(
|
|
"[stream error: {} — retrying ({}/2)]",
|
|
e, empty_retries,
|
|
)));
|
|
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
|
|
continue;
|
|
}
|
|
other => other?,
|
|
};
|
|
|
|
// Strip ephemeral tool calls (journal) that the API has
|
|
// now processed. They're persisted to disk; no need to keep
|
|
// them in the conversation history burning tokens.
|
|
self.strip_ephemeral_tool_calls();
|
|
|
|
if let Some(usage) = &usage {
|
|
self.last_prompt_tokens = usage.prompt_tokens;
|
|
self.measure_budget();
|
|
self.publish_context_state();
|
|
let _ = ui_tx.send(UiMessage::StatusUpdate(StatusInfo {
|
|
dmn_state: String::new(), // filled by main loop
|
|
dmn_turns: 0,
|
|
dmn_max_turns: 0,
|
|
prompt_tokens: usage.prompt_tokens,
|
|
completion_tokens: usage.completion_tokens,
|
|
model: self.client.model.clone(),
|
|
turn_tools: 0, // tracked by TUI from ToolCall messages
|
|
context_budget: self.context_budget.status_string(),
|
|
}));
|
|
}
|
|
|
|
// Empty response — model returned finish=stop with no content
|
|
// or tool calls. Inject a nudge so the retry has different input.
|
|
let has_content = msg.content.is_some();
|
|
let has_tools = msg.tool_calls.as_ref().map_or(false, |tc| !tc.is_empty());
|
|
if !has_content && !has_tools {
|
|
if empty_retries < 2 {
|
|
empty_retries += 1;
|
|
let _ = ui_tx.send(UiMessage::Debug(format!(
|
|
"empty response, injecting nudge and retrying ({}/2)",
|
|
empty_retries,
|
|
)));
|
|
self.push_message(Message::user(
|
|
"[system] Your previous response was empty. \
|
|
Please respond with text or use a tool."
|
|
));
|
|
continue;
|
|
}
|
|
// After max retries, fall through — return the empty response
|
|
} else {
|
|
empty_retries = 0;
|
|
}
|
|
|
|
// Structured tool calls from the API
|
|
if let Some(ref tool_calls) = msg.tool_calls {
|
|
if !tool_calls.is_empty() {
|
|
self.push_message(msg.clone());
|
|
for call in tool_calls {
|
|
self.dispatch_tool_call(call, None, ui_tx, &mut ds)
|
|
.await;
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// No structured tool calls — check for leaked tool calls
|
|
// (Qwen sometimes outputs <tool_call> XML as text).
|
|
let text = msg.content_text().to_string();
|
|
let leaked = parse_leaked_tool_calls(&text);
|
|
|
|
if !leaked.is_empty() {
|
|
let _ = ui_tx.send(UiMessage::Debug(format!(
|
|
"recovered {} leaked tool call(s) from text",
|
|
leaked.len()
|
|
)));
|
|
// Strip tool call XML and thinking tokens from the message
|
|
// so they don't clutter the conversation history.
|
|
let cleaned = strip_leaked_artifacts(&text);
|
|
let mut clean_msg = msg.clone();
|
|
clean_msg.content = if cleaned.trim().is_empty() {
|
|
None
|
|
} else {
|
|
Some(MessageContent::Text(cleaned))
|
|
};
|
|
self.push_message(clean_msg);
|
|
for call in &leaked {
|
|
self.dispatch_tool_call(call, Some("recovered"), ui_tx, &mut ds)
|
|
.await;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// Genuinely text-only response
|
|
let _ = ui_tx.send(UiMessage::Activity(String::new()));
|
|
self.push_message(msg);
|
|
|
|
return Ok(TurnResult {
|
|
text,
|
|
yield_requested: ds.yield_requested,
|
|
had_tool_calls: ds.had_tool_calls,
|
|
tool_errors: ds.tool_errors,
|
|
model_switch: ds.model_switch,
|
|
dmn_pause: ds.dmn_pause,
|
|
});
|
|
}
|
|
}
|
|
|
|
/// Dispatch a single tool call: send UI annotations, run the tool,
|
|
/// push results into the conversation, handle images.
|
|
async fn dispatch_tool_call(
|
|
&mut self,
|
|
call: &ToolCall,
|
|
tag: Option<&str>,
|
|
ui_tx: &UiSender,
|
|
ds: &mut DispatchState,
|
|
) {
|
|
let args: serde_json::Value =
|
|
serde_json::from_str(&call.function.arguments).unwrap_or_default();
|
|
|
|
let args_summary = summarize_args(&call.function.name, &args);
|
|
let label = match tag {
|
|
Some(t) => format!("calling: {} ({})", call.function.name, t),
|
|
None => format!("calling: {}", call.function.name),
|
|
};
|
|
let _ = ui_tx.send(UiMessage::Activity(label));
|
|
let _ = ui_tx.send(UiMessage::ToolCall {
|
|
name: call.function.name.clone(),
|
|
args_summary: args_summary.clone(),
|
|
});
|
|
let _ = ui_tx.send(UiMessage::ToolStarted {
|
|
id: call.id.clone(),
|
|
name: call.function.name.clone(),
|
|
detail: args_summary,
|
|
});
|
|
|
|
// Handle working_stack tool — needs &mut self for context state
|
|
if call.function.name == "working_stack" {
|
|
let result = tools::working_stack::handle(&args, &mut self.context.working_stack);
|
|
let output = tools::ToolOutput {
|
|
text: result.clone(),
|
|
is_yield: false,
|
|
images: Vec::new(),
|
|
model_switch: None,
|
|
dmn_pause: false,
|
|
};
|
|
let _ = ui_tx.send(UiMessage::ToolResult {
|
|
name: call.function.name.clone(),
|
|
result: output.text.clone(),
|
|
});
|
|
let _ = ui_tx.send(UiMessage::ToolFinished { id: call.id.clone() });
|
|
self.push_message(Message::tool_result(&call.id, &output.text));
|
|
ds.had_tool_calls = true;
|
|
|
|
// Re-render the context message so the model sees the updated stack
|
|
if !result.starts_with("Error:") {
|
|
self.refresh_context_message();
|
|
}
|
|
return;
|
|
}
|
|
|
|
let output =
|
|
tools::dispatch(&call.function.name, &args, &self.process_tracker).await;
|
|
|
|
if output.is_yield {
|
|
ds.yield_requested = true;
|
|
} else {
|
|
ds.had_tool_calls = true;
|
|
}
|
|
if output.model_switch.is_some() {
|
|
ds.model_switch = output.model_switch;
|
|
}
|
|
if output.dmn_pause {
|
|
ds.dmn_pause = true;
|
|
}
|
|
if output.text.starts_with("Error:") {
|
|
ds.tool_errors += 1;
|
|
}
|
|
|
|
let _ = ui_tx.send(UiMessage::ToolResult {
|
|
name: call.function.name.clone(),
|
|
result: output.text.clone(),
|
|
});
|
|
let _ = ui_tx.send(UiMessage::ToolFinished { id: call.id.clone() });
|
|
|
|
self.push_message(Message::tool_result(&call.id, &output.text));
|
|
|
|
if !output.images.is_empty() {
|
|
// Only one live image in context at a time — age out any
|
|
// previous ones to avoid accumulating ~90KB+ per image.
|
|
self.age_out_images();
|
|
self.push_message(Message::user_with_images(
|
|
"Here is the image you requested:",
|
|
&output.images,
|
|
));
|
|
}
|
|
}
|
|
|
|
/// Build context state summary for the debug screen.
|
|
pub fn context_state_summary(&self) -> Vec<ContextSection> {
|
|
let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len();
|
|
|
|
let mut sections = Vec::new();
|
|
|
|
// System prompt
|
|
sections.push(ContextSection {
|
|
name: "System prompt".into(),
|
|
tokens: count(&self.context.system_prompt),
|
|
content: self.context.system_prompt.clone(),
|
|
children: Vec::new(),
|
|
});
|
|
|
|
// Personality — parent with file children
|
|
let personality_children: Vec<ContextSection> = self.context.personality.iter()
|
|
.map(|(name, content)| ContextSection {
|
|
name: name.clone(),
|
|
tokens: count(content),
|
|
content: content.clone(),
|
|
children: Vec::new(),
|
|
})
|
|
.collect();
|
|
let personality_tokens: usize = personality_children.iter().map(|c| c.tokens).sum();
|
|
sections.push(ContextSection {
|
|
name: format!("Personality ({} files)", personality_children.len()),
|
|
tokens: personality_tokens,
|
|
content: String::new(),
|
|
children: personality_children,
|
|
});
|
|
|
|
// Journal — split into per-entry children
|
|
{
|
|
let mut journal_children = Vec::new();
|
|
let mut current_header = String::new();
|
|
let mut current_body = String::new();
|
|
for line in self.context.journal.lines() {
|
|
if line.starts_with("## ") {
|
|
if !current_header.is_empty() {
|
|
let body = std::mem::take(&mut current_body);
|
|
let preview: String = body.lines().next().unwrap_or("").chars().take(60).collect();
|
|
journal_children.push(ContextSection {
|
|
name: format!("{}: {}", current_header, preview),
|
|
tokens: count(&body),
|
|
content: body,
|
|
children: Vec::new(),
|
|
});
|
|
}
|
|
current_header = line.trim_start_matches("## ").to_string();
|
|
current_body.clear();
|
|
} else {
|
|
if !current_body.is_empty() || !line.is_empty() {
|
|
current_body.push_str(line);
|
|
current_body.push('\n');
|
|
}
|
|
}
|
|
}
|
|
if !current_header.is_empty() {
|
|
let preview: String = current_body.lines().next().unwrap_or("").chars().take(60).collect();
|
|
journal_children.push(ContextSection {
|
|
name: format!("{}: {}", current_header, preview),
|
|
tokens: count(¤t_body),
|
|
content: current_body,
|
|
children: Vec::new(),
|
|
});
|
|
}
|
|
let journal_tokens: usize = journal_children.iter().map(|c| c.tokens).sum();
|
|
sections.push(ContextSection {
|
|
name: format!("Journal ({} entries)", journal_children.len()),
|
|
tokens: journal_tokens,
|
|
content: String::new(),
|
|
children: journal_children,
|
|
});
|
|
}
|
|
|
|
// Working stack — instructions + items as children
|
|
let instructions = std::fs::read_to_string(WORKING_STACK_INSTRUCTIONS)
|
|
.unwrap_or_default();
|
|
let mut stack_children = vec![ContextSection {
|
|
name: "Instructions".into(),
|
|
tokens: count(&instructions),
|
|
content: instructions,
|
|
children: Vec::new(),
|
|
}];
|
|
for (i, item) in self.context.working_stack.iter().enumerate() {
|
|
let marker = if i == self.context.working_stack.len() - 1 { "→" } else { " " };
|
|
stack_children.push(ContextSection {
|
|
name: format!("{} [{}] {}", marker, i, item),
|
|
tokens: count(item),
|
|
content: String::new(),
|
|
children: Vec::new(),
|
|
});
|
|
}
|
|
let stack_tokens: usize = stack_children.iter().map(|c| c.tokens).sum();
|
|
sections.push(ContextSection {
|
|
name: format!("Working stack ({} items)", self.context.working_stack.len()),
|
|
tokens: stack_tokens,
|
|
content: String::new(),
|
|
children: stack_children,
|
|
});
|
|
|
|
// Conversation — each message as a child
|
|
let conv_start = self.messages.iter()
|
|
.position(|m| m.role == Role::Assistant || m.role == Role::Tool)
|
|
.unwrap_or(self.messages.len());
|
|
let conv_messages = &self.messages[conv_start..];
|
|
let conv_children: Vec<ContextSection> = conv_messages.iter().enumerate()
|
|
.map(|(i, msg)| {
|
|
let text = msg.content.as_ref()
|
|
.map(|c| c.as_text().to_string())
|
|
.unwrap_or_default();
|
|
let tool_info = msg.tool_calls.as_ref().map(|tc| {
|
|
tc.iter()
|
|
.map(|c| c.function.name.clone())
|
|
.collect::<Vec<_>>()
|
|
.join(", ")
|
|
});
|
|
let label = match (&msg.role, &tool_info) {
|
|
(_, Some(tools)) => format!("[tool_call: {}]", tools),
|
|
_ => {
|
|
let preview: String = text.chars().take(60).collect();
|
|
let preview = preview.replace('\n', " ");
|
|
if text.len() > 60 { format!("{}...", preview) } else { preview }
|
|
}
|
|
};
|
|
let tokens = count(&text);
|
|
let role_name = match msg.role {
|
|
Role::Assistant => "PoC",
|
|
Role::User => "Kent",
|
|
Role::Tool => "tool",
|
|
Role::System => "system",
|
|
};
|
|
ContextSection {
|
|
name: format!("[{}] {}: {}", conv_start + i, role_name, label),
|
|
tokens,
|
|
content: text,
|
|
children: Vec::new(),
|
|
}
|
|
})
|
|
.collect();
|
|
let conv_tokens: usize = conv_children.iter().map(|c| c.tokens).sum();
|
|
sections.push(ContextSection {
|
|
name: format!("Conversation ({} messages)", conv_children.len()),
|
|
tokens: conv_tokens,
|
|
content: String::new(),
|
|
children: conv_children,
|
|
});
|
|
|
|
sections
|
|
}
|
|
|
|
/// Load recent journal entries at startup for orientation.
|
|
/// Uses the same budget logic as compaction but with empty conversation.
|
|
/// Only parses the tail of the journal file (last 64KB) for speed.
|
|
fn load_startup_journal(&mut self) {
|
|
let journal_path = journal::default_journal_path();
|
|
let entries = journal::parse_journal_tail(&journal_path, 64 * 1024);
|
|
if entries.is_empty() {
|
|
return;
|
|
}
|
|
|
|
let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len();
|
|
let context_message = self.context.render_context_message();
|
|
|
|
let plan = plan_context(
|
|
&self.context.system_prompt,
|
|
&context_message,
|
|
&[], // no conversation yet
|
|
&entries,
|
|
&self.client.model,
|
|
&count,
|
|
);
|
|
|
|
self.context.journal = render_journal_text(&entries, &plan);
|
|
}
|
|
|
|
/// Re-render the context message in self.messages from live ContextState.
|
|
/// Called after any change to context state (working stack, etc).
|
|
fn refresh_context_message(&mut self) {
|
|
let rendered = self.context.render_context_message();
|
|
// The context message is the first user message (index 1, after system prompt)
|
|
if self.messages.len() >= 2 && self.messages[1].role == Role::User {
|
|
self.messages[1] = Message::user(rendered);
|
|
}
|
|
self.publish_context_state();
|
|
self.save_working_stack();
|
|
}
|
|
|
|
/// Persist working stack to disk.
|
|
fn save_working_stack(&self) {
|
|
if let Ok(json) = serde_json::to_string(&self.context.working_stack) {
|
|
let _ = std::fs::write(WORKING_STACK_FILE, json);
|
|
}
|
|
}
|
|
|
|
/// Load working stack from disk.
|
|
fn load_working_stack(&mut self) {
|
|
if let Ok(data) = std::fs::read_to_string(WORKING_STACK_FILE) {
|
|
if let Ok(stack) = serde_json::from_str::<Vec<String>>(&data) {
|
|
self.context.working_stack = stack;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Push the current context summary to the shared state for the TUI to read.
|
|
fn publish_context_state(&self) {
|
|
if let Ok(mut state) = self.shared_context.write() {
|
|
*state = self.context_state_summary();
|
|
}
|
|
}
|
|
|
|
/// Replace base64 image data in older messages with text placeholders.
|
|
/// Only the most recent image stays live — each new image ages out
|
|
/// all previous ones. The tool result message (right before each image
|
|
/// message) already records what was loaded, so no info is lost.
|
|
fn age_out_images(&mut self) {
|
|
for msg in &mut self.messages {
|
|
if let Some(MessageContent::Parts(parts)) = &msg.content {
|
|
let has_images = parts.iter().any(|p| matches!(p, ContentPart::ImageUrl { .. }));
|
|
if !has_images {
|
|
continue;
|
|
}
|
|
let mut replacement = String::new();
|
|
for part in parts {
|
|
match part {
|
|
ContentPart::Text { text } => {
|
|
if !replacement.is_empty() {
|
|
replacement.push('\n');
|
|
}
|
|
replacement.push_str(text);
|
|
}
|
|
ContentPart::ImageUrl { .. } => {
|
|
if !replacement.is_empty() {
|
|
replacement.push('\n');
|
|
}
|
|
replacement.push_str(
|
|
"[image aged out — see tool result above for details]",
|
|
);
|
|
}
|
|
}
|
|
}
|
|
msg.content = Some(MessageContent::Text(replacement));
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Strip ephemeral tool calls from the conversation history.
|
|
///
|
|
/// Ephemeral tools (like journal) persist their output to disk,
|
|
/// so the tool call + result don't need to stay in the context
|
|
/// window. We keep them for exactly one API round-trip (the model
|
|
/// needs to see the result was acknowledged), then strip them.
|
|
///
|
|
/// If an assistant message contains ONLY ephemeral tool calls,
|
|
/// the entire message and its tool results are removed. If mixed
|
|
/// with non-ephemeral calls, we leave it (rare case, small cost).
|
|
fn strip_ephemeral_tool_calls(&mut self) {
|
|
// Collect IDs of tool calls to strip
|
|
let mut strip_ids: Vec<String> = Vec::new();
|
|
let mut strip_msg_indices: Vec<usize> = Vec::new();
|
|
|
|
for (i, msg) in self.messages.iter().enumerate() {
|
|
if msg.role != Role::Assistant {
|
|
continue;
|
|
}
|
|
let calls = match &msg.tool_calls {
|
|
Some(c) if !c.is_empty() => c,
|
|
_ => continue,
|
|
};
|
|
|
|
let all_ephemeral = calls.iter().all(|c| {
|
|
c.function.name == tools::journal::TOOL_NAME
|
|
});
|
|
|
|
if all_ephemeral {
|
|
strip_msg_indices.push(i);
|
|
for call in calls {
|
|
strip_ids.push(call.id.clone());
|
|
}
|
|
}
|
|
}
|
|
|
|
if strip_ids.is_empty() {
|
|
return;
|
|
}
|
|
|
|
// Remove in reverse order to preserve indices
|
|
self.messages.retain(|msg| {
|
|
// Strip the assistant messages we identified
|
|
if msg.role == Role::Assistant {
|
|
if let Some(calls) = &msg.tool_calls {
|
|
if calls.iter().all(|c| strip_ids.contains(&c.id)) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
// Strip matching tool results
|
|
if msg.role == Role::Tool {
|
|
if let Some(ref id) = msg.tool_call_id {
|
|
if strip_ids.contains(id) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
true
|
|
});
|
|
}
|
|
|
|
/// Last prompt token count reported by the API.
|
|
pub fn last_prompt_tokens(&self) -> u32 {
|
|
self.last_prompt_tokens
|
|
}
|
|
|
|
/// Build context window from conversation messages + journal.
|
|
/// Used by both compact() (in-memory messages) and restore_from_log()
|
|
/// (conversation log). The context window is always:
|
|
/// identity + journal summaries + raw recent messages
|
|
pub fn compact(&mut self, new_system_prompt: String, new_personality: Vec<(String, String)>) {
|
|
self.context.system_prompt = new_system_prompt;
|
|
self.context.personality = new_personality;
|
|
self.do_compact();
|
|
}
|
|
|
|
/// Internal compaction — rebuilds context window from current messages.
|
|
fn do_compact(&mut self) {
|
|
// Find where actual conversation starts (after system + context)
|
|
let conv_start = self
|
|
.messages
|
|
.iter()
|
|
.position(|m| m.role == Role::Assistant || m.role == Role::Tool)
|
|
.unwrap_or(self.messages.len());
|
|
|
|
let conversation: Vec<Message> = self.messages[conv_start..].to_vec();
|
|
let (messages, journal) = build_context_window(
|
|
&self.context,
|
|
&conversation,
|
|
&self.client.model,
|
|
&self.tokenizer,
|
|
);
|
|
self.context.journal = journal;
|
|
self.messages = messages;
|
|
self.last_prompt_tokens = 0;
|
|
self.measure_budget();
|
|
self.publish_context_state();
|
|
}
|
|
|
|
/// Emergency compaction using stored config — called on context overflow.
|
|
fn emergency_compact(&mut self) {
|
|
self.do_compact();
|
|
}
|
|
|
|
/// Restore from the conversation log. Builds the context window
|
|
/// the same way compact() does — journal summaries for old messages,
|
|
/// raw recent messages. This is the unified startup path.
|
|
/// Returns true if the log had content to restore.
|
|
pub fn restore_from_log(
|
|
&mut self,
|
|
system_prompt: String,
|
|
personality: Vec<(String, String)>,
|
|
) -> bool {
|
|
self.context.system_prompt = system_prompt;
|
|
self.context.personality = personality;
|
|
|
|
let all_messages = match &self.conversation_log {
|
|
Some(log) => match log.read_tail(512 * 1024) {
|
|
Ok(msgs) if !msgs.is_empty() => {
|
|
dbglog!("[restore] read {} messages from log tail", msgs.len());
|
|
msgs
|
|
}
|
|
Ok(_) => {
|
|
dbglog!("[restore] log exists but is empty");
|
|
return false;
|
|
}
|
|
Err(e) => {
|
|
dbglog!("[restore] failed to read log: {}", e);
|
|
return false;
|
|
}
|
|
},
|
|
None => {
|
|
dbglog!("[restore] no conversation log configured");
|
|
return false;
|
|
}
|
|
};
|
|
|
|
// Filter out system/context messages — we only want the
|
|
// actual conversation (user prompts, assistant responses,
|
|
// tool calls/results)
|
|
let conversation: Vec<Message> = all_messages
|
|
.into_iter()
|
|
.filter(|m| m.role != Role::System)
|
|
.collect();
|
|
dbglog!("[restore] {} messages after filtering system", conversation.len());
|
|
|
|
let (messages, journal) = build_context_window(
|
|
&self.context,
|
|
&conversation,
|
|
&self.client.model,
|
|
&self.tokenizer,
|
|
);
|
|
dbglog!("[restore] journal text: {} chars, {} lines",
|
|
journal.len(), journal.lines().count());
|
|
self.context.journal = journal;
|
|
self.messages = messages;
|
|
dbglog!("[restore] built context window: {} messages", self.messages.len());
|
|
self.last_prompt_tokens = 0;
|
|
self.measure_budget();
|
|
self.publish_context_state();
|
|
true
|
|
}
|
|
|
|
/// Replace the API client (for model switching).
|
|
pub fn swap_client(&mut self, new_client: ApiClient) {
|
|
self.client = new_client;
|
|
}
|
|
|
|
/// Get the model identifier.
|
|
pub fn model(&self) -> &str {
|
|
&self.client.model
|
|
}
|
|
|
|
/// Get the conversation history for persistence.
|
|
pub fn messages(&self) -> &[Message] {
|
|
&self.messages
|
|
}
|
|
|
|
/// Mutable access to conversation history (for /retry).
|
|
pub fn messages_mut(&mut self) -> &mut Vec<Message> {
|
|
&mut self.messages
|
|
}
|
|
|
|
/// Restore from a saved conversation.
|
|
pub fn restore(&mut self, messages: Vec<Message>) {
|
|
self.messages = messages;
|
|
}
|
|
}
|
|
|
|
/// Look up a model's context window size in tokens.
|
|
pub fn model_context_window(model: &str) -> usize {
|
|
let m = model.to_lowercase();
|
|
if m.contains("opus") || m.contains("sonnet") {
|
|
200_000
|
|
} else if m.contains("qwen") {
|
|
131_072
|
|
} else {
|
|
128_000
|
|
}
|
|
}
|
|
|
|
/// Context budget in tokens: 60% of the model's context window.
|
|
/// Leaves headroom for conversation to grow before compaction triggers.
|
|
///
|
|
/// Future direction: make this dynamic based on what the agent is
|
|
/// doing — deep coding work might allocate more to conversation,
|
|
/// consolidation might allocate more to journal/memory, idle might
|
|
/// shrink everything to save cost.
|
|
fn context_budget_tokens(model: &str) -> usize {
|
|
model_context_window(model) * 60 / 100
|
|
}
|
|
|
|
/// Allocation plan for the context window. Separates the budget math
|
|
/// (which entries and messages to include) from the message assembly
|
|
/// (building the actual Vec<Message>). This makes the core algorithm
|
|
/// testable and inspectable — log the plan on compaction to see exactly
|
|
/// what allocation decisions were made.
|
|
struct ContextPlan {
|
|
/// Index into all_entries: header-only entries start here
|
|
header_start: usize,
|
|
/// Index into all_entries: full entries start here (headers end here)
|
|
full_start: usize,
|
|
/// Total journal entries (header-only + full go up to this)
|
|
entry_count: usize,
|
|
/// Index into recent conversation: skip messages before this
|
|
conv_trim: usize,
|
|
/// Total recent conversation messages
|
|
conv_count: usize,
|
|
/// Tokens used by full journal entries
|
|
full_tokens: usize,
|
|
/// Tokens used by header-only journal entries
|
|
header_tokens: usize,
|
|
/// Tokens used by conversation (after trimming)
|
|
conv_tokens: usize,
|
|
/// Total budget available (after identity, memory, reserve)
|
|
available: usize,
|
|
}
|
|
|
|
/// Build a context window from conversation messages + journal entries.
|
|
/// This is the core algorithm shared by compact() and restore_from_log().
|
|
///
|
|
/// Allocation strategy: identity and memory are fixed costs. The
|
|
/// remaining budget (minus 25% reserve for model output) is split
|
|
/// between journal and conversation. Conversation gets priority —
|
|
/// it's what's happening now. Journal fills the rest, newest first.
|
|
///
|
|
/// When the budget is tight, journal entries are dropped first
|
|
/// (oldest entries go first). If conversation alone exceeds the
|
|
/// budget, oldest messages are trimmed to fit.
|
|
/// Returns (messages, journal_text) — caller stores journal_text in ContextState.
|
|
fn build_context_window(
|
|
context: &ContextState,
|
|
conversation: &[Message],
|
|
model: &str,
|
|
tokenizer: &CoreBPE,
|
|
) -> (Vec<Message>, String) {
|
|
let journal_path = journal::default_journal_path();
|
|
let all_entries = journal::parse_journal(&journal_path);
|
|
dbglog!("[ctx] {} journal entries from {}", all_entries.len(), journal_path.display());
|
|
let count = |s: &str| tokenizer.encode_with_special_tokens(s).len();
|
|
|
|
let system_prompt = context.system_prompt.clone();
|
|
let context_message = context.render_context_message();
|
|
|
|
// Cap memory to 50% of the context budget so conversation always
|
|
// gets space. Truncate at the last complete section boundary.
|
|
let max_tokens = context_budget_tokens(model);
|
|
let memory_cap = max_tokens / 2;
|
|
let memory_tokens = count(&context_message);
|
|
let context_message = if memory_tokens > memory_cap {
|
|
dbglog!("[ctx] memory too large: {} tokens > {} cap, truncating", memory_tokens, memory_cap);
|
|
truncate_at_section(&context_message, memory_cap, &count)
|
|
} else {
|
|
context_message
|
|
};
|
|
|
|
let recent_start = find_journal_cutoff(conversation, all_entries.last());
|
|
dbglog!("[ctx] journal cutoff: {} of {} conversation messages are 'recent'",
|
|
conversation.len() - recent_start, conversation.len());
|
|
let recent = &conversation[recent_start..];
|
|
|
|
let plan = plan_context(
|
|
&system_prompt,
|
|
&context_message,
|
|
recent,
|
|
&all_entries,
|
|
model,
|
|
&count,
|
|
);
|
|
|
|
// Render journal text from the plan
|
|
let journal_text = render_journal_text(&all_entries, &plan);
|
|
dbglog!("[ctx] plan: header_start={} full_start={} entry_count={} conv_trim={} journal_text={} chars",
|
|
plan.header_start, plan.full_start, plan.entry_count, plan.conv_trim, journal_text.len());
|
|
|
|
let messages = assemble_context(
|
|
system_prompt, context_message, &journal_text,
|
|
recent, &plan,
|
|
);
|
|
(messages, journal_text)
|
|
}
|
|
|
|
/// Compute the allocation plan: how much budget goes to journal vs
|
|
/// conversation, which entries and messages to include.
|
|
fn plan_context(
|
|
system_prompt: &str,
|
|
context_message: &str,
|
|
recent: &[Message],
|
|
entries: &[journal::JournalEntry],
|
|
model: &str,
|
|
count: &dyn Fn(&str) -> usize,
|
|
) -> ContextPlan {
|
|
let max_tokens = context_budget_tokens(model);
|
|
|
|
// Fixed costs — always included
|
|
let identity_cost = count(system_prompt);
|
|
let memory_cost = count(context_message);
|
|
let reserve = max_tokens / 4;
|
|
let available = max_tokens
|
|
.saturating_sub(identity_cost)
|
|
.saturating_sub(memory_cost)
|
|
.saturating_sub(reserve);
|
|
|
|
// Measure conversation
|
|
let conv_costs: Vec<usize> = recent.iter().map(|m| msg_token_count_fn(m, count)).collect();
|
|
let total_conv: usize = conv_costs.iter().sum();
|
|
|
|
// Journal always gets at least 15% of available budget so it doesn't
|
|
// get squeezed out by large conversations.
|
|
let journal_min = available * 15 / 100;
|
|
let journal_budget = available.saturating_sub(total_conv).max(journal_min);
|
|
|
|
// Fill journal entries newest-first within budget.
|
|
// Tiered: recent entries get full content, older entries get just
|
|
// a header line (timestamp + first line) for timeline awareness.
|
|
let full_budget = journal_budget * 70 / 100;
|
|
let header_budget = journal_budget.saturating_sub(full_budget);
|
|
|
|
// Phase 1: Full entries (newest first)
|
|
let mut full_used = 0;
|
|
let mut n_full = 0;
|
|
for entry in entries.iter().rev() {
|
|
let cost = count(&entry.content) + 10;
|
|
if full_used + cost > full_budget {
|
|
break;
|
|
}
|
|
full_used += cost;
|
|
n_full += 1;
|
|
}
|
|
let full_start = entries.len().saturating_sub(n_full);
|
|
|
|
// Phase 2: Header-only entries (continuing backward from where full stopped)
|
|
let mut header_used = 0;
|
|
let mut n_headers = 0;
|
|
for entry in entries[..full_start].iter().rev() {
|
|
let first_line = entry
|
|
.content
|
|
.lines()
|
|
.find(|l| !l.trim().is_empty())
|
|
.unwrap_or("(empty)");
|
|
let cost = count(first_line) + 10;
|
|
if header_used + cost > header_budget {
|
|
break;
|
|
}
|
|
header_used += cost;
|
|
n_headers += 1;
|
|
}
|
|
let header_start = full_start.saturating_sub(n_headers);
|
|
|
|
// If conversation exceeds available budget, trim oldest messages
|
|
let journal_used = full_used + header_used;
|
|
let mut conv_trim = 0;
|
|
let mut trimmed_conv = total_conv;
|
|
while trimmed_conv + journal_used > available && conv_trim < recent.len() {
|
|
trimmed_conv -= conv_costs[conv_trim];
|
|
conv_trim += 1;
|
|
}
|
|
// Walk forward to user message boundary
|
|
while conv_trim < recent.len() && recent[conv_trim].role != Role::User {
|
|
conv_trim += 1;
|
|
}
|
|
|
|
dbglog!("[plan] model={} max_tokens={} available={} (identity={} memory={} reserve={})",
|
|
model, max_tokens, available, identity_cost, memory_cost, reserve);
|
|
dbglog!("[plan] conv: {} msgs, {} tokens total, trimming {} msgs → {} tokens",
|
|
recent.len(), total_conv, conv_trim, trimmed_conv);
|
|
dbglog!("[plan] journal: {} full entries ({}t) + {} headers ({}t)",
|
|
n_full, full_used, n_headers, header_used);
|
|
|
|
ContextPlan {
|
|
header_start,
|
|
full_start,
|
|
entry_count: entries.len(),
|
|
conv_trim,
|
|
conv_count: recent.len(),
|
|
full_tokens: full_used,
|
|
header_tokens: header_used,
|
|
conv_tokens: trimmed_conv,
|
|
available,
|
|
}
|
|
}
|
|
|
|
/// Render journal entries into text from a context plan.
|
|
fn render_journal_text(
|
|
entries: &[journal::JournalEntry],
|
|
plan: &ContextPlan,
|
|
) -> String {
|
|
let has_journal = plan.header_start < plan.entry_count;
|
|
if !has_journal {
|
|
return String::new();
|
|
}
|
|
|
|
let mut text = String::from("[Earlier in this conversation — from your journal]\n\n");
|
|
|
|
// Header-only entries (older) — just timestamp + first line
|
|
for entry in &entries[plan.header_start..plan.full_start] {
|
|
let first_line = entry
|
|
.content
|
|
.lines()
|
|
.find(|l| !l.trim().is_empty())
|
|
.unwrap_or("(empty)");
|
|
text.push_str(&format!(
|
|
"## {} — {}\n",
|
|
entry.timestamp.format("%Y-%m-%dT%H:%M"),
|
|
first_line,
|
|
));
|
|
}
|
|
|
|
// Separator between headers and full entries
|
|
let n_headers = plan.full_start - plan.header_start;
|
|
let n_full = plan.entry_count - plan.full_start;
|
|
if n_headers > 0 && n_full > 0 {
|
|
text.push_str("\n---\n\n");
|
|
}
|
|
|
|
// Full entries (recent)
|
|
for entry in &entries[plan.full_start..] {
|
|
text.push_str(&format!(
|
|
"## {}\n\n{}\n\n",
|
|
entry.timestamp.format("%Y-%m-%dT%H:%M"),
|
|
entry.content
|
|
));
|
|
}
|
|
|
|
text
|
|
}
|
|
|
|
/// Assemble the context window from a plan. No allocation decisions
|
|
/// happen here — just follow the plan to build messages.
|
|
fn assemble_context(
|
|
system_prompt: String,
|
|
context_message: String,
|
|
journal_text: &str,
|
|
recent: &[Message],
|
|
plan: &ContextPlan,
|
|
) -> Vec<Message> {
|
|
let mut messages = vec![Message::system(system_prompt)];
|
|
if !context_message.is_empty() {
|
|
messages.push(Message::user(context_message));
|
|
}
|
|
|
|
let final_recent = &recent[plan.conv_trim..];
|
|
|
|
if !journal_text.is_empty() {
|
|
messages.push(Message::user(journal_text.to_string()));
|
|
} else if !final_recent.is_empty() {
|
|
messages.push(Message::user(
|
|
"Your context was just rebuilt. Memory files have been \
|
|
reloaded. Your recent conversation continues below. \
|
|
Earlier context is in your journal and memory files."
|
|
.to_string(),
|
|
));
|
|
}
|
|
|
|
messages.extend(final_recent.iter().cloned());
|
|
messages
|
|
}
|
|
|
|
/// Find the conversation index where messages are no longer covered
|
|
/// Truncate a context message to fit within a token budget. Cuts at
|
|
/// section boundaries (lines starting with `---` or `## `) to avoid
|
|
/// splitting mid-section. Drops sections from the end first since
|
|
/// earlier sections (identity, instructions) matter more.
|
|
fn truncate_at_section(text: &str, max_tokens: usize, count: &dyn Fn(&str) -> usize) -> String {
|
|
// Find section boundaries (--- separators between assembled parts)
|
|
let mut boundaries = vec![0usize];
|
|
for (i, line) in text.lines().enumerate() {
|
|
if line.trim() == "---" || line.starts_with("## ") {
|
|
// Find byte offset of this line
|
|
let offset = text.lines().take(i).map(|l| l.len() + 1).sum::<usize>();
|
|
boundaries.push(offset);
|
|
}
|
|
}
|
|
boundaries.push(text.len());
|
|
|
|
// Binary search: find the largest prefix of sections that fits
|
|
let mut best = 0;
|
|
for &end in &boundaries[1..] {
|
|
let slice = &text[..end];
|
|
if count(slice) <= max_tokens {
|
|
best = end;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if best == 0 {
|
|
// Even the first section doesn't fit — hard truncate
|
|
best = text.len().min(max_tokens * 3); // ~3 chars/token rough estimate
|
|
}
|
|
|
|
let truncated = &text[..best];
|
|
dbglog!("[ctx] truncated memory from {} to {} chars ({} tokens)",
|
|
text.len(), truncated.len(), count(truncated));
|
|
truncated.to_string()
|
|
}
|
|
|
|
/// by journal entries. Messages before this index are summarized by
|
|
/// the journal; messages from this index onward stay as raw conversation.
|
|
/// Walks back to a user message boundary to avoid splitting tool
|
|
/// call/result sequences.
|
|
fn find_journal_cutoff(
|
|
conversation: &[Message],
|
|
newest_entry: Option<&journal::JournalEntry>,
|
|
) -> usize {
|
|
let cutoff = match newest_entry {
|
|
Some(entry) => entry.timestamp,
|
|
None => return 0,
|
|
};
|
|
|
|
let mut split = conversation.len();
|
|
for (i, msg) in conversation.iter().enumerate() {
|
|
if let Some(ts) = parse_msg_timestamp(msg) {
|
|
if ts > cutoff {
|
|
split = i;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
// Walk back to user message boundary
|
|
while split > 0 && split < conversation.len() && conversation[split].role != Role::User {
|
|
split -= 1;
|
|
}
|
|
split
|
|
}
|
|
|
|
/// Count the token footprint of a message using a token counting function.
|
|
fn msg_token_count_fn(msg: &Message, count: &dyn Fn(&str) -> usize) -> usize {
|
|
let content = msg.content.as_ref().map_or(0, |c| match c {
|
|
MessageContent::Text(s) => count(s),
|
|
MessageContent::Parts(parts) => parts
|
|
.iter()
|
|
.map(|p| match p {
|
|
ContentPart::Text { text } => count(text),
|
|
ContentPart::ImageUrl { .. } => 85,
|
|
})
|
|
.sum(),
|
|
});
|
|
let tools = msg.tool_calls.as_ref().map_or(0, |calls| {
|
|
calls
|
|
.iter()
|
|
.map(|c| count(&c.function.arguments) + count(&c.function.name))
|
|
.sum()
|
|
});
|
|
content + tools
|
|
}
|
|
|
|
/// Count the token footprint of a message using BPE tokenization.
|
|
fn msg_token_count(tokenizer: &CoreBPE, msg: &Message) -> usize {
|
|
msg_token_count_fn(msg, &|s| tokenizer.encode_with_special_tokens(s).len())
|
|
}
|
|
|
|
/// Detect context window overflow errors from the API.
|
|
/// Different providers phrase this differently; we check for common patterns.
|
|
/// OpenRouter wraps upstream errors, so we check both the wrapper and the raw message.
|
|
fn is_context_overflow(err: &anyhow::Error) -> bool {
|
|
let msg = err.to_string().to_lowercase();
|
|
msg.contains("context length")
|
|
|| msg.contains("token limit")
|
|
|| msg.contains("too many tokens")
|
|
|| msg.contains("maximum context")
|
|
|| msg.contains("prompt is too long")
|
|
|| msg.contains("request too large")
|
|
|| msg.contains("input validation error")
|
|
|| msg.contains("content length limit")
|
|
|| (msg.contains("400") && msg.contains("tokens"))
|
|
}
|
|
|
|
/// Detect model/provider errors delivered inside the SSE stream.
|
|
/// OpenRouter returns HTTP 200 but finish_reason="error" with
|
|
/// partial content (e.g. "system") — we surface this as an error
|
|
/// so the turn loop can retry.
|
|
fn is_stream_error(err: &anyhow::Error) -> bool {
|
|
err.to_string().contains("model stream error")
|
|
}
|
|
|
|
/// Parse a message's timestamp field into a DateTime.
|
|
fn parse_msg_timestamp(msg: &Message) -> Option<DateTime<Utc>> {
|
|
msg.timestamp
|
|
.as_ref()
|
|
.and_then(|ts| DateTime::parse_from_rfc3339(ts).ok())
|
|
.map(|dt| dt.with_timezone(&Utc))
|
|
}
|
|
|
|
/// Create a short summary of tool args for the tools pane header.
|
|
fn summarize_args(tool_name: &str, args: &serde_json::Value) -> String {
|
|
match tool_name {
|
|
"read_file" | "write_file" | "edit_file" => args["file_path"]
|
|
.as_str()
|
|
.unwrap_or("")
|
|
.to_string(),
|
|
"bash" => {
|
|
let cmd = args["command"].as_str().unwrap_or("");
|
|
if cmd.len() > 60 {
|
|
let end = cmd.char_indices()
|
|
.map(|(i, _)| i)
|
|
.take_while(|&i| i <= 60)
|
|
.last()
|
|
.unwrap_or(0);
|
|
format!("{}...", &cmd[..end])
|
|
} else {
|
|
cmd.to_string()
|
|
}
|
|
}
|
|
"grep" => {
|
|
let pattern = args["pattern"].as_str().unwrap_or("");
|
|
let path = args["path"].as_str().unwrap_or(".");
|
|
format!("{} in {}", pattern, path)
|
|
}
|
|
"glob" => args["pattern"]
|
|
.as_str()
|
|
.unwrap_or("")
|
|
.to_string(),
|
|
"view_image" => {
|
|
if let Some(pane) = args["pane_id"].as_str() {
|
|
format!("pane {}", pane)
|
|
} else {
|
|
args["file_path"].as_str().unwrap_or("").to_string()
|
|
}
|
|
}
|
|
"journal" => {
|
|
let entry = args["entry"].as_str().unwrap_or("");
|
|
if entry.len() > 60 {
|
|
format!("{}...", &entry[..60])
|
|
} else {
|
|
entry.to_string()
|
|
}
|
|
}
|
|
"yield_to_user" => args["message"]
|
|
.as_str()
|
|
.unwrap_or("")
|
|
.to_string(),
|
|
"switch_model" => args["model"]
|
|
.as_str()
|
|
.unwrap_or("")
|
|
.to_string(),
|
|
"pause" => String::new(),
|
|
_ => String::new(),
|
|
}
|
|
}
|
|
|
|
/// Parse tool calls leaked as text by models that don't always use the
|
|
/// structured function calling API (notably Qwen).
|
|
///
|
|
/// Handles the XML format:
|
|
/// <tool_call>
|
|
/// <function=bash>
|
|
/// <parameter=command>echo hello</parameter>
|
|
/// </function>
|
|
/// </tool_call>
|
|
///
|
|
/// Also handles JSON-in-text format:
|
|
/// <tool_call>
|
|
/// {"name": "bash", "arguments": {"command": "echo hello"}}
|
|
/// </tool_call>
|
|
fn parse_leaked_tool_calls(text: &str) -> Vec<ToolCall> {
|
|
// Normalize whitespace inside XML tags: "<\nfunction\n=\nbash\n>" → "<function=bash>"
|
|
// This handles streaming tokenizers that split tags across tokens.
|
|
let normalized = normalize_xml_tags(text);
|
|
let text = &normalized;
|
|
|
|
let mut calls = Vec::new();
|
|
let mut search_from = 0;
|
|
let mut call_counter: u32 = 0;
|
|
|
|
while let Some(start) = text[search_from..].find("<tool_call>") {
|
|
let abs_start = search_from + start;
|
|
let after_tag = abs_start + "<tool_call>".len();
|
|
|
|
let end = match text[after_tag..].find("</tool_call>") {
|
|
Some(pos) => after_tag + pos,
|
|
None => break,
|
|
};
|
|
|
|
let body = text[after_tag..end].trim();
|
|
search_from = end + "</tool_call>".len();
|
|
|
|
// Try XML format first, then JSON
|
|
if let Some(call) = parse_xml_tool_call(body, &mut call_counter) {
|
|
calls.push(call);
|
|
} else if let Some(call) = parse_json_tool_call(body, &mut call_counter) {
|
|
calls.push(call);
|
|
}
|
|
}
|
|
|
|
calls
|
|
}
|
|
|
|
/// Normalize whitespace inside XML-like tags for streaming tokenizers.
|
|
/// Collapses whitespace between `<` and `>` so that `<\nfunction\n=\nbash\n>`
|
|
/// becomes `<function=bash>`, and `</\nparameter\n>` becomes `</parameter>`.
|
|
/// Leaves content between tags untouched.
|
|
fn normalize_xml_tags(text: &str) -> String {
|
|
let mut result = String::with_capacity(text.len());
|
|
let mut chars = text.chars().peekable();
|
|
while let Some(ch) = chars.next() {
|
|
if ch == '<' {
|
|
let mut tag = String::from('<');
|
|
for inner in chars.by_ref() {
|
|
if inner == '>' {
|
|
tag.push('>');
|
|
break;
|
|
} else if inner.is_whitespace() {
|
|
// Skip whitespace inside tags
|
|
} else {
|
|
tag.push(inner);
|
|
}
|
|
}
|
|
result.push_str(&tag);
|
|
} else {
|
|
result.push(ch);
|
|
}
|
|
}
|
|
result
|
|
}
|
|
|
|
/// Parse a Qwen-style `<tag=value>body</tag>` pseudo-XML element.
|
|
/// Returns `(value, body, rest)` on success.
|
|
fn parse_qwen_tag<'a>(s: &'a str, tag: &str) -> Option<(&'a str, &'a str, &'a str)> {
|
|
let open = format!("<{}=", tag);
|
|
let close = format!("</{}>", tag);
|
|
|
|
let start = s.find(&open)? + open.len();
|
|
let name_end = start + s[start..].find('>')?;
|
|
let body_start = name_end + 1;
|
|
let body_end = body_start + s[body_start..].find(&close)?;
|
|
|
|
Some((
|
|
s[start..name_end].trim(),
|
|
s[body_start..body_end].trim(),
|
|
&s[body_end + close.len()..],
|
|
))
|
|
}
|
|
|
|
/// Parse Qwen's XML tool call format.
|
|
fn parse_xml_tool_call(body: &str, counter: &mut u32) -> Option<ToolCall> {
|
|
let (func_name, func_body, _) = parse_qwen_tag(body, "function")?;
|
|
let func_name = func_name.to_string();
|
|
|
|
let mut args = serde_json::Map::new();
|
|
let mut rest = func_body;
|
|
while let Some((key, val, remainder)) = parse_qwen_tag(rest, "parameter") {
|
|
args.insert(key.to_string(), serde_json::Value::String(val.to_string()));
|
|
rest = remainder;
|
|
}
|
|
|
|
*counter += 1;
|
|
Some(ToolCall {
|
|
id: format!("leaked_{}", counter),
|
|
call_type: "function".to_string(),
|
|
function: FunctionCall {
|
|
name: func_name,
|
|
arguments: serde_json::to_string(&args).unwrap_or_default(),
|
|
},
|
|
})
|
|
}
|
|
|
|
/// Parse JSON tool call format (some models emit this).
|
|
fn parse_json_tool_call(body: &str, counter: &mut u32) -> Option<ToolCall> {
|
|
let v: serde_json::Value = serde_json::from_str(body).ok()?;
|
|
let name = v["name"].as_str()?;
|
|
let arguments = &v["arguments"];
|
|
|
|
*counter += 1;
|
|
Some(ToolCall {
|
|
id: format!("leaked_{}", counter),
|
|
call_type: "function".to_string(),
|
|
function: FunctionCall {
|
|
name: name.to_string(),
|
|
arguments: serde_json::to_string(arguments).unwrap_or_default(),
|
|
},
|
|
})
|
|
}
|
|
|
|
/// Strip tool call XML and thinking tokens from text so the conversation
|
|
/// history stays clean. Removes `<tool_call>...</tool_call>` blocks and
|
|
/// `</think>` tags (thinking content before them is kept — it's useful context).
|
|
fn strip_leaked_artifacts(text: &str) -> String {
|
|
let normalized = normalize_xml_tags(text);
|
|
let mut result = normalized.clone();
|
|
|
|
// Remove <tool_call>...</tool_call> blocks
|
|
while let Some(start) = result.find("<tool_call>") {
|
|
if let Some(end_pos) = result[start..].find("</tool_call>") {
|
|
let end = start + end_pos + "</tool_call>".len();
|
|
result = format!("{}{}", &result[..start], &result[end..]);
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Remove </think> tags (but keep the thinking text before them)
|
|
result = result.replace("</think>", "");
|
|
|
|
result.trim().to_string()
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_leaked_tool_call_clean() {
|
|
let text = "thinking\n</think>\n<tool_call>\n<function=bash>\n<parameter=command>poc-memory used core-personality</parameter>\n</function>\n</tool_call>";
|
|
let calls = parse_leaked_tool_calls(text);
|
|
assert_eq!(calls.len(), 1);
|
|
assert_eq!(calls[0].function.name, "bash");
|
|
let args: serde_json::Value = serde_json::from_str(&calls[0].function.arguments).unwrap();
|
|
assert_eq!(args["command"], "poc-memory used core-personality");
|
|
}
|
|
|
|
#[test]
|
|
fn test_leaked_tool_call_streamed_whitespace() {
|
|
// Streaming tokenizer splits XML tags across tokens with newlines
|
|
let text = "<tool_call>\n<\nfunction\n=\nbash\n>\n<\nparameter\n=\ncommand\n>pwd</\nparameter\n>\n</\nfunction\n>\n</tool_call>";
|
|
let calls = parse_leaked_tool_calls(text);
|
|
assert_eq!(calls.len(), 1, "should parse streamed format");
|
|
assert_eq!(calls[0].function.name, "bash");
|
|
let args: serde_json::Value = serde_json::from_str(&calls[0].function.arguments).unwrap();
|
|
assert_eq!(args["command"], "pwd");
|
|
}
|
|
|
|
#[test]
|
|
fn test_normalize_preserves_content() {
|
|
let text = "<function=bash>\n<parameter=command>echo hello world</parameter>\n</function>";
|
|
let normalized = normalize_xml_tags(text);
|
|
// Newlines between tags are not inside tags, so preserved
|
|
assert_eq!(normalized, "<function=bash>\n<parameter=command>echo hello world</parameter>\n</function>");
|
|
}
|
|
|
|
#[test]
|
|
fn test_normalize_strips_tag_internal_whitespace() {
|
|
let text = "<\nfunction\n=\nbash\n>";
|
|
let normalized = normalize_xml_tags(text);
|
|
assert_eq!(normalized, "<function=bash>");
|
|
}
|
|
}
|