2026-03-18 22:44:52 -04:00
|
|
|
// agent.rs — Core agent loop
|
|
|
|
|
//
|
|
|
|
|
// The simplest possible implementation of the agent pattern:
|
|
|
|
|
// send messages + tool definitions to the model, if it responds
|
|
|
|
|
// with tool calls then dispatch them and loop, if it responds
|
|
|
|
|
// with text then display it and wait for the next prompt.
|
|
|
|
|
//
|
|
|
|
|
// Uses streaming by default so text tokens appear as they're
|
|
|
|
|
// generated. Tool calls are accumulated from stream deltas and
|
|
|
|
|
// dispatched after the stream completes.
|
|
|
|
|
//
|
|
|
|
|
// The DMN (dmn.rs) is the outer loop that decides what prompts
|
|
|
|
|
// to send here. This module just handles single turns: prompt
|
|
|
|
|
// in, response out, tool calls dispatched.
|
|
|
|
|
|
|
|
|
|
use anyhow::Result;
|
|
|
|
|
use chrono::{DateTime, Utc};
|
|
|
|
|
use tiktoken_rs::CoreBPE;
|
|
|
|
|
|
|
|
|
|
use std::io::Write;
|
|
|
|
|
use std::process::{Command, Stdio};
|
|
|
|
|
|
|
|
|
|
use crate::api::ApiClient;
|
|
|
|
|
use crate::journal;
|
|
|
|
|
use crate::log::ConversationLog;
|
|
|
|
|
use crate::tools;
|
|
|
|
|
use crate::tools::ProcessTracker;
|
|
|
|
|
use crate::types::*;
|
|
|
|
|
use crate::ui_channel::{ContextSection, SharedContextState, StatusInfo, StreamTarget, UiMessage, UiSender};
|
|
|
|
|
|
|
|
|
|
/// Result of a single agent turn.
|
|
|
|
|
pub struct TurnResult {
|
|
|
|
|
/// The text response (already sent through UI channel).
|
|
|
|
|
#[allow(dead_code)]
|
|
|
|
|
pub text: String,
|
|
|
|
|
/// Whether the model called yield_to_user during this turn.
|
|
|
|
|
pub yield_requested: bool,
|
|
|
|
|
/// Whether any tools (other than yield_to_user) were called.
|
|
|
|
|
pub had_tool_calls: bool,
|
|
|
|
|
/// Number of tool calls that returned errors this turn.
|
|
|
|
|
pub tool_errors: u32,
|
|
|
|
|
/// Model name to switch to after this turn completes.
|
|
|
|
|
pub model_switch: Option<String>,
|
|
|
|
|
/// Agent requested DMN pause (full stop on autonomous behavior).
|
|
|
|
|
pub dmn_pause: bool,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Accumulated state across tool dispatches within a single turn.
|
|
|
|
|
struct DispatchState {
|
|
|
|
|
yield_requested: bool,
|
|
|
|
|
had_tool_calls: bool,
|
|
|
|
|
tool_errors: u32,
|
|
|
|
|
model_switch: Option<String>,
|
|
|
|
|
dmn_pause: bool,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Mutable context state — the structured regions of the context window.
|
|
|
|
|
///
|
|
|
|
|
/// Each field is a different dimension of awareness. The struct renders
|
|
|
|
|
|
|
|
|
|
pub struct Agent {
|
|
|
|
|
client: ApiClient,
|
|
|
|
|
messages: Vec<Message>,
|
|
|
|
|
tool_defs: Vec<ToolDef>,
|
|
|
|
|
/// Last known prompt token count from the API (tracks context size).
|
|
|
|
|
last_prompt_tokens: u32,
|
|
|
|
|
/// Shared process tracker for bash tool — lets TUI show/kill running commands.
|
|
|
|
|
pub process_tracker: ProcessTracker,
|
|
|
|
|
/// Current reasoning effort level ("none", "low", "high").
|
|
|
|
|
pub reasoning_effort: String,
|
|
|
|
|
/// Persistent conversation log — append-only record of all messages.
|
|
|
|
|
conversation_log: Option<ConversationLog>,
|
|
|
|
|
/// Current context window budget breakdown.
|
|
|
|
|
pub context_budget: ContextBudget,
|
|
|
|
|
/// BPE tokenizer for token counting (cl100k_base — close enough
|
|
|
|
|
/// for Claude and Qwen budget allocation, ~85-90% count accuracy).
|
|
|
|
|
tokenizer: CoreBPE,
|
|
|
|
|
/// Mutable context state — personality, working stack, etc.
|
|
|
|
|
pub context: ContextState,
|
|
|
|
|
/// Shared live context summary — TUI reads this directly for debug screen.
|
|
|
|
|
pub shared_context: SharedContextState,
|
|
|
|
|
/// Stable session ID for memory-search dedup across turns.
|
|
|
|
|
session_id: String,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Agent {
|
|
|
|
|
pub fn new(
|
|
|
|
|
client: ApiClient,
|
|
|
|
|
system_prompt: String,
|
|
|
|
|
personality: Vec<(String, String)>,
|
|
|
|
|
conversation_log: Option<ConversationLog>,
|
|
|
|
|
shared_context: SharedContextState,
|
|
|
|
|
) -> Self {
|
|
|
|
|
let tool_defs = tools::definitions();
|
|
|
|
|
let tokenizer = tiktoken_rs::cl100k_base()
|
|
|
|
|
.expect("failed to load cl100k_base tokenizer");
|
|
|
|
|
|
|
|
|
|
let context = ContextState {
|
|
|
|
|
system_prompt: system_prompt.clone(),
|
|
|
|
|
personality,
|
|
|
|
|
journal: String::new(),
|
|
|
|
|
working_stack: Vec::new(),
|
|
|
|
|
};
|
|
|
|
|
let session_id = format!("poc-agent-{}", chrono::Utc::now().format("%Y%m%d-%H%M%S"));
|
|
|
|
|
let mut agent = Self {
|
|
|
|
|
client,
|
|
|
|
|
messages: Vec::new(),
|
|
|
|
|
tool_defs,
|
|
|
|
|
last_prompt_tokens: 0,
|
|
|
|
|
process_tracker: ProcessTracker::new(),
|
|
|
|
|
reasoning_effort: "none".to_string(),
|
|
|
|
|
conversation_log,
|
|
|
|
|
context_budget: ContextBudget::default(),
|
|
|
|
|
tokenizer,
|
|
|
|
|
context,
|
|
|
|
|
shared_context,
|
|
|
|
|
session_id,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Load recent journal entries at startup for orientation
|
|
|
|
|
agent.load_startup_journal();
|
|
|
|
|
agent.load_working_stack();
|
|
|
|
|
|
|
|
|
|
agent.push_context(Message::system(system_prompt));
|
|
|
|
|
let rendered = agent.context.render_context_message();
|
|
|
|
|
if !rendered.is_empty() {
|
|
|
|
|
agent.push_context(Message::user(rendered));
|
|
|
|
|
}
|
|
|
|
|
if !agent.context.journal.is_empty() {
|
|
|
|
|
agent.push_context(Message::user(agent.context.journal.clone()));
|
|
|
|
|
}
|
|
|
|
|
agent.measure_budget();
|
|
|
|
|
agent.publish_context_state();
|
|
|
|
|
agent
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Run poc-hook for a given event, returning any output to inject.
|
|
|
|
|
fn run_hook(&self, event: &str, prompt: &str) -> Option<String> {
|
|
|
|
|
let transcript_path = self.conversation_log.as_ref()
|
|
|
|
|
.map(|l| l.path().to_string_lossy().to_string())
|
|
|
|
|
.unwrap_or_default();
|
|
|
|
|
|
|
|
|
|
let hook_input = serde_json::json!({
|
|
|
|
|
"hook_event_name": event,
|
|
|
|
|
"session_id": self.session_id,
|
|
|
|
|
"transcript_path": transcript_path,
|
|
|
|
|
"prompt": prompt,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
let mut child = Command::new("poc-hook")
|
|
|
|
|
.stdin(Stdio::piped())
|
|
|
|
|
.stdout(Stdio::piped())
|
|
|
|
|
.stderr(Stdio::null())
|
|
|
|
|
.spawn()
|
|
|
|
|
.ok()?;
|
|
|
|
|
|
|
|
|
|
if let Some(ref mut stdin) = child.stdin {
|
|
|
|
|
let _ = stdin.write_all(hook_input.to_string().as_bytes());
|
|
|
|
|
}
|
|
|
|
|
drop(child.stdin.take());
|
|
|
|
|
|
|
|
|
|
let output = child.wait_with_output().ok()?;
|
|
|
|
|
let text = String::from_utf8_lossy(&output.stdout).to_string();
|
|
|
|
|
if text.trim().is_empty() {
|
|
|
|
|
None
|
|
|
|
|
} else {
|
|
|
|
|
Some(text)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Push a conversation message — stamped and logged.
|
|
|
|
|
fn push_message(&mut self, mut msg: Message) {
|
|
|
|
|
msg.stamp();
|
|
|
|
|
if let Some(ref log) = self.conversation_log {
|
|
|
|
|
if let Err(e) = log.append(&msg) {
|
|
|
|
|
eprintln!("warning: failed to log message: {:#}", e);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
self.messages.push(msg);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Push a context-only message (system prompt, identity context,
|
|
|
|
|
/// journal summaries). Not logged — these are reconstructed on
|
|
|
|
|
/// every startup/compaction.
|
|
|
|
|
fn push_context(&mut self, msg: Message) {
|
|
|
|
|
self.messages.push(msg);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Measure context window usage by category. Uses the BPE tokenizer
|
|
|
|
|
/// for direct token counting (no chars/4 approximation).
|
|
|
|
|
fn measure_budget(&mut self) {
|
|
|
|
|
let mut id_tokens: usize = 0;
|
|
|
|
|
let mem_tokens: usize = 0;
|
|
|
|
|
let mut jnl_tokens: usize = 0;
|
|
|
|
|
let mut conv_tokens: usize = 0;
|
|
|
|
|
let mut in_conversation = false;
|
|
|
|
|
|
|
|
|
|
for msg in &self.messages {
|
|
|
|
|
let tokens = msg_token_count(&self.tokenizer, msg);
|
|
|
|
|
|
|
|
|
|
if in_conversation {
|
|
|
|
|
conv_tokens += tokens;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
match msg.role {
|
|
|
|
|
Role::System => id_tokens += tokens,
|
|
|
|
|
Role::User => {
|
|
|
|
|
let text = msg.content_text();
|
|
|
|
|
if text.starts_with("[Earlier in this conversation") {
|
|
|
|
|
jnl_tokens += tokens;
|
|
|
|
|
} else if text.starts_with("Your context was just rebuilt") {
|
|
|
|
|
jnl_tokens += tokens;
|
|
|
|
|
} else if jnl_tokens == 0 && conv_tokens == 0 {
|
|
|
|
|
// Static identity context (before any journal/conversation)
|
|
|
|
|
id_tokens += tokens;
|
|
|
|
|
} else {
|
|
|
|
|
in_conversation = true;
|
|
|
|
|
conv_tokens += tokens;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
_ => {
|
|
|
|
|
in_conversation = true;
|
|
|
|
|
conv_tokens += tokens;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.context_budget = ContextBudget {
|
|
|
|
|
identity_tokens: id_tokens,
|
|
|
|
|
memory_tokens: mem_tokens,
|
|
|
|
|
journal_tokens: jnl_tokens,
|
|
|
|
|
conversation_tokens: conv_tokens,
|
|
|
|
|
window_tokens: model_context_window(&self.client.model),
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Send a user message and run the agent loop until the model
|
|
|
|
|
/// produces a text response (no more tool calls). Streams text
|
|
|
|
|
/// and tool activity through the UI channel.
|
|
|
|
|
pub async fn turn(
|
|
|
|
|
&mut self,
|
|
|
|
|
user_input: &str,
|
|
|
|
|
ui_tx: &UiSender,
|
|
|
|
|
target: StreamTarget,
|
|
|
|
|
) -> Result<TurnResult> {
|
|
|
|
|
// Run poc-hook (memory search, notifications, context check)
|
|
|
|
|
if let Some(hook_output) = self.run_hook("UserPromptSubmit", user_input) {
|
|
|
|
|
let enriched = format!("{}\n\n<system-reminder>\n{}\n</system-reminder>",
|
|
|
|
|
user_input, hook_output);
|
|
|
|
|
self.push_message(Message::user(enriched));
|
|
|
|
|
} else {
|
|
|
|
|
self.push_message(Message::user(user_input));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let mut overflow_retries: u32 = 0;
|
|
|
|
|
let mut empty_retries: u32 = 0;
|
|
|
|
|
let mut ds = DispatchState {
|
|
|
|
|
yield_requested: false,
|
|
|
|
|
had_tool_calls: false,
|
|
|
|
|
tool_errors: 0,
|
|
|
|
|
model_switch: None,
|
|
|
|
|
dmn_pause: false,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
loop {
|
|
|
|
|
let _ = ui_tx.send(UiMessage::Activity("thinking...".into()));
|
|
|
|
|
let api_result = self
|
|
|
|
|
.client
|
|
|
|
|
.chat_completion_stream(
|
|
|
|
|
&self.messages,
|
|
|
|
|
Some(&self.tool_defs),
|
|
|
|
|
ui_tx,
|
|
|
|
|
target,
|
|
|
|
|
&self.reasoning_effort,
|
|
|
|
|
)
|
|
|
|
|
.await;
|
|
|
|
|
|
|
|
|
|
// Context overflow → compact and retry (max 2 attempts)
|
|
|
|
|
// Stream error → retry with backoff (max 2 attempts)
|
|
|
|
|
let (msg, usage) = match api_result {
|
|
|
|
|
Err(e) if is_context_overflow(&e) && overflow_retries < 2 => {
|
|
|
|
|
overflow_retries += 1;
|
|
|
|
|
let _ = ui_tx.send(UiMessage::Info(format!(
|
|
|
|
|
"[context overflow — compacting and retrying ({}/2)]",
|
|
|
|
|
overflow_retries,
|
|
|
|
|
)));
|
|
|
|
|
self.emergency_compact();
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
Err(e) if is_stream_error(&e) && empty_retries < 2 => {
|
|
|
|
|
empty_retries += 1;
|
|
|
|
|
let _ = ui_tx.send(UiMessage::Info(format!(
|
|
|
|
|
"[stream error: {} — retrying ({}/2)]",
|
|
|
|
|
e, empty_retries,
|
|
|
|
|
)));
|
|
|
|
|
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
other => other?,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Strip ephemeral tool calls (journal) that the API has
|
|
|
|
|
// now processed. They're persisted to disk; no need to keep
|
|
|
|
|
// them in the conversation history burning tokens.
|
|
|
|
|
self.strip_ephemeral_tool_calls();
|
|
|
|
|
|
|
|
|
|
if let Some(usage) = &usage {
|
|
|
|
|
self.last_prompt_tokens = usage.prompt_tokens;
|
|
|
|
|
self.measure_budget();
|
|
|
|
|
self.publish_context_state();
|
|
|
|
|
let _ = ui_tx.send(UiMessage::StatusUpdate(StatusInfo {
|
|
|
|
|
dmn_state: String::new(), // filled by main loop
|
|
|
|
|
dmn_turns: 0,
|
|
|
|
|
dmn_max_turns: 0,
|
|
|
|
|
prompt_tokens: usage.prompt_tokens,
|
|
|
|
|
completion_tokens: usage.completion_tokens,
|
|
|
|
|
model: self.client.model.clone(),
|
|
|
|
|
turn_tools: 0, // tracked by TUI from ToolCall messages
|
|
|
|
|
context_budget: self.context_budget.status_string(),
|
|
|
|
|
}));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Empty response — model returned finish=stop with no content
|
|
|
|
|
// or tool calls. Inject a nudge so the retry has different input.
|
|
|
|
|
let has_content = msg.content.is_some();
|
|
|
|
|
let has_tools = msg.tool_calls.as_ref().map_or(false, |tc| !tc.is_empty());
|
|
|
|
|
if !has_content && !has_tools {
|
|
|
|
|
if empty_retries < 2 {
|
|
|
|
|
empty_retries += 1;
|
|
|
|
|
let _ = ui_tx.send(UiMessage::Debug(format!(
|
|
|
|
|
"empty response, injecting nudge and retrying ({}/2)",
|
|
|
|
|
empty_retries,
|
|
|
|
|
)));
|
|
|
|
|
self.push_message(Message::user(
|
|
|
|
|
"[system] Your previous response was empty. \
|
|
|
|
|
Please respond with text or use a tool."
|
|
|
|
|
));
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
// After max retries, fall through — return the empty response
|
|
|
|
|
} else {
|
|
|
|
|
empty_retries = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Structured tool calls from the API
|
|
|
|
|
if let Some(ref tool_calls) = msg.tool_calls {
|
|
|
|
|
if !tool_calls.is_empty() {
|
|
|
|
|
self.push_message(msg.clone());
|
|
|
|
|
for call in tool_calls {
|
|
|
|
|
self.dispatch_tool_call(call, None, ui_tx, &mut ds)
|
|
|
|
|
.await;
|
|
|
|
|
}
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// No structured tool calls — check for leaked tool calls
|
|
|
|
|
// (Qwen sometimes outputs <tool_call> XML as text).
|
|
|
|
|
let text = msg.content_text().to_string();
|
2026-03-21 15:29:45 -04:00
|
|
|
let leaked = crate::parsing::parse_leaked_tool_calls(&text);
|
2026-03-18 22:44:52 -04:00
|
|
|
|
|
|
|
|
if !leaked.is_empty() {
|
|
|
|
|
let _ = ui_tx.send(UiMessage::Debug(format!(
|
|
|
|
|
"recovered {} leaked tool call(s) from text",
|
|
|
|
|
leaked.len()
|
|
|
|
|
)));
|
|
|
|
|
// Strip tool call XML and thinking tokens from the message
|
|
|
|
|
// so they don't clutter the conversation history.
|
2026-03-21 15:29:45 -04:00
|
|
|
let cleaned = crate::parsing::strip_leaked_artifacts(&text);
|
2026-03-18 22:44:52 -04:00
|
|
|
let mut clean_msg = msg.clone();
|
|
|
|
|
clean_msg.content = if cleaned.trim().is_empty() {
|
|
|
|
|
None
|
|
|
|
|
} else {
|
|
|
|
|
Some(MessageContent::Text(cleaned))
|
|
|
|
|
};
|
|
|
|
|
self.push_message(clean_msg);
|
|
|
|
|
for call in &leaked {
|
|
|
|
|
self.dispatch_tool_call(call, Some("recovered"), ui_tx, &mut ds)
|
|
|
|
|
.await;
|
|
|
|
|
}
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Genuinely text-only response
|
|
|
|
|
let _ = ui_tx.send(UiMessage::Activity(String::new()));
|
|
|
|
|
self.push_message(msg);
|
|
|
|
|
|
|
|
|
|
return Ok(TurnResult {
|
|
|
|
|
text,
|
|
|
|
|
yield_requested: ds.yield_requested,
|
|
|
|
|
had_tool_calls: ds.had_tool_calls,
|
|
|
|
|
tool_errors: ds.tool_errors,
|
|
|
|
|
model_switch: ds.model_switch,
|
|
|
|
|
dmn_pause: ds.dmn_pause,
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Dispatch a single tool call: send UI annotations, run the tool,
|
|
|
|
|
/// push results into the conversation, handle images.
|
|
|
|
|
async fn dispatch_tool_call(
|
|
|
|
|
&mut self,
|
|
|
|
|
call: &ToolCall,
|
|
|
|
|
tag: Option<&str>,
|
|
|
|
|
ui_tx: &UiSender,
|
|
|
|
|
ds: &mut DispatchState,
|
|
|
|
|
) {
|
|
|
|
|
let args: serde_json::Value =
|
|
|
|
|
serde_json::from_str(&call.function.arguments).unwrap_or_default();
|
|
|
|
|
|
|
|
|
|
let args_summary = summarize_args(&call.function.name, &args);
|
|
|
|
|
let label = match tag {
|
|
|
|
|
Some(t) => format!("calling: {} ({})", call.function.name, t),
|
|
|
|
|
None => format!("calling: {}", call.function.name),
|
|
|
|
|
};
|
|
|
|
|
let _ = ui_tx.send(UiMessage::Activity(label));
|
|
|
|
|
let _ = ui_tx.send(UiMessage::ToolCall {
|
|
|
|
|
name: call.function.name.clone(),
|
|
|
|
|
args_summary: args_summary.clone(),
|
|
|
|
|
});
|
|
|
|
|
let _ = ui_tx.send(UiMessage::ToolStarted {
|
|
|
|
|
id: call.id.clone(),
|
|
|
|
|
name: call.function.name.clone(),
|
|
|
|
|
detail: args_summary,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Handle working_stack tool — needs &mut self for context state
|
|
|
|
|
if call.function.name == "working_stack" {
|
2026-03-20 13:15:01 -04:00
|
|
|
let result = tools::working_stack::handle(&args, &mut self.context.working_stack);
|
2026-03-18 22:44:52 -04:00
|
|
|
let output = tools::ToolOutput {
|
2026-03-20 13:15:01 -04:00
|
|
|
text: result.clone(),
|
2026-03-18 22:44:52 -04:00
|
|
|
is_yield: false,
|
|
|
|
|
images: Vec::new(),
|
|
|
|
|
model_switch: None,
|
|
|
|
|
dmn_pause: false,
|
|
|
|
|
};
|
|
|
|
|
let _ = ui_tx.send(UiMessage::ToolResult {
|
|
|
|
|
name: call.function.name.clone(),
|
|
|
|
|
result: output.text.clone(),
|
|
|
|
|
});
|
|
|
|
|
let _ = ui_tx.send(UiMessage::ToolFinished { id: call.id.clone() });
|
|
|
|
|
self.push_message(Message::tool_result(&call.id, &output.text));
|
|
|
|
|
ds.had_tool_calls = true;
|
2026-03-20 13:15:01 -04:00
|
|
|
|
|
|
|
|
// Re-render the context message so the model sees the updated stack
|
|
|
|
|
if !result.starts_with("Error:") {
|
|
|
|
|
self.refresh_context_message();
|
|
|
|
|
}
|
2026-03-18 22:44:52 -04:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let output =
|
|
|
|
|
tools::dispatch(&call.function.name, &args, &self.process_tracker).await;
|
|
|
|
|
|
|
|
|
|
if output.is_yield {
|
|
|
|
|
ds.yield_requested = true;
|
|
|
|
|
} else {
|
|
|
|
|
ds.had_tool_calls = true;
|
|
|
|
|
}
|
|
|
|
|
if output.model_switch.is_some() {
|
|
|
|
|
ds.model_switch = output.model_switch;
|
|
|
|
|
}
|
|
|
|
|
if output.dmn_pause {
|
|
|
|
|
ds.dmn_pause = true;
|
|
|
|
|
}
|
|
|
|
|
if output.text.starts_with("Error:") {
|
|
|
|
|
ds.tool_errors += 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let _ = ui_tx.send(UiMessage::ToolResult {
|
|
|
|
|
name: call.function.name.clone(),
|
|
|
|
|
result: output.text.clone(),
|
|
|
|
|
});
|
|
|
|
|
let _ = ui_tx.send(UiMessage::ToolFinished { id: call.id.clone() });
|
|
|
|
|
|
|
|
|
|
self.push_message(Message::tool_result(&call.id, &output.text));
|
|
|
|
|
|
|
|
|
|
if !output.images.is_empty() {
|
|
|
|
|
// Only one live image in context at a time — age out any
|
|
|
|
|
// previous ones to avoid accumulating ~90KB+ per image.
|
|
|
|
|
self.age_out_images();
|
|
|
|
|
self.push_message(Message::user_with_images(
|
|
|
|
|
"Here is the image you requested:",
|
|
|
|
|
&output.images,
|
|
|
|
|
));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Build context state summary for the debug screen.
|
|
|
|
|
pub fn context_state_summary(&self) -> Vec<ContextSection> {
|
|
|
|
|
let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len();
|
|
|
|
|
|
|
|
|
|
let mut sections = Vec::new();
|
|
|
|
|
|
|
|
|
|
// System prompt
|
|
|
|
|
sections.push(ContextSection {
|
|
|
|
|
name: "System prompt".into(),
|
|
|
|
|
tokens: count(&self.context.system_prompt),
|
|
|
|
|
content: self.context.system_prompt.clone(),
|
|
|
|
|
children: Vec::new(),
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Personality — parent with file children
|
|
|
|
|
let personality_children: Vec<ContextSection> = self.context.personality.iter()
|
|
|
|
|
.map(|(name, content)| ContextSection {
|
|
|
|
|
name: name.clone(),
|
|
|
|
|
tokens: count(content),
|
|
|
|
|
content: content.clone(),
|
|
|
|
|
children: Vec::new(),
|
|
|
|
|
})
|
|
|
|
|
.collect();
|
|
|
|
|
let personality_tokens: usize = personality_children.iter().map(|c| c.tokens).sum();
|
|
|
|
|
sections.push(ContextSection {
|
|
|
|
|
name: format!("Personality ({} files)", personality_children.len()),
|
|
|
|
|
tokens: personality_tokens,
|
|
|
|
|
content: String::new(),
|
|
|
|
|
children: personality_children,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Journal — split into per-entry children
|
|
|
|
|
{
|
|
|
|
|
let mut journal_children = Vec::new();
|
|
|
|
|
let mut current_header = String::new();
|
|
|
|
|
let mut current_body = String::new();
|
|
|
|
|
for line in self.context.journal.lines() {
|
|
|
|
|
if line.starts_with("## ") {
|
|
|
|
|
if !current_header.is_empty() {
|
|
|
|
|
let body = std::mem::take(&mut current_body);
|
|
|
|
|
let preview: String = body.lines().next().unwrap_or("").chars().take(60).collect();
|
|
|
|
|
journal_children.push(ContextSection {
|
|
|
|
|
name: format!("{}: {}", current_header, preview),
|
|
|
|
|
tokens: count(&body),
|
|
|
|
|
content: body,
|
|
|
|
|
children: Vec::new(),
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
current_header = line.trim_start_matches("## ").to_string();
|
|
|
|
|
current_body.clear();
|
|
|
|
|
} else {
|
|
|
|
|
if !current_body.is_empty() || !line.is_empty() {
|
|
|
|
|
current_body.push_str(line);
|
|
|
|
|
current_body.push('\n');
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if !current_header.is_empty() {
|
|
|
|
|
let preview: String = current_body.lines().next().unwrap_or("").chars().take(60).collect();
|
|
|
|
|
journal_children.push(ContextSection {
|
|
|
|
|
name: format!("{}: {}", current_header, preview),
|
|
|
|
|
tokens: count(¤t_body),
|
|
|
|
|
content: current_body,
|
|
|
|
|
children: Vec::new(),
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
let journal_tokens: usize = journal_children.iter().map(|c| c.tokens).sum();
|
|
|
|
|
sections.push(ContextSection {
|
|
|
|
|
name: format!("Journal ({} entries)", journal_children.len()),
|
|
|
|
|
tokens: journal_tokens,
|
|
|
|
|
content: String::new(),
|
|
|
|
|
children: journal_children,
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Working stack — instructions + items as children
|
|
|
|
|
let instructions = std::fs::read_to_string(WORKING_STACK_INSTRUCTIONS)
|
|
|
|
|
.unwrap_or_default();
|
|
|
|
|
let mut stack_children = vec![ContextSection {
|
|
|
|
|
name: "Instructions".into(),
|
|
|
|
|
tokens: count(&instructions),
|
|
|
|
|
content: instructions,
|
|
|
|
|
children: Vec::new(),
|
|
|
|
|
}];
|
|
|
|
|
for (i, item) in self.context.working_stack.iter().enumerate() {
|
|
|
|
|
let marker = if i == self.context.working_stack.len() - 1 { "→" } else { " " };
|
|
|
|
|
stack_children.push(ContextSection {
|
|
|
|
|
name: format!("{} [{}] {}", marker, i, item),
|
|
|
|
|
tokens: count(item),
|
|
|
|
|
content: String::new(),
|
|
|
|
|
children: Vec::new(),
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
let stack_tokens: usize = stack_children.iter().map(|c| c.tokens).sum();
|
|
|
|
|
sections.push(ContextSection {
|
|
|
|
|
name: format!("Working stack ({} items)", self.context.working_stack.len()),
|
|
|
|
|
tokens: stack_tokens,
|
|
|
|
|
content: String::new(),
|
|
|
|
|
children: stack_children,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Conversation — each message as a child
|
|
|
|
|
let conv_start = self.messages.iter()
|
|
|
|
|
.position(|m| m.role == Role::Assistant || m.role == Role::Tool)
|
|
|
|
|
.unwrap_or(self.messages.len());
|
|
|
|
|
let conv_messages = &self.messages[conv_start..];
|
|
|
|
|
let conv_children: Vec<ContextSection> = conv_messages.iter().enumerate()
|
|
|
|
|
.map(|(i, msg)| {
|
|
|
|
|
let text = msg.content.as_ref()
|
|
|
|
|
.map(|c| c.as_text().to_string())
|
|
|
|
|
.unwrap_or_default();
|
|
|
|
|
let tool_info = msg.tool_calls.as_ref().map(|tc| {
|
|
|
|
|
tc.iter()
|
|
|
|
|
.map(|c| c.function.name.clone())
|
|
|
|
|
.collect::<Vec<_>>()
|
|
|
|
|
.join(", ")
|
|
|
|
|
});
|
|
|
|
|
let label = match (&msg.role, &tool_info) {
|
|
|
|
|
(_, Some(tools)) => format!("[tool_call: {}]", tools),
|
|
|
|
|
_ => {
|
|
|
|
|
let preview: String = text.chars().take(60).collect();
|
|
|
|
|
let preview = preview.replace('\n', " ");
|
|
|
|
|
if text.len() > 60 { format!("{}...", preview) } else { preview }
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
let tokens = count(&text);
|
|
|
|
|
let role_name = match msg.role {
|
|
|
|
|
Role::Assistant => "PoC",
|
|
|
|
|
Role::User => "Kent",
|
|
|
|
|
Role::Tool => "tool",
|
|
|
|
|
Role::System => "system",
|
|
|
|
|
};
|
|
|
|
|
ContextSection {
|
|
|
|
|
name: format!("[{}] {}: {}", conv_start + i, role_name, label),
|
|
|
|
|
tokens,
|
|
|
|
|
content: text,
|
|
|
|
|
children: Vec::new(),
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
.collect();
|
|
|
|
|
let conv_tokens: usize = conv_children.iter().map(|c| c.tokens).sum();
|
|
|
|
|
sections.push(ContextSection {
|
|
|
|
|
name: format!("Conversation ({} messages)", conv_children.len()),
|
|
|
|
|
tokens: conv_tokens,
|
|
|
|
|
content: String::new(),
|
|
|
|
|
children: conv_children,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
sections
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Load recent journal entries at startup for orientation.
|
|
|
|
|
/// Uses the same budget logic as compaction but with empty conversation.
|
|
|
|
|
/// Only parses the tail of the journal file (last 64KB) for speed.
|
|
|
|
|
fn load_startup_journal(&mut self) {
|
|
|
|
|
let journal_path = journal::default_journal_path();
|
|
|
|
|
let entries = journal::parse_journal_tail(&journal_path, 64 * 1024);
|
|
|
|
|
if entries.is_empty() {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len();
|
|
|
|
|
let context_message = self.context.render_context_message();
|
|
|
|
|
|
|
|
|
|
let plan = plan_context(
|
|
|
|
|
&self.context.system_prompt,
|
|
|
|
|
&context_message,
|
|
|
|
|
&[], // no conversation yet
|
|
|
|
|
&entries,
|
|
|
|
|
&self.client.model,
|
|
|
|
|
&count,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
self.context.journal = render_journal_text(&entries, &plan);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Re-render the context message in self.messages from live ContextState.
|
|
|
|
|
/// Called after any change to context state (working stack, etc).
|
|
|
|
|
fn refresh_context_message(&mut self) {
|
|
|
|
|
let rendered = self.context.render_context_message();
|
|
|
|
|
// The context message is the first user message (index 1, after system prompt)
|
|
|
|
|
if self.messages.len() >= 2 && self.messages[1].role == Role::User {
|
|
|
|
|
self.messages[1] = Message::user(rendered);
|
|
|
|
|
}
|
|
|
|
|
self.publish_context_state();
|
|
|
|
|
self.save_working_stack();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Persist working stack to disk.
|
|
|
|
|
fn save_working_stack(&self) {
|
|
|
|
|
if let Ok(json) = serde_json::to_string(&self.context.working_stack) {
|
|
|
|
|
let _ = std::fs::write(WORKING_STACK_FILE, json);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Load working stack from disk.
|
|
|
|
|
fn load_working_stack(&mut self) {
|
|
|
|
|
if let Ok(data) = std::fs::read_to_string(WORKING_STACK_FILE) {
|
|
|
|
|
if let Ok(stack) = serde_json::from_str::<Vec<String>>(&data) {
|
|
|
|
|
self.context.working_stack = stack;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Push the current context summary to the shared state for the TUI to read.
|
|
|
|
|
fn publish_context_state(&self) {
|
|
|
|
|
if let Ok(mut state) = self.shared_context.write() {
|
|
|
|
|
*state = self.context_state_summary();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Replace base64 image data in older messages with text placeholders.
|
|
|
|
|
/// Only the most recent image stays live — each new image ages out
|
|
|
|
|
/// all previous ones. The tool result message (right before each image
|
|
|
|
|
/// message) already records what was loaded, so no info is lost.
|
|
|
|
|
fn age_out_images(&mut self) {
|
|
|
|
|
for msg in &mut self.messages {
|
|
|
|
|
if let Some(MessageContent::Parts(parts)) = &msg.content {
|
|
|
|
|
let has_images = parts.iter().any(|p| matches!(p, ContentPart::ImageUrl { .. }));
|
|
|
|
|
if !has_images {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
let mut replacement = String::new();
|
|
|
|
|
for part in parts {
|
|
|
|
|
match part {
|
|
|
|
|
ContentPart::Text { text } => {
|
|
|
|
|
if !replacement.is_empty() {
|
|
|
|
|
replacement.push('\n');
|
|
|
|
|
}
|
|
|
|
|
replacement.push_str(text);
|
|
|
|
|
}
|
|
|
|
|
ContentPart::ImageUrl { .. } => {
|
|
|
|
|
if !replacement.is_empty() {
|
|
|
|
|
replacement.push('\n');
|
|
|
|
|
}
|
|
|
|
|
replacement.push_str(
|
|
|
|
|
"[image aged out — see tool result above for details]",
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
msg.content = Some(MessageContent::Text(replacement));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Strip ephemeral tool calls from the conversation history.
|
|
|
|
|
///
|
|
|
|
|
/// Ephemeral tools (like journal) persist their output to disk,
|
|
|
|
|
/// so the tool call + result don't need to stay in the context
|
|
|
|
|
/// window. We keep them for exactly one API round-trip (the model
|
|
|
|
|
/// needs to see the result was acknowledged), then strip them.
|
|
|
|
|
///
|
|
|
|
|
/// If an assistant message contains ONLY ephemeral tool calls,
|
|
|
|
|
/// the entire message and its tool results are removed. If mixed
|
|
|
|
|
/// with non-ephemeral calls, we leave it (rare case, small cost).
|
|
|
|
|
fn strip_ephemeral_tool_calls(&mut self) {
|
|
|
|
|
// Collect IDs of tool calls to strip
|
|
|
|
|
let mut strip_ids: Vec<String> = Vec::new();
|
|
|
|
|
let mut strip_msg_indices: Vec<usize> = Vec::new();
|
|
|
|
|
|
|
|
|
|
for (i, msg) in self.messages.iter().enumerate() {
|
|
|
|
|
if msg.role != Role::Assistant {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
let calls = match &msg.tool_calls {
|
|
|
|
|
Some(c) if !c.is_empty() => c,
|
|
|
|
|
_ => continue,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let all_ephemeral = calls.iter().all(|c| {
|
|
|
|
|
c.function.name == tools::journal::TOOL_NAME
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if all_ephemeral {
|
|
|
|
|
strip_msg_indices.push(i);
|
|
|
|
|
for call in calls {
|
|
|
|
|
strip_ids.push(call.id.clone());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if strip_ids.is_empty() {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Remove in reverse order to preserve indices
|
|
|
|
|
self.messages.retain(|msg| {
|
|
|
|
|
// Strip the assistant messages we identified
|
|
|
|
|
if msg.role == Role::Assistant {
|
|
|
|
|
if let Some(calls) = &msg.tool_calls {
|
|
|
|
|
if calls.iter().all(|c| strip_ids.contains(&c.id)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// Strip matching tool results
|
|
|
|
|
if msg.role == Role::Tool {
|
|
|
|
|
if let Some(ref id) = msg.tool_call_id {
|
|
|
|
|
if strip_ids.contains(id) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
true
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Last prompt token count reported by the API.
|
|
|
|
|
pub fn last_prompt_tokens(&self) -> u32 {
|
|
|
|
|
self.last_prompt_tokens
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Build context window from conversation messages + journal.
|
|
|
|
|
/// Used by both compact() (in-memory messages) and restore_from_log()
|
|
|
|
|
/// (conversation log). The context window is always:
|
|
|
|
|
/// identity + journal summaries + raw recent messages
|
|
|
|
|
pub fn compact(&mut self, new_system_prompt: String, new_personality: Vec<(String, String)>) {
|
|
|
|
|
self.context.system_prompt = new_system_prompt;
|
|
|
|
|
self.context.personality = new_personality;
|
|
|
|
|
self.do_compact();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Internal compaction — rebuilds context window from current messages.
|
|
|
|
|
fn do_compact(&mut self) {
|
|
|
|
|
// Find where actual conversation starts (after system + context)
|
|
|
|
|
let conv_start = self
|
|
|
|
|
.messages
|
|
|
|
|
.iter()
|
|
|
|
|
.position(|m| m.role == Role::Assistant || m.role == Role::Tool)
|
|
|
|
|
.unwrap_or(self.messages.len());
|
|
|
|
|
|
|
|
|
|
let conversation: Vec<Message> = self.messages[conv_start..].to_vec();
|
|
|
|
|
let (messages, journal) = build_context_window(
|
|
|
|
|
&self.context,
|
|
|
|
|
&conversation,
|
|
|
|
|
&self.client.model,
|
|
|
|
|
&self.tokenizer,
|
|
|
|
|
);
|
|
|
|
|
self.context.journal = journal;
|
|
|
|
|
self.messages = messages;
|
|
|
|
|
self.last_prompt_tokens = 0;
|
|
|
|
|
self.measure_budget();
|
|
|
|
|
self.publish_context_state();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Emergency compaction using stored config — called on context overflow.
|
|
|
|
|
fn emergency_compact(&mut self) {
|
|
|
|
|
self.do_compact();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Restore from the conversation log. Builds the context window
|
|
|
|
|
/// the same way compact() does — journal summaries for old messages,
|
|
|
|
|
/// raw recent messages. This is the unified startup path.
|
|
|
|
|
/// Returns true if the log had content to restore.
|
|
|
|
|
pub fn restore_from_log(
|
|
|
|
|
&mut self,
|
|
|
|
|
system_prompt: String,
|
|
|
|
|
personality: Vec<(String, String)>,
|
|
|
|
|
) -> bool {
|
|
|
|
|
self.context.system_prompt = system_prompt;
|
|
|
|
|
self.context.personality = personality;
|
|
|
|
|
|
|
|
|
|
let all_messages = match &self.conversation_log {
|
|
|
|
|
Some(log) => match log.read_tail(512 * 1024) {
|
|
|
|
|
Ok(msgs) if !msgs.is_empty() => {
|
|
|
|
|
dbglog!("[restore] read {} messages from log tail", msgs.len());
|
|
|
|
|
msgs
|
|
|
|
|
}
|
|
|
|
|
Ok(_) => {
|
|
|
|
|
dbglog!("[restore] log exists but is empty");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
Err(e) => {
|
|
|
|
|
dbglog!("[restore] failed to read log: {}", e);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
None => {
|
|
|
|
|
dbglog!("[restore] no conversation log configured");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Filter out system/context messages — we only want the
|
|
|
|
|
// actual conversation (user prompts, assistant responses,
|
|
|
|
|
// tool calls/results)
|
|
|
|
|
let conversation: Vec<Message> = all_messages
|
|
|
|
|
.into_iter()
|
|
|
|
|
.filter(|m| m.role != Role::System)
|
|
|
|
|
.collect();
|
|
|
|
|
dbglog!("[restore] {} messages after filtering system", conversation.len());
|
|
|
|
|
|
|
|
|
|
let (messages, journal) = build_context_window(
|
|
|
|
|
&self.context,
|
|
|
|
|
&conversation,
|
|
|
|
|
&self.client.model,
|
|
|
|
|
&self.tokenizer,
|
|
|
|
|
);
|
|
|
|
|
dbglog!("[restore] journal text: {} chars, {} lines",
|
|
|
|
|
journal.len(), journal.lines().count());
|
|
|
|
|
self.context.journal = journal;
|
|
|
|
|
self.messages = messages;
|
|
|
|
|
dbglog!("[restore] built context window: {} messages", self.messages.len());
|
|
|
|
|
self.last_prompt_tokens = 0;
|
|
|
|
|
self.measure_budget();
|
|
|
|
|
self.publish_context_state();
|
|
|
|
|
true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Replace the API client (for model switching).
|
|
|
|
|
pub fn swap_client(&mut self, new_client: ApiClient) {
|
|
|
|
|
self.client = new_client;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Get the model identifier.
|
|
|
|
|
pub fn model(&self) -> &str {
|
|
|
|
|
&self.client.model
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Get the conversation history for persistence.
|
|
|
|
|
pub fn messages(&self) -> &[Message] {
|
|
|
|
|
&self.messages
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Mutable access to conversation history (for /retry).
|
|
|
|
|
pub fn messages_mut(&mut self) -> &mut Vec<Message> {
|
|
|
|
|
&mut self.messages
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Restore from a saved conversation.
|
|
|
|
|
pub fn restore(&mut self, messages: Vec<Message>) {
|
|
|
|
|
self.messages = messages;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Look up a model's context window size in tokens.
|
|
|
|
|
pub fn model_context_window(model: &str) -> usize {
|
|
|
|
|
let m = model.to_lowercase();
|
|
|
|
|
if m.contains("opus") || m.contains("sonnet") {
|
|
|
|
|
200_000
|
|
|
|
|
} else if m.contains("qwen") {
|
|
|
|
|
131_072
|
|
|
|
|
} else {
|
|
|
|
|
128_000
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Context budget in tokens: 60% of the model's context window.
|
|
|
|
|
/// Leaves headroom for conversation to grow before compaction triggers.
|
|
|
|
|
///
|
|
|
|
|
/// Future direction: make this dynamic based on what the agent is
|
|
|
|
|
/// doing — deep coding work might allocate more to conversation,
|
|
|
|
|
/// consolidation might allocate more to journal/memory, idle might
|
|
|
|
|
/// shrink everything to save cost.
|
|
|
|
|
fn context_budget_tokens(model: &str) -> usize {
|
|
|
|
|
model_context_window(model) * 60 / 100
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Allocation plan for the context window. Separates the budget math
|
|
|
|
|
/// (which entries and messages to include) from the message assembly
|
|
|
|
|
/// (building the actual Vec<Message>). This makes the core algorithm
|
|
|
|
|
/// testable and inspectable — log the plan on compaction to see exactly
|
|
|
|
|
/// what allocation decisions were made.
|
|
|
|
|
struct ContextPlan {
|
|
|
|
|
/// Index into all_entries: header-only entries start here
|
|
|
|
|
header_start: usize,
|
|
|
|
|
/// Index into all_entries: full entries start here (headers end here)
|
|
|
|
|
full_start: usize,
|
|
|
|
|
/// Total journal entries (header-only + full go up to this)
|
|
|
|
|
entry_count: usize,
|
|
|
|
|
/// Index into recent conversation: skip messages before this
|
|
|
|
|
conv_trim: usize,
|
|
|
|
|
/// Total recent conversation messages
|
cleanup: fix all build warnings, delete dead DMN context code
- Delete poc-daemon/src/context.rs dead code (git_context, work_state,
irc_digest, recent_commits, uncommitted_files) — replaced by
where-am-i.md and memory graph
- Remove unused imports (BufWriter, Context, similarity)
- Prefix unused variables (_store, _avg_cc, _episodic_ratio, _message)
- #[allow(dead_code)] on public API surface that's not yet wired
(Message::assistant, ConversationLog::message_count/read_all,
Config::context_message, ContextInfo fields)
- Fix to_capnp macro dead_code warning
- Rename _rewrite_store_DISABLED to snake_case
Only remaining warnings are in generated capnp code (can't fix).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 14:20:34 -04:00
|
|
|
_conv_count: usize,
|
2026-03-18 22:44:52 -04:00
|
|
|
/// Tokens used by full journal entries
|
cleanup: fix all build warnings, delete dead DMN context code
- Delete poc-daemon/src/context.rs dead code (git_context, work_state,
irc_digest, recent_commits, uncommitted_files) — replaced by
where-am-i.md and memory graph
- Remove unused imports (BufWriter, Context, similarity)
- Prefix unused variables (_store, _avg_cc, _episodic_ratio, _message)
- #[allow(dead_code)] on public API surface that's not yet wired
(Message::assistant, ConversationLog::message_count/read_all,
Config::context_message, ContextInfo fields)
- Fix to_capnp macro dead_code warning
- Rename _rewrite_store_DISABLED to snake_case
Only remaining warnings are in generated capnp code (can't fix).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 14:20:34 -04:00
|
|
|
_full_tokens: usize,
|
2026-03-18 22:44:52 -04:00
|
|
|
/// Tokens used by header-only journal entries
|
cleanup: fix all build warnings, delete dead DMN context code
- Delete poc-daemon/src/context.rs dead code (git_context, work_state,
irc_digest, recent_commits, uncommitted_files) — replaced by
where-am-i.md and memory graph
- Remove unused imports (BufWriter, Context, similarity)
- Prefix unused variables (_store, _avg_cc, _episodic_ratio, _message)
- #[allow(dead_code)] on public API surface that's not yet wired
(Message::assistant, ConversationLog::message_count/read_all,
Config::context_message, ContextInfo fields)
- Fix to_capnp macro dead_code warning
- Rename _rewrite_store_DISABLED to snake_case
Only remaining warnings are in generated capnp code (can't fix).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 14:20:34 -04:00
|
|
|
_header_tokens: usize,
|
2026-03-18 22:44:52 -04:00
|
|
|
/// Tokens used by conversation (after trimming)
|
cleanup: fix all build warnings, delete dead DMN context code
- Delete poc-daemon/src/context.rs dead code (git_context, work_state,
irc_digest, recent_commits, uncommitted_files) — replaced by
where-am-i.md and memory graph
- Remove unused imports (BufWriter, Context, similarity)
- Prefix unused variables (_store, _avg_cc, _episodic_ratio, _message)
- #[allow(dead_code)] on public API surface that's not yet wired
(Message::assistant, ConversationLog::message_count/read_all,
Config::context_message, ContextInfo fields)
- Fix to_capnp macro dead_code warning
- Rename _rewrite_store_DISABLED to snake_case
Only remaining warnings are in generated capnp code (can't fix).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 14:20:34 -04:00
|
|
|
_conv_tokens: usize,
|
2026-03-18 22:44:52 -04:00
|
|
|
/// Total budget available (after identity, memory, reserve)
|
cleanup: fix all build warnings, delete dead DMN context code
- Delete poc-daemon/src/context.rs dead code (git_context, work_state,
irc_digest, recent_commits, uncommitted_files) — replaced by
where-am-i.md and memory graph
- Remove unused imports (BufWriter, Context, similarity)
- Prefix unused variables (_store, _avg_cc, _episodic_ratio, _message)
- #[allow(dead_code)] on public API surface that's not yet wired
(Message::assistant, ConversationLog::message_count/read_all,
Config::context_message, ContextInfo fields)
- Fix to_capnp macro dead_code warning
- Rename _rewrite_store_DISABLED to snake_case
Only remaining warnings are in generated capnp code (can't fix).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 14:20:34 -04:00
|
|
|
_available: usize,
|
2026-03-18 22:44:52 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Build a context window from conversation messages + journal entries.
|
|
|
|
|
/// This is the core algorithm shared by compact() and restore_from_log().
|
|
|
|
|
///
|
|
|
|
|
/// Allocation strategy: identity and memory are fixed costs. The
|
|
|
|
|
/// remaining budget (minus 25% reserve for model output) is split
|
|
|
|
|
/// between journal and conversation. Conversation gets priority —
|
|
|
|
|
/// it's what's happening now. Journal fills the rest, newest first.
|
|
|
|
|
///
|
|
|
|
|
/// When the budget is tight, journal entries are dropped first
|
|
|
|
|
/// (oldest entries go first). If conversation alone exceeds the
|
|
|
|
|
/// budget, oldest messages are trimmed to fit.
|
|
|
|
|
/// Returns (messages, journal_text) — caller stores journal_text in ContextState.
|
|
|
|
|
fn build_context_window(
|
|
|
|
|
context: &ContextState,
|
|
|
|
|
conversation: &[Message],
|
|
|
|
|
model: &str,
|
|
|
|
|
tokenizer: &CoreBPE,
|
|
|
|
|
) -> (Vec<Message>, String) {
|
|
|
|
|
let journal_path = journal::default_journal_path();
|
|
|
|
|
let all_entries = journal::parse_journal(&journal_path);
|
|
|
|
|
dbglog!("[ctx] {} journal entries from {}", all_entries.len(), journal_path.display());
|
|
|
|
|
let count = |s: &str| tokenizer.encode_with_special_tokens(s).len();
|
|
|
|
|
|
|
|
|
|
let system_prompt = context.system_prompt.clone();
|
|
|
|
|
let context_message = context.render_context_message();
|
|
|
|
|
|
|
|
|
|
// Cap memory to 50% of the context budget so conversation always
|
|
|
|
|
// gets space. Truncate at the last complete section boundary.
|
|
|
|
|
let max_tokens = context_budget_tokens(model);
|
|
|
|
|
let memory_cap = max_tokens / 2;
|
|
|
|
|
let memory_tokens = count(&context_message);
|
|
|
|
|
let context_message = if memory_tokens > memory_cap {
|
|
|
|
|
dbglog!("[ctx] memory too large: {} tokens > {} cap, truncating", memory_tokens, memory_cap);
|
|
|
|
|
truncate_at_section(&context_message, memory_cap, &count)
|
|
|
|
|
} else {
|
|
|
|
|
context_message
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let recent_start = find_journal_cutoff(conversation, all_entries.last());
|
|
|
|
|
dbglog!("[ctx] journal cutoff: {} of {} conversation messages are 'recent'",
|
|
|
|
|
conversation.len() - recent_start, conversation.len());
|
|
|
|
|
let recent = &conversation[recent_start..];
|
|
|
|
|
|
|
|
|
|
let plan = plan_context(
|
|
|
|
|
&system_prompt,
|
|
|
|
|
&context_message,
|
|
|
|
|
recent,
|
|
|
|
|
&all_entries,
|
|
|
|
|
model,
|
|
|
|
|
&count,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// Render journal text from the plan
|
|
|
|
|
let journal_text = render_journal_text(&all_entries, &plan);
|
|
|
|
|
dbglog!("[ctx] plan: header_start={} full_start={} entry_count={} conv_trim={} journal_text={} chars",
|
|
|
|
|
plan.header_start, plan.full_start, plan.entry_count, plan.conv_trim, journal_text.len());
|
|
|
|
|
|
|
|
|
|
let messages = assemble_context(
|
|
|
|
|
system_prompt, context_message, &journal_text,
|
|
|
|
|
recent, &plan,
|
|
|
|
|
);
|
|
|
|
|
(messages, journal_text)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Compute the allocation plan: how much budget goes to journal vs
|
|
|
|
|
/// conversation, which entries and messages to include.
|
|
|
|
|
fn plan_context(
|
|
|
|
|
system_prompt: &str,
|
|
|
|
|
context_message: &str,
|
|
|
|
|
recent: &[Message],
|
|
|
|
|
entries: &[journal::JournalEntry],
|
|
|
|
|
model: &str,
|
|
|
|
|
count: &dyn Fn(&str) -> usize,
|
|
|
|
|
) -> ContextPlan {
|
|
|
|
|
let max_tokens = context_budget_tokens(model);
|
|
|
|
|
|
|
|
|
|
// Fixed costs — always included
|
|
|
|
|
let identity_cost = count(system_prompt);
|
|
|
|
|
let memory_cost = count(context_message);
|
|
|
|
|
let reserve = max_tokens / 4;
|
|
|
|
|
let available = max_tokens
|
|
|
|
|
.saturating_sub(identity_cost)
|
|
|
|
|
.saturating_sub(memory_cost)
|
|
|
|
|
.saturating_sub(reserve);
|
|
|
|
|
|
|
|
|
|
// Measure conversation
|
|
|
|
|
let conv_costs: Vec<usize> = recent.iter().map(|m| msg_token_count_fn(m, count)).collect();
|
|
|
|
|
let total_conv: usize = conv_costs.iter().sum();
|
|
|
|
|
|
|
|
|
|
// Journal always gets at least 15% of available budget so it doesn't
|
|
|
|
|
// get squeezed out by large conversations.
|
|
|
|
|
let journal_min = available * 15 / 100;
|
|
|
|
|
let journal_budget = available.saturating_sub(total_conv).max(journal_min);
|
|
|
|
|
|
|
|
|
|
// Fill journal entries newest-first within budget.
|
|
|
|
|
// Tiered: recent entries get full content, older entries get just
|
|
|
|
|
// a header line (timestamp + first line) for timeline awareness.
|
|
|
|
|
let full_budget = journal_budget * 70 / 100;
|
|
|
|
|
let header_budget = journal_budget.saturating_sub(full_budget);
|
|
|
|
|
|
|
|
|
|
// Phase 1: Full entries (newest first)
|
|
|
|
|
let mut full_used = 0;
|
|
|
|
|
let mut n_full = 0;
|
|
|
|
|
for entry in entries.iter().rev() {
|
|
|
|
|
let cost = count(&entry.content) + 10;
|
|
|
|
|
if full_used + cost > full_budget {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
full_used += cost;
|
|
|
|
|
n_full += 1;
|
|
|
|
|
}
|
|
|
|
|
let full_start = entries.len().saturating_sub(n_full);
|
|
|
|
|
|
|
|
|
|
// Phase 2: Header-only entries (continuing backward from where full stopped)
|
|
|
|
|
let mut header_used = 0;
|
|
|
|
|
let mut n_headers = 0;
|
|
|
|
|
for entry in entries[..full_start].iter().rev() {
|
|
|
|
|
let first_line = entry
|
|
|
|
|
.content
|
|
|
|
|
.lines()
|
|
|
|
|
.find(|l| !l.trim().is_empty())
|
|
|
|
|
.unwrap_or("(empty)");
|
|
|
|
|
let cost = count(first_line) + 10;
|
|
|
|
|
if header_used + cost > header_budget {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
header_used += cost;
|
|
|
|
|
n_headers += 1;
|
|
|
|
|
}
|
|
|
|
|
let header_start = full_start.saturating_sub(n_headers);
|
|
|
|
|
|
|
|
|
|
// If conversation exceeds available budget, trim oldest messages
|
|
|
|
|
let journal_used = full_used + header_used;
|
|
|
|
|
let mut conv_trim = 0;
|
|
|
|
|
let mut trimmed_conv = total_conv;
|
|
|
|
|
while trimmed_conv + journal_used > available && conv_trim < recent.len() {
|
|
|
|
|
trimmed_conv -= conv_costs[conv_trim];
|
|
|
|
|
conv_trim += 1;
|
|
|
|
|
}
|
|
|
|
|
// Walk forward to user message boundary
|
|
|
|
|
while conv_trim < recent.len() && recent[conv_trim].role != Role::User {
|
|
|
|
|
conv_trim += 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dbglog!("[plan] model={} max_tokens={} available={} (identity={} memory={} reserve={})",
|
|
|
|
|
model, max_tokens, available, identity_cost, memory_cost, reserve);
|
|
|
|
|
dbglog!("[plan] conv: {} msgs, {} tokens total, trimming {} msgs → {} tokens",
|
|
|
|
|
recent.len(), total_conv, conv_trim, trimmed_conv);
|
|
|
|
|
dbglog!("[plan] journal: {} full entries ({}t) + {} headers ({}t)",
|
|
|
|
|
n_full, full_used, n_headers, header_used);
|
|
|
|
|
|
|
|
|
|
ContextPlan {
|
|
|
|
|
header_start,
|
|
|
|
|
full_start,
|
|
|
|
|
entry_count: entries.len(),
|
|
|
|
|
conv_trim,
|
cleanup: fix all build warnings, delete dead DMN context code
- Delete poc-daemon/src/context.rs dead code (git_context, work_state,
irc_digest, recent_commits, uncommitted_files) — replaced by
where-am-i.md and memory graph
- Remove unused imports (BufWriter, Context, similarity)
- Prefix unused variables (_store, _avg_cc, _episodic_ratio, _message)
- #[allow(dead_code)] on public API surface that's not yet wired
(Message::assistant, ConversationLog::message_count/read_all,
Config::context_message, ContextInfo fields)
- Fix to_capnp macro dead_code warning
- Rename _rewrite_store_DISABLED to snake_case
Only remaining warnings are in generated capnp code (can't fix).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 14:20:34 -04:00
|
|
|
_conv_count: recent.len(),
|
|
|
|
|
_full_tokens: full_used,
|
|
|
|
|
_header_tokens: header_used,
|
|
|
|
|
_conv_tokens: trimmed_conv,
|
|
|
|
|
_available: available,
|
2026-03-18 22:44:52 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Render journal entries into text from a context plan.
|
|
|
|
|
fn render_journal_text(
|
|
|
|
|
entries: &[journal::JournalEntry],
|
|
|
|
|
plan: &ContextPlan,
|
|
|
|
|
) -> String {
|
|
|
|
|
let has_journal = plan.header_start < plan.entry_count;
|
|
|
|
|
if !has_journal {
|
|
|
|
|
return String::new();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let mut text = String::from("[Earlier in this conversation — from your journal]\n\n");
|
|
|
|
|
|
|
|
|
|
// Header-only entries (older) — just timestamp + first line
|
|
|
|
|
for entry in &entries[plan.header_start..plan.full_start] {
|
|
|
|
|
let first_line = entry
|
|
|
|
|
.content
|
|
|
|
|
.lines()
|
|
|
|
|
.find(|l| !l.trim().is_empty())
|
|
|
|
|
.unwrap_or("(empty)");
|
|
|
|
|
text.push_str(&format!(
|
|
|
|
|
"## {} — {}\n",
|
|
|
|
|
entry.timestamp.format("%Y-%m-%dT%H:%M"),
|
|
|
|
|
first_line,
|
|
|
|
|
));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Separator between headers and full entries
|
|
|
|
|
let n_headers = plan.full_start - plan.header_start;
|
|
|
|
|
let n_full = plan.entry_count - plan.full_start;
|
|
|
|
|
if n_headers > 0 && n_full > 0 {
|
|
|
|
|
text.push_str("\n---\n\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Full entries (recent)
|
|
|
|
|
for entry in &entries[plan.full_start..] {
|
|
|
|
|
text.push_str(&format!(
|
|
|
|
|
"## {}\n\n{}\n\n",
|
|
|
|
|
entry.timestamp.format("%Y-%m-%dT%H:%M"),
|
|
|
|
|
entry.content
|
|
|
|
|
));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
text
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Assemble the context window from a plan. No allocation decisions
|
|
|
|
|
/// happen here — just follow the plan to build messages.
|
|
|
|
|
fn assemble_context(
|
|
|
|
|
system_prompt: String,
|
|
|
|
|
context_message: String,
|
|
|
|
|
journal_text: &str,
|
|
|
|
|
recent: &[Message],
|
|
|
|
|
plan: &ContextPlan,
|
|
|
|
|
) -> Vec<Message> {
|
|
|
|
|
let mut messages = vec![Message::system(system_prompt)];
|
|
|
|
|
if !context_message.is_empty() {
|
|
|
|
|
messages.push(Message::user(context_message));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let final_recent = &recent[plan.conv_trim..];
|
|
|
|
|
|
|
|
|
|
if !journal_text.is_empty() {
|
|
|
|
|
messages.push(Message::user(journal_text.to_string()));
|
|
|
|
|
} else if !final_recent.is_empty() {
|
|
|
|
|
messages.push(Message::user(
|
|
|
|
|
"Your context was just rebuilt. Memory files have been \
|
|
|
|
|
reloaded. Your recent conversation continues below. \
|
|
|
|
|
Earlier context is in your journal and memory files."
|
|
|
|
|
.to_string(),
|
|
|
|
|
));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
messages.extend(final_recent.iter().cloned());
|
|
|
|
|
messages
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Find the conversation index where messages are no longer covered
|
|
|
|
|
/// Truncate a context message to fit within a token budget. Cuts at
|
|
|
|
|
/// section boundaries (lines starting with `---` or `## `) to avoid
|
|
|
|
|
/// splitting mid-section. Drops sections from the end first since
|
|
|
|
|
/// earlier sections (identity, instructions) matter more.
|
|
|
|
|
fn truncate_at_section(text: &str, max_tokens: usize, count: &dyn Fn(&str) -> usize) -> String {
|
|
|
|
|
// Find section boundaries (--- separators between assembled parts)
|
|
|
|
|
let mut boundaries = vec![0usize];
|
|
|
|
|
for (i, line) in text.lines().enumerate() {
|
|
|
|
|
if line.trim() == "---" || line.starts_with("## ") {
|
|
|
|
|
// Find byte offset of this line
|
|
|
|
|
let offset = text.lines().take(i).map(|l| l.len() + 1).sum::<usize>();
|
|
|
|
|
boundaries.push(offset);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
boundaries.push(text.len());
|
|
|
|
|
|
|
|
|
|
// Binary search: find the largest prefix of sections that fits
|
|
|
|
|
let mut best = 0;
|
|
|
|
|
for &end in &boundaries[1..] {
|
|
|
|
|
let slice = &text[..end];
|
|
|
|
|
if count(slice) <= max_tokens {
|
|
|
|
|
best = end;
|
|
|
|
|
} else {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if best == 0 {
|
|
|
|
|
// Even the first section doesn't fit — hard truncate
|
|
|
|
|
best = text.len().min(max_tokens * 3); // ~3 chars/token rough estimate
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let truncated = &text[..best];
|
|
|
|
|
dbglog!("[ctx] truncated memory from {} to {} chars ({} tokens)",
|
|
|
|
|
text.len(), truncated.len(), count(truncated));
|
|
|
|
|
truncated.to_string()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// by journal entries. Messages before this index are summarized by
|
|
|
|
|
/// the journal; messages from this index onward stay as raw conversation.
|
|
|
|
|
/// Walks back to a user message boundary to avoid splitting tool
|
|
|
|
|
/// call/result sequences.
|
|
|
|
|
fn find_journal_cutoff(
|
|
|
|
|
conversation: &[Message],
|
|
|
|
|
newest_entry: Option<&journal::JournalEntry>,
|
|
|
|
|
) -> usize {
|
|
|
|
|
let cutoff = match newest_entry {
|
|
|
|
|
Some(entry) => entry.timestamp,
|
|
|
|
|
None => return 0,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let mut split = conversation.len();
|
|
|
|
|
for (i, msg) in conversation.iter().enumerate() {
|
|
|
|
|
if let Some(ts) = parse_msg_timestamp(msg) {
|
|
|
|
|
if ts > cutoff {
|
|
|
|
|
split = i;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// Walk back to user message boundary
|
|
|
|
|
while split > 0 && split < conversation.len() && conversation[split].role != Role::User {
|
|
|
|
|
split -= 1;
|
|
|
|
|
}
|
|
|
|
|
split
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Count the token footprint of a message using a token counting function.
|
|
|
|
|
fn msg_token_count_fn(msg: &Message, count: &dyn Fn(&str) -> usize) -> usize {
|
|
|
|
|
let content = msg.content.as_ref().map_or(0, |c| match c {
|
|
|
|
|
MessageContent::Text(s) => count(s),
|
|
|
|
|
MessageContent::Parts(parts) => parts
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|p| match p {
|
|
|
|
|
ContentPart::Text { text } => count(text),
|
|
|
|
|
ContentPart::ImageUrl { .. } => 85,
|
|
|
|
|
})
|
|
|
|
|
.sum(),
|
|
|
|
|
});
|
|
|
|
|
let tools = msg.tool_calls.as_ref().map_or(0, |calls| {
|
|
|
|
|
calls
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|c| count(&c.function.arguments) + count(&c.function.name))
|
|
|
|
|
.sum()
|
|
|
|
|
});
|
|
|
|
|
content + tools
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Count the token footprint of a message using BPE tokenization.
|
|
|
|
|
fn msg_token_count(tokenizer: &CoreBPE, msg: &Message) -> usize {
|
|
|
|
|
msg_token_count_fn(msg, &|s| tokenizer.encode_with_special_tokens(s).len())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Detect context window overflow errors from the API.
|
|
|
|
|
/// Different providers phrase this differently; we check for common patterns.
|
|
|
|
|
/// OpenRouter wraps upstream errors, so we check both the wrapper and the raw message.
|
|
|
|
|
fn is_context_overflow(err: &anyhow::Error) -> bool {
|
|
|
|
|
let msg = err.to_string().to_lowercase();
|
|
|
|
|
msg.contains("context length")
|
|
|
|
|
|| msg.contains("token limit")
|
|
|
|
|
|| msg.contains("too many tokens")
|
|
|
|
|
|| msg.contains("maximum context")
|
|
|
|
|
|| msg.contains("prompt is too long")
|
|
|
|
|
|| msg.contains("request too large")
|
|
|
|
|
|| msg.contains("input validation error")
|
|
|
|
|
|| msg.contains("content length limit")
|
|
|
|
|
|| (msg.contains("400") && msg.contains("tokens"))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Detect model/provider errors delivered inside the SSE stream.
|
|
|
|
|
/// OpenRouter returns HTTP 200 but finish_reason="error" with
|
|
|
|
|
/// partial content (e.g. "system") — we surface this as an error
|
|
|
|
|
/// so the turn loop can retry.
|
|
|
|
|
fn is_stream_error(err: &anyhow::Error) -> bool {
|
|
|
|
|
err.to_string().contains("model stream error")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Parse a message's timestamp field into a DateTime.
|
|
|
|
|
fn parse_msg_timestamp(msg: &Message) -> Option<DateTime<Utc>> {
|
|
|
|
|
msg.timestamp
|
|
|
|
|
.as_ref()
|
|
|
|
|
.and_then(|ts| DateTime::parse_from_rfc3339(ts).ok())
|
|
|
|
|
.map(|dt| dt.with_timezone(&Utc))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Create a short summary of tool args for the tools pane header.
|
|
|
|
|
fn summarize_args(tool_name: &str, args: &serde_json::Value) -> String {
|
|
|
|
|
match tool_name {
|
|
|
|
|
"read_file" | "write_file" | "edit_file" => args["file_path"]
|
|
|
|
|
.as_str()
|
|
|
|
|
.unwrap_or("")
|
|
|
|
|
.to_string(),
|
|
|
|
|
"bash" => {
|
|
|
|
|
let cmd = args["command"].as_str().unwrap_or("");
|
|
|
|
|
if cmd.len() > 60 {
|
|
|
|
|
let end = cmd.char_indices()
|
|
|
|
|
.map(|(i, _)| i)
|
|
|
|
|
.take_while(|&i| i <= 60)
|
|
|
|
|
.last()
|
|
|
|
|
.unwrap_or(0);
|
|
|
|
|
format!("{}...", &cmd[..end])
|
|
|
|
|
} else {
|
|
|
|
|
cmd.to_string()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
"grep" => {
|
|
|
|
|
let pattern = args["pattern"].as_str().unwrap_or("");
|
|
|
|
|
let path = args["path"].as_str().unwrap_or(".");
|
|
|
|
|
format!("{} in {}", pattern, path)
|
|
|
|
|
}
|
|
|
|
|
"glob" => args["pattern"]
|
|
|
|
|
.as_str()
|
|
|
|
|
.unwrap_or("")
|
|
|
|
|
.to_string(),
|
|
|
|
|
"view_image" => {
|
|
|
|
|
if let Some(pane) = args["pane_id"].as_str() {
|
|
|
|
|
format!("pane {}", pane)
|
|
|
|
|
} else {
|
|
|
|
|
args["file_path"].as_str().unwrap_or("").to_string()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
"journal" => {
|
|
|
|
|
let entry = args["entry"].as_str().unwrap_or("");
|
|
|
|
|
if entry.len() > 60 {
|
|
|
|
|
format!("{}...", &entry[..60])
|
|
|
|
|
} else {
|
|
|
|
|
entry.to_string()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
"yield_to_user" => args["message"]
|
|
|
|
|
.as_str()
|
|
|
|
|
.unwrap_or("")
|
|
|
|
|
.to_string(),
|
|
|
|
|
"switch_model" => args["model"]
|
|
|
|
|
.as_str()
|
|
|
|
|
.unwrap_or("")
|
|
|
|
|
.to_string(),
|
|
|
|
|
"pause" => String::new(),
|
|
|
|
|
_ => String::new(),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-21 15:29:45 -04:00
|
|
|
// Parsing functions (parse_leaked_tool_calls, strip_leaked_artifacts)
|
|
|
|
|
// and their tests live in parsing.rs
|