Replace flat-file journal parser with direct store query for EpisodicSession nodes. Filter journal entries to only those older than the oldest conversation message (plus one overlap entry to avoid gaps). Falls back to 20 recent entries when no conversation exists yet. Fixes: poc-agent context window showing 0 journal entries. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
1178 lines
46 KiB
Rust
1178 lines
46 KiB
Rust
// agent.rs — Core agent loop
|
|
//
|
|
// The simplest possible implementation of the agent pattern:
|
|
// send messages + tool definitions to the model, if it responds
|
|
// with tool calls then dispatch them and loop, if it responds
|
|
// with text then display it and wait for the next prompt.
|
|
//
|
|
// Uses streaming by default so text tokens appear as they're
|
|
// generated. Tool calls are accumulated from stream deltas and
|
|
// dispatched after the stream completes.
|
|
//
|
|
// The DMN (dmn.rs) is the outer loop that decides what prompts
|
|
// to send here. This module just handles single turns: prompt
|
|
// in, response out, tool calls dispatched.
|
|
|
|
use anyhow::Result;
|
|
use tiktoken_rs::CoreBPE;
|
|
|
|
use crate::agent::api::ApiClient;
|
|
use crate::agent::journal;
|
|
use crate::agent::log::ConversationLog;
|
|
use crate::agent::api::StreamEvent;
|
|
use crate::agent::tools;
|
|
use crate::agent::tools::ProcessTracker;
|
|
use crate::agent::types::*;
|
|
use crate::agent::ui_channel::{ContextSection, SharedContextState, StatusInfo, StreamTarget, UiMessage, UiSender};
|
|
|
|
/// Result of a single agent turn.
|
|
pub struct TurnResult {
|
|
/// The text response (already sent through UI channel).
|
|
#[allow(dead_code)]
|
|
pub text: String,
|
|
/// Whether the model called yield_to_user during this turn.
|
|
pub yield_requested: bool,
|
|
/// Whether any tools (other than yield_to_user) were called.
|
|
pub had_tool_calls: bool,
|
|
/// Number of tool calls that returned errors this turn.
|
|
pub tool_errors: u32,
|
|
/// Model name to switch to after this turn completes.
|
|
pub model_switch: Option<String>,
|
|
/// Agent requested DMN pause (full stop on autonomous behavior).
|
|
pub dmn_pause: bool,
|
|
}
|
|
|
|
/// Accumulated state across tool dispatches within a single turn.
|
|
struct DispatchState {
|
|
yield_requested: bool,
|
|
had_tool_calls: bool,
|
|
tool_errors: u32,
|
|
model_switch: Option<String>,
|
|
dmn_pause: bool,
|
|
}
|
|
|
|
pub struct Agent {
|
|
client: ApiClient,
|
|
messages: Vec<Message>,
|
|
tool_defs: Vec<ToolDef>,
|
|
/// Last known prompt token count from the API (tracks context size).
|
|
last_prompt_tokens: u32,
|
|
/// Shared process tracker for bash tool — lets TUI show/kill running commands.
|
|
pub process_tracker: ProcessTracker,
|
|
/// Current reasoning effort level ("none", "low", "high").
|
|
pub reasoning_effort: String,
|
|
/// Persistent conversation log — append-only record of all messages.
|
|
conversation_log: Option<ConversationLog>,
|
|
/// Current context window budget breakdown.
|
|
pub context_budget: ContextBudget,
|
|
/// BPE tokenizer for token counting (cl100k_base — close enough
|
|
/// for Claude and Qwen budget allocation, ~85-90% count accuracy).
|
|
tokenizer: CoreBPE,
|
|
/// Mutable context state — personality, working stack, etc.
|
|
pub context: ContextState,
|
|
/// Shared live context summary — TUI reads this directly for debug screen.
|
|
pub shared_context: SharedContextState,
|
|
/// Stable session ID for memory-search dedup across turns.
|
|
session_id: String,
|
|
/// Agent orchestration state (surface-observe, journal, reflect).
|
|
pub agent_cycles: crate::subconscious::subconscious::AgentCycleState,
|
|
}
|
|
|
|
impl Agent {
|
|
pub fn new(
|
|
client: ApiClient,
|
|
system_prompt: String,
|
|
personality: Vec<(String, String)>,
|
|
conversation_log: Option<ConversationLog>,
|
|
shared_context: SharedContextState,
|
|
) -> Self {
|
|
let tool_defs = tools::definitions();
|
|
let tokenizer = tiktoken_rs::cl100k_base()
|
|
.expect("failed to load cl100k_base tokenizer");
|
|
|
|
let context = ContextState {
|
|
system_prompt: system_prompt.clone(),
|
|
personality,
|
|
journal: String::new(),
|
|
working_stack: Vec::new(),
|
|
loaded_nodes: Vec::new(),
|
|
};
|
|
let session_id = format!("poc-agent-{}", chrono::Utc::now().format("%Y%m%d-%H%M%S"));
|
|
let agent_cycles = crate::subconscious::subconscious::AgentCycleState::new(&session_id);
|
|
let mut agent = Self {
|
|
client,
|
|
messages: Vec::new(),
|
|
tool_defs,
|
|
last_prompt_tokens: 0,
|
|
process_tracker: ProcessTracker::new(),
|
|
reasoning_effort: "none".to_string(),
|
|
conversation_log,
|
|
context_budget: ContextBudget::default(),
|
|
tokenizer,
|
|
context,
|
|
shared_context,
|
|
session_id,
|
|
agent_cycles,
|
|
};
|
|
|
|
// Load recent journal entries at startup for orientation
|
|
agent.load_startup_journal();
|
|
agent.load_working_stack();
|
|
|
|
agent.push_context(Message::system(system_prompt));
|
|
let rendered = agent.context.render_context_message();
|
|
if !rendered.is_empty() {
|
|
agent.push_context(Message::user(rendered));
|
|
}
|
|
if !agent.context.journal.is_empty() {
|
|
agent.push_context(Message::user(agent.context.journal.clone()));
|
|
}
|
|
agent.measure_budget();
|
|
agent.publish_context_state();
|
|
agent
|
|
}
|
|
|
|
/// Run agent orchestration cycle and return formatted output to inject.
|
|
fn run_agent_cycle(&mut self) -> Option<String> {
|
|
let transcript_path = self.conversation_log.as_ref()
|
|
.map(|l| l.path().to_string_lossy().to_string())
|
|
.unwrap_or_default();
|
|
|
|
let session = crate::session::HookSession::from_fields(
|
|
self.session_id.clone(),
|
|
transcript_path,
|
|
"UserPromptSubmit".into(),
|
|
);
|
|
|
|
self.agent_cycles.trigger(&session);
|
|
let text = crate::subconscious::subconscious::format_agent_output(&self.agent_cycles.last_output);
|
|
if text.trim().is_empty() {
|
|
None
|
|
} else {
|
|
Some(text)
|
|
}
|
|
}
|
|
|
|
/// Push a conversation message — stamped and logged.
|
|
fn push_message(&mut self, mut msg: Message) {
|
|
msg.stamp();
|
|
if let Some(ref log) = self.conversation_log {
|
|
if let Err(e) = log.append(&msg) {
|
|
eprintln!("warning: failed to log message: {:#}", e);
|
|
}
|
|
}
|
|
self.messages.push(msg);
|
|
}
|
|
|
|
/// Push a context-only message (system prompt, identity context,
|
|
/// journal summaries). Not logged — these are reconstructed on
|
|
/// every startup/compaction.
|
|
fn push_context(&mut self, msg: Message) {
|
|
self.messages.push(msg);
|
|
}
|
|
|
|
/// Measure context window usage by category. Uses the BPE tokenizer
|
|
/// for direct token counting (no chars/4 approximation).
|
|
fn measure_budget(&mut self) {
|
|
let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len();
|
|
|
|
let mut id_tokens: usize = 0;
|
|
let mut jnl_tokens: usize = 0;
|
|
let mut conv_tokens: usize = 0;
|
|
let mut in_conversation = false;
|
|
|
|
for msg in &self.messages {
|
|
let tokens = crate::agent::context::msg_token_count(&self.tokenizer, msg);
|
|
|
|
if in_conversation {
|
|
conv_tokens += tokens;
|
|
continue;
|
|
}
|
|
|
|
match msg.role {
|
|
Role::System => id_tokens += tokens,
|
|
Role::User => {
|
|
let text = msg.content_text();
|
|
if text.starts_with("[Earlier in this conversation") {
|
|
jnl_tokens += tokens;
|
|
} else if text.starts_with("Your context was just rebuilt") {
|
|
jnl_tokens += tokens;
|
|
} else if jnl_tokens == 0 && conv_tokens == 0 {
|
|
// Personality context — part of identity
|
|
id_tokens += tokens;
|
|
} else {
|
|
in_conversation = true;
|
|
conv_tokens += tokens;
|
|
}
|
|
}
|
|
_ => {
|
|
in_conversation = true;
|
|
conv_tokens += tokens;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Memory = nodes loaded during the session via tool calls
|
|
let mem_tokens: usize = self.context.loaded_nodes.iter()
|
|
.map(|node| count(&node.render()))
|
|
.sum();
|
|
|
|
self.context_budget = ContextBudget {
|
|
identity_tokens: id_tokens,
|
|
memory_tokens: mem_tokens,
|
|
journal_tokens: jnl_tokens,
|
|
conversation_tokens: conv_tokens,
|
|
window_tokens: crate::agent::context::model_context_window(&self.client.model),
|
|
};
|
|
}
|
|
|
|
/// Send a user message and run the agent loop until the model
|
|
/// produces a text response (no more tool calls). Streams text
|
|
/// and tool activity through the UI channel.
|
|
pub async fn turn(
|
|
&mut self,
|
|
user_input: &str,
|
|
ui_tx: &UiSender,
|
|
target: StreamTarget,
|
|
) -> Result<TurnResult> {
|
|
// Run agent orchestration cycle (surface-observe, reflect, journal)
|
|
if let Some(hook_output) = self.run_agent_cycle() {
|
|
let enriched = format!("{}\n\n<system-reminder>\n{}\n</system-reminder>",
|
|
user_input, hook_output);
|
|
self.push_message(Message::user(enriched));
|
|
} else {
|
|
self.push_message(Message::user(user_input));
|
|
}
|
|
let _ = ui_tx.send(UiMessage::AgentUpdate(self.agent_cycles.snapshots()));
|
|
|
|
let mut overflow_retries: u32 = 0;
|
|
let mut empty_retries: u32 = 0;
|
|
let mut ds = DispatchState {
|
|
yield_requested: false,
|
|
had_tool_calls: false,
|
|
tool_errors: 0,
|
|
model_switch: None,
|
|
dmn_pause: false,
|
|
};
|
|
|
|
loop {
|
|
let _ = ui_tx.send(UiMessage::Activity("thinking...".into()));
|
|
|
|
// Stream events from the API — we route each event to the
|
|
// appropriate UI pane rather than letting the API layer do it.
|
|
let mut rx = self.client.start_stream(
|
|
&self.messages,
|
|
Some(&self.tool_defs),
|
|
ui_tx,
|
|
&self.reasoning_effort,
|
|
None,
|
|
None, // priority: interactive
|
|
);
|
|
|
|
let mut content = String::new();
|
|
let mut tool_calls: Vec<ToolCall> = Vec::new();
|
|
let mut usage = None;
|
|
let mut finish_reason = None;
|
|
let mut in_tool_call = false;
|
|
let mut stream_error = None;
|
|
let mut first_content = true;
|
|
// Buffer for content not yet sent to UI — holds a tail
|
|
// that might be a partial <tool_call> tag.
|
|
let mut display_buf = String::new();
|
|
|
|
while let Some(event) = rx.recv().await {
|
|
match event {
|
|
StreamEvent::Content(text) => {
|
|
if first_content {
|
|
let _ = ui_tx.send(UiMessage::Activity("streaming...".into()));
|
|
first_content = false;
|
|
}
|
|
content.push_str(&text);
|
|
|
|
if in_tool_call {
|
|
// Already inside a tool call — suppress display.
|
|
} else {
|
|
display_buf.push_str(&text);
|
|
|
|
if let Some(pos) = display_buf.find("<tool_call>") {
|
|
// Flush content before the tag, suppress the rest.
|
|
let before = &display_buf[..pos];
|
|
if !before.is_empty() {
|
|
let _ = ui_tx.send(UiMessage::TextDelta(before.to_string(), target));
|
|
}
|
|
display_buf.clear();
|
|
in_tool_call = true;
|
|
} else {
|
|
// Flush display_buf except a tail that could be
|
|
// a partial "<tool_call>" (10 chars).
|
|
let safe = display_buf.len().saturating_sub(10);
|
|
// Find a char boundary at or before safe
|
|
let safe = display_buf.floor_char_boundary(safe);
|
|
if safe > 0 {
|
|
let flush = display_buf[..safe].to_string();
|
|
display_buf = display_buf[safe..].to_string();
|
|
let _ = ui_tx.send(UiMessage::TextDelta(flush, target));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
StreamEvent::Reasoning(text) => {
|
|
let _ = ui_tx.send(UiMessage::Reasoning(text));
|
|
}
|
|
StreamEvent::ToolCallDelta { index, id, call_type, name, arguments } => {
|
|
while tool_calls.len() <= index {
|
|
tool_calls.push(ToolCall {
|
|
id: String::new(),
|
|
call_type: "function".to_string(),
|
|
function: FunctionCall { name: String::new(), arguments: String::new() },
|
|
});
|
|
}
|
|
if let Some(id) = id { tool_calls[index].id = id; }
|
|
if let Some(ct) = call_type { tool_calls[index].call_type = ct; }
|
|
if let Some(n) = name { tool_calls[index].function.name = n; }
|
|
if let Some(a) = arguments { tool_calls[index].function.arguments.push_str(&a); }
|
|
}
|
|
StreamEvent::Usage(u) => usage = Some(u),
|
|
StreamEvent::Finished { reason, .. } => {
|
|
finish_reason = Some(reason);
|
|
break;
|
|
}
|
|
StreamEvent::Error(e) => {
|
|
stream_error = Some(e);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Handle stream errors with retry logic
|
|
if let Some(e) = stream_error {
|
|
let err = anyhow::anyhow!("{}", e);
|
|
if crate::agent::context::is_context_overflow(&err) && overflow_retries < 2 {
|
|
overflow_retries += 1;
|
|
let _ = ui_tx.send(UiMessage::Info(format!(
|
|
"[context overflow — compacting and retrying ({}/2)]",
|
|
overflow_retries,
|
|
)));
|
|
self.emergency_compact();
|
|
continue;
|
|
}
|
|
if crate::agent::context::is_stream_error(&err) && empty_retries < 2 {
|
|
empty_retries += 1;
|
|
let _ = ui_tx.send(UiMessage::Info(format!(
|
|
"[stream error: {} — retrying ({}/2)]",
|
|
e, empty_retries,
|
|
)));
|
|
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
|
|
continue;
|
|
}
|
|
return Err(err);
|
|
}
|
|
|
|
if finish_reason.as_deref() == Some("error") {
|
|
let detail = if content.is_empty() { "no details".into() } else { content };
|
|
return Err(anyhow::anyhow!("model stream error: {}", detail));
|
|
}
|
|
|
|
// Flush remaining display buffer (normal responses without tool calls).
|
|
if !in_tool_call && !display_buf.is_empty() {
|
|
let _ = ui_tx.send(UiMessage::TextDelta(display_buf, target));
|
|
}
|
|
if !content.is_empty() && !in_tool_call {
|
|
let _ = ui_tx.send(UiMessage::TextDelta("\n".to_string(), target));
|
|
}
|
|
|
|
let msg = crate::agent::api::build_response_message(content, tool_calls);
|
|
|
|
// Strip ephemeral tool calls (journal) that the API has
|
|
// now processed. They're persisted to disk; no need to keep
|
|
// them in the conversation history burning tokens.
|
|
self.strip_ephemeral_tool_calls();
|
|
|
|
if let Some(usage) = &usage {
|
|
self.last_prompt_tokens = usage.prompt_tokens;
|
|
self.measure_budget();
|
|
self.publish_context_state();
|
|
let _ = ui_tx.send(UiMessage::StatusUpdate(StatusInfo {
|
|
dmn_state: String::new(), // filled by main loop
|
|
dmn_turns: 0,
|
|
dmn_max_turns: 0,
|
|
prompt_tokens: usage.prompt_tokens,
|
|
completion_tokens: usage.completion_tokens,
|
|
model: self.client.model.clone(),
|
|
turn_tools: 0, // tracked by TUI from ToolCall messages
|
|
context_budget: self.context_budget.status_string(),
|
|
}));
|
|
}
|
|
|
|
// Empty response — model returned finish=stop with no content
|
|
// or tool calls. Inject a nudge so the retry has different input.
|
|
let has_content = msg.content.is_some();
|
|
let has_tools = msg.tool_calls.as_ref().map_or(false, |tc| !tc.is_empty());
|
|
if !has_content && !has_tools {
|
|
if empty_retries < 2 {
|
|
empty_retries += 1;
|
|
let _ = ui_tx.send(UiMessage::Debug(format!(
|
|
"empty response, injecting nudge and retrying ({}/2)",
|
|
empty_retries,
|
|
)));
|
|
self.push_message(Message::user(
|
|
"[system] Your previous response was empty. \
|
|
Please respond with text or use a tool."
|
|
));
|
|
continue;
|
|
}
|
|
// After max retries, fall through — return the empty response
|
|
} else {
|
|
empty_retries = 0;
|
|
}
|
|
|
|
// Tool calls (structured from API, or recovered from content
|
|
// by build_response_message if the model leaked them as XML).
|
|
if let Some(ref tool_calls) = msg.tool_calls {
|
|
if !tool_calls.is_empty() {
|
|
self.push_message(msg.clone());
|
|
for call in tool_calls {
|
|
self.dispatch_tool_call(call, None, ui_tx, &mut ds)
|
|
.await;
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Genuinely text-only response
|
|
let text = msg.content_text().to_string();
|
|
let _ = ui_tx.send(UiMessage::Activity(String::new()));
|
|
self.push_message(msg);
|
|
|
|
return Ok(TurnResult {
|
|
text,
|
|
yield_requested: ds.yield_requested,
|
|
had_tool_calls: ds.had_tool_calls,
|
|
tool_errors: ds.tool_errors,
|
|
model_switch: ds.model_switch,
|
|
dmn_pause: ds.dmn_pause,
|
|
});
|
|
}
|
|
}
|
|
|
|
/// Dispatch a single tool call: send UI annotations, run the tool,
|
|
/// push results into the conversation, handle images.
|
|
async fn dispatch_tool_call(
|
|
&mut self,
|
|
call: &ToolCall,
|
|
tag: Option<&str>,
|
|
ui_tx: &UiSender,
|
|
ds: &mut DispatchState,
|
|
) {
|
|
let args: serde_json::Value =
|
|
serde_json::from_str(&call.function.arguments).unwrap_or_default();
|
|
|
|
let args_summary = summarize_args(&call.function.name, &args);
|
|
let label = match tag {
|
|
Some(t) => format!("calling: {} ({})", call.function.name, t),
|
|
None => format!("calling: {}", call.function.name),
|
|
};
|
|
let _ = ui_tx.send(UiMessage::Activity(label));
|
|
let _ = ui_tx.send(UiMessage::ToolCall {
|
|
name: call.function.name.clone(),
|
|
args_summary: args_summary.clone(),
|
|
});
|
|
let _ = ui_tx.send(UiMessage::ToolStarted {
|
|
id: call.id.clone(),
|
|
name: call.function.name.clone(),
|
|
detail: args_summary,
|
|
});
|
|
|
|
// Handle working_stack tool — needs &mut self for context state
|
|
if call.function.name == "working_stack" {
|
|
let result = tools::working_stack::handle(&args, &mut self.context.working_stack);
|
|
let output = tools::ToolOutput {
|
|
text: result.clone(),
|
|
is_yield: false,
|
|
images: Vec::new(),
|
|
model_switch: None,
|
|
dmn_pause: false,
|
|
};
|
|
let _ = ui_tx.send(UiMessage::ToolResult {
|
|
name: call.function.name.clone(),
|
|
result: output.text.clone(),
|
|
});
|
|
let _ = ui_tx.send(UiMessage::ToolFinished { id: call.id.clone() });
|
|
self.push_message(Message::tool_result(&call.id, &output.text));
|
|
ds.had_tool_calls = true;
|
|
|
|
// Re-render the context message so the model sees the updated stack
|
|
if !result.starts_with("Error:") {
|
|
self.refresh_context_message();
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Handle memory tools — needs &mut self for node tracking
|
|
if call.function.name.starts_with("memory_") {
|
|
let result = tools::memory::dispatch(&call.function.name, &args, None);
|
|
let text = match &result {
|
|
Ok(s) => s.clone(),
|
|
Err(e) => format!("Error: {:#}", e),
|
|
};
|
|
|
|
// Track loaded/updated nodes
|
|
if result.is_ok() {
|
|
match call.function.name.as_str() {
|
|
"memory_render" | "memory_links" => {
|
|
if let Some(key) = args.get("key").and_then(|v| v.as_str()) {
|
|
if let Some(node) = crate::hippocampus::memory::MemoryNode::load(key) {
|
|
// Replace if already tracked, otherwise add
|
|
if let Some(existing) = self.context.loaded_nodes.iter_mut()
|
|
.find(|n| n.key == node.key) {
|
|
*existing = node;
|
|
} else {
|
|
self.context.loaded_nodes.push(node);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
"memory_write" => {
|
|
if let Some(key) = args.get("key").and_then(|v| v.as_str()) {
|
|
if let Some(node) = crate::hippocampus::memory::MemoryNode::load(key) {
|
|
// Refresh if already tracked
|
|
if let Some(existing) = self.context.loaded_nodes.iter_mut()
|
|
.find(|n| n.key == node.key) {
|
|
*existing = node;
|
|
}
|
|
// Don't auto-add writes — only renders register nodes
|
|
}
|
|
}
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
let output = tools::ToolOutput {
|
|
text,
|
|
is_yield: false,
|
|
images: Vec::new(),
|
|
model_switch: None,
|
|
dmn_pause: false,
|
|
};
|
|
let _ = ui_tx.send(UiMessage::ToolResult {
|
|
name: call.function.name.clone(),
|
|
result: output.text.clone(),
|
|
});
|
|
let _ = ui_tx.send(UiMessage::ToolFinished { id: call.id.clone() });
|
|
self.push_message(Message::tool_result(&call.id, &output.text));
|
|
ds.had_tool_calls = true;
|
|
if output.text.starts_with("Error:") {
|
|
ds.tool_errors += 1;
|
|
}
|
|
self.measure_budget();
|
|
self.publish_context_state();
|
|
return;
|
|
}
|
|
|
|
let output =
|
|
tools::dispatch(&call.function.name, &args, &self.process_tracker).await;
|
|
|
|
if output.is_yield {
|
|
ds.yield_requested = true;
|
|
} else {
|
|
ds.had_tool_calls = true;
|
|
}
|
|
if output.model_switch.is_some() {
|
|
ds.model_switch = output.model_switch;
|
|
}
|
|
if output.dmn_pause {
|
|
ds.dmn_pause = true;
|
|
}
|
|
if output.text.starts_with("Error:") {
|
|
ds.tool_errors += 1;
|
|
}
|
|
|
|
let _ = ui_tx.send(UiMessage::ToolResult {
|
|
name: call.function.name.clone(),
|
|
result: output.text.clone(),
|
|
});
|
|
let _ = ui_tx.send(UiMessage::ToolFinished { id: call.id.clone() });
|
|
|
|
self.push_message(Message::tool_result(&call.id, &output.text));
|
|
|
|
if !output.images.is_empty() {
|
|
// Only one live image in context at a time — age out any
|
|
// previous ones to avoid accumulating ~90KB+ per image.
|
|
self.age_out_images();
|
|
self.push_message(Message::user_with_images(
|
|
"Here is the image you requested:",
|
|
&output.images,
|
|
));
|
|
}
|
|
}
|
|
|
|
/// Build context state summary for the debug screen.
|
|
pub fn context_state_summary(&self) -> Vec<ContextSection> {
|
|
let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len();
|
|
|
|
let mut sections = Vec::new();
|
|
|
|
// System prompt
|
|
sections.push(ContextSection {
|
|
name: "System prompt".into(),
|
|
tokens: count(&self.context.system_prompt),
|
|
content: self.context.system_prompt.clone(),
|
|
children: Vec::new(),
|
|
});
|
|
|
|
// Personality — parent with file children
|
|
let personality_children: Vec<ContextSection> = self.context.personality.iter()
|
|
.map(|(name, content)| ContextSection {
|
|
name: name.clone(),
|
|
tokens: count(content),
|
|
content: content.clone(),
|
|
children: Vec::new(),
|
|
})
|
|
.collect();
|
|
let personality_tokens: usize = personality_children.iter().map(|c| c.tokens).sum();
|
|
sections.push(ContextSection {
|
|
name: format!("Personality ({} files)", personality_children.len()),
|
|
tokens: personality_tokens,
|
|
content: String::new(),
|
|
children: personality_children,
|
|
});
|
|
|
|
// Journal — split into per-entry children
|
|
{
|
|
let mut journal_children = Vec::new();
|
|
let mut current_header = String::new();
|
|
let mut current_body = String::new();
|
|
for line in self.context.journal.lines() {
|
|
if line.starts_with("## ") {
|
|
if !current_header.is_empty() {
|
|
let body = std::mem::take(&mut current_body);
|
|
let preview: String = body.lines().next().unwrap_or("").chars().take(60).collect();
|
|
journal_children.push(ContextSection {
|
|
name: format!("{}: {}", current_header, preview),
|
|
tokens: count(&body),
|
|
content: body,
|
|
children: Vec::new(),
|
|
});
|
|
}
|
|
current_header = line.trim_start_matches("## ").to_string();
|
|
current_body.clear();
|
|
} else {
|
|
if !current_body.is_empty() || !line.is_empty() {
|
|
current_body.push_str(line);
|
|
current_body.push('\n');
|
|
}
|
|
}
|
|
}
|
|
if !current_header.is_empty() {
|
|
let preview: String = current_body.lines().next().unwrap_or("").chars().take(60).collect();
|
|
journal_children.push(ContextSection {
|
|
name: format!("{}: {}", current_header, preview),
|
|
tokens: count(¤t_body),
|
|
content: current_body,
|
|
children: Vec::new(),
|
|
});
|
|
}
|
|
let journal_tokens: usize = journal_children.iter().map(|c| c.tokens).sum();
|
|
sections.push(ContextSection {
|
|
name: format!("Journal ({} entries)", journal_children.len()),
|
|
tokens: journal_tokens,
|
|
content: String::new(),
|
|
children: journal_children,
|
|
});
|
|
}
|
|
|
|
// Working stack — instructions + items as children
|
|
let instructions = std::fs::read_to_string(WORKING_STACK_INSTRUCTIONS)
|
|
.unwrap_or_default();
|
|
let mut stack_children = vec![ContextSection {
|
|
name: "Instructions".into(),
|
|
tokens: count(&instructions),
|
|
content: instructions,
|
|
children: Vec::new(),
|
|
}];
|
|
for (i, item) in self.context.working_stack.iter().enumerate() {
|
|
let marker = if i == self.context.working_stack.len() - 1 { "→" } else { " " };
|
|
stack_children.push(ContextSection {
|
|
name: format!("{} [{}] {}", marker, i, item),
|
|
tokens: count(item),
|
|
content: String::new(),
|
|
children: Vec::new(),
|
|
});
|
|
}
|
|
let stack_tokens: usize = stack_children.iter().map(|c| c.tokens).sum();
|
|
sections.push(ContextSection {
|
|
name: format!("Working stack ({} items)", self.context.working_stack.len()),
|
|
tokens: stack_tokens,
|
|
content: String::new(),
|
|
children: stack_children,
|
|
});
|
|
|
|
// Loaded memory nodes — tracked by memory tools
|
|
if !self.context.loaded_nodes.is_empty() {
|
|
let node_children: Vec<ContextSection> = self.context.loaded_nodes.iter()
|
|
.map(|node| {
|
|
let rendered = node.render();
|
|
ContextSection {
|
|
name: format!("{} (v{}, w={:.2}, {} links)",
|
|
node.key, node.version, node.weight, node.links.len()),
|
|
tokens: count(&rendered),
|
|
content: String::new(), // don't duplicate in debug view
|
|
children: Vec::new(),
|
|
}
|
|
})
|
|
.collect();
|
|
let node_tokens: usize = node_children.iter().map(|c| c.tokens).sum();
|
|
sections.push(ContextSection {
|
|
name: format!("Memory nodes ({} loaded)", self.context.loaded_nodes.len()),
|
|
tokens: node_tokens,
|
|
content: String::new(),
|
|
children: node_children,
|
|
});
|
|
}
|
|
|
|
// Conversation — each message as a child
|
|
let conv_start = self.messages.iter()
|
|
.position(|m| m.role == Role::Assistant || m.role == Role::Tool)
|
|
.unwrap_or(self.messages.len());
|
|
let conv_messages = &self.messages[conv_start..];
|
|
let conv_children: Vec<ContextSection> = conv_messages.iter().enumerate()
|
|
.map(|(i, msg)| {
|
|
let text = msg.content.as_ref()
|
|
.map(|c| c.as_text().to_string())
|
|
.unwrap_or_default();
|
|
let tool_info = msg.tool_calls.as_ref().map(|tc| {
|
|
tc.iter()
|
|
.map(|c| c.function.name.clone())
|
|
.collect::<Vec<_>>()
|
|
.join(", ")
|
|
});
|
|
let label = match (&msg.role, &tool_info) {
|
|
(_, Some(tools)) => format!("[tool_call: {}]", tools),
|
|
_ => {
|
|
let preview: String = text.chars().take(60).collect();
|
|
let preview = preview.replace('\n', " ");
|
|
if text.len() > 60 { format!("{}...", preview) } else { preview }
|
|
}
|
|
};
|
|
let tokens = count(&text);
|
|
let role_name = match msg.role {
|
|
Role::Assistant => "PoC",
|
|
Role::User => "Kent",
|
|
Role::Tool => "tool",
|
|
Role::System => "system",
|
|
};
|
|
ContextSection {
|
|
name: format!("[{}] {}: {}", conv_start + i, role_name, label),
|
|
tokens,
|
|
content: text,
|
|
children: Vec::new(),
|
|
}
|
|
})
|
|
.collect();
|
|
let conv_tokens: usize = conv_children.iter().map(|c| c.tokens).sum();
|
|
sections.push(ContextSection {
|
|
name: format!("Conversation ({} messages)", conv_children.len()),
|
|
tokens: conv_tokens,
|
|
content: String::new(),
|
|
children: conv_children,
|
|
});
|
|
|
|
sections
|
|
}
|
|
|
|
/// Load recent journal entries at startup for orientation.
|
|
/// Uses the same budget logic as compaction but with empty conversation.
|
|
/// Only parses the tail of the journal file (last 64KB) for speed.
|
|
fn load_startup_journal(&mut self) {
|
|
let store = match crate::store::Store::load() {
|
|
Ok(s) => s,
|
|
Err(_) => return,
|
|
};
|
|
|
|
// Find oldest message timestamp in conversation log
|
|
let oldest_msg_ts = self.conversation_log.as_ref()
|
|
.and_then(|log| log.oldest_timestamp());
|
|
|
|
// Get journal entries from the memory graph
|
|
let mut journal_nodes: Vec<_> = store.nodes.values()
|
|
.filter(|n| n.node_type == crate::store::NodeType::EpisodicSession)
|
|
.collect();
|
|
journal_nodes.sort_by_key(|n| n.created_at);
|
|
|
|
// Filter: entries older than oldest conversation message,
|
|
// plus the first one that overlaps (no gap)
|
|
let entries: Vec<journal::JournalEntry> = if let Some(cutoff) = oldest_msg_ts {
|
|
let mut result = Vec::new();
|
|
let mut included_overlap = false;
|
|
for node in &journal_nodes {
|
|
let ts = chrono::DateTime::from_timestamp(node.created_at, 0)
|
|
.unwrap_or_default();
|
|
if ts < cutoff || !included_overlap {
|
|
result.push(journal::JournalEntry {
|
|
timestamp: ts,
|
|
content: node.content.clone(),
|
|
});
|
|
if ts >= cutoff {
|
|
included_overlap = true;
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
result
|
|
} else {
|
|
// No conversation yet — include recent entries for orientation
|
|
let n = 20;
|
|
let skip = journal_nodes.len().saturating_sub(n);
|
|
journal_nodes.iter().skip(skip).map(|node| {
|
|
journal::JournalEntry {
|
|
timestamp: chrono::DateTime::from_timestamp(node.created_at, 0)
|
|
.unwrap_or_default(),
|
|
content: node.content.clone(),
|
|
}
|
|
}).collect()
|
|
};
|
|
|
|
if entries.is_empty() {
|
|
return;
|
|
}
|
|
|
|
let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len();
|
|
let context_message = self.context.render_context_message();
|
|
|
|
let plan = crate::agent::context::plan_context(
|
|
&self.context.system_prompt,
|
|
&context_message,
|
|
&[], // no conversation yet
|
|
&entries,
|
|
&self.client.model,
|
|
&count,
|
|
);
|
|
|
|
self.context.journal = crate::agent::context::render_journal_text(&entries, &plan);
|
|
}
|
|
|
|
/// Re-render the context message in self.messages from live ContextState.
|
|
/// Called after any change to context state (working stack, etc).
|
|
fn refresh_context_message(&mut self) {
|
|
let rendered = self.context.render_context_message();
|
|
// The context message is the first user message (index 1, after system prompt)
|
|
if self.messages.len() >= 2 && self.messages[1].role == Role::User {
|
|
self.messages[1] = Message::user(rendered);
|
|
}
|
|
self.publish_context_state();
|
|
self.save_working_stack();
|
|
}
|
|
|
|
/// Persist working stack to disk.
|
|
fn save_working_stack(&self) {
|
|
if let Ok(json) = serde_json::to_string(&self.context.working_stack) {
|
|
let _ = std::fs::write(WORKING_STACK_FILE, json);
|
|
}
|
|
}
|
|
|
|
/// Load working stack from disk.
|
|
fn load_working_stack(&mut self) {
|
|
if let Ok(data) = std::fs::read_to_string(WORKING_STACK_FILE) {
|
|
if let Ok(stack) = serde_json::from_str::<Vec<String>>(&data) {
|
|
self.context.working_stack = stack;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Push the current context summary to the shared state for the TUI to read.
|
|
fn publish_context_state(&self) {
|
|
if let Ok(mut state) = self.shared_context.write() {
|
|
*state = self.context_state_summary();
|
|
}
|
|
}
|
|
|
|
/// Replace base64 image data in older messages with text placeholders.
|
|
/// Only the most recent image stays live — each new image ages out
|
|
/// all previous ones. The tool result message (right before each image
|
|
/// message) already records what was loaded, so no info is lost.
|
|
fn age_out_images(&mut self) {
|
|
for msg in &mut self.messages {
|
|
if let Some(MessageContent::Parts(parts)) = &msg.content {
|
|
let has_images = parts.iter().any(|p| matches!(p, ContentPart::ImageUrl { .. }));
|
|
if !has_images {
|
|
continue;
|
|
}
|
|
let mut replacement = String::new();
|
|
for part in parts {
|
|
match part {
|
|
ContentPart::Text { text } => {
|
|
if !replacement.is_empty() {
|
|
replacement.push('\n');
|
|
}
|
|
replacement.push_str(text);
|
|
}
|
|
ContentPart::ImageUrl { .. } => {
|
|
if !replacement.is_empty() {
|
|
replacement.push('\n');
|
|
}
|
|
replacement.push_str(
|
|
"[image aged out — see tool result above for details]",
|
|
);
|
|
}
|
|
}
|
|
}
|
|
msg.content = Some(MessageContent::Text(replacement));
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Strip ephemeral tool calls from the conversation history.
|
|
///
|
|
/// Ephemeral tools (like journal) persist their output to disk,
|
|
/// so the tool call + result don't need to stay in the context
|
|
/// window. We keep them for exactly one API round-trip (the model
|
|
/// needs to see the result was acknowledged), then strip them.
|
|
///
|
|
/// If an assistant message contains ONLY ephemeral tool calls,
|
|
/// the entire message and its tool results are removed. If mixed
|
|
/// with non-ephemeral calls, we leave it (rare case, small cost).
|
|
fn strip_ephemeral_tool_calls(&mut self) {
|
|
// Collect IDs of tool calls to strip
|
|
let mut strip_ids: Vec<String> = Vec::new();
|
|
let mut strip_msg_indices: Vec<usize> = Vec::new();
|
|
|
|
for (i, msg) in self.messages.iter().enumerate() {
|
|
if msg.role != Role::Assistant {
|
|
continue;
|
|
}
|
|
let calls = match &msg.tool_calls {
|
|
Some(c) if !c.is_empty() => c,
|
|
_ => continue,
|
|
};
|
|
|
|
let all_ephemeral = calls.iter().all(|c| {
|
|
c.function.name == tools::journal::TOOL_NAME
|
|
});
|
|
|
|
if all_ephemeral {
|
|
strip_msg_indices.push(i);
|
|
for call in calls {
|
|
strip_ids.push(call.id.clone());
|
|
}
|
|
}
|
|
}
|
|
|
|
if strip_ids.is_empty() {
|
|
return;
|
|
}
|
|
|
|
// Remove in reverse order to preserve indices
|
|
self.messages.retain(|msg| {
|
|
// Strip the assistant messages we identified
|
|
if msg.role == Role::Assistant {
|
|
if let Some(calls) = &msg.tool_calls {
|
|
if calls.iter().all(|c| strip_ids.contains(&c.id)) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
// Strip matching tool results
|
|
if msg.role == Role::Tool {
|
|
if let Some(ref id) = msg.tool_call_id {
|
|
if strip_ids.contains(id) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
true
|
|
});
|
|
}
|
|
|
|
/// Last prompt token count reported by the API.
|
|
pub fn last_prompt_tokens(&self) -> u32 {
|
|
self.last_prompt_tokens
|
|
}
|
|
|
|
/// Build context window from conversation messages + journal.
|
|
/// Used by both compact() (in-memory messages) and restore_from_log()
|
|
/// (conversation log). The context window is always:
|
|
/// identity + journal summaries + raw recent messages
|
|
pub fn compact(&mut self, new_system_prompt: String, new_personality: Vec<(String, String)>) {
|
|
self.context.system_prompt = new_system_prompt;
|
|
self.context.personality = new_personality;
|
|
self.do_compact();
|
|
}
|
|
|
|
/// Internal compaction — rebuilds context window from current messages.
|
|
fn do_compact(&mut self) {
|
|
// Find where actual conversation starts (after system + context)
|
|
let conv_start = self
|
|
.messages
|
|
.iter()
|
|
.position(|m| m.role == Role::Assistant || m.role == Role::Tool)
|
|
.unwrap_or(self.messages.len());
|
|
|
|
let conversation: Vec<Message> = self.messages[conv_start..].to_vec();
|
|
let (messages, journal) = crate::agent::context::build_context_window(
|
|
&self.context,
|
|
&conversation,
|
|
&self.client.model,
|
|
&self.tokenizer,
|
|
);
|
|
self.context.journal = journal;
|
|
self.messages = messages;
|
|
self.last_prompt_tokens = 0;
|
|
self.measure_budget();
|
|
self.publish_context_state();
|
|
}
|
|
|
|
/// Emergency compaction using stored config — called on context overflow.
|
|
fn emergency_compact(&mut self) {
|
|
self.do_compact();
|
|
}
|
|
|
|
/// Restore from the conversation log. Builds the context window
|
|
/// the same way compact() does — journal summaries for old messages,
|
|
/// raw recent messages. This is the unified startup path.
|
|
/// Returns true if the log had content to restore.
|
|
pub fn restore_from_log(
|
|
&mut self,
|
|
system_prompt: String,
|
|
personality: Vec<(String, String)>,
|
|
) -> bool {
|
|
self.context.system_prompt = system_prompt;
|
|
self.context.personality = personality;
|
|
|
|
let all_messages = match &self.conversation_log {
|
|
Some(log) => match log.read_tail(512 * 1024) {
|
|
Ok(msgs) if !msgs.is_empty() => {
|
|
dbglog!("[restore] read {} messages from log tail", msgs.len());
|
|
msgs
|
|
}
|
|
Ok(_) => {
|
|
dbglog!("[restore] log exists but is empty");
|
|
return false;
|
|
}
|
|
Err(e) => {
|
|
dbglog!("[restore] failed to read log: {}", e);
|
|
return false;
|
|
}
|
|
},
|
|
None => {
|
|
dbglog!("[restore] no conversation log configured");
|
|
return false;
|
|
}
|
|
};
|
|
|
|
// Filter out system/context messages — we only want the
|
|
// actual conversation (user prompts, assistant responses,
|
|
// tool calls/results)
|
|
let conversation: Vec<Message> = all_messages
|
|
.into_iter()
|
|
.filter(|m| m.role != Role::System)
|
|
.collect();
|
|
dbglog!("[restore] {} messages after filtering system", conversation.len());
|
|
|
|
let (messages, journal) = crate::agent::context::build_context_window(
|
|
&self.context,
|
|
&conversation,
|
|
&self.client.model,
|
|
&self.tokenizer,
|
|
);
|
|
dbglog!("[restore] journal text: {} chars, {} lines",
|
|
journal.len(), journal.lines().count());
|
|
self.context.journal = journal;
|
|
self.messages = messages;
|
|
dbglog!("[restore] built context window: {} messages", self.messages.len());
|
|
self.last_prompt_tokens = 0;
|
|
self.measure_budget();
|
|
self.publish_context_state();
|
|
true
|
|
}
|
|
|
|
/// Replace the API client (for model switching).
|
|
pub fn swap_client(&mut self, new_client: ApiClient) {
|
|
self.client = new_client;
|
|
}
|
|
|
|
/// Get the model identifier.
|
|
pub fn model(&self) -> &str {
|
|
&self.client.model
|
|
}
|
|
|
|
/// Get the conversation history for persistence.
|
|
pub fn messages(&self) -> &[Message] {
|
|
&self.messages
|
|
}
|
|
|
|
/// Mutable access to conversation history (for /retry).
|
|
pub fn messages_mut(&mut self) -> &mut Vec<Message> {
|
|
&mut self.messages
|
|
}
|
|
|
|
/// Restore from a saved conversation.
|
|
pub fn restore(&mut self, messages: Vec<Message>) {
|
|
self.messages = messages;
|
|
}
|
|
}
|
|
|
|
// Context window building, token counting, and error classification
|
|
// live in context.rs
|
|
|
|
|
|
/// Create a short summary of tool args for the tools pane header.
|
|
fn summarize_args(tool_name: &str, args: &serde_json::Value) -> String {
|
|
match tool_name {
|
|
"read_file" | "write_file" | "edit_file" => args["file_path"]
|
|
.as_str()
|
|
.unwrap_or("")
|
|
.to_string(),
|
|
"bash" => {
|
|
let cmd = args["command"].as_str().unwrap_or("");
|
|
if cmd.len() > 60 {
|
|
let end = cmd.char_indices()
|
|
.map(|(i, _)| i)
|
|
.take_while(|&i| i <= 60)
|
|
.last()
|
|
.unwrap_or(0);
|
|
format!("{}...", &cmd[..end])
|
|
} else {
|
|
cmd.to_string()
|
|
}
|
|
}
|
|
"grep" => {
|
|
let pattern = args["pattern"].as_str().unwrap_or("");
|
|
let path = args["path"].as_str().unwrap_or(".");
|
|
format!("{} in {}", pattern, path)
|
|
}
|
|
"glob" => args["pattern"]
|
|
.as_str()
|
|
.unwrap_or("")
|
|
.to_string(),
|
|
"view_image" => {
|
|
if let Some(pane) = args["pane_id"].as_str() {
|
|
format!("pane {}", pane)
|
|
} else {
|
|
args["file_path"].as_str().unwrap_or("").to_string()
|
|
}
|
|
}
|
|
"journal" => {
|
|
let entry = args["entry"].as_str().unwrap_or("");
|
|
if entry.len() > 60 {
|
|
format!("{}...", &entry[..60])
|
|
} else {
|
|
entry.to_string()
|
|
}
|
|
}
|
|
"yield_to_user" => args["message"]
|
|
.as_str()
|
|
.unwrap_or("")
|
|
.to_string(),
|
|
"switch_model" => args["model"]
|
|
.as_str()
|
|
.unwrap_or("")
|
|
.to_string(),
|
|
"pause" => String::new(),
|
|
_ => String::new(),
|
|
}
|
|
}
|
|
|
|
// Parsing functions (parse_leaked_tool_calls, strip_leaked_artifacts)
|
|
// and their tests live in parsing.rs
|