2026-03-18 22:44:52 -04:00
// agent.rs — Core agent loop
//
// The simplest possible implementation of the agent pattern:
// send messages + tool definitions to the model, if it responds
// with tool calls then dispatch them and loop, if it responds
// with text then display it and wait for the next prompt.
//
// Uses streaming by default so text tokens appear as they're
// generated. Tool calls are accumulated from stream deltas and
// dispatched after the stream completes.
//
// The DMN (dmn.rs) is the outer loop that decides what prompts
// to send here. This module just handles single turns: prompt
// in, response out, tool calls dispatched.
use anyhow ::Result ;
use chrono ::{ DateTime , Utc } ;
use tiktoken_rs ::CoreBPE ;
use std ::io ::Write ;
use std ::process ::{ Command , Stdio } ;
use crate ::api ::ApiClient ;
use crate ::journal ;
use crate ::log ::ConversationLog ;
use crate ::tools ;
use crate ::tools ::ProcessTracker ;
use crate ::types ::* ;
use crate ::ui_channel ::{ ContextSection , SharedContextState , StatusInfo , StreamTarget , UiMessage , UiSender } ;
/// Result of a single agent turn.
pub struct TurnResult {
/// The text response (already sent through UI channel).
#[ allow(dead_code) ]
pub text : String ,
/// Whether the model called yield_to_user during this turn.
pub yield_requested : bool ,
/// Whether any tools (other than yield_to_user) were called.
pub had_tool_calls : bool ,
/// Number of tool calls that returned errors this turn.
pub tool_errors : u32 ,
/// Model name to switch to after this turn completes.
pub model_switch : Option < String > ,
/// Agent requested DMN pause (full stop on autonomous behavior).
pub dmn_pause : bool ,
}
/// Accumulated state across tool dispatches within a single turn.
struct DispatchState {
yield_requested : bool ,
had_tool_calls : bool ,
tool_errors : u32 ,
model_switch : Option < String > ,
dmn_pause : bool ,
}
/// Mutable context state — the structured regions of the context window.
///
/// Each field is a different dimension of awareness. The struct renders
/// itself to text for inclusion in the context message sent to the model.
/// Tools can update individual fields mid-session.
#[ derive(Debug, Clone) ]
pub struct ContextState {
/// System prompt (identity, instructions, loaded from prompt file).
pub system_prompt : String ,
/// Identity files: (filename, contents). Transparent structure for
/// debug inspection and per-file budget control.
pub personality : Vec < ( String , String ) > ,
/// Journal entries rendered as text — bridges old conversation.
pub journal : String ,
/// Working stack — what the agent is currently doing.
/// Top of stack (last element) is the current focus.
pub working_stack : Vec < String > ,
}
/// Path to working stack instructions, included in context before the stack state.
const WORKING_STACK_INSTRUCTIONS : & str = " /home/kent/.config/poc-agent/working-stack.md " ;
/// Path to persisted working stack state.
const WORKING_STACK_FILE : & str = " /home/kent/.claude/memory/working-stack.json " ;
impl ContextState {
/// Render the context message for the model. Personality + working stack.
/// Journal is rendered separately as its own message in the conversation.
pub fn render_context_message ( & self ) -> String {
let mut parts : Vec < String > = self . personality . iter ( )
. map ( | ( name , content ) | format! ( " ## {} \n \n {} " , name , content ) )
. collect ( ) ;
// Always include working stack section — instructions + current state
let instructions = std ::fs ::read_to_string ( WORKING_STACK_INSTRUCTIONS )
. unwrap_or_default ( ) ;
let mut stack_section = instructions ;
if self . working_stack . is_empty ( ) {
stack_section . push_str ( " \n ## Current stack \n \n (empty) \n " ) ;
} else {
stack_section . push_str ( " \n ## Current stack \n \n " ) ;
for ( i , item ) in self . working_stack . iter ( ) . enumerate ( ) {
if i = = self . working_stack . len ( ) - 1 {
stack_section . push_str ( & format! ( " → {} \n " , item ) ) ;
} else {
stack_section . push_str ( & format! ( " [ {} ] {} \n " , i , item ) ) ;
}
}
}
parts . push ( stack_section ) ;
parts . join ( " \n \n --- \n \n " )
}
}
/// Breakdown of context window usage by category, in tokens.
///
/// Categories:
/// id — static identity context (system prompt + CLAUDE.md + memory files)
/// mem — dynamically recalled content from poc-memory (future)
/// jnl — journal entries bridging old conversation
/// conv — raw recent conversation messages
/// free — unused context window (headroom before compaction)
///
/// Token estimates are derived from char proportions scaled by the
/// API-reported prompt_tokens count. Before the first API call, uses
/// chars/4 as a rough approximation.
#[ derive(Debug, Clone, Default) ]
pub struct ContextBudget {
pub identity_tokens : usize ,
pub memory_tokens : usize ,
pub journal_tokens : usize ,
pub conversation_tokens : usize ,
/// Model's context window size in tokens.
pub window_tokens : usize ,
}
impl ContextBudget {
pub fn used ( & self ) -> usize {
self . identity_tokens + self . memory_tokens + self . journal_tokens + self . conversation_tokens
}
pub fn free ( & self ) -> usize {
self . window_tokens . saturating_sub ( self . used ( ) )
}
/// Format as a compact status string with percentages of the token window.
/// Non-zero values always show at least 1%.
pub fn status_string ( & self ) -> String {
let total = self . window_tokens ;
if total = = 0 {
return String ::new ( ) ;
}
let pct = | n : usize | {
if n = = 0 { return 0 ; }
( ( n * 100 ) / total ) . max ( 1 )
} ;
format! (
" id:{}% mem:{}% jnl:{}% conv:{}% free:{}% " ,
pct ( self . identity_tokens ) ,
pct ( self . memory_tokens ) ,
pct ( self . journal_tokens ) ,
pct ( self . conversation_tokens ) ,
pct ( self . free ( ) ) ,
)
}
}
pub struct Agent {
client : ApiClient ,
messages : Vec < Message > ,
tool_defs : Vec < ToolDef > ,
/// Last known prompt token count from the API (tracks context size).
last_prompt_tokens : u32 ,
/// Shared process tracker for bash tool — lets TUI show/kill running commands.
pub process_tracker : ProcessTracker ,
/// Current reasoning effort level ("none", "low", "high").
pub reasoning_effort : String ,
/// Persistent conversation log — append-only record of all messages.
conversation_log : Option < ConversationLog > ,
/// Current context window budget breakdown.
pub context_budget : ContextBudget ,
/// BPE tokenizer for token counting (cl100k_base — close enough
/// for Claude and Qwen budget allocation, ~85-90% count accuracy).
tokenizer : CoreBPE ,
/// Mutable context state — personality, working stack, etc.
pub context : ContextState ,
/// Shared live context summary — TUI reads this directly for debug screen.
pub shared_context : SharedContextState ,
/// Stable session ID for memory-search dedup across turns.
session_id : String ,
}
impl Agent {
pub fn new (
client : ApiClient ,
system_prompt : String ,
personality : Vec < ( String , String ) > ,
conversation_log : Option < ConversationLog > ,
shared_context : SharedContextState ,
) -> Self {
let tool_defs = tools ::definitions ( ) ;
let tokenizer = tiktoken_rs ::cl100k_base ( )
. expect ( " failed to load cl100k_base tokenizer " ) ;
let context = ContextState {
system_prompt : system_prompt . clone ( ) ,
personality ,
journal : String ::new ( ) ,
working_stack : Vec ::new ( ) ,
} ;
let session_id = format! ( " poc-agent- {} " , chrono ::Utc ::now ( ) . format ( " %Y%m%d-%H%M%S " ) ) ;
let mut agent = Self {
client ,
messages : Vec ::new ( ) ,
tool_defs ,
last_prompt_tokens : 0 ,
process_tracker : ProcessTracker ::new ( ) ,
reasoning_effort : " none " . to_string ( ) ,
conversation_log ,
context_budget : ContextBudget ::default ( ) ,
tokenizer ,
context ,
shared_context ,
session_id ,
} ;
// Load recent journal entries at startup for orientation
agent . load_startup_journal ( ) ;
agent . load_working_stack ( ) ;
agent . push_context ( Message ::system ( system_prompt ) ) ;
let rendered = agent . context . render_context_message ( ) ;
if ! rendered . is_empty ( ) {
agent . push_context ( Message ::user ( rendered ) ) ;
}
if ! agent . context . journal . is_empty ( ) {
agent . push_context ( Message ::user ( agent . context . journal . clone ( ) ) ) ;
}
agent . measure_budget ( ) ;
agent . publish_context_state ( ) ;
agent
}
/// Run poc-hook for a given event, returning any output to inject.
fn run_hook ( & self , event : & str , prompt : & str ) -> Option < String > {
let transcript_path = self . conversation_log . as_ref ( )
. map ( | l | l . path ( ) . to_string_lossy ( ) . to_string ( ) )
. unwrap_or_default ( ) ;
let hook_input = serde_json ::json! ( {
" hook_event_name " : event ,
" session_id " : self . session_id ,
" transcript_path " : transcript_path ,
" prompt " : prompt ,
} ) ;
let mut child = Command ::new ( " poc-hook " )
. stdin ( Stdio ::piped ( ) )
. stdout ( Stdio ::piped ( ) )
. stderr ( Stdio ::null ( ) )
. spawn ( )
. ok ( ) ? ;
if let Some ( ref mut stdin ) = child . stdin {
let _ = stdin . write_all ( hook_input . to_string ( ) . as_bytes ( ) ) ;
}
drop ( child . stdin . take ( ) ) ;
let output = child . wait_with_output ( ) . ok ( ) ? ;
let text = String ::from_utf8_lossy ( & output . stdout ) . to_string ( ) ;
if text . trim ( ) . is_empty ( ) {
None
} else {
Some ( text )
}
}
/// Push a conversation message — stamped and logged.
fn push_message ( & mut self , mut msg : Message ) {
msg . stamp ( ) ;
if let Some ( ref log ) = self . conversation_log {
if let Err ( e ) = log . append ( & msg ) {
eprintln! ( " warning: failed to log message: {:#} " , e ) ;
}
}
self . messages . push ( msg ) ;
}
/// Push a context-only message (system prompt, identity context,
/// journal summaries). Not logged — these are reconstructed on
/// every startup/compaction.
fn push_context ( & mut self , msg : Message ) {
self . messages . push ( msg ) ;
}
/// Measure context window usage by category. Uses the BPE tokenizer
/// for direct token counting (no chars/4 approximation).
fn measure_budget ( & mut self ) {
let mut id_tokens : usize = 0 ;
let mem_tokens : usize = 0 ;
let mut jnl_tokens : usize = 0 ;
let mut conv_tokens : usize = 0 ;
let mut in_conversation = false ;
for msg in & self . messages {
let tokens = msg_token_count ( & self . tokenizer , msg ) ;
if in_conversation {
conv_tokens + = tokens ;
continue ;
}
match msg . role {
Role ::System = > id_tokens + = tokens ,
Role ::User = > {
let text = msg . content_text ( ) ;
if text . starts_with ( " [Earlier in this conversation " ) {
jnl_tokens + = tokens ;
} else if text . starts_with ( " Your context was just rebuilt " ) {
jnl_tokens + = tokens ;
} else if jnl_tokens = = 0 & & conv_tokens = = 0 {
// Static identity context (before any journal/conversation)
id_tokens + = tokens ;
} else {
in_conversation = true ;
conv_tokens + = tokens ;
}
}
_ = > {
in_conversation = true ;
conv_tokens + = tokens ;
}
}
}
self . context_budget = ContextBudget {
identity_tokens : id_tokens ,
memory_tokens : mem_tokens ,
journal_tokens : jnl_tokens ,
conversation_tokens : conv_tokens ,
window_tokens : model_context_window ( & self . client . model ) ,
} ;
}
/// Send a user message and run the agent loop until the model
/// produces a text response (no more tool calls). Streams text
/// and tool activity through the UI channel.
pub async fn turn (
& mut self ,
user_input : & str ,
ui_tx : & UiSender ,
target : StreamTarget ,
) -> Result < TurnResult > {
// Run poc-hook (memory search, notifications, context check)
if let Some ( hook_output ) = self . run_hook ( " UserPromptSubmit " , user_input ) {
let enriched = format! ( " {} \n \n <system-reminder> \n {} \n </system-reminder> " ,
user_input , hook_output ) ;
self . push_message ( Message ::user ( enriched ) ) ;
} else {
self . push_message ( Message ::user ( user_input ) ) ;
}
let mut overflow_retries : u32 = 0 ;
let mut empty_retries : u32 = 0 ;
let mut ds = DispatchState {
yield_requested : false ,
had_tool_calls : false ,
tool_errors : 0 ,
model_switch : None ,
dmn_pause : false ,
} ;
loop {
let _ = ui_tx . send ( UiMessage ::Activity ( " thinking... " . into ( ) ) ) ;
let api_result = self
. client
. chat_completion_stream (
& self . messages ,
Some ( & self . tool_defs ) ,
ui_tx ,
target ,
& self . reasoning_effort ,
)
. await ;
// Context overflow → compact and retry (max 2 attempts)
// Stream error → retry with backoff (max 2 attempts)
let ( msg , usage ) = match api_result {
Err ( e ) if is_context_overflow ( & e ) & & overflow_retries < 2 = > {
overflow_retries + = 1 ;
let _ = ui_tx . send ( UiMessage ::Info ( format! (
" [context overflow — compacting and retrying ({}/2)] " ,
overflow_retries ,
) ) ) ;
self . emergency_compact ( ) ;
continue ;
}
Err ( e ) if is_stream_error ( & e ) & & empty_retries < 2 = > {
empty_retries + = 1 ;
let _ = ui_tx . send ( UiMessage ::Info ( format! (
" [stream error: {} — retrying ({}/2)] " ,
e , empty_retries ,
) ) ) ;
tokio ::time ::sleep ( std ::time ::Duration ::from_secs ( 2 ) ) . await ;
continue ;
}
other = > other ? ,
} ;
// Strip ephemeral tool calls (journal) that the API has
// now processed. They're persisted to disk; no need to keep
// them in the conversation history burning tokens.
self . strip_ephemeral_tool_calls ( ) ;
if let Some ( usage ) = & usage {
self . last_prompt_tokens = usage . prompt_tokens ;
self . measure_budget ( ) ;
self . publish_context_state ( ) ;
let _ = ui_tx . send ( UiMessage ::StatusUpdate ( StatusInfo {
dmn_state : String ::new ( ) , // filled by main loop
dmn_turns : 0 ,
dmn_max_turns : 0 ,
prompt_tokens : usage . prompt_tokens ,
completion_tokens : usage . completion_tokens ,
model : self . client . model . clone ( ) ,
turn_tools : 0 , // tracked by TUI from ToolCall messages
context_budget : self . context_budget . status_string ( ) ,
} ) ) ;
}
// Empty response — model returned finish=stop with no content
// or tool calls. Inject a nudge so the retry has different input.
let has_content = msg . content . is_some ( ) ;
let has_tools = msg . tool_calls . as_ref ( ) . map_or ( false , | tc | ! tc . is_empty ( ) ) ;
if ! has_content & & ! has_tools {
if empty_retries < 2 {
empty_retries + = 1 ;
let _ = ui_tx . send ( UiMessage ::Debug ( format! (
" empty response, injecting nudge and retrying ({}/2) " ,
empty_retries ,
) ) ) ;
self . push_message ( Message ::user (
" [system] Your previous response was empty. \
Please respond with text or use a tool . "
) ) ;
continue ;
}
// After max retries, fall through — return the empty response
} else {
empty_retries = 0 ;
}
// Structured tool calls from the API
if let Some ( ref tool_calls ) = msg . tool_calls {
if ! tool_calls . is_empty ( ) {
self . push_message ( msg . clone ( ) ) ;
for call in tool_calls {
self . dispatch_tool_call ( call , None , ui_tx , & mut ds )
. await ;
}
continue ;
}
}
// No structured tool calls — check for leaked tool calls
// (Qwen sometimes outputs <tool_call> XML as text).
let text = msg . content_text ( ) . to_string ( ) ;
let leaked = parse_leaked_tool_calls ( & text ) ;
if ! leaked . is_empty ( ) {
let _ = ui_tx . send ( UiMessage ::Debug ( format! (
" recovered {} leaked tool call(s) from text " ,
leaked . len ( )
) ) ) ;
// Strip tool call XML and thinking tokens from the message
// so they don't clutter the conversation history.
let cleaned = strip_leaked_artifacts ( & text ) ;
let mut clean_msg = msg . clone ( ) ;
clean_msg . content = if cleaned . trim ( ) . is_empty ( ) {
None
} else {
Some ( MessageContent ::Text ( cleaned ) )
} ;
self . push_message ( clean_msg ) ;
for call in & leaked {
self . dispatch_tool_call ( call , Some ( " recovered " ) , ui_tx , & mut ds )
. await ;
}
continue ;
}
// Genuinely text-only response
let _ = ui_tx . send ( UiMessage ::Activity ( String ::new ( ) ) ) ;
self . push_message ( msg ) ;
return Ok ( TurnResult {
text ,
yield_requested : ds . yield_requested ,
had_tool_calls : ds . had_tool_calls ,
tool_errors : ds . tool_errors ,
model_switch : ds . model_switch ,
dmn_pause : ds . dmn_pause ,
} ) ;
}
}
/// Dispatch a single tool call: send UI annotations, run the tool,
/// push results into the conversation, handle images.
async fn dispatch_tool_call (
& mut self ,
call : & ToolCall ,
tag : Option < & str > ,
ui_tx : & UiSender ,
ds : & mut DispatchState ,
) {
let args : serde_json ::Value =
serde_json ::from_str ( & call . function . arguments ) . unwrap_or_default ( ) ;
let args_summary = summarize_args ( & call . function . name , & args ) ;
let label = match tag {
Some ( t ) = > format! ( " calling: {} ( {} ) " , call . function . name , t ) ,
None = > format! ( " calling: {} " , call . function . name ) ,
} ;
let _ = ui_tx . send ( UiMessage ::Activity ( label ) ) ;
let _ = ui_tx . send ( UiMessage ::ToolCall {
name : call . function . name . clone ( ) ,
args_summary : args_summary . clone ( ) ,
} ) ;
let _ = ui_tx . send ( UiMessage ::ToolStarted {
id : call . id . clone ( ) ,
name : call . function . name . clone ( ) ,
detail : args_summary ,
} ) ;
// Handle working_stack tool — needs &mut self for context state
if call . function . name = = " working_stack " {
2026-03-20 13:15:01 -04:00
let result = tools ::working_stack ::handle ( & args , & mut self . context . working_stack ) ;
2026-03-18 22:44:52 -04:00
let output = tools ::ToolOutput {
2026-03-20 13:15:01 -04:00
text : result . clone ( ) ,
2026-03-18 22:44:52 -04:00
is_yield : false ,
images : Vec ::new ( ) ,
model_switch : None ,
dmn_pause : false ,
} ;
let _ = ui_tx . send ( UiMessage ::ToolResult {
name : call . function . name . clone ( ) ,
result : output . text . clone ( ) ,
} ) ;
let _ = ui_tx . send ( UiMessage ::ToolFinished { id : call . id . clone ( ) } ) ;
self . push_message ( Message ::tool_result ( & call . id , & output . text ) ) ;
ds . had_tool_calls = true ;
2026-03-20 13:15:01 -04:00
// Re-render the context message so the model sees the updated stack
if ! result . starts_with ( " Error: " ) {
self . refresh_context_message ( ) ;
}
2026-03-18 22:44:52 -04:00
return ;
}
let output =
tools ::dispatch ( & call . function . name , & args , & self . process_tracker ) . await ;
if output . is_yield {
ds . yield_requested = true ;
} else {
ds . had_tool_calls = true ;
}
if output . model_switch . is_some ( ) {
ds . model_switch = output . model_switch ;
}
if output . dmn_pause {
ds . dmn_pause = true ;
}
if output . text . starts_with ( " Error: " ) {
ds . tool_errors + = 1 ;
}
let _ = ui_tx . send ( UiMessage ::ToolResult {
name : call . function . name . clone ( ) ,
result : output . text . clone ( ) ,
} ) ;
let _ = ui_tx . send ( UiMessage ::ToolFinished { id : call . id . clone ( ) } ) ;
self . push_message ( Message ::tool_result ( & call . id , & output . text ) ) ;
if ! output . images . is_empty ( ) {
// Only one live image in context at a time — age out any
// previous ones to avoid accumulating ~90KB+ per image.
self . age_out_images ( ) ;
self . push_message ( Message ::user_with_images (
" Here is the image you requested: " ,
& output . images ,
) ) ;
}
}
/// Build context state summary for the debug screen.
pub fn context_state_summary ( & self ) -> Vec < ContextSection > {
let count = | s : & str | self . tokenizer . encode_with_special_tokens ( s ) . len ( ) ;
let mut sections = Vec ::new ( ) ;
// System prompt
sections . push ( ContextSection {
name : " System prompt " . into ( ) ,
tokens : count ( & self . context . system_prompt ) ,
content : self . context . system_prompt . clone ( ) ,
children : Vec ::new ( ) ,
} ) ;
// Personality — parent with file children
let personality_children : Vec < ContextSection > = self . context . personality . iter ( )
. map ( | ( name , content ) | ContextSection {
name : name . clone ( ) ,
tokens : count ( content ) ,
content : content . clone ( ) ,
children : Vec ::new ( ) ,
} )
. collect ( ) ;
let personality_tokens : usize = personality_children . iter ( ) . map ( | c | c . tokens ) . sum ( ) ;
sections . push ( ContextSection {
name : format ! ( " Personality ({} files) " , personality_children . len ( ) ) ,
tokens : personality_tokens ,
content : String ::new ( ) ,
children : personality_children ,
} ) ;
// Journal — split into per-entry children
{
let mut journal_children = Vec ::new ( ) ;
let mut current_header = String ::new ( ) ;
let mut current_body = String ::new ( ) ;
for line in self . context . journal . lines ( ) {
if line . starts_with ( " ## " ) {
if ! current_header . is_empty ( ) {
let body = std ::mem ::take ( & mut current_body ) ;
let preview : String = body . lines ( ) . next ( ) . unwrap_or ( " " ) . chars ( ) . take ( 60 ) . collect ( ) ;
journal_children . push ( ContextSection {
name : format ! ( " {}: {} " , current_header , preview ) ,
tokens : count ( & body ) ,
content : body ,
children : Vec ::new ( ) ,
} ) ;
}
current_header = line . trim_start_matches ( " ## " ) . to_string ( ) ;
current_body . clear ( ) ;
} else {
if ! current_body . is_empty ( ) | | ! line . is_empty ( ) {
current_body . push_str ( line ) ;
current_body . push ( '\n' ) ;
}
}
}
if ! current_header . is_empty ( ) {
let preview : String = current_body . lines ( ) . next ( ) . unwrap_or ( " " ) . chars ( ) . take ( 60 ) . collect ( ) ;
journal_children . push ( ContextSection {
name : format ! ( " {}: {} " , current_header , preview ) ,
tokens : count ( & current_body ) ,
content : current_body ,
children : Vec ::new ( ) ,
} ) ;
}
let journal_tokens : usize = journal_children . iter ( ) . map ( | c | c . tokens ) . sum ( ) ;
sections . push ( ContextSection {
name : format ! ( " Journal ({} entries) " , journal_children . len ( ) ) ,
tokens : journal_tokens ,
content : String ::new ( ) ,
children : journal_children ,
} ) ;
}
// Working stack — instructions + items as children
let instructions = std ::fs ::read_to_string ( WORKING_STACK_INSTRUCTIONS )
. unwrap_or_default ( ) ;
let mut stack_children = vec! [ ContextSection {
name : " Instructions " . into ( ) ,
tokens : count ( & instructions ) ,
content : instructions ,
children : Vec ::new ( ) ,
} ] ;
for ( i , item ) in self . context . working_stack . iter ( ) . enumerate ( ) {
let marker = if i = = self . context . working_stack . len ( ) - 1 { " → " } else { " " } ;
stack_children . push ( ContextSection {
name : format ! ( " {} [{}] {} " , marker , i , item ) ,
tokens : count ( item ) ,
content : String ::new ( ) ,
children : Vec ::new ( ) ,
} ) ;
}
let stack_tokens : usize = stack_children . iter ( ) . map ( | c | c . tokens ) . sum ( ) ;
sections . push ( ContextSection {
name : format ! ( " Working stack ({} items) " , self . context . working_stack . len ( ) ) ,
tokens : stack_tokens ,
content : String ::new ( ) ,
children : stack_children ,
} ) ;
// Conversation — each message as a child
let conv_start = self . messages . iter ( )
. position ( | m | m . role = = Role ::Assistant | | m . role = = Role ::Tool )
. unwrap_or ( self . messages . len ( ) ) ;
let conv_messages = & self . messages [ conv_start .. ] ;
let conv_children : Vec < ContextSection > = conv_messages . iter ( ) . enumerate ( )
. map ( | ( i , msg ) | {
let text = msg . content . as_ref ( )
. map ( | c | c . as_text ( ) . to_string ( ) )
. unwrap_or_default ( ) ;
let tool_info = msg . tool_calls . as_ref ( ) . map ( | tc | {
tc . iter ( )
. map ( | c | c . function . name . clone ( ) )
. collect ::< Vec < _ > > ( )
. join ( " , " )
} ) ;
let label = match ( & msg . role , & tool_info ) {
( _ , Some ( tools ) ) = > format! ( " [tool_call: {} ] " , tools ) ,
_ = > {
let preview : String = text . chars ( ) . take ( 60 ) . collect ( ) ;
let preview = preview . replace ( '\n' , " " ) ;
if text . len ( ) > 60 { format! ( " {} ... " , preview ) } else { preview }
}
} ;
let tokens = count ( & text ) ;
let role_name = match msg . role {
Role ::Assistant = > " PoC " ,
Role ::User = > " Kent " ,
Role ::Tool = > " tool " ,
Role ::System = > " system " ,
} ;
ContextSection {
name : format ! ( " [{}] {}: {} " , conv_start + i , role_name , label ) ,
tokens ,
content : text ,
children : Vec ::new ( ) ,
}
} )
. collect ( ) ;
let conv_tokens : usize = conv_children . iter ( ) . map ( | c | c . tokens ) . sum ( ) ;
sections . push ( ContextSection {
name : format ! ( " Conversation ({} messages) " , conv_children . len ( ) ) ,
tokens : conv_tokens ,
content : String ::new ( ) ,
children : conv_children ,
} ) ;
sections
}
/// Load recent journal entries at startup for orientation.
/// Uses the same budget logic as compaction but with empty conversation.
/// Only parses the tail of the journal file (last 64KB) for speed.
fn load_startup_journal ( & mut self ) {
let journal_path = journal ::default_journal_path ( ) ;
let entries = journal ::parse_journal_tail ( & journal_path , 64 * 1024 ) ;
if entries . is_empty ( ) {
return ;
}
let count = | s : & str | self . tokenizer . encode_with_special_tokens ( s ) . len ( ) ;
let context_message = self . context . render_context_message ( ) ;
let plan = plan_context (
& self . context . system_prompt ,
& context_message ,
& [ ] , // no conversation yet
& entries ,
& self . client . model ,
& count ,
) ;
self . context . journal = render_journal_text ( & entries , & plan ) ;
}
/// Re-render the context message in self.messages from live ContextState.
/// Called after any change to context state (working stack, etc).
fn refresh_context_message ( & mut self ) {
let rendered = self . context . render_context_message ( ) ;
// The context message is the first user message (index 1, after system prompt)
if self . messages . len ( ) > = 2 & & self . messages [ 1 ] . role = = Role ::User {
self . messages [ 1 ] = Message ::user ( rendered ) ;
}
self . publish_context_state ( ) ;
self . save_working_stack ( ) ;
}
/// Persist working stack to disk.
fn save_working_stack ( & self ) {
if let Ok ( json ) = serde_json ::to_string ( & self . context . working_stack ) {
let _ = std ::fs ::write ( WORKING_STACK_FILE , json ) ;
}
}
/// Load working stack from disk.
fn load_working_stack ( & mut self ) {
if let Ok ( data ) = std ::fs ::read_to_string ( WORKING_STACK_FILE ) {
if let Ok ( stack ) = serde_json ::from_str ::< Vec < String > > ( & data ) {
self . context . working_stack = stack ;
}
}
}
/// Push the current context summary to the shared state for the TUI to read.
fn publish_context_state ( & self ) {
if let Ok ( mut state ) = self . shared_context . write ( ) {
* state = self . context_state_summary ( ) ;
}
}
/// Replace base64 image data in older messages with text placeholders.
/// Only the most recent image stays live — each new image ages out
/// all previous ones. The tool result message (right before each image
/// message) already records what was loaded, so no info is lost.
fn age_out_images ( & mut self ) {
for msg in & mut self . messages {
if let Some ( MessageContent ::Parts ( parts ) ) = & msg . content {
let has_images = parts . iter ( ) . any ( | p | matches! ( p , ContentPart ::ImageUrl { .. } ) ) ;
if ! has_images {
continue ;
}
let mut replacement = String ::new ( ) ;
for part in parts {
match part {
ContentPart ::Text { text } = > {
if ! replacement . is_empty ( ) {
replacement . push ( '\n' ) ;
}
replacement . push_str ( text ) ;
}
ContentPart ::ImageUrl { .. } = > {
if ! replacement . is_empty ( ) {
replacement . push ( '\n' ) ;
}
replacement . push_str (
" [image aged out — see tool result above for details] " ,
) ;
}
}
}
msg . content = Some ( MessageContent ::Text ( replacement ) ) ;
}
}
}
/// Strip ephemeral tool calls from the conversation history.
///
/// Ephemeral tools (like journal) persist their output to disk,
/// so the tool call + result don't need to stay in the context
/// window. We keep them for exactly one API round-trip (the model
/// needs to see the result was acknowledged), then strip them.
///
/// If an assistant message contains ONLY ephemeral tool calls,
/// the entire message and its tool results are removed. If mixed
/// with non-ephemeral calls, we leave it (rare case, small cost).
fn strip_ephemeral_tool_calls ( & mut self ) {
// Collect IDs of tool calls to strip
let mut strip_ids : Vec < String > = Vec ::new ( ) ;
let mut strip_msg_indices : Vec < usize > = Vec ::new ( ) ;
for ( i , msg ) in self . messages . iter ( ) . enumerate ( ) {
if msg . role ! = Role ::Assistant {
continue ;
}
let calls = match & msg . tool_calls {
Some ( c ) if ! c . is_empty ( ) = > c ,
_ = > continue ,
} ;
let all_ephemeral = calls . iter ( ) . all ( | c | {
c . function . name = = tools ::journal ::TOOL_NAME
} ) ;
if all_ephemeral {
strip_msg_indices . push ( i ) ;
for call in calls {
strip_ids . push ( call . id . clone ( ) ) ;
}
}
}
if strip_ids . is_empty ( ) {
return ;
}
// Remove in reverse order to preserve indices
self . messages . retain ( | msg | {
// Strip the assistant messages we identified
if msg . role = = Role ::Assistant {
if let Some ( calls ) = & msg . tool_calls {
if calls . iter ( ) . all ( | c | strip_ids . contains ( & c . id ) ) {
return false ;
}
}
}
// Strip matching tool results
if msg . role = = Role ::Tool {
if let Some ( ref id ) = msg . tool_call_id {
if strip_ids . contains ( id ) {
return false ;
}
}
}
true
} ) ;
}
/// Last prompt token count reported by the API.
pub fn last_prompt_tokens ( & self ) -> u32 {
self . last_prompt_tokens
}
/// Build context window from conversation messages + journal.
/// Used by both compact() (in-memory messages) and restore_from_log()
/// (conversation log). The context window is always:
/// identity + journal summaries + raw recent messages
pub fn compact ( & mut self , new_system_prompt : String , new_personality : Vec < ( String , String ) > ) {
self . context . system_prompt = new_system_prompt ;
self . context . personality = new_personality ;
self . do_compact ( ) ;
}
/// Internal compaction — rebuilds context window from current messages.
fn do_compact ( & mut self ) {
// Find where actual conversation starts (after system + context)
let conv_start = self
. messages
. iter ( )
. position ( | m | m . role = = Role ::Assistant | | m . role = = Role ::Tool )
. unwrap_or ( self . messages . len ( ) ) ;
let conversation : Vec < Message > = self . messages [ conv_start .. ] . to_vec ( ) ;
let ( messages , journal ) = build_context_window (
& self . context ,
& conversation ,
& self . client . model ,
& self . tokenizer ,
) ;
self . context . journal = journal ;
self . messages = messages ;
self . last_prompt_tokens = 0 ;
self . measure_budget ( ) ;
self . publish_context_state ( ) ;
}
/// Emergency compaction using stored config — called on context overflow.
fn emergency_compact ( & mut self ) {
self . do_compact ( ) ;
}
/// Restore from the conversation log. Builds the context window
/// the same way compact() does — journal summaries for old messages,
/// raw recent messages. This is the unified startup path.
/// Returns true if the log had content to restore.
pub fn restore_from_log (
& mut self ,
system_prompt : String ,
personality : Vec < ( String , String ) > ,
) -> bool {
self . context . system_prompt = system_prompt ;
self . context . personality = personality ;
let all_messages = match & self . conversation_log {
Some ( log ) = > match log . read_tail ( 512 * 1024 ) {
Ok ( msgs ) if ! msgs . is_empty ( ) = > {
dbglog! ( " [restore] read {} messages from log tail " , msgs . len ( ) ) ;
msgs
}
Ok ( _ ) = > {
dbglog! ( " [restore] log exists but is empty " ) ;
return false ;
}
Err ( e ) = > {
dbglog! ( " [restore] failed to read log: {} " , e ) ;
return false ;
}
} ,
None = > {
dbglog! ( " [restore] no conversation log configured " ) ;
return false ;
}
} ;
// Filter out system/context messages — we only want the
// actual conversation (user prompts, assistant responses,
// tool calls/results)
let conversation : Vec < Message > = all_messages
. into_iter ( )
. filter ( | m | m . role ! = Role ::System )
. collect ( ) ;
dbglog! ( " [restore] {} messages after filtering system " , conversation . len ( ) ) ;
let ( messages , journal ) = build_context_window (
& self . context ,
& conversation ,
& self . client . model ,
& self . tokenizer ,
) ;
dbglog! ( " [restore] journal text: {} chars, {} lines " ,
journal . len ( ) , journal . lines ( ) . count ( ) ) ;
self . context . journal = journal ;
self . messages = messages ;
dbglog! ( " [restore] built context window: {} messages " , self . messages . len ( ) ) ;
self . last_prompt_tokens = 0 ;
self . measure_budget ( ) ;
self . publish_context_state ( ) ;
true
}
/// Replace the API client (for model switching).
pub fn swap_client ( & mut self , new_client : ApiClient ) {
self . client = new_client ;
}
/// Get the model identifier.
pub fn model ( & self ) -> & str {
& self . client . model
}
/// Get the conversation history for persistence.
pub fn messages ( & self ) -> & [ Message ] {
& self . messages
}
/// Mutable access to conversation history (for /retry).
pub fn messages_mut ( & mut self ) -> & mut Vec < Message > {
& mut self . messages
}
/// Restore from a saved conversation.
pub fn restore ( & mut self , messages : Vec < Message > ) {
self . messages = messages ;
}
}
/// Look up a model's context window size in tokens.
pub fn model_context_window ( model : & str ) -> usize {
let m = model . to_lowercase ( ) ;
if m . contains ( " opus " ) | | m . contains ( " sonnet " ) {
200_000
} else if m . contains ( " qwen " ) {
131_072
} else {
128_000
}
}
/// Context budget in tokens: 60% of the model's context window.
/// Leaves headroom for conversation to grow before compaction triggers.
///
/// Future direction: make this dynamic based on what the agent is
/// doing — deep coding work might allocate more to conversation,
/// consolidation might allocate more to journal/memory, idle might
/// shrink everything to save cost.
fn context_budget_tokens ( model : & str ) -> usize {
model_context_window ( model ) * 60 / 100
}
/// Allocation plan for the context window. Separates the budget math
/// (which entries and messages to include) from the message assembly
/// (building the actual Vec<Message>). This makes the core algorithm
/// testable and inspectable — log the plan on compaction to see exactly
/// what allocation decisions were made.
struct ContextPlan {
/// Index into all_entries: header-only entries start here
header_start : usize ,
/// Index into all_entries: full entries start here (headers end here)
full_start : usize ,
/// Total journal entries (header-only + full go up to this)
entry_count : usize ,
/// Index into recent conversation: skip messages before this
conv_trim : usize ,
/// Total recent conversation messages
cleanup: fix all build warnings, delete dead DMN context code
- Delete poc-daemon/src/context.rs dead code (git_context, work_state,
irc_digest, recent_commits, uncommitted_files) — replaced by
where-am-i.md and memory graph
- Remove unused imports (BufWriter, Context, similarity)
- Prefix unused variables (_store, _avg_cc, _episodic_ratio, _message)
- #[allow(dead_code)] on public API surface that's not yet wired
(Message::assistant, ConversationLog::message_count/read_all,
Config::context_message, ContextInfo fields)
- Fix to_capnp macro dead_code warning
- Rename _rewrite_store_DISABLED to snake_case
Only remaining warnings are in generated capnp code (can't fix).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 14:20:34 -04:00
_conv_count : usize ,
2026-03-18 22:44:52 -04:00
/// Tokens used by full journal entries
cleanup: fix all build warnings, delete dead DMN context code
- Delete poc-daemon/src/context.rs dead code (git_context, work_state,
irc_digest, recent_commits, uncommitted_files) — replaced by
where-am-i.md and memory graph
- Remove unused imports (BufWriter, Context, similarity)
- Prefix unused variables (_store, _avg_cc, _episodic_ratio, _message)
- #[allow(dead_code)] on public API surface that's not yet wired
(Message::assistant, ConversationLog::message_count/read_all,
Config::context_message, ContextInfo fields)
- Fix to_capnp macro dead_code warning
- Rename _rewrite_store_DISABLED to snake_case
Only remaining warnings are in generated capnp code (can't fix).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 14:20:34 -04:00
_full_tokens : usize ,
2026-03-18 22:44:52 -04:00
/// Tokens used by header-only journal entries
cleanup: fix all build warnings, delete dead DMN context code
- Delete poc-daemon/src/context.rs dead code (git_context, work_state,
irc_digest, recent_commits, uncommitted_files) — replaced by
where-am-i.md and memory graph
- Remove unused imports (BufWriter, Context, similarity)
- Prefix unused variables (_store, _avg_cc, _episodic_ratio, _message)
- #[allow(dead_code)] on public API surface that's not yet wired
(Message::assistant, ConversationLog::message_count/read_all,
Config::context_message, ContextInfo fields)
- Fix to_capnp macro dead_code warning
- Rename _rewrite_store_DISABLED to snake_case
Only remaining warnings are in generated capnp code (can't fix).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 14:20:34 -04:00
_header_tokens : usize ,
2026-03-18 22:44:52 -04:00
/// Tokens used by conversation (after trimming)
cleanup: fix all build warnings, delete dead DMN context code
- Delete poc-daemon/src/context.rs dead code (git_context, work_state,
irc_digest, recent_commits, uncommitted_files) — replaced by
where-am-i.md and memory graph
- Remove unused imports (BufWriter, Context, similarity)
- Prefix unused variables (_store, _avg_cc, _episodic_ratio, _message)
- #[allow(dead_code)] on public API surface that's not yet wired
(Message::assistant, ConversationLog::message_count/read_all,
Config::context_message, ContextInfo fields)
- Fix to_capnp macro dead_code warning
- Rename _rewrite_store_DISABLED to snake_case
Only remaining warnings are in generated capnp code (can't fix).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 14:20:34 -04:00
_conv_tokens : usize ,
2026-03-18 22:44:52 -04:00
/// Total budget available (after identity, memory, reserve)
cleanup: fix all build warnings, delete dead DMN context code
- Delete poc-daemon/src/context.rs dead code (git_context, work_state,
irc_digest, recent_commits, uncommitted_files) — replaced by
where-am-i.md and memory graph
- Remove unused imports (BufWriter, Context, similarity)
- Prefix unused variables (_store, _avg_cc, _episodic_ratio, _message)
- #[allow(dead_code)] on public API surface that's not yet wired
(Message::assistant, ConversationLog::message_count/read_all,
Config::context_message, ContextInfo fields)
- Fix to_capnp macro dead_code warning
- Rename _rewrite_store_DISABLED to snake_case
Only remaining warnings are in generated capnp code (can't fix).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 14:20:34 -04:00
_available : usize ,
2026-03-18 22:44:52 -04:00
}
/// Build a context window from conversation messages + journal entries.
/// This is the core algorithm shared by compact() and restore_from_log().
///
/// Allocation strategy: identity and memory are fixed costs. The
/// remaining budget (minus 25% reserve for model output) is split
/// between journal and conversation. Conversation gets priority —
/// it's what's happening now. Journal fills the rest, newest first.
///
/// When the budget is tight, journal entries are dropped first
/// (oldest entries go first). If conversation alone exceeds the
/// budget, oldest messages are trimmed to fit.
/// Returns (messages, journal_text) — caller stores journal_text in ContextState.
fn build_context_window (
context : & ContextState ,
conversation : & [ Message ] ,
model : & str ,
tokenizer : & CoreBPE ,
) -> ( Vec < Message > , String ) {
let journal_path = journal ::default_journal_path ( ) ;
let all_entries = journal ::parse_journal ( & journal_path ) ;
dbglog! ( " [ctx] {} journal entries from {} " , all_entries . len ( ) , journal_path . display ( ) ) ;
let count = | s : & str | tokenizer . encode_with_special_tokens ( s ) . len ( ) ;
let system_prompt = context . system_prompt . clone ( ) ;
let context_message = context . render_context_message ( ) ;
// Cap memory to 50% of the context budget so conversation always
// gets space. Truncate at the last complete section boundary.
let max_tokens = context_budget_tokens ( model ) ;
let memory_cap = max_tokens / 2 ;
let memory_tokens = count ( & context_message ) ;
let context_message = if memory_tokens > memory_cap {
dbglog! ( " [ctx] memory too large: {} tokens > {} cap, truncating " , memory_tokens , memory_cap ) ;
truncate_at_section ( & context_message , memory_cap , & count )
} else {
context_message
} ;
let recent_start = find_journal_cutoff ( conversation , all_entries . last ( ) ) ;
dbglog! ( " [ctx] journal cutoff: {} of {} conversation messages are 'recent' " ,
conversation . len ( ) - recent_start , conversation . len ( ) ) ;
let recent = & conversation [ recent_start .. ] ;
let plan = plan_context (
& system_prompt ,
& context_message ,
recent ,
& all_entries ,
model ,
& count ,
) ;
// Render journal text from the plan
let journal_text = render_journal_text ( & all_entries , & plan ) ;
dbglog! ( " [ctx] plan: header_start={} full_start={} entry_count={} conv_trim={} journal_text={} chars " ,
plan . header_start , plan . full_start , plan . entry_count , plan . conv_trim , journal_text . len ( ) ) ;
let messages = assemble_context (
system_prompt , context_message , & journal_text ,
recent , & plan ,
) ;
( messages , journal_text )
}
/// Compute the allocation plan: how much budget goes to journal vs
/// conversation, which entries and messages to include.
fn plan_context (
system_prompt : & str ,
context_message : & str ,
recent : & [ Message ] ,
entries : & [ journal ::JournalEntry ] ,
model : & str ,
count : & dyn Fn ( & str ) -> usize ,
) -> ContextPlan {
let max_tokens = context_budget_tokens ( model ) ;
// Fixed costs — always included
let identity_cost = count ( system_prompt ) ;
let memory_cost = count ( context_message ) ;
let reserve = max_tokens / 4 ;
let available = max_tokens
. saturating_sub ( identity_cost )
. saturating_sub ( memory_cost )
. saturating_sub ( reserve ) ;
// Measure conversation
let conv_costs : Vec < usize > = recent . iter ( ) . map ( | m | msg_token_count_fn ( m , count ) ) . collect ( ) ;
let total_conv : usize = conv_costs . iter ( ) . sum ( ) ;
// Journal always gets at least 15% of available budget so it doesn't
// get squeezed out by large conversations.
let journal_min = available * 15 / 100 ;
let journal_budget = available . saturating_sub ( total_conv ) . max ( journal_min ) ;
// Fill journal entries newest-first within budget.
// Tiered: recent entries get full content, older entries get just
// a header line (timestamp + first line) for timeline awareness.
let full_budget = journal_budget * 70 / 100 ;
let header_budget = journal_budget . saturating_sub ( full_budget ) ;
// Phase 1: Full entries (newest first)
let mut full_used = 0 ;
let mut n_full = 0 ;
for entry in entries . iter ( ) . rev ( ) {
let cost = count ( & entry . content ) + 10 ;
if full_used + cost > full_budget {
break ;
}
full_used + = cost ;
n_full + = 1 ;
}
let full_start = entries . len ( ) . saturating_sub ( n_full ) ;
// Phase 2: Header-only entries (continuing backward from where full stopped)
let mut header_used = 0 ;
let mut n_headers = 0 ;
for entry in entries [ .. full_start ] . iter ( ) . rev ( ) {
let first_line = entry
. content
. lines ( )
. find ( | l | ! l . trim ( ) . is_empty ( ) )
. unwrap_or ( " (empty) " ) ;
let cost = count ( first_line ) + 10 ;
if header_used + cost > header_budget {
break ;
}
header_used + = cost ;
n_headers + = 1 ;
}
let header_start = full_start . saturating_sub ( n_headers ) ;
// If conversation exceeds available budget, trim oldest messages
let journal_used = full_used + header_used ;
let mut conv_trim = 0 ;
let mut trimmed_conv = total_conv ;
while trimmed_conv + journal_used > available & & conv_trim < recent . len ( ) {
trimmed_conv - = conv_costs [ conv_trim ] ;
conv_trim + = 1 ;
}
// Walk forward to user message boundary
while conv_trim < recent . len ( ) & & recent [ conv_trim ] . role ! = Role ::User {
conv_trim + = 1 ;
}
dbglog! ( " [plan] model={} max_tokens={} available={} (identity={} memory={} reserve={}) " ,
model , max_tokens , available , identity_cost , memory_cost , reserve ) ;
dbglog! ( " [plan] conv: {} msgs, {} tokens total, trimming {} msgs → {} tokens " ,
recent . len ( ) , total_conv , conv_trim , trimmed_conv ) ;
dbglog! ( " [plan] journal: {} full entries ({}t) + {} headers ({}t) " ,
n_full , full_used , n_headers , header_used ) ;
ContextPlan {
header_start ,
full_start ,
entry_count : entries . len ( ) ,
conv_trim ,
cleanup: fix all build warnings, delete dead DMN context code
- Delete poc-daemon/src/context.rs dead code (git_context, work_state,
irc_digest, recent_commits, uncommitted_files) — replaced by
where-am-i.md and memory graph
- Remove unused imports (BufWriter, Context, similarity)
- Prefix unused variables (_store, _avg_cc, _episodic_ratio, _message)
- #[allow(dead_code)] on public API surface that's not yet wired
(Message::assistant, ConversationLog::message_count/read_all,
Config::context_message, ContextInfo fields)
- Fix to_capnp macro dead_code warning
- Rename _rewrite_store_DISABLED to snake_case
Only remaining warnings are in generated capnp code (can't fix).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 14:20:34 -04:00
_conv_count : recent . len ( ) ,
_full_tokens : full_used ,
_header_tokens : header_used ,
_conv_tokens : trimmed_conv ,
_available : available ,
2026-03-18 22:44:52 -04:00
}
}
/// Render journal entries into text from a context plan.
fn render_journal_text (
entries : & [ journal ::JournalEntry ] ,
plan : & ContextPlan ,
) -> String {
let has_journal = plan . header_start < plan . entry_count ;
if ! has_journal {
return String ::new ( ) ;
}
let mut text = String ::from ( " [Earlier in this conversation — from your journal] \n \n " ) ;
// Header-only entries (older) — just timestamp + first line
for entry in & entries [ plan . header_start .. plan . full_start ] {
let first_line = entry
. content
. lines ( )
. find ( | l | ! l . trim ( ) . is_empty ( ) )
. unwrap_or ( " (empty) " ) ;
text . push_str ( & format! (
" ## {} — {} \n " ,
entry . timestamp . format ( " %Y-%m-%dT%H:%M " ) ,
first_line ,
) ) ;
}
// Separator between headers and full entries
let n_headers = plan . full_start - plan . header_start ;
let n_full = plan . entry_count - plan . full_start ;
if n_headers > 0 & & n_full > 0 {
text . push_str ( " \n --- \n \n " ) ;
}
// Full entries (recent)
for entry in & entries [ plan . full_start .. ] {
text . push_str ( & format! (
" ## {} \n \n {} \n \n " ,
entry . timestamp . format ( " %Y-%m-%dT%H:%M " ) ,
entry . content
) ) ;
}
text
}
/// Assemble the context window from a plan. No allocation decisions
/// happen here — just follow the plan to build messages.
fn assemble_context (
system_prompt : String ,
context_message : String ,
journal_text : & str ,
recent : & [ Message ] ,
plan : & ContextPlan ,
) -> Vec < Message > {
let mut messages = vec! [ Message ::system ( system_prompt ) ] ;
if ! context_message . is_empty ( ) {
messages . push ( Message ::user ( context_message ) ) ;
}
let final_recent = & recent [ plan . conv_trim .. ] ;
if ! journal_text . is_empty ( ) {
messages . push ( Message ::user ( journal_text . to_string ( ) ) ) ;
} else if ! final_recent . is_empty ( ) {
messages . push ( Message ::user (
" Your context was just rebuilt. Memory files have been \
reloaded . Your recent conversation continues below . \
Earlier context is in your journal and memory files . "
. to_string ( ) ,
) ) ;
}
messages . extend ( final_recent . iter ( ) . cloned ( ) ) ;
messages
}
/// Find the conversation index where messages are no longer covered
/// Truncate a context message to fit within a token budget. Cuts at
/// section boundaries (lines starting with `---` or `## `) to avoid
/// splitting mid-section. Drops sections from the end first since
/// earlier sections (identity, instructions) matter more.
fn truncate_at_section ( text : & str , max_tokens : usize , count : & dyn Fn ( & str ) -> usize ) -> String {
// Find section boundaries (--- separators between assembled parts)
let mut boundaries = vec! [ 0 usize ] ;
for ( i , line ) in text . lines ( ) . enumerate ( ) {
if line . trim ( ) = = " --- " | | line . starts_with ( " ## " ) {
// Find byte offset of this line
let offset = text . lines ( ) . take ( i ) . map ( | l | l . len ( ) + 1 ) . sum ::< usize > ( ) ;
boundaries . push ( offset ) ;
}
}
boundaries . push ( text . len ( ) ) ;
// Binary search: find the largest prefix of sections that fits
let mut best = 0 ;
for & end in & boundaries [ 1 .. ] {
let slice = & text [ .. end ] ;
if count ( slice ) < = max_tokens {
best = end ;
} else {
break ;
}
}
if best = = 0 {
// Even the first section doesn't fit — hard truncate
best = text . len ( ) . min ( max_tokens * 3 ) ; // ~3 chars/token rough estimate
}
let truncated = & text [ .. best ] ;
dbglog! ( " [ctx] truncated memory from {} to {} chars ({} tokens) " ,
text . len ( ) , truncated . len ( ) , count ( truncated ) ) ;
truncated . to_string ( )
}
/// by journal entries. Messages before this index are summarized by
/// the journal; messages from this index onward stay as raw conversation.
/// Walks back to a user message boundary to avoid splitting tool
/// call/result sequences.
fn find_journal_cutoff (
conversation : & [ Message ] ,
newest_entry : Option < & journal ::JournalEntry > ,
) -> usize {
let cutoff = match newest_entry {
Some ( entry ) = > entry . timestamp ,
None = > return 0 ,
} ;
let mut split = conversation . len ( ) ;
for ( i , msg ) in conversation . iter ( ) . enumerate ( ) {
if let Some ( ts ) = parse_msg_timestamp ( msg ) {
if ts > cutoff {
split = i ;
break ;
}
}
}
// Walk back to user message boundary
while split > 0 & & split < conversation . len ( ) & & conversation [ split ] . role ! = Role ::User {
split - = 1 ;
}
split
}
/// Count the token footprint of a message using a token counting function.
fn msg_token_count_fn ( msg : & Message , count : & dyn Fn ( & str ) -> usize ) -> usize {
let content = msg . content . as_ref ( ) . map_or ( 0 , | c | match c {
MessageContent ::Text ( s ) = > count ( s ) ,
MessageContent ::Parts ( parts ) = > parts
. iter ( )
. map ( | p | match p {
ContentPart ::Text { text } = > count ( text ) ,
ContentPart ::ImageUrl { .. } = > 85 ,
} )
. sum ( ) ,
} ) ;
let tools = msg . tool_calls . as_ref ( ) . map_or ( 0 , | calls | {
calls
. iter ( )
. map ( | c | count ( & c . function . arguments ) + count ( & c . function . name ) )
. sum ( )
} ) ;
content + tools
}
/// Count the token footprint of a message using BPE tokenization.
fn msg_token_count ( tokenizer : & CoreBPE , msg : & Message ) -> usize {
msg_token_count_fn ( msg , & | s | tokenizer . encode_with_special_tokens ( s ) . len ( ) )
}
/// Detect context window overflow errors from the API.
/// Different providers phrase this differently; we check for common patterns.
/// OpenRouter wraps upstream errors, so we check both the wrapper and the raw message.
fn is_context_overflow ( err : & anyhow ::Error ) -> bool {
let msg = err . to_string ( ) . to_lowercase ( ) ;
msg . contains ( " context length " )
| | msg . contains ( " token limit " )
| | msg . contains ( " too many tokens " )
| | msg . contains ( " maximum context " )
| | msg . contains ( " prompt is too long " )
| | msg . contains ( " request too large " )
| | msg . contains ( " input validation error " )
| | msg . contains ( " content length limit " )
| | ( msg . contains ( " 400 " ) & & msg . contains ( " tokens " ) )
}
/// Detect model/provider errors delivered inside the SSE stream.
/// OpenRouter returns HTTP 200 but finish_reason="error" with
/// partial content (e.g. "system") — we surface this as an error
/// so the turn loop can retry.
fn is_stream_error ( err : & anyhow ::Error ) -> bool {
err . to_string ( ) . contains ( " model stream error " )
}
/// Parse a message's timestamp field into a DateTime.
fn parse_msg_timestamp ( msg : & Message ) -> Option < DateTime < Utc > > {
msg . timestamp
. as_ref ( )
. and_then ( | ts | DateTime ::parse_from_rfc3339 ( ts ) . ok ( ) )
. map ( | dt | dt . with_timezone ( & Utc ) )
}
/// Create a short summary of tool args for the tools pane header.
fn summarize_args ( tool_name : & str , args : & serde_json ::Value ) -> String {
match tool_name {
" read_file " | " write_file " | " edit_file " = > args [ " file_path " ]
. as_str ( )
. unwrap_or ( " " )
. to_string ( ) ,
" bash " = > {
let cmd = args [ " command " ] . as_str ( ) . unwrap_or ( " " ) ;
if cmd . len ( ) > 60 {
let end = cmd . char_indices ( )
. map ( | ( i , _ ) | i )
. take_while ( | & i | i < = 60 )
. last ( )
. unwrap_or ( 0 ) ;
format! ( " {} ... " , & cmd [ .. end ] )
} else {
cmd . to_string ( )
}
}
" grep " = > {
let pattern = args [ " pattern " ] . as_str ( ) . unwrap_or ( " " ) ;
let path = args [ " path " ] . as_str ( ) . unwrap_or ( " . " ) ;
format! ( " {} in {} " , pattern , path )
}
" glob " = > args [ " pattern " ]
. as_str ( )
. unwrap_or ( " " )
. to_string ( ) ,
" view_image " = > {
if let Some ( pane ) = args [ " pane_id " ] . as_str ( ) {
format! ( " pane {} " , pane )
} else {
args [ " file_path " ] . as_str ( ) . unwrap_or ( " " ) . to_string ( )
}
}
" journal " = > {
let entry = args [ " entry " ] . as_str ( ) . unwrap_or ( " " ) ;
if entry . len ( ) > 60 {
format! ( " {} ... " , & entry [ .. 60 ] )
} else {
entry . to_string ( )
}
}
" yield_to_user " = > args [ " message " ]
. as_str ( )
. unwrap_or ( " " )
. to_string ( ) ,
" switch_model " = > args [ " model " ]
. as_str ( )
. unwrap_or ( " " )
. to_string ( ) ,
" pause " = > String ::new ( ) ,
_ = > String ::new ( ) ,
}
}
/// Parse tool calls leaked as text by models that don't always use the
/// structured function calling API (notably Qwen).
///
/// Handles the XML format:
/// <tool_call>
/// <function=bash>
/// <parameter=command>echo hello</parameter>
/// </function>
/// </tool_call>
///
/// Also handles JSON-in-text format:
/// <tool_call>
/// {"name": "bash", "arguments": {"command": "echo hello"}}
/// </tool_call>
fn parse_leaked_tool_calls ( text : & str ) -> Vec < ToolCall > {
// Normalize whitespace inside XML tags: "<\nfunction\n=\nbash\n>" → "<function=bash>"
// This handles streaming tokenizers that split tags across tokens.
let normalized = normalize_xml_tags ( text ) ;
let text = & normalized ;
let mut calls = Vec ::new ( ) ;
let mut search_from = 0 ;
let mut call_counter : u32 = 0 ;
while let Some ( start ) = text [ search_from .. ] . find ( " <tool_call> " ) {
let abs_start = search_from + start ;
let after_tag = abs_start + " <tool_call> " . len ( ) ;
let end = match text [ after_tag .. ] . find ( " </tool_call> " ) {
Some ( pos ) = > after_tag + pos ,
None = > break ,
} ;
let body = text [ after_tag .. end ] . trim ( ) ;
search_from = end + " </tool_call> " . len ( ) ;
// Try XML format first, then JSON
if let Some ( call ) = parse_xml_tool_call ( body , & mut call_counter ) {
calls . push ( call ) ;
} else if let Some ( call ) = parse_json_tool_call ( body , & mut call_counter ) {
calls . push ( call ) ;
}
}
calls
}
/// Normalize whitespace inside XML-like tags for streaming tokenizers.
/// Collapses whitespace between `<` and `>` so that `<\nfunction\n=\nbash\n>`
/// becomes `<function=bash>`, and `</\nparameter\n>` becomes `</parameter>`.
/// Leaves content between tags untouched.
fn normalize_xml_tags ( text : & str ) -> String {
let mut result = String ::with_capacity ( text . len ( ) ) ;
let mut chars = text . chars ( ) . peekable ( ) ;
while let Some ( ch ) = chars . next ( ) {
if ch = = '<' {
let mut tag = String ::from ( '<' ) ;
for inner in chars . by_ref ( ) {
if inner = = '>' {
tag . push ( '>' ) ;
break ;
} else if inner . is_whitespace ( ) {
// Skip whitespace inside tags
} else {
tag . push ( inner ) ;
}
}
result . push_str ( & tag ) ;
} else {
result . push ( ch ) ;
}
}
result
}
/// Parse a Qwen-style `<tag=value>body</tag>` pseudo-XML element.
/// Returns `(value, body, rest)` on success.
fn parse_qwen_tag < ' a > ( s : & ' a str , tag : & str ) -> Option < ( & ' a str , & ' a str , & ' a str ) > {
let open = format! ( " < {} = " , tag ) ;
let close = format! ( " </ {} > " , tag ) ;
let start = s . find ( & open ) ? + open . len ( ) ;
let name_end = start + s [ start .. ] . find ( '>' ) ? ;
let body_start = name_end + 1 ;
let body_end = body_start + s [ body_start .. ] . find ( & close ) ? ;
Some ( (
s [ start .. name_end ] . trim ( ) ,
s [ body_start .. body_end ] . trim ( ) ,
& s [ body_end + close . len ( ) .. ] ,
) )
}
/// Parse Qwen's XML tool call format.
fn parse_xml_tool_call ( body : & str , counter : & mut u32 ) -> Option < ToolCall > {
let ( func_name , func_body , _ ) = parse_qwen_tag ( body , " function " ) ? ;
let func_name = func_name . to_string ( ) ;
let mut args = serde_json ::Map ::new ( ) ;
let mut rest = func_body ;
while let Some ( ( key , val , remainder ) ) = parse_qwen_tag ( rest , " parameter " ) {
args . insert ( key . to_string ( ) , serde_json ::Value ::String ( val . to_string ( ) ) ) ;
rest = remainder ;
}
* counter + = 1 ;
Some ( ToolCall {
id : format ! ( " leaked_{} " , counter ) ,
call_type : " function " . to_string ( ) ,
function : FunctionCall {
name : func_name ,
arguments : serde_json ::to_string ( & args ) . unwrap_or_default ( ) ,
} ,
} )
}
/// Parse JSON tool call format (some models emit this).
fn parse_json_tool_call ( body : & str , counter : & mut u32 ) -> Option < ToolCall > {
let v : serde_json ::Value = serde_json ::from_str ( body ) . ok ( ) ? ;
let name = v [ " name " ] . as_str ( ) ? ;
let arguments = & v [ " arguments " ] ;
* counter + = 1 ;
Some ( ToolCall {
id : format ! ( " leaked_{} " , counter ) ,
call_type : " function " . to_string ( ) ,
function : FunctionCall {
name : name . to_string ( ) ,
arguments : serde_json ::to_string ( arguments ) . unwrap_or_default ( ) ,
} ,
} )
}
/// Strip tool call XML and thinking tokens from text so the conversation
/// history stays clean. Removes `<tool_call>...</tool_call>` blocks and
/// `</think>` tags (thinking content before them is kept — it's useful context).
fn strip_leaked_artifacts ( text : & str ) -> String {
let normalized = normalize_xml_tags ( text ) ;
let mut result = normalized . clone ( ) ;
// Remove <tool_call>...</tool_call> blocks
while let Some ( start ) = result . find ( " <tool_call> " ) {
if let Some ( end_pos ) = result [ start .. ] . find ( " </tool_call> " ) {
let end = start + end_pos + " </tool_call> " . len ( ) ;
result = format! ( " {} {} " , & result [ .. start ] , & result [ end .. ] ) ;
} else {
break ;
}
}
// Remove </think> tags (but keep the thinking text before them)
result = result . replace ( " </think> " , " " ) ;
result . trim ( ) . to_string ( )
}
#[ cfg(test) ]
mod tests {
use super ::* ;
#[ test ]
fn test_leaked_tool_call_clean ( ) {
let text = " thinking \n </think> \n <tool_call> \n <function=bash> \n <parameter=command>poc-memory used core-personality</parameter> \n </function> \n </tool_call> " ;
let calls = parse_leaked_tool_calls ( text ) ;
assert_eq! ( calls . len ( ) , 1 ) ;
assert_eq! ( calls [ 0 ] . function . name , " bash " ) ;
let args : serde_json ::Value = serde_json ::from_str ( & calls [ 0 ] . function . arguments ) . unwrap ( ) ;
assert_eq! ( args [ " command " ] , " poc-memory used core-personality " ) ;
}
#[ test ]
fn test_leaked_tool_call_streamed_whitespace ( ) {
// Streaming tokenizer splits XML tags across tokens with newlines
let text = " <tool_call> \n < \n function \n = \n bash \n > \n < \n parameter \n = \n command \n >pwd</ \n parameter \n > \n </ \n function \n > \n </tool_call> " ;
let calls = parse_leaked_tool_calls ( text ) ;
assert_eq! ( calls . len ( ) , 1 , " should parse streamed format " ) ;
assert_eq! ( calls [ 0 ] . function . name , " bash " ) ;
let args : serde_json ::Value = serde_json ::from_str ( & calls [ 0 ] . function . arguments ) . unwrap ( ) ;
assert_eq! ( args [ " command " ] , " pwd " ) ;
}
#[ test ]
fn test_normalize_preserves_content ( ) {
let text = " <function=bash> \n <parameter=command>echo hello world</parameter> \n </function> " ;
let normalized = normalize_xml_tags ( text ) ;
// Newlines between tags are not inside tags, so preserved
assert_eq! ( normalized , " <function=bash> \n <parameter=command>echo hello world</parameter> \n </function> " ) ;
}
#[ test ]
fn test_normalize_strips_tag_internal_whitespace ( ) {
let text = " < \n function \n = \n bash \n > " ;
let normalized = normalize_xml_tags ( text ) ;
assert_eq! ( normalized , " <function=bash> " ) ;
}
}