diff --git a/src/agent/context.rs b/src/agent/context.rs index bee049d..8dcc6a3 100644 --- a/src/agent/context.rs +++ b/src/agent/context.rs @@ -1,12 +1,8 @@ -// context.rs — Context window building and management +// context.rs — Context window management // -// Pure functions for building the agent's context window from journal -// entries and conversation messages. No mutable state — all functions -// take inputs and return new values. State mutation happens in agent.rs. +// Token counting and conversation trimming for the context window. -use crate::agent::journal; use crate::agent::types::*; -use chrono::{DateTime, Utc}; use tiktoken_rs::CoreBPE; /// Look up a model's context window size in tokens. @@ -26,27 +22,6 @@ fn context_budget_tokens(model: &str) -> usize { model_context_window(model) * 60 / 100 } -/// Allocation plan for the context window. -pub struct ContextPlan { - header_start: usize, - full_start: usize, - entry_count: usize, - conv_trim: usize, - _conv_count: usize, - _full_tokens: usize, - _header_tokens: usize, - _conv_tokens: usize, - _available: usize, -} - -/// Build a context window from conversation messages + journal entries. -/// -/// Allocation strategy: identity and memory are fixed costs. The -/// remaining budget (minus 25% reserve for model output) is split -/// between journal and conversation. Conversation gets priority — -/// it's what's happening now. Journal fills the rest, newest first. -/// -/// Returns (messages, journal_text) — caller stores journal_text in ContextState. /// Trim conversation to fit within the context budget. /// Returns the trimmed conversation messages (oldest dropped first). pub fn trim_conversation( @@ -67,7 +42,6 @@ pub fn trim_conversation( .saturating_sub(journal_cost) .saturating_sub(reserve); - // Trim oldest messages until we fit let msg_costs: Vec = conversation.iter() .map(|m| msg_token_count(tokenizer, m)).collect(); let total: usize = msg_costs.iter().sum(); @@ -87,246 +61,26 @@ pub fn trim_conversation( conversation[skip..].to_vec() } -pub fn plan_context( - system_prompt: &str, - context_message: &str, - recent: &[Message], - entries: &[journal::JournalEntry], - model: &str, - count: &dyn Fn(&str) -> usize, -) -> ContextPlan { - let max_tokens = context_budget_tokens(model); - - let identity_cost = count(system_prompt); - let memory_cost = count(context_message); - let reserve = max_tokens / 4; - let available = max_tokens - .saturating_sub(identity_cost) - .saturating_sub(memory_cost) - .saturating_sub(reserve); - - let conv_costs: Vec = recent.iter().map(|m| msg_token_count_fn(m, count)).collect(); - let total_conv: usize = conv_costs.iter().sum(); - - let journal_min = available * 15 / 100; - let journal_budget = available.saturating_sub(total_conv).max(journal_min); - - let full_budget = journal_budget * 70 / 100; - let header_budget = journal_budget.saturating_sub(full_budget); - - // Phase 1: Full entries (newest first) - let mut full_used = 0; - let mut n_full = 0; - for entry in entries.iter().rev() { - let cost = count(&entry.content) + 10; - if full_used + cost > full_budget { - break; - } - full_used += cost; - n_full += 1; - } - let full_start = entries.len().saturating_sub(n_full); - - // Phase 2: Header-only entries (continuing backward) - let mut header_used = 0; - let mut n_headers = 0; - for entry in entries[..full_start].iter().rev() { - let first_line = entry - .content - .lines() - .find(|l| !l.trim().is_empty()) - .unwrap_or("(empty)"); - let cost = count(first_line) + 10; - if header_used + cost > header_budget { - break; - } - header_used += cost; - n_headers += 1; - } - let header_start = full_start.saturating_sub(n_headers); - - // Trim oldest conversation if it exceeds budget - let journal_used = full_used + header_used; - let mut conv_trim = 0; - let mut trimmed_conv = total_conv; - while trimmed_conv + journal_used > available && conv_trim < recent.len() { - trimmed_conv -= conv_costs[conv_trim]; - conv_trim += 1; - } - // Walk forward to user message boundary - while conv_trim < recent.len() && recent[conv_trim].role != Role::User { - conv_trim += 1; - } - - dbglog!("[plan] model={} max_tokens={} available={} (identity={} memory={} reserve={})", - model, max_tokens, available, identity_cost, memory_cost, reserve); - dbglog!("[plan] conv: {} msgs, {} tokens total, trimming {} msgs → {} tokens", - recent.len(), total_conv, conv_trim, trimmed_conv); - dbglog!("[plan] journal: {} full entries ({}t) + {} headers ({}t)", - n_full, full_used, n_headers, header_used); - - ContextPlan { - header_start, - full_start, - entry_count: entries.len(), - conv_trim, - _conv_count: recent.len(), - _full_tokens: full_used, - _header_tokens: header_used, - _conv_tokens: trimmed_conv, - _available: available, - } -} - -pub fn render_journal_text( - entries: &[journal::JournalEntry], - plan: &ContextPlan, -) -> String { - let has_journal = plan.header_start < plan.entry_count; - if !has_journal { - return String::new(); - } - - let mut text = String::from("[Earlier in this conversation — from your journal]\n\n"); - - for entry in &entries[plan.header_start..plan.full_start] { - let first_line = entry - .content - .lines() - .find(|l| !l.trim().is_empty()) - .unwrap_or("(empty)"); - text.push_str(&format!( - "## {} — {}\n", - entry.timestamp.format("%Y-%m-%dT%H:%M"), - first_line, - )); - } - - let n_headers = plan.full_start - plan.header_start; - let n_full = plan.entry_count - plan.full_start; - if n_headers > 0 && n_full > 0 { - text.push_str("\n---\n\n"); - } - - for entry in &entries[plan.full_start..] { - text.push_str(&format!( - "## {}\n\n{}\n\n", - entry.timestamp.format("%Y-%m-%dT%H:%M"), - entry.content - )); - } - - text -} - -fn assemble_context( - system_prompt: String, - context_message: String, - journal_text: &str, - recent: &[Message], - plan: &ContextPlan, -) -> Vec { - let mut messages = vec![Message::system(system_prompt)]; - if !context_message.is_empty() { - messages.push(Message::user(context_message)); - } - - let final_recent = &recent[plan.conv_trim..]; - - if !journal_text.is_empty() { - messages.push(Message::user(journal_text.to_string())); - } else if !final_recent.is_empty() { - messages.push(Message::user( - "Your context was just rebuilt. Memory files have been \ - reloaded. Your recent conversation continues below. \ - Earlier context is in your journal and memory files." - .to_string(), - )); - } - - messages.extend(final_recent.iter().cloned()); - messages -} - -fn truncate_at_section(text: &str, max_tokens: usize, count: &dyn Fn(&str) -> usize) -> String { - let mut boundaries = vec![0usize]; - for (i, line) in text.lines().enumerate() { - if line.trim() == "---" || line.starts_with("## ") { - let offset = text.lines().take(i).map(|l| l.len() + 1).sum::(); - boundaries.push(offset); - } - } - boundaries.push(text.len()); - - let mut best = 0; - for &end in &boundaries[1..] { - let slice = &text[..end]; - if count(slice) <= max_tokens { - best = end; - } else { - break; - } - } - - if best == 0 { - best = text.len().min(max_tokens * 3); - } - - let truncated = &text[..best]; - dbglog!("[ctx] truncated memory from {} to {} chars ({} tokens)", - text.len(), truncated.len(), count(truncated)); - truncated.to_string() -} - -fn find_journal_cutoff( - conversation: &[Message], - newest_entry: Option<&journal::JournalEntry>, -) -> usize { - let cutoff = match newest_entry { - Some(entry) => entry.timestamp, - None => return 0, - }; - - let mut split = conversation.len(); - for (i, msg) in conversation.iter().enumerate() { - if let Some(ts) = parse_msg_timestamp(msg) { - if ts > cutoff { - split = i; - break; - } - } - } - while split > 0 && split < conversation.len() && conversation[split].role != Role::User { - split -= 1; - } - split -} - -fn msg_token_count_fn(msg: &Message, count: &dyn Fn(&str) -> usize) -> usize { +/// Count the token footprint of a message using BPE tokenization. +pub fn msg_token_count(tokenizer: &CoreBPE, msg: &Message) -> usize { let content = msg.content.as_ref().map_or(0, |c| match c { - MessageContent::Text(s) => count(s), - MessageContent::Parts(parts) => parts - .iter() + MessageContent::Text(s) => tokenizer.encode_with_special_tokens(s).len(), + MessageContent::Parts(parts) => parts.iter() .map(|p| match p { - ContentPart::Text { text } => count(text), + ContentPart::Text { text } => tokenizer.encode_with_special_tokens(text).len(), ContentPart::ImageUrl { .. } => 85, }) .sum(), }); let tools = msg.tool_calls.as_ref().map_or(0, |calls| { - calls - .iter() - .map(|c| count(&c.function.arguments) + count(&c.function.name)) + calls.iter() + .map(|c| tokenizer.encode_with_special_tokens(&c.function.arguments).len() + + tokenizer.encode_with_special_tokens(&c.function.name).len()) .sum() }); content + tools } -/// Count the token footprint of a message using BPE tokenization. -pub fn msg_token_count(tokenizer: &CoreBPE, msg: &Message) -> usize { - msg_token_count_fn(msg, &|s| tokenizer.encode_with_special_tokens(s).len()) -} - /// Detect context window overflow errors from the API. pub fn is_context_overflow(err: &anyhow::Error) -> bool { let msg = err.to_string().to_lowercase(); @@ -345,10 +99,3 @@ pub fn is_context_overflow(err: &anyhow::Error) -> bool { pub fn is_stream_error(err: &anyhow::Error) -> bool { err.to_string().contains("model stream error") } - -fn parse_msg_timestamp(msg: &Message) -> Option> { - msg.timestamp - .as_ref() - .and_then(|ts| DateTime::parse_from_rfc3339(ts).ok()) - .map(|dt| dt.with_timezone(&Utc)) -} diff --git a/src/agent/tui.rs b/src/agent/tui.rs index 7483e96..ceb0937 100644 --- a/src/agent/tui.rs +++ b/src/agent/tui.rs @@ -1044,7 +1044,7 @@ impl App { let mut lines: Vec = Vec::new(); let section = Style::default().fg(Color::Yellow); - let dim = Style::default().fg(Color::DarkGray); + let _dim = Style::default().fg(Color::DarkGray); let hint = Style::default().fg(Color::DarkGray).add_modifier(Modifier::ITALIC); lines.push(Line::raw("")); @@ -1150,34 +1150,6 @@ impl App { frame.render_widget(para, size); } - fn most_recent_file(dir: &std::path::Path) -> Option<(String, String)> { - let entries = std::fs::read_dir(dir).ok()?; - let mut latest: Option<(String, std::time::SystemTime)> = None; - for entry in entries.flatten() { - let name = entry.file_name().to_string_lossy().to_string(); - if name.starts_with("pid-") || name.starts_with("transcript-offset") { continue; } - if let Ok(meta) = entry.metadata() { - if let Ok(modified) = meta.modified() { - if latest.as_ref().map_or(true, |(_, t)| modified > *t) { - latest = Some((name, modified)); - } - } - } - } - latest.map(|(name, time)| { - let ago = time.elapsed().map(|d| Self::format_duration(d)) - .unwrap_or_else(|_| "?".into()); - (name, ago) - }) - } - - fn format_duration(d: std::time::Duration) -> String { - let secs = d.as_secs(); - if secs < 60 { format!("{}s ago", secs) } - else if secs < 3600 { format!("{}m ago", secs / 60) } - else { format!("{}h ago", secs / 3600) } - } - fn draw_debug(&self, frame: &mut Frame, size: Rect) { let mut lines: Vec = Vec::new(); let section = Style::default().fg(Color::Yellow); diff --git a/src/subconscious/hook.rs b/src/subconscious/hook.rs index b9a2cee..2d8844d 100644 --- a/src/subconscious/hook.rs +++ b/src/subconscious/hook.rs @@ -6,7 +6,6 @@ use std::collections::HashSet; use std::fs; -use std::fs::File; use std::io::Write; use std::path::Path; use std::process::Command;