From aceaf0410e0f14cefe9f45424178822f3e4036d1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 2 Apr 2026 15:31:12 -0400 Subject: [PATCH] delete dead flat-file journal code from thought/context.rs Journal entries are loaded from the memory graph store, not from the flat journal file. Remove build_context_window, plan_context, render_journal_text, assemble_context, truncate_at_section, find_journal_cutoff, parse_journal*, ContextPlan, and stale TODOs. Keep JournalEntry, default_journal_path (write path), and the live context management functions. -363 lines. Co-Authored-By: Proof of Concept --- src/agent/runner.rs | 3 - src/thought/context.rs | 445 ++++------------------------------------- src/thought/journal.rs | 3 +- 3 files changed, 44 insertions(+), 407 deletions(-) diff --git a/src/agent/runner.rs b/src/agent/runner.rs index 42a7cf6..dca6e22 100644 --- a/src/agent/runner.rs +++ b/src/agent/runner.rs @@ -982,9 +982,6 @@ impl Agent { &self.client.model, &self.tokenizer, ); - // Don't overwrite journal — it was loaded from the memory graph - // in load_startup_journal. The old build_context_window reads - // from a stale flat file. TODO: remove build_context_window. self.context.entries = messages.into_iter() .map(ConversationEntry::Message).collect(); self.last_prompt_tokens = 0; diff --git a/src/thought/context.rs b/src/thought/context.rs index f2266ec..0dd5dc2 100644 --- a/src/thought/context.rs +++ b/src/thought/context.rs @@ -1,14 +1,12 @@ -// context.rs — Context window building and management +// context.rs — Context window management // -// Pure functions for building the agent's context window from journal -// entries and conversation messages. No mutable state — all functions -// take inputs and return new values. State mutation happens in agent.rs. +// Token counting, conversation trimming, and error classification. +// Journal entries are loaded from the memory graph store, not from +// a flat file — the parse functions are gone. -// TODO: move Message, ContextState, etc. to thought layer use crate::agent::types::*; -use chrono::{DateTime, NaiveDateTime, Utc}; +use chrono::{DateTime, Utc}; use tiktoken_rs::CoreBPE; -use std::path::Path; /// A single journal entry with its timestamp and content. #[derive(Debug, Clone)] @@ -17,65 +15,7 @@ pub struct JournalEntry { pub content: String, } -/// Parse journal entries from the journal file. Returns entries sorted -/// by timestamp (oldest first). Entries with unparseable timestamps -/// are skipped. -pub fn parse_journal(path: &Path) -> Vec { - let text = match std::fs::read_to_string(path) { - Ok(t) => t, - Err(_) => return Vec::new(), - }; - parse_journal_text(&text) -} - -/// Parse journal entries from text. -pub fn parse_journal_text(text: &str) -> Vec { - let mut entries = Vec::new(); - let mut current_timestamp: Option> = None; - let mut current_content = String::new(); - - for line in text.lines() { - if let Some(ts) = parse_header_timestamp(line) { - if let Some(prev_ts) = current_timestamp.take() { - let content = current_content.trim().to_string(); - if !content.is_empty() { - entries.push(JournalEntry { timestamp: prev_ts, content }); - } - } - current_timestamp = Some(ts); - current_content.clear(); - } else if current_timestamp.is_some() { - current_content.push_str(line); - current_content.push('\n'); - } - } - - if let Some(ts) = current_timestamp { - let content = current_content.trim().to_string(); - if !content.is_empty() { - entries.push(JournalEntry { timestamp: ts, content }); - } - } - - entries -} - -/// Try to parse a line as a journal header (## TIMESTAMP [— title]). -fn parse_header_timestamp(line: &str) -> Option> { - let line = line.trim(); - if !line.starts_with("## ") { return None; } - let rest = line[3..].trim(); - if !rest.starts_with(|c: char| c.is_ascii_digit()) { return None; } - let ts_str = rest.split_once(' ').map_or(rest, |(ts, _)| ts); - for fmt in ["%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M"] { - if let Ok(naive) = NaiveDateTime::parse_from_str(ts_str, fmt) { - return Some(naive.and_utc()); - } - } - None -} - -/// Default journal file path. +/// Default journal file path (used by the write path only). pub fn default_journal_path() -> std::path::PathBuf { dirs::home_dir() .unwrap_or_default() @@ -92,337 +32,6 @@ fn context_budget_tokens(model: &str) -> usize { model_context_window(model) * 60 / 100 } -/// Allocation plan for the context window. -pub struct ContextPlan { - header_start: usize, - full_start: usize, - entry_count: usize, - conv_trim: usize, - _conv_count: usize, - _full_tokens: usize, - _header_tokens: usize, - _conv_tokens: usize, - _available: usize, -} - -/// Build a context window from conversation messages + journal entries. -/// -/// Allocation strategy: identity and memory are fixed costs. The -/// remaining budget (minus 25% reserve for model output) is split -/// between journal and conversation. Conversation gets priority — -/// it's what's happening now. Journal fills the rest, newest first. -/// -/// Returns (messages, journal_text) — caller stores journal_text in ContextState. -pub fn build_context_window( - context: &ContextState, - conversation: &[Message], - model: &str, - tokenizer: &CoreBPE, -) -> (Vec, String) { - let journal_path = default_journal_path(); - let all_entries = parse_journal(&journal_path); - dbglog!("[ctx] {} journal entries from {}", all_entries.len(), journal_path.display()); - let count = |s: &str| tokenizer.encode_with_special_tokens(s).len(); - - let system_prompt = context.system_prompt.clone(); - let context_message = context.render_context_message(); - - // Cap memory to 50% of the context budget so conversation always - // gets space. Truncate at the last complete section boundary. - let max_tokens = context_budget_tokens(model); - let memory_cap = max_tokens / 2; - let memory_tokens = count(&context_message); - let context_message = if memory_tokens > memory_cap { - dbglog!("[ctx] memory too large: {} tokens > {} cap, truncating", memory_tokens, memory_cap); - truncate_at_section(&context_message, memory_cap, &count) - } else { - context_message - }; - - let recent_start = find_journal_cutoff(conversation, all_entries.last()); - dbglog!("[ctx] journal cutoff: {} of {} conversation messages are 'recent'", - conversation.len() - recent_start, conversation.len()); - let recent = &conversation[recent_start..]; - - let plan = plan_context( - &system_prompt, - &context_message, - recent, - &all_entries, - model, - &count, - ); - - let journal_text = render_journal_text(&all_entries, &plan); - dbglog!("[ctx] plan: header_start={} full_start={} entry_count={} conv_trim={} journal_text={} chars", - plan.header_start, plan.full_start, plan.entry_count, plan.conv_trim, journal_text.len()); - - let messages = assemble_context( - system_prompt, context_message, &journal_text, - recent, &plan, - ); - (messages, journal_text) -} - -pub fn plan_context( - system_prompt: &str, - context_message: &str, - recent: &[Message], - entries: &[JournalEntry], - model: &str, - count: &dyn Fn(&str) -> usize, -) -> ContextPlan { - let max_tokens = context_budget_tokens(model); - - let identity_cost = count(system_prompt); - let memory_cost = count(context_message); - let reserve = max_tokens / 4; - let available = max_tokens - .saturating_sub(identity_cost) - .saturating_sub(memory_cost) - .saturating_sub(reserve); - - let conv_costs: Vec = recent.iter().map(|m| msg_token_count_fn(m, count)).collect(); - let total_conv: usize = conv_costs.iter().sum(); - - let journal_min = available * 15 / 100; - let journal_budget = available.saturating_sub(total_conv).max(journal_min); - - let full_budget = journal_budget * 70 / 100; - let header_budget = journal_budget.saturating_sub(full_budget); - - // Phase 1: Full entries (newest first) - let mut full_used = 0; - let mut n_full = 0; - for entry in entries.iter().rev() { - let cost = count(&entry.content) + 10; - if full_used + cost > full_budget { - break; - } - full_used += cost; - n_full += 1; - } - let full_start = entries.len().saturating_sub(n_full); - - // Phase 2: Header-only entries (continuing backward) - let mut header_used = 0; - let mut n_headers = 0; - for entry in entries[..full_start].iter().rev() { - let first_line = entry - .content - .lines() - .find(|l| !l.trim().is_empty()) - .unwrap_or("(empty)"); - let cost = count(first_line) + 10; - if header_used + cost > header_budget { - break; - } - header_used += cost; - n_headers += 1; - } - let header_start = full_start.saturating_sub(n_headers); - - // Trim oldest conversation if it exceeds budget - let journal_used = full_used + header_used; - let mut conv_trim = 0; - let mut trimmed_conv = total_conv; - while trimmed_conv + journal_used > available && conv_trim < recent.len() { - trimmed_conv -= conv_costs[conv_trim]; - conv_trim += 1; - } - // Walk forward to user message boundary - while conv_trim < recent.len() && recent[conv_trim].role != Role::User { - conv_trim += 1; - } - - dbglog!("[plan] model={} max_tokens={} available={} (identity={} memory={} reserve={})", - model, max_tokens, available, identity_cost, memory_cost, reserve); - dbglog!("[plan] conv: {} msgs, {} tokens total, trimming {} msgs → {} tokens", - recent.len(), total_conv, conv_trim, trimmed_conv); - dbglog!("[plan] journal: {} full entries ({}t) + {} headers ({}t)", - n_full, full_used, n_headers, header_used); - - ContextPlan { - header_start, - full_start, - entry_count: entries.len(), - conv_trim, - _conv_count: recent.len(), - _full_tokens: full_used, - _header_tokens: header_used, - _conv_tokens: trimmed_conv, - _available: available, - } -} - -pub fn render_journal_text( - entries: &[JournalEntry], - plan: &ContextPlan, -) -> String { - let has_journal = plan.header_start < plan.entry_count; - if !has_journal { - return String::new(); - } - - let mut text = String::from("[Earlier in this conversation — from your journal]\n\n"); - - for entry in &entries[plan.header_start..plan.full_start] { - let first_line = entry - .content - .lines() - .find(|l| !l.trim().is_empty()) - .unwrap_or("(empty)"); - text.push_str(&format!( - "## {} — {}\n", - entry.timestamp.format("%Y-%m-%dT%H:%M"), - first_line, - )); - } - - let n_headers = plan.full_start - plan.header_start; - let n_full = plan.entry_count - plan.full_start; - if n_headers > 0 && n_full > 0 { - text.push_str("\n---\n\n"); - } - - for entry in &entries[plan.full_start..] { - text.push_str(&format!( - "## {}\n\n{}\n\n", - entry.timestamp.format("%Y-%m-%dT%H:%M"), - entry.content - )); - } - - text -} - -fn assemble_context( - system_prompt: String, - context_message: String, - journal_text: &str, - recent: &[Message], - plan: &ContextPlan, -) -> Vec { - let mut messages = vec![Message::system(system_prompt)]; - if !context_message.is_empty() { - messages.push(Message::user(context_message)); - } - - let final_recent = &recent[plan.conv_trim..]; - - if !journal_text.is_empty() { - messages.push(Message::user(journal_text.to_string())); - } else if !final_recent.is_empty() { - messages.push(Message::user( - "Your context was just rebuilt. Memory files have been \ - reloaded. Your recent conversation continues below. \ - Earlier context is in your journal and memory files." - .to_string(), - )); - } - - messages.extend(final_recent.iter().cloned()); - messages -} - -fn truncate_at_section(text: &str, max_tokens: usize, count: &dyn Fn(&str) -> usize) -> String { - let mut boundaries = vec![0usize]; - for (i, line) in text.lines().enumerate() { - if line.trim() == "---" || line.starts_with("## ") { - let offset = text.lines().take(i).map(|l| l.len() + 1).sum::(); - boundaries.push(offset); - } - } - boundaries.push(text.len()); - - let mut best = 0; - for &end in &boundaries[1..] { - let slice = &text[..end]; - if count(slice) <= max_tokens { - best = end; - } else { - break; - } - } - - if best == 0 { - best = text.len().min(max_tokens * 3); - } - - let truncated = &text[..best]; - dbglog!("[ctx] truncated memory from {} to {} chars ({} tokens)", - text.len(), truncated.len(), count(truncated)); - truncated.to_string() -} - -fn find_journal_cutoff( - conversation: &[Message], - newest_entry: Option<&JournalEntry>, -) -> usize { - let cutoff = match newest_entry { - Some(entry) => entry.timestamp, - None => return 0, - }; - - let mut split = conversation.len(); - for (i, msg) in conversation.iter().enumerate() { - if let Some(ts) = parse_msg_timestamp(msg) { - if ts > cutoff { - split = i; - break; - } - } - } - while split > 0 && split < conversation.len() && conversation[split].role != Role::User { - split -= 1; - } - split -} - -fn msg_token_count_fn(msg: &Message, count: &dyn Fn(&str) -> usize) -> usize { - let content = msg.content.as_ref().map_or(0, |c| match c { - MessageContent::Text(s) => count(s), - MessageContent::Parts(parts) => parts - .iter() - .map(|p| match p { - ContentPart::Text { text } => count(text), - ContentPart::ImageUrl { .. } => 85, - }) - .sum(), - }); - let tools = msg.tool_calls.as_ref().map_or(0, |calls| { - calls - .iter() - .map(|c| count(&c.function.arguments) + count(&c.function.name)) - .sum() - }); - content + tools -} - -/// Count the token footprint of a message using BPE tokenization. -pub fn msg_token_count(tokenizer: &CoreBPE, msg: &Message) -> usize { - msg_token_count_fn(msg, &|s| tokenizer.encode_with_special_tokens(s).len()) -} - -/// Detect context window overflow errors from the API. -pub fn is_context_overflow(err: &anyhow::Error) -> bool { - let msg = err.to_string().to_lowercase(); - msg.contains("context length") - || msg.contains("token limit") - || msg.contains("too many tokens") - || msg.contains("maximum context") - || msg.contains("prompt is too long") - || msg.contains("request too large") - || msg.contains("input validation error") - || msg.contains("content length limit") - || (msg.contains("400") && msg.contains("tokens")) -} - -/// Detect model/provider errors delivered inside the SSE stream. -pub fn is_stream_error(err: &anyhow::Error) -> bool { - err.to_string().contains("model stream error") -} - /// Trim conversation to fit within the context budget. /// Returns the trimmed conversation messages (oldest dropped first). pub fn trim_conversation( @@ -462,9 +71,41 @@ pub fn trim_conversation( conversation[skip..].to_vec() } -fn parse_msg_timestamp(msg: &Message) -> Option> { - msg.timestamp - .as_ref() - .and_then(|ts| DateTime::parse_from_rfc3339(ts).ok()) - .map(|dt| dt.with_timezone(&Utc)) +/// Count the token footprint of a message using BPE tokenization. +pub fn msg_token_count(tokenizer: &CoreBPE, msg: &Message) -> usize { + let count = |s: &str| tokenizer.encode_with_special_tokens(s).len(); + let content = msg.content.as_ref().map_or(0, |c| match c { + MessageContent::Text(s) => count(s), + MessageContent::Parts(parts) => parts.iter() + .map(|p| match p { + ContentPart::Text { text } => count(text), + ContentPart::ImageUrl { .. } => 85, + }) + .sum(), + }); + let tools = msg.tool_calls.as_ref().map_or(0, |calls| { + calls.iter() + .map(|c| count(&c.function.arguments) + count(&c.function.name)) + .sum() + }); + content + tools +} + +/// Detect context window overflow errors from the API. +pub fn is_context_overflow(err: &anyhow::Error) -> bool { + let msg = err.to_string().to_lowercase(); + msg.contains("context length") + || msg.contains("token limit") + || msg.contains("too many tokens") + || msg.contains("maximum context") + || msg.contains("prompt is too long") + || msg.contains("request too large") + || msg.contains("input validation error") + || msg.contains("content length limit") + || (msg.contains("400") && msg.contains("tokens")) +} + +/// Detect model/provider errors delivered inside the SSE stream. +pub fn is_stream_error(err: &anyhow::Error) -> bool { + err.to_string().contains("model stream error") } diff --git a/src/thought/journal.rs b/src/thought/journal.rs index b97a277..306b286 100644 --- a/src/thought/journal.rs +++ b/src/thought/journal.rs @@ -1,8 +1,7 @@ // tools/journal.rs — Native journal tool // // Appends entries directly to the journal file without spawning a -// shell. The entry is persisted to disk immediately; -// build_context_window() picks it up on the next compaction. +// shell. The entry is persisted to disk immediately. // // This tool is "ephemeral" — after the API processes the tool call // and result, the agent strips them from the conversation history.