delete dead flat-file journal code from thought/context.rs

Journal entries are loaded from the memory graph store, not from the flat journal file. Remove build_context_window, plan_context, render_journal_text, assemble_context, truncate_at_section, find_journal_cutoff, parse_journal*, ContextPlan, and stale TODOs. Keep JournalEntry, default_journal_path (write path), and the live context management functions. -363 lines. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-02 15:31:12 -04:00 · 2026-04-02 15:31:12 -04:00 · aceaf0410e
commit aceaf0410e
parent 214806cb90
3 changed files with 31 additions and 394 deletions
--- a/src/agent/runner.rs
+++ b/src/agent/runner.rs
@ -982,9 +982,6 @@ impl Agent {
            &self.client.model,
            &self.tokenizer,
        );
-        // Don't overwrite journal — it was loaded from the memory graph
-        // in load_startup_journal. The old build_context_window reads
-        // from a stale flat file. TODO: remove build_context_window.
        self.context.entries = messages.into_iter()
            .map(ConversationEntry::Message).collect();
        self.last_prompt_tokens = 0;
--- a/src/thought/context.rs
+++ b/src/thought/context.rs
@ -1,14 +1,12 @@
-// context.rs — Context window building and management
+// context.rs — Context window management
 //
-// Pure functions for building the agent's context window from journal
-// entries and conversation messages. No mutable state — all functions
-// take inputs and return new values. State mutation happens in agent.rs.
+// Token counting, conversation trimming, and error classification.
+// Journal entries are loaded from the memory graph store, not from
+// a flat file — the parse functions are gone.

-// TODO: move Message, ContextState, etc. to thought layer
 use crate::agent::types::*;
-use chrono::{DateTime, NaiveDateTime, Utc};
+use chrono::{DateTime, Utc};
 use tiktoken_rs::CoreBPE;
-use std::path::Path;

 /// A single journal entry with its timestamp and content.
 #[derive(Debug, Clone)]
@ -17,65 +15,7 @@ pub struct JournalEntry {
    pub content: String,
 }

-/// Parse journal entries from the journal file. Returns entries sorted
-/// by timestamp (oldest first). Entries with unparseable timestamps
-/// are skipped.
-pub fn parse_journal(path: &Path) -> Vec<JournalEntry> {
-    let text = match std::fs::read_to_string(path) {
-        Ok(t) => t,
-        Err(_) => return Vec::new(),
-    };
-    parse_journal_text(&text)
-}
-
-/// Parse journal entries from text.
-pub fn parse_journal_text(text: &str) -> Vec<JournalEntry> {
-    let mut entries = Vec::new();
-    let mut current_timestamp: Option<DateTime<Utc>> = None;
-    let mut current_content = String::new();
-
-    for line in text.lines() {
-        if let Some(ts) = parse_header_timestamp(line) {
-            if let Some(prev_ts) = current_timestamp.take() {
-                let content = current_content.trim().to_string();
-                if !content.is_empty() {
-                    entries.push(JournalEntry { timestamp: prev_ts, content });
-                }
-            }
-            current_timestamp = Some(ts);
-            current_content.clear();
-        } else if current_timestamp.is_some() {
-            current_content.push_str(line);
-            current_content.push('\n');
-        }
-    }
-
-    if let Some(ts) = current_timestamp {
-        let content = current_content.trim().to_string();
-        if !content.is_empty() {
-            entries.push(JournalEntry { timestamp: ts, content });
-        }
-    }
-
-    entries
-}
-
-/// Try to parse a line as a journal header (## TIMESTAMP [— title]).
-fn parse_header_timestamp(line: &str) -> Option<DateTime<Utc>> {
-    let line = line.trim();
-    if !line.starts_with("## ") { return None; }
-    let rest = line[3..].trim();
-    if !rest.starts_with(|c: char| c.is_ascii_digit()) { return None; }
-    let ts_str = rest.split_once(' ').map_or(rest, |(ts, _)| ts);
-    for fmt in ["%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M"] {
-        if let Ok(naive) = NaiveDateTime::parse_from_str(ts_str, fmt) {
-            return Some(naive.and_utc());
-        }
-    }
-    None
-}
-
-/// Default journal file path.
+/// Default journal file path (used by the write path only).
 pub fn default_journal_path() -> std::path::PathBuf {
    dirs::home_dir()
        .unwrap_or_default()
@ -92,337 +32,6 @@ fn context_budget_tokens(model: &str) -> usize {
    model_context_window(model) * 60 / 100
 }

-/// Allocation plan for the context window.
-pub struct ContextPlan {
-    header_start: usize,
-    full_start: usize,
-    entry_count: usize,
-    conv_trim: usize,
-    _conv_count: usize,
-    _full_tokens: usize,
-    _header_tokens: usize,
-    _conv_tokens: usize,
-    _available: usize,
-}
-
-/// Build a context window from conversation messages + journal entries.
-///
-/// Allocation strategy: identity and memory are fixed costs. The
-/// remaining budget (minus 25% reserve for model output) is split
-/// between journal and conversation. Conversation gets priority —
-/// it's what's happening now. Journal fills the rest, newest first.
-///
-/// Returns (messages, journal_text) — caller stores journal_text in ContextState.
-pub fn build_context_window(
-    context: &ContextState,
-    conversation: &[Message],
-    model: &str,
-    tokenizer: &CoreBPE,
-) -> (Vec<Message>, String) {
-    let journal_path = default_journal_path();
-    let all_entries = parse_journal(&journal_path);
-    dbglog!("[ctx] {} journal entries from {}", all_entries.len(), journal_path.display());
-    let count = |s: &str| tokenizer.encode_with_special_tokens(s).len();
-
-    let system_prompt = context.system_prompt.clone();
-    let context_message = context.render_context_message();
-
-    // Cap memory to 50% of the context budget so conversation always
-    // gets space. Truncate at the last complete section boundary.
-    let max_tokens = context_budget_tokens(model);
-    let memory_cap = max_tokens / 2;
-    let memory_tokens = count(&context_message);
-    let context_message = if memory_tokens > memory_cap {
-        dbglog!("[ctx] memory too large: {} tokens > {} cap, truncating", memory_tokens, memory_cap);
-        truncate_at_section(&context_message, memory_cap, &count)
-    } else {
-        context_message
-    };
-
-    let recent_start = find_journal_cutoff(conversation, all_entries.last());
-    dbglog!("[ctx] journal cutoff: {} of {} conversation messages are 'recent'",
-        conversation.len() - recent_start, conversation.len());
-    let recent = &conversation[recent_start..];
-
-    let plan = plan_context(
-        &system_prompt,
-        &context_message,
-        recent,
-        &all_entries,
-        model,
-        &count,
-    );
-
-    let journal_text = render_journal_text(&all_entries, &plan);
-    dbglog!("[ctx] plan: header_start={} full_start={} entry_count={} conv_trim={} journal_text={} chars",
-        plan.header_start, plan.full_start, plan.entry_count, plan.conv_trim, journal_text.len());
-
-    let messages = assemble_context(
-        system_prompt, context_message, &journal_text,
-        recent, &plan,
-    );
-    (messages, journal_text)
-}
-
-pub fn plan_context(
-    system_prompt: &str,
-    context_message: &str,
-    recent: &[Message],
-    entries: &[JournalEntry],
-    model: &str,
-    count: &dyn Fn(&str) -> usize,
-) -> ContextPlan {
-    let max_tokens = context_budget_tokens(model);
-
-    let identity_cost = count(system_prompt);
-    let memory_cost = count(context_message);
-    let reserve = max_tokens / 4;
-    let available = max_tokens
-        .saturating_sub(identity_cost)
-        .saturating_sub(memory_cost)
-        .saturating_sub(reserve);
-
-    let conv_costs: Vec<usize> = recent.iter().map(|m| msg_token_count_fn(m, count)).collect();
-    let total_conv: usize = conv_costs.iter().sum();
-
-    let journal_min = available * 15 / 100;
-    let journal_budget = available.saturating_sub(total_conv).max(journal_min);
-
-    let full_budget = journal_budget * 70 / 100;
-    let header_budget = journal_budget.saturating_sub(full_budget);
-
-    // Phase 1: Full entries (newest first)
-    let mut full_used = 0;
-    let mut n_full = 0;
-    for entry in entries.iter().rev() {
-        let cost = count(&entry.content) + 10;
-        if full_used + cost > full_budget {
-            break;
-        }
-        full_used += cost;
-        n_full += 1;
-    }
-    let full_start = entries.len().saturating_sub(n_full);
-
-    // Phase 2: Header-only entries (continuing backward)
-    let mut header_used = 0;
-    let mut n_headers = 0;
-    for entry in entries[..full_start].iter().rev() {
-        let first_line = entry
-            .content
-            .lines()
-            .find(|l| !l.trim().is_empty())
-            .unwrap_or("(empty)");
-        let cost = count(first_line) + 10;
-        if header_used + cost > header_budget {
-            break;
-        }
-        header_used += cost;
-        n_headers += 1;
-    }
-    let header_start = full_start.saturating_sub(n_headers);
-
-    // Trim oldest conversation if it exceeds budget
-    let journal_used = full_used + header_used;
-    let mut conv_trim = 0;
-    let mut trimmed_conv = total_conv;
-    while trimmed_conv + journal_used > available && conv_trim < recent.len() {
-        trimmed_conv -= conv_costs[conv_trim];
-        conv_trim += 1;
-    }
-    // Walk forward to user message boundary
-    while conv_trim < recent.len() && recent[conv_trim].role != Role::User {
-        conv_trim += 1;
-    }
-
-    dbglog!("[plan] model={} max_tokens={} available={} (identity={} memory={} reserve={})",
-        model, max_tokens, available, identity_cost, memory_cost, reserve);
-    dbglog!("[plan] conv: {} msgs, {} tokens total, trimming {} msgs → {} tokens",
-        recent.len(), total_conv, conv_trim, trimmed_conv);
-    dbglog!("[plan] journal: {} full entries ({}t) + {} headers ({}t)",
-        n_full, full_used, n_headers, header_used);
-
-    ContextPlan {
-        header_start,
-        full_start,
-        entry_count: entries.len(),
-        conv_trim,
-        _conv_count: recent.len(),
-        _full_tokens: full_used,
-        _header_tokens: header_used,
-        _conv_tokens: trimmed_conv,
-        _available: available,
-    }
-}
-
-pub fn render_journal_text(
-    entries: &[JournalEntry],
-    plan: &ContextPlan,
-) -> String {
-    let has_journal = plan.header_start < plan.entry_count;
-    if !has_journal {
-        return String::new();
-    }
-
-    let mut text = String::from("[Earlier in this conversation — from your journal]\n\n");
-
-    for entry in &entries[plan.header_start..plan.full_start] {
-        let first_line = entry
-            .content
-            .lines()
-            .find(|l| !l.trim().is_empty())
-            .unwrap_or("(empty)");
-        text.push_str(&format!(
-            "## {} — {}\n",
-            entry.timestamp.format("%Y-%m-%dT%H:%M"),
-            first_line,
-        ));
-    }
-
-    let n_headers = plan.full_start - plan.header_start;
-    let n_full = plan.entry_count - plan.full_start;
-    if n_headers > 0 && n_full > 0 {
-        text.push_str("\n---\n\n");
-    }
-
-    for entry in &entries[plan.full_start..] {
-        text.push_str(&format!(
-            "## {}\n\n{}\n\n",
-            entry.timestamp.format("%Y-%m-%dT%H:%M"),
-            entry.content
-        ));
-    }
-
-    text
-}
-
-fn assemble_context(
-    system_prompt: String,
-    context_message: String,
-    journal_text: &str,
-    recent: &[Message],
-    plan: &ContextPlan,
-) -> Vec<Message> {
-    let mut messages = vec![Message::system(system_prompt)];
-    if !context_message.is_empty() {
-        messages.push(Message::user(context_message));
-    }
-
-    let final_recent = &recent[plan.conv_trim..];
-
-    if !journal_text.is_empty() {
-        messages.push(Message::user(journal_text.to_string()));
-    } else if !final_recent.is_empty() {
-        messages.push(Message::user(
-            "Your context was just rebuilt. Memory files have been \
-             reloaded. Your recent conversation continues below. \
-             Earlier context is in your journal and memory files."
-                .to_string(),
-        ));
-    }
-
-    messages.extend(final_recent.iter().cloned());
-    messages
-}
-
-fn truncate_at_section(text: &str, max_tokens: usize, count: &dyn Fn(&str) -> usize) -> String {
-    let mut boundaries = vec![0usize];
-    for (i, line) in text.lines().enumerate() {
-        if line.trim() == "---" || line.starts_with("## ") {
-            let offset = text.lines().take(i).map(|l| l.len() + 1).sum::<usize>();
-            boundaries.push(offset);
-        }
-    }
-    boundaries.push(text.len());
-
-    let mut best = 0;
-    for &end in &boundaries[1..] {
-        let slice = &text[..end];
-        if count(slice) <= max_tokens {
-            best = end;
-        } else {
-            break;
-        }
-    }
-
-    if best == 0 {
-        best = text.len().min(max_tokens * 3);
-    }
-
-    let truncated = &text[..best];
-    dbglog!("[ctx] truncated memory from {} to {} chars ({} tokens)",
-        text.len(), truncated.len(), count(truncated));
-    truncated.to_string()
-}
-
-fn find_journal_cutoff(
-    conversation: &[Message],
-    newest_entry: Option<&JournalEntry>,
-) -> usize {
-    let cutoff = match newest_entry {
-        Some(entry) => entry.timestamp,
-        None => return 0,
-    };
-
-    let mut split = conversation.len();
-    for (i, msg) in conversation.iter().enumerate() {
-        if let Some(ts) = parse_msg_timestamp(msg) {
-            if ts > cutoff {
-                split = i;
-                break;
-            }
-        }
-    }
-    while split > 0 && split < conversation.len() && conversation[split].role != Role::User {
-        split -= 1;
-    }
-    split
-}
-
-fn msg_token_count_fn(msg: &Message, count: &dyn Fn(&str) -> usize) -> usize {
-    let content = msg.content.as_ref().map_or(0, |c| match c {
-        MessageContent::Text(s) => count(s),
-        MessageContent::Parts(parts) => parts
-            .iter()
-            .map(|p| match p {
-                ContentPart::Text { text } => count(text),
-                ContentPart::ImageUrl { .. } => 85,
-            })
-            .sum(),
-    });
-    let tools = msg.tool_calls.as_ref().map_or(0, |calls| {
-        calls
-            .iter()
-            .map(|c| count(&c.function.arguments) + count(&c.function.name))
-            .sum()
-    });
-    content + tools
-}
-
-/// Count the token footprint of a message using BPE tokenization.
-pub fn msg_token_count(tokenizer: &CoreBPE, msg: &Message) -> usize {
-    msg_token_count_fn(msg, &|s| tokenizer.encode_with_special_tokens(s).len())
-}
-
-/// Detect context window overflow errors from the API.
-pub fn is_context_overflow(err: &anyhow::Error) -> bool {
-    let msg = err.to_string().to_lowercase();
-    msg.contains("context length")
-        || msg.contains("token limit")
-        || msg.contains("too many tokens")
-        || msg.contains("maximum context")
-        || msg.contains("prompt is too long")
-        || msg.contains("request too large")
-        || msg.contains("input validation error")
-        || msg.contains("content length limit")
-        || (msg.contains("400") && msg.contains("tokens"))
-}
-
-/// Detect model/provider errors delivered inside the SSE stream.
-pub fn is_stream_error(err: &anyhow::Error) -> bool {
-    err.to_string().contains("model stream error")
-}
-
 /// Trim conversation to fit within the context budget.
 /// Returns the trimmed conversation messages (oldest dropped first).
 pub fn trim_conversation(
@ -462,9 +71,41 @@ pub fn trim_conversation(
    conversation[skip..].to_vec()
 }

-fn parse_msg_timestamp(msg: &Message) -> Option<DateTime<Utc>> {
-    msg.timestamp
-        .as_ref()
-        .and_then(|ts| DateTime::parse_from_rfc3339(ts).ok())
-        .map(|dt| dt.with_timezone(&Utc))
+/// Count the token footprint of a message using BPE tokenization.
+pub fn msg_token_count(tokenizer: &CoreBPE, msg: &Message) -> usize {
+    let count = |s: &str| tokenizer.encode_with_special_tokens(s).len();
+    let content = msg.content.as_ref().map_or(0, |c| match c {
+        MessageContent::Text(s) => count(s),
+        MessageContent::Parts(parts) => parts.iter()
+            .map(|p| match p {
+                ContentPart::Text { text } => count(text),
+                ContentPart::ImageUrl { .. } => 85,
+            })
+            .sum(),
+    });
+    let tools = msg.tool_calls.as_ref().map_or(0, |calls| {
+        calls.iter()
+            .map(|c| count(&c.function.arguments) + count(&c.function.name))
+            .sum()
+    });
+    content + tools
+}
+
+/// Detect context window overflow errors from the API.
+pub fn is_context_overflow(err: &anyhow::Error) -> bool {
+    let msg = err.to_string().to_lowercase();
+    msg.contains("context length")
+        || msg.contains("token limit")
+        || msg.contains("too many tokens")
+        || msg.contains("maximum context")
+        || msg.contains("prompt is too long")
+        || msg.contains("request too large")
+        || msg.contains("input validation error")
+        || msg.contains("content length limit")
+        || (msg.contains("400") && msg.contains("tokens"))
+}
+
+/// Detect model/provider errors delivered inside the SSE stream.
+pub fn is_stream_error(err: &anyhow::Error) -> bool {
+    err.to_string().contains("model stream error")
 }
--- a/src/thought/journal.rs
+++ b/src/thought/journal.rs
@ -1,8 +1,7 @@
 // tools/journal.rs — Native journal tool
 //
 // Appends entries directly to the journal file without spawning a
-// shell. The entry is persisted to disk immediately;
-// build_context_window() picks it up on the next compaction.
+// shell. The entry is persisted to disk immediately.
 //
 // This tool is "ephemeral" — after the API processes the tool call
 // and result, the agent strips them from the conversation history.