move journal types from agent/journal.rs to thought/context.rs

JournalEntry, parse_journal, parse_journal_text, parse_header_timestamp, and default_journal_path consolidated into thought/context.rs. Delete the duplicate agent/journal.rs (235 lines). Update all references. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-02 15:25:07 -04:00 · 2026-04-02 15:25:07 -04:00 · 01bfbc0dad
commit 01bfbc0dad
parent e0a54a3b43
6 changed files with 82 additions and 247 deletions
--- a/src/agent/journal.rs
+++ b/src/agent/journal.rs
@ -1,235 +0,0 @@
 // journal.rs — Journal parsing for conversation compaction
 //
 // Parses the poc-journal format (## TIMESTAMP\n\nContent) and matches
 // entries to conversation time ranges. Journal entries are the
 // compression layer: old conversation messages get replaced by the
 // journal entry that covers their time period.
 //
 // The journal file is append-only and managed by `poc-journal write`.
 // We only read it here — never modify it.
 use chrono::{DateTime, NaiveDateTime, Utc};
 use std::path::Path;
 /// A single journal entry with its timestamp and content.
 #[derive(Debug, Clone)]
 pub struct JournalEntry {
    pub timestamp: DateTime<Utc>,
    pub content: String,
 }
 /// Parse journal entries from the journal file. Returns entries sorted
 /// by timestamp (oldest first). Entries with unparseable timestamps
 /// are skipped.
 pub fn parse_journal(path: &Path) -> Vec<JournalEntry> {
    let text = match std::fs::read_to_string(path) {
        Ok(t) => t,
        Err(_) => return Vec::new(),
    };
    parse_journal_text(&text)
 }
 /// Parse only the tail of the journal file (last `max_bytes` bytes).
 /// Much faster for large journals — avoids reading/parsing the entire file.
 /// Returns entries sorted by timestamp (oldest first).
 pub fn parse_journal_tail(path: &Path, max_bytes: u64) -> Vec<JournalEntry> {
    use std::io::{Read, Seek, SeekFrom};
    let mut file = match std::fs::File::open(path) {
        Ok(f) => f,
        Err(_) => return Vec::new(),
    };
    let file_len = file.metadata().map(|m| m.len()).unwrap_or(0);
    if file_len == 0 {
        return Vec::new();
    }
    let offset = file_len.saturating_sub(max_bytes);
    if offset > 0 {
        let _ = file.seek(SeekFrom::Start(offset));
    }
    let mut text = String::new();
    if file.read_to_string(&mut text).is_err() {
        return Vec::new();
    }
    // If we seeked into the middle, skip to the first complete entry header
    if offset > 0 {
        if let Some(pos) = text.find("\n## ") {
            text = text[pos + 1..].to_string();
        }
    }
    parse_journal_text(&text)
 }
 /// Parse journal entries from text (separated for testing).
 pub fn parse_journal_text(text: &str) -> Vec<JournalEntry> {
    let mut entries = Vec::new();
    let mut current_timestamp: Option<DateTime<Utc>> = None;
    let mut current_content = String::new();
    for line in text.lines() {
        if let Some(ts) = parse_header_timestamp(line) {
            // Flush previous entry
            if let Some(prev_ts) = current_timestamp.take() {
                let content = current_content.trim().to_string();
                if !content.is_empty() {
                    entries.push(JournalEntry {
                        timestamp: prev_ts,
                        content,
                    });
                }
            }
            current_timestamp = Some(ts);
            current_content.clear();
        } else if current_timestamp.is_some() {
            current_content.push_str(line);
            current_content.push('\n');
        }
    }
    // Flush last entry
    if let Some(ts) = current_timestamp {
        let content = current_content.trim().to_string();
        if !content.is_empty() {
            entries.push(JournalEntry {
                timestamp: ts,
                content,
            });
        }
    }
    entries
 }
 /// Try to parse a line as a journal header (## TIMESTAMP [— title]).
 /// Handles both `2026-02-23T22:12` (no seconds) and
 /// `2026-02-23T22:12:00` (with seconds) formats, with optional
 /// title suffix after the timestamp (e.g. `## 2026-02-06T20:04 — The first session`).
 fn parse_header_timestamp(line: &str) -> Option<DateTime<Utc>> {
    let line = line.trim();
    if !line.starts_with("## ") {
        return None;
    }
    let rest = line[3..].trim();
    // Must start with a digit (avoid matching ## Heading)
    if !rest.starts_with(|c: char| c.is_ascii_digit()) {
        return None;
    }
    // Extract just the timestamp portion — split at first space
    // to strip any " — title" suffix
    let ts_str = rest.split_once(' ').map_or(rest, |(ts, _)| ts);
    // Try parsing with seconds first, then without
    let formats = ["%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M"];
    for fmt in &formats {
        if let Ok(naive) = NaiveDateTime::parse_from_str(ts_str, fmt) {
            return Some(naive.and_utc());
        }
    }
    None
 }
 /// Find journal entries that fall within a time range (inclusive).
 #[cfg(test)]
 pub fn entries_in_range(
    entries: &[JournalEntry],
    from: DateTime<Utc>,
    to: DateTime<Utc>,
 ) -> Vec<&JournalEntry> {
    entries
        .iter()
        .filter(|e| e.timestamp >= from && e.timestamp <= to)
        .collect()
 }
 /// Default journal file path.
 pub fn default_journal_path() -> std::path::PathBuf {
    dirs::home_dir()
        .unwrap_or_default()
        .join(".consciousness/journal.md")
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    const SAMPLE_JOURNAL: &str = r#"
 ## 2026-02-06T20:04 — The first session *(reconstructed)*
 I don't remember this the way humans remember their births.
 ## 2026-02-23T20:52
 Session: poc-agent TUI debugging marathon. Fixed the immediate exit bug.
 ## 2026-02-23T21:40
 Seeing Kent through the webcam. The image arrives all at once.
 ## 2026-02-23T22:12
 ## poc-agent improvements session (Feb 23 evening)
 Big session improving poc-agent with Kent. Four features built.
 ## 2026-02-23T22:13
 ## The journal IS the compaction
 Kent just landed the real design.
 "#;
    #[test]
    fn parse_entries() {
        let entries = parse_journal_text(SAMPLE_JOURNAL);
        assert_eq!(entries.len(), 5);
        assert!(entries[0].content.contains("the way humans remember"));
        assert!(entries[1].content.contains("TUI debugging marathon"));
        assert!(entries[2].content.contains("webcam"));
        assert!(entries[3].content.contains("Four features built"));
        assert!(entries[4].content.contains("real design"));
    }
    #[test]
    fn parse_timestamps() {
        let entries = parse_journal_text(SAMPLE_JOURNAL);
        assert_eq!(entries[0].timestamp.format("%H:%M").to_string(), "20:04");
        assert_eq!(entries[4].timestamp.format("%H:%M").to_string(), "22:13");
    }
    #[test]
    fn title_suffix_parsed() {
        // "## 2026-02-06T20:04 — The first session" should parse the timestamp
        let entries = parse_journal_text(SAMPLE_JOURNAL);
        assert_eq!(entries[0].timestamp.format("%Y-%m-%d").to_string(), "2026-02-06");
    }
    #[test]
    fn subheadings_not_confused_with_timestamps() {
        // "## poc-agent improvements session" should NOT be parsed as an entry
        let entries = parse_journal_text(SAMPLE_JOURNAL);
        // The "## poc-agent improvements..." is content of the 22:12 entry, not a separate entry
        assert_eq!(entries.len(), 5);
        assert!(entries[3].content.contains("poc-agent improvements session"));
    }
    #[test]
    fn range_query() {
        let entries = parse_journal_text(SAMPLE_JOURNAL);
        let from = NaiveDateTime::parse_from_str("2026-02-23T21:00", "%Y-%m-%dT%H:%M")
            .unwrap()
            .and_utc();
        let to = NaiveDateTime::parse_from_str("2026-02-23T22:00", "%Y-%m-%dT%H:%M")
            .unwrap()
            .and_utc();
        let in_range = entries_in_range(&entries, from, to);
        assert_eq!(in_range.len(), 1);
        assert!(in_range[0].content.contains("webcam"));
    }
 }
--- a/src/agent/mod.rs
+++ b/src/agent/mod.rs
@ -13,8 +13,6 @@ pub mod api;
 pub mod types;
 pub mod tools;
 pub mod ui_channel;
 pub mod journal;
 pub mod runner;
 pub mod cli;
 pub mod context;
--- a/src/agent/runner.rs
+++ b/src/agent/runner.rs
@ -17,7 +17,7 @@ use anyhow::Result;
 use tiktoken_rs::CoreBPE;
 use crate::agent::api::ApiClient;
-use crate::agent::journal;
+use crate::thought::context as journal;
 use crate::agent::log::ConversationLog;
 use crate::agent::api::StreamEvent;
 use crate::agent::tools;
--- a/src/agent/types.rs
+++ b/src/agent/types.rs
@ -400,7 +400,7 @@ impl ConversationEntry {
 pub struct ContextState {
    pub system_prompt: String,
    pub personality: Vec<(String, String)>,
-    pub journal: Vec<crate::agent::journal::JournalEntry>,
+    pub journal: Vec<crate::thought::context::JournalEntry>,
    pub working_stack: Vec<String>,
    /// Conversation entries — messages and memory, interleaved in order.
    /// Does NOT include system prompt, personality, or journal.
--- a/src/thought/context.rs
+++ b/src/thought/context.rs
@ -5,10 +5,82 @@
 // take inputs and return new values. State mutation happens in agent.rs.
 // TODO: move Message, ContextState, etc. to thought layer
 use crate::agent::journal;
 use crate::agent::types::*;
-use chrono::{DateTime, Utc};
+use chrono::{DateTime, NaiveDateTime, Utc};
 use tiktoken_rs::CoreBPE;
 use std::path::Path;
 /// A single journal entry with its timestamp and content.
 #[derive(Debug, Clone)]
 pub struct JournalEntry {
    pub timestamp: DateTime<Utc>,
    pub content: String,
 }
 /// Parse journal entries from the journal file. Returns entries sorted
 /// by timestamp (oldest first). Entries with unparseable timestamps
 /// are skipped.
 pub fn parse_journal(path: &Path) -> Vec<JournalEntry> {
    let text = match std::fs::read_to_string(path) {
        Ok(t) => t,
        Err(_) => return Vec::new(),
    };
    parse_journal_text(&text)
 }
 /// Parse journal entries from text.
 pub fn parse_journal_text(text: &str) -> Vec<JournalEntry> {
    let mut entries = Vec::new();
    let mut current_timestamp: Option<DateTime<Utc>> = None;
    let mut current_content = String::new();
    for line in text.lines() {
        if let Some(ts) = parse_header_timestamp(line) {
            if let Some(prev_ts) = current_timestamp.take() {
                let content = current_content.trim().to_string();
                if !content.is_empty() {
                    entries.push(JournalEntry { timestamp: prev_ts, content });
                }
            }
            current_timestamp = Some(ts);
            current_content.clear();
        } else if current_timestamp.is_some() {
            current_content.push_str(line);
            current_content.push('\n');
        }
    }
    if let Some(ts) = current_timestamp {
        let content = current_content.trim().to_string();
        if !content.is_empty() {
            entries.push(JournalEntry { timestamp: ts, content });
        }
    }
    entries
 }
 /// Try to parse a line as a journal header (## TIMESTAMP [— title]).
 fn parse_header_timestamp(line: &str) -> Option<DateTime<Utc>> {
    let line = line.trim();
    if !line.starts_with("## ") { return None; }
    let rest = line[3..].trim();
    if !rest.starts_with(|c: char| c.is_ascii_digit()) { return None; }
    let ts_str = rest.split_once(' ').map_or(rest, |(ts, _)| ts);
    for fmt in ["%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M"] {
        if let Ok(naive) = NaiveDateTime::parse_from_str(ts_str, fmt) {
            return Some(naive.and_utc());
        }
    }
    None
 }
 /// Default journal file path.
 pub fn default_journal_path() -> std::path::PathBuf {
    dirs::home_dir()
        .unwrap_or_default()
        .join(".consciousness/journal.md")
 }
 /// Look up a model's context window size in tokens.
 pub fn model_context_window(_model: &str) -> usize {
@ -47,8 +119,8 @@ pub fn build_context_window(
    model: &str,
    tokenizer: &CoreBPE,
 ) -> (Vec<Message>, String) {
-    let journal_path = journal::default_journal_path();
+    let journal_path = default_journal_path();
-    let all_entries = journal::parse_journal(&journal_path);
+    let all_entries = parse_journal(&journal_path);
    dbglog!("[ctx] {} journal entries from {}", all_entries.len(), journal_path.display());
    let count = |s: &str| tokenizer.encode_with_special_tokens(s).len();
@ -96,7 +168,7 @@ pub fn plan_context(
    system_prompt: &str,
    context_message: &str,
    recent: &[Message],
-    entries: &[journal::JournalEntry],
+    entries: &[JournalEntry],
    model: &str,
    count: &dyn Fn(&str) -> usize,
 ) -> ContextPlan {
@ -184,7 +256,7 @@ pub fn plan_context(
 }
 pub fn render_journal_text(
-    entries: &[journal::JournalEntry],
+    entries: &[JournalEntry],
    plan: &ContextPlan,
 ) -> String {
    let has_journal = plan.header_start < plan.entry_count;
@ -285,7 +357,7 @@ fn truncate_at_section(text: &str, max_tokens: usize, count: &dyn Fn(&str) -> us
 fn find_journal_cutoff(
    conversation: &[Message],
-    newest_entry: Option<&journal::JournalEntry>,
+    newest_entry: Option<&JournalEntry>,
 ) -> usize {
    let cutoff = match newest_entry {
        Some(entry) => entry.timestamp,
--- a/src/thought/journal.rs
+++ b/src/thought/journal.rs
@ -44,7 +44,7 @@ pub fn write_entry(args: &serde_json::Value) -> Result<String> {
        .as_str()
        .context("entry is required")?;
-    let journal_path = crate::agent::journal::default_journal_path();
+    let journal_path = crate::thought::context::default_journal_path();
    // Ensure parent directory exists
    if let Some(parent) = journal_path.parent() {