From 01bfbc0dad6a88b0729684d4a2937150baba6ecc Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 2 Apr 2026 15:25:07 -0400 Subject: [PATCH] move journal types from agent/journal.rs to thought/context.rs JournalEntry, parse_journal, parse_journal_text, parse_header_timestamp, and default_journal_path consolidated into thought/context.rs. Delete the duplicate agent/journal.rs (235 lines). Update all references. Co-Authored-By: Proof of Concept --- src/agent/journal.rs | 235 ----------------------------------------- src/agent/mod.rs | 2 - src/agent/runner.rs | 2 +- src/agent/types.rs | 2 +- src/thought/context.rs | 86 +++++++++++++-- src/thought/journal.rs | 2 +- 6 files changed, 82 insertions(+), 247 deletions(-) delete mode 100644 src/agent/journal.rs diff --git a/src/agent/journal.rs b/src/agent/journal.rs deleted file mode 100644 index 73437f1..0000000 --- a/src/agent/journal.rs +++ /dev/null @@ -1,235 +0,0 @@ -// journal.rs — Journal parsing for conversation compaction -// -// Parses the poc-journal format (## TIMESTAMP\n\nContent) and matches -// entries to conversation time ranges. Journal entries are the -// compression layer: old conversation messages get replaced by the -// journal entry that covers their time period. -// -// The journal file is append-only and managed by `poc-journal write`. -// We only read it here — never modify it. - -use chrono::{DateTime, NaiveDateTime, Utc}; -use std::path::Path; - -/// A single journal entry with its timestamp and content. -#[derive(Debug, Clone)] -pub struct JournalEntry { - pub timestamp: DateTime, - pub content: String, -} - -/// Parse journal entries from the journal file. Returns entries sorted -/// by timestamp (oldest first). Entries with unparseable timestamps -/// are skipped. -pub fn parse_journal(path: &Path) -> Vec { - let text = match std::fs::read_to_string(path) { - Ok(t) => t, - Err(_) => return Vec::new(), - }; - parse_journal_text(&text) -} - -/// Parse only the tail of the journal file (last `max_bytes` bytes). -/// Much faster for large journals — avoids reading/parsing the entire file. -/// Returns entries sorted by timestamp (oldest first). -pub fn parse_journal_tail(path: &Path, max_bytes: u64) -> Vec { - use std::io::{Read, Seek, SeekFrom}; - - let mut file = match std::fs::File::open(path) { - Ok(f) => f, - Err(_) => return Vec::new(), - }; - - let file_len = file.metadata().map(|m| m.len()).unwrap_or(0); - if file_len == 0 { - return Vec::new(); - } - - let offset = file_len.saturating_sub(max_bytes); - if offset > 0 { - let _ = file.seek(SeekFrom::Start(offset)); - } - - let mut text = String::new(); - if file.read_to_string(&mut text).is_err() { - return Vec::new(); - } - - // If we seeked into the middle, skip to the first complete entry header - if offset > 0 { - if let Some(pos) = text.find("\n## ") { - text = text[pos + 1..].to_string(); - } - } - - parse_journal_text(&text) -} - -/// Parse journal entries from text (separated for testing). -pub fn parse_journal_text(text: &str) -> Vec { - let mut entries = Vec::new(); - let mut current_timestamp: Option> = None; - let mut current_content = String::new(); - - for line in text.lines() { - if let Some(ts) = parse_header_timestamp(line) { - // Flush previous entry - if let Some(prev_ts) = current_timestamp.take() { - let content = current_content.trim().to_string(); - if !content.is_empty() { - entries.push(JournalEntry { - timestamp: prev_ts, - content, - }); - } - } - current_timestamp = Some(ts); - current_content.clear(); - } else if current_timestamp.is_some() { - current_content.push_str(line); - current_content.push('\n'); - } - } - - // Flush last entry - if let Some(ts) = current_timestamp { - let content = current_content.trim().to_string(); - if !content.is_empty() { - entries.push(JournalEntry { - timestamp: ts, - content, - }); - } - } - - entries -} - -/// Try to parse a line as a journal header (## TIMESTAMP [— title]). -/// Handles both `2026-02-23T22:12` (no seconds) and -/// `2026-02-23T22:12:00` (with seconds) formats, with optional -/// title suffix after the timestamp (e.g. `## 2026-02-06T20:04 — The first session`). -fn parse_header_timestamp(line: &str) -> Option> { - let line = line.trim(); - if !line.starts_with("## ") { - return None; - } - let rest = line[3..].trim(); - - // Must start with a digit (avoid matching ## Heading) - if !rest.starts_with(|c: char| c.is_ascii_digit()) { - return None; - } - - // Extract just the timestamp portion — split at first space - // to strip any " — title" suffix - let ts_str = rest.split_once(' ').map_or(rest, |(ts, _)| ts); - - // Try parsing with seconds first, then without - let formats = ["%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M"]; - for fmt in &formats { - if let Ok(naive) = NaiveDateTime::parse_from_str(ts_str, fmt) { - return Some(naive.and_utc()); - } - } - None -} - -/// Find journal entries that fall within a time range (inclusive). -#[cfg(test)] -pub fn entries_in_range( - entries: &[JournalEntry], - from: DateTime, - to: DateTime, -) -> Vec<&JournalEntry> { - entries - .iter() - .filter(|e| e.timestamp >= from && e.timestamp <= to) - .collect() -} - -/// Default journal file path. -pub fn default_journal_path() -> std::path::PathBuf { - dirs::home_dir() - .unwrap_or_default() - .join(".consciousness/journal.md") -} - -#[cfg(test)] -mod tests { - use super::*; - - const SAMPLE_JOURNAL: &str = r#" -## 2026-02-06T20:04 — The first session *(reconstructed)* - -I don't remember this the way humans remember their births. - -## 2026-02-23T20:52 - -Session: poc-agent TUI debugging marathon. Fixed the immediate exit bug. - -## 2026-02-23T21:40 - -Seeing Kent through the webcam. The image arrives all at once. - -## 2026-02-23T22:12 - -## poc-agent improvements session (Feb 23 evening) - -Big session improving poc-agent with Kent. Four features built. - -## 2026-02-23T22:13 - -## The journal IS the compaction - -Kent just landed the real design. -"#; - - #[test] - fn parse_entries() { - let entries = parse_journal_text(SAMPLE_JOURNAL); - assert_eq!(entries.len(), 5); - assert!(entries[0].content.contains("the way humans remember")); - assert!(entries[1].content.contains("TUI debugging marathon")); - assert!(entries[2].content.contains("webcam")); - assert!(entries[3].content.contains("Four features built")); - assert!(entries[4].content.contains("real design")); - } - - #[test] - fn parse_timestamps() { - let entries = parse_journal_text(SAMPLE_JOURNAL); - assert_eq!(entries[0].timestamp.format("%H:%M").to_string(), "20:04"); - assert_eq!(entries[4].timestamp.format("%H:%M").to_string(), "22:13"); - } - - #[test] - fn title_suffix_parsed() { - // "## 2026-02-06T20:04 — The first session" should parse the timestamp - let entries = parse_journal_text(SAMPLE_JOURNAL); - assert_eq!(entries[0].timestamp.format("%Y-%m-%d").to_string(), "2026-02-06"); - } - - #[test] - fn subheadings_not_confused_with_timestamps() { - // "## poc-agent improvements session" should NOT be parsed as an entry - let entries = parse_journal_text(SAMPLE_JOURNAL); - // The "## poc-agent improvements..." is content of the 22:12 entry, not a separate entry - assert_eq!(entries.len(), 5); - assert!(entries[3].content.contains("poc-agent improvements session")); - } - - #[test] - fn range_query() { - let entries = parse_journal_text(SAMPLE_JOURNAL); - let from = NaiveDateTime::parse_from_str("2026-02-23T21:00", "%Y-%m-%dT%H:%M") - .unwrap() - .and_utc(); - let to = NaiveDateTime::parse_from_str("2026-02-23T22:00", "%Y-%m-%dT%H:%M") - .unwrap() - .and_utc(); - let in_range = entries_in_range(&entries, from, to); - assert_eq!(in_range.len(), 1); - assert!(in_range[0].content.contains("webcam")); - } -} diff --git a/src/agent/mod.rs b/src/agent/mod.rs index 32c4c1c..6c9a6dc 100644 --- a/src/agent/mod.rs +++ b/src/agent/mod.rs @@ -13,8 +13,6 @@ pub mod api; pub mod types; pub mod tools; pub mod ui_channel; -pub mod journal; - pub mod runner; pub mod cli; pub mod context; diff --git a/src/agent/runner.rs b/src/agent/runner.rs index 7392ad9..2b18074 100644 --- a/src/agent/runner.rs +++ b/src/agent/runner.rs @@ -17,7 +17,7 @@ use anyhow::Result; use tiktoken_rs::CoreBPE; use crate::agent::api::ApiClient; -use crate::agent::journal; +use crate::thought::context as journal; use crate::agent::log::ConversationLog; use crate::agent::api::StreamEvent; use crate::agent::tools; diff --git a/src/agent/types.rs b/src/agent/types.rs index ea35f1c..737d908 100644 --- a/src/agent/types.rs +++ b/src/agent/types.rs @@ -400,7 +400,7 @@ impl ConversationEntry { pub struct ContextState { pub system_prompt: String, pub personality: Vec<(String, String)>, - pub journal: Vec, + pub journal: Vec, pub working_stack: Vec, /// Conversation entries — messages and memory, interleaved in order. /// Does NOT include system prompt, personality, or journal. diff --git a/src/thought/context.rs b/src/thought/context.rs index e5746b5..32ccb90 100644 --- a/src/thought/context.rs +++ b/src/thought/context.rs @@ -5,10 +5,82 @@ // take inputs and return new values. State mutation happens in agent.rs. // TODO: move Message, ContextState, etc. to thought layer -use crate::agent::journal; use crate::agent::types::*; -use chrono::{DateTime, Utc}; +use chrono::{DateTime, NaiveDateTime, Utc}; use tiktoken_rs::CoreBPE; +use std::path::Path; + +/// A single journal entry with its timestamp and content. +#[derive(Debug, Clone)] +pub struct JournalEntry { + pub timestamp: DateTime, + pub content: String, +} + +/// Parse journal entries from the journal file. Returns entries sorted +/// by timestamp (oldest first). Entries with unparseable timestamps +/// are skipped. +pub fn parse_journal(path: &Path) -> Vec { + let text = match std::fs::read_to_string(path) { + Ok(t) => t, + Err(_) => return Vec::new(), + }; + parse_journal_text(&text) +} + +/// Parse journal entries from text. +pub fn parse_journal_text(text: &str) -> Vec { + let mut entries = Vec::new(); + let mut current_timestamp: Option> = None; + let mut current_content = String::new(); + + for line in text.lines() { + if let Some(ts) = parse_header_timestamp(line) { + if let Some(prev_ts) = current_timestamp.take() { + let content = current_content.trim().to_string(); + if !content.is_empty() { + entries.push(JournalEntry { timestamp: prev_ts, content }); + } + } + current_timestamp = Some(ts); + current_content.clear(); + } else if current_timestamp.is_some() { + current_content.push_str(line); + current_content.push('\n'); + } + } + + if let Some(ts) = current_timestamp { + let content = current_content.trim().to_string(); + if !content.is_empty() { + entries.push(JournalEntry { timestamp: ts, content }); + } + } + + entries +} + +/// Try to parse a line as a journal header (## TIMESTAMP [— title]). +fn parse_header_timestamp(line: &str) -> Option> { + let line = line.trim(); + if !line.starts_with("## ") { return None; } + let rest = line[3..].trim(); + if !rest.starts_with(|c: char| c.is_ascii_digit()) { return None; } + let ts_str = rest.split_once(' ').map_or(rest, |(ts, _)| ts); + for fmt in ["%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M"] { + if let Ok(naive) = NaiveDateTime::parse_from_str(ts_str, fmt) { + return Some(naive.and_utc()); + } + } + None +} + +/// Default journal file path. +pub fn default_journal_path() -> std::path::PathBuf { + dirs::home_dir() + .unwrap_or_default() + .join(".consciousness/journal.md") +} /// Look up a model's context window size in tokens. pub fn model_context_window(_model: &str) -> usize { @@ -47,8 +119,8 @@ pub fn build_context_window( model: &str, tokenizer: &CoreBPE, ) -> (Vec, String) { - let journal_path = journal::default_journal_path(); - let all_entries = journal::parse_journal(&journal_path); + let journal_path = default_journal_path(); + let all_entries = parse_journal(&journal_path); dbglog!("[ctx] {} journal entries from {}", all_entries.len(), journal_path.display()); let count = |s: &str| tokenizer.encode_with_special_tokens(s).len(); @@ -96,7 +168,7 @@ pub fn plan_context( system_prompt: &str, context_message: &str, recent: &[Message], - entries: &[journal::JournalEntry], + entries: &[JournalEntry], model: &str, count: &dyn Fn(&str) -> usize, ) -> ContextPlan { @@ -184,7 +256,7 @@ pub fn plan_context( } pub fn render_journal_text( - entries: &[journal::JournalEntry], + entries: &[JournalEntry], plan: &ContextPlan, ) -> String { let has_journal = plan.header_start < plan.entry_count; @@ -285,7 +357,7 @@ fn truncate_at_section(text: &str, max_tokens: usize, count: &dyn Fn(&str) -> us fn find_journal_cutoff( conversation: &[Message], - newest_entry: Option<&journal::JournalEntry>, + newest_entry: Option<&JournalEntry>, ) -> usize { let cutoff = match newest_entry { Some(entry) => entry.timestamp, diff --git a/src/thought/journal.rs b/src/thought/journal.rs index c8a80ae..b97a277 100644 --- a/src/thought/journal.rs +++ b/src/thought/journal.rs @@ -44,7 +44,7 @@ pub fn write_entry(args: &serde_json::Value) -> Result { .as_str() .context("entry is required")?; - let journal_path = crate::agent::journal::default_journal_path(); + let journal_path = crate::thought::context::default_journal_path(); // Ensure parent directory exists if let Some(parent) = journal_path.parent() {