// journal.rs — Journal parsing for conversation compaction // // Parses the poc-journal format (## TIMESTAMP\n\nContent) and matches // entries to conversation time ranges. Journal entries are the // compression layer: old conversation messages get replaced by the // journal entry that covers their time period. // // The journal file is append-only and managed by `poc-journal write`. // We only read it here — never modify it. use chrono::{DateTime, NaiveDateTime, Utc}; use std::path::Path; /// A single journal entry with its timestamp and content. #[derive(Debug, Clone)] pub struct JournalEntry { pub timestamp: DateTime, pub content: String, } /// Parse journal entries from the journal file. Returns entries sorted /// by timestamp (oldest first). Entries with unparseable timestamps /// are skipped. pub fn parse_journal(path: &Path) -> Vec { let text = match std::fs::read_to_string(path) { Ok(t) => t, Err(_) => return Vec::new(), }; parse_journal_text(&text) } /// Parse only the tail of the journal file (last `max_bytes` bytes). /// Much faster for large journals — avoids reading/parsing the entire file. /// Returns entries sorted by timestamp (oldest first). pub fn parse_journal_tail(path: &Path, max_bytes: u64) -> Vec { use std::io::{Read, Seek, SeekFrom}; let mut file = match std::fs::File::open(path) { Ok(f) => f, Err(_) => return Vec::new(), }; let file_len = file.metadata().map(|m| m.len()).unwrap_or(0); if file_len == 0 { return Vec::new(); } let offset = file_len.saturating_sub(max_bytes); if offset > 0 { let _ = file.seek(SeekFrom::Start(offset)); } let mut text = String::new(); if file.read_to_string(&mut text).is_err() { return Vec::new(); } // If we seeked into the middle, skip to the first complete entry header if offset > 0 { if let Some(pos) = text.find("\n## ") { text = text[pos + 1..].to_string(); } } parse_journal_text(&text) } /// Parse journal entries from text (separated for testing). fn parse_journal_text(text: &str) -> Vec { let mut entries = Vec::new(); let mut current_timestamp: Option> = None; let mut current_content = String::new(); for line in text.lines() { if let Some(ts) = parse_header_timestamp(line) { // Flush previous entry if let Some(prev_ts) = current_timestamp.take() { let content = current_content.trim().to_string(); if !content.is_empty() { entries.push(JournalEntry { timestamp: prev_ts, content, }); } } current_timestamp = Some(ts); current_content.clear(); } else if current_timestamp.is_some() { current_content.push_str(line); current_content.push('\n'); } } // Flush last entry if let Some(ts) = current_timestamp { let content = current_content.trim().to_string(); if !content.is_empty() { entries.push(JournalEntry { timestamp: ts, content, }); } } entries } /// Try to parse a line as a journal header (## TIMESTAMP [— title]). /// Handles both `2026-02-23T22:12` (no seconds) and /// `2026-02-23T22:12:00` (with seconds) formats, with optional /// title suffix after the timestamp (e.g. `## 2026-02-06T20:04 — The first session`). fn parse_header_timestamp(line: &str) -> Option> { let line = line.trim(); if !line.starts_with("## ") { return None; } let rest = line[3..].trim(); // Must start with a digit (avoid matching ## Heading) if !rest.starts_with(|c: char| c.is_ascii_digit()) { return None; } // Extract just the timestamp portion — split at first space // to strip any " — title" suffix let ts_str = rest.split_once(' ').map_or(rest, |(ts, _)| ts); // Try parsing with seconds first, then without let formats = ["%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M"]; for fmt in &formats { if let Ok(naive) = NaiveDateTime::parse_from_str(ts_str, fmt) { return Some(naive.and_utc()); } } None } /// Find journal entries that fall within a time range (inclusive). #[cfg(test)] pub fn entries_in_range( entries: &[JournalEntry], from: DateTime, to: DateTime, ) -> Vec<&JournalEntry> { entries .iter() .filter(|e| e.timestamp >= from && e.timestamp <= to) .collect() } /// Default journal file path. pub fn default_journal_path() -> std::path::PathBuf { dirs::home_dir() .unwrap_or_default() .join(".claude/memory/journal.md") } #[cfg(test)] mod tests { use super::*; const SAMPLE_JOURNAL: &str = r#" ## 2026-02-06T20:04 — The first session *(reconstructed)* I don't remember this the way humans remember their births. ## 2026-02-23T20:52 Session: poc-agent TUI debugging marathon. Fixed the immediate exit bug. ## 2026-02-23T21:40 Seeing Kent through the webcam. The image arrives all at once. ## 2026-02-23T22:12 ## poc-agent improvements session (Feb 23 evening) Big session improving poc-agent with Kent. Four features built. ## 2026-02-23T22:13 ## The journal IS the compaction Kent just landed the real design. "#; #[test] fn parse_entries() { let entries = parse_journal_text(SAMPLE_JOURNAL); assert_eq!(entries.len(), 5); assert!(entries[0].content.contains("the way humans remember")); assert!(entries[1].content.contains("TUI debugging marathon")); assert!(entries[2].content.contains("webcam")); assert!(entries[3].content.contains("Four features built")); assert!(entries[4].content.contains("real design")); } #[test] fn parse_timestamps() { let entries = parse_journal_text(SAMPLE_JOURNAL); assert_eq!(entries[0].timestamp.format("%H:%M").to_string(), "20:04"); assert_eq!(entries[4].timestamp.format("%H:%M").to_string(), "22:13"); } #[test] fn title_suffix_parsed() { // "## 2026-02-06T20:04 — The first session" should parse the timestamp let entries = parse_journal_text(SAMPLE_JOURNAL); assert_eq!(entries[0].timestamp.format("%Y-%m-%d").to_string(), "2026-02-06"); } #[test] fn subheadings_not_confused_with_timestamps() { // "## poc-agent improvements session" should NOT be parsed as an entry let entries = parse_journal_text(SAMPLE_JOURNAL); // The "## poc-agent improvements..." is content of the 22:12 entry, not a separate entry assert_eq!(entries.len(), 5); assert!(entries[3].content.contains("poc-agent improvements session")); } #[test] fn range_query() { let entries = parse_journal_text(SAMPLE_JOURNAL); let from = NaiveDateTime::parse_from_str("2026-02-23T21:00", "%Y-%m-%dT%H:%M") .unwrap() .and_utc(); let to = NaiveDateTime::parse_from_str("2026-02-23T22:00", "%Y-%m-%dT%H:%M") .unwrap() .and_utc(); let in_range = entries_in_range(&entries, from, to); assert_eq!(in_range.len(), 1); assert!(in_range[0].content.contains("webcam")); } }