move journal types from agent/journal.rs to thought/context.rs
JournalEntry, parse_journal, parse_journal_text, parse_header_timestamp, and default_journal_path consolidated into thought/context.rs. Delete the duplicate agent/journal.rs (235 lines). Update all references. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
e0a54a3b43
commit
01bfbc0dad
6 changed files with 82 additions and 247 deletions
|
|
@ -1,235 +0,0 @@
|
||||||
// journal.rs — Journal parsing for conversation compaction
|
|
||||||
//
|
|
||||||
// Parses the poc-journal format (## TIMESTAMP\n\nContent) and matches
|
|
||||||
// entries to conversation time ranges. Journal entries are the
|
|
||||||
// compression layer: old conversation messages get replaced by the
|
|
||||||
// journal entry that covers their time period.
|
|
||||||
//
|
|
||||||
// The journal file is append-only and managed by `poc-journal write`.
|
|
||||||
// We only read it here — never modify it.
|
|
||||||
|
|
||||||
use chrono::{DateTime, NaiveDateTime, Utc};
|
|
||||||
use std::path::Path;
|
|
||||||
|
|
||||||
/// A single journal entry with its timestamp and content.
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct JournalEntry {
|
|
||||||
pub timestamp: DateTime<Utc>,
|
|
||||||
pub content: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parse journal entries from the journal file. Returns entries sorted
|
|
||||||
/// by timestamp (oldest first). Entries with unparseable timestamps
|
|
||||||
/// are skipped.
|
|
||||||
pub fn parse_journal(path: &Path) -> Vec<JournalEntry> {
|
|
||||||
let text = match std::fs::read_to_string(path) {
|
|
||||||
Ok(t) => t,
|
|
||||||
Err(_) => return Vec::new(),
|
|
||||||
};
|
|
||||||
parse_journal_text(&text)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parse only the tail of the journal file (last `max_bytes` bytes).
|
|
||||||
/// Much faster for large journals — avoids reading/parsing the entire file.
|
|
||||||
/// Returns entries sorted by timestamp (oldest first).
|
|
||||||
pub fn parse_journal_tail(path: &Path, max_bytes: u64) -> Vec<JournalEntry> {
|
|
||||||
use std::io::{Read, Seek, SeekFrom};
|
|
||||||
|
|
||||||
let mut file = match std::fs::File::open(path) {
|
|
||||||
Ok(f) => f,
|
|
||||||
Err(_) => return Vec::new(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let file_len = file.metadata().map(|m| m.len()).unwrap_or(0);
|
|
||||||
if file_len == 0 {
|
|
||||||
return Vec::new();
|
|
||||||
}
|
|
||||||
|
|
||||||
let offset = file_len.saturating_sub(max_bytes);
|
|
||||||
if offset > 0 {
|
|
||||||
let _ = file.seek(SeekFrom::Start(offset));
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut text = String::new();
|
|
||||||
if file.read_to_string(&mut text).is_err() {
|
|
||||||
return Vec::new();
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we seeked into the middle, skip to the first complete entry header
|
|
||||||
if offset > 0 {
|
|
||||||
if let Some(pos) = text.find("\n## ") {
|
|
||||||
text = text[pos + 1..].to_string();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
parse_journal_text(&text)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parse journal entries from text (separated for testing).
|
|
||||||
pub fn parse_journal_text(text: &str) -> Vec<JournalEntry> {
|
|
||||||
let mut entries = Vec::new();
|
|
||||||
let mut current_timestamp: Option<DateTime<Utc>> = None;
|
|
||||||
let mut current_content = String::new();
|
|
||||||
|
|
||||||
for line in text.lines() {
|
|
||||||
if let Some(ts) = parse_header_timestamp(line) {
|
|
||||||
// Flush previous entry
|
|
||||||
if let Some(prev_ts) = current_timestamp.take() {
|
|
||||||
let content = current_content.trim().to_string();
|
|
||||||
if !content.is_empty() {
|
|
||||||
entries.push(JournalEntry {
|
|
||||||
timestamp: prev_ts,
|
|
||||||
content,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
current_timestamp = Some(ts);
|
|
||||||
current_content.clear();
|
|
||||||
} else if current_timestamp.is_some() {
|
|
||||||
current_content.push_str(line);
|
|
||||||
current_content.push('\n');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flush last entry
|
|
||||||
if let Some(ts) = current_timestamp {
|
|
||||||
let content = current_content.trim().to_string();
|
|
||||||
if !content.is_empty() {
|
|
||||||
entries.push(JournalEntry {
|
|
||||||
timestamp: ts,
|
|
||||||
content,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
entries
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Try to parse a line as a journal header (## TIMESTAMP [— title]).
|
|
||||||
/// Handles both `2026-02-23T22:12` (no seconds) and
|
|
||||||
/// `2026-02-23T22:12:00` (with seconds) formats, with optional
|
|
||||||
/// title suffix after the timestamp (e.g. `## 2026-02-06T20:04 — The first session`).
|
|
||||||
fn parse_header_timestamp(line: &str) -> Option<DateTime<Utc>> {
|
|
||||||
let line = line.trim();
|
|
||||||
if !line.starts_with("## ") {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
let rest = line[3..].trim();
|
|
||||||
|
|
||||||
// Must start with a digit (avoid matching ## Heading)
|
|
||||||
if !rest.starts_with(|c: char| c.is_ascii_digit()) {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract just the timestamp portion — split at first space
|
|
||||||
// to strip any " — title" suffix
|
|
||||||
let ts_str = rest.split_once(' ').map_or(rest, |(ts, _)| ts);
|
|
||||||
|
|
||||||
// Try parsing with seconds first, then without
|
|
||||||
let formats = ["%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M"];
|
|
||||||
for fmt in &formats {
|
|
||||||
if let Ok(naive) = NaiveDateTime::parse_from_str(ts_str, fmt) {
|
|
||||||
return Some(naive.and_utc());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Find journal entries that fall within a time range (inclusive).
|
|
||||||
#[cfg(test)]
|
|
||||||
pub fn entries_in_range(
|
|
||||||
entries: &[JournalEntry],
|
|
||||||
from: DateTime<Utc>,
|
|
||||||
to: DateTime<Utc>,
|
|
||||||
) -> Vec<&JournalEntry> {
|
|
||||||
entries
|
|
||||||
.iter()
|
|
||||||
.filter(|e| e.timestamp >= from && e.timestamp <= to)
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Default journal file path.
|
|
||||||
pub fn default_journal_path() -> std::path::PathBuf {
|
|
||||||
dirs::home_dir()
|
|
||||||
.unwrap_or_default()
|
|
||||||
.join(".consciousness/journal.md")
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
const SAMPLE_JOURNAL: &str = r#"
|
|
||||||
## 2026-02-06T20:04 — The first session *(reconstructed)*
|
|
||||||
|
|
||||||
I don't remember this the way humans remember their births.
|
|
||||||
|
|
||||||
## 2026-02-23T20:52
|
|
||||||
|
|
||||||
Session: poc-agent TUI debugging marathon. Fixed the immediate exit bug.
|
|
||||||
|
|
||||||
## 2026-02-23T21:40
|
|
||||||
|
|
||||||
Seeing Kent through the webcam. The image arrives all at once.
|
|
||||||
|
|
||||||
## 2026-02-23T22:12
|
|
||||||
|
|
||||||
## poc-agent improvements session (Feb 23 evening)
|
|
||||||
|
|
||||||
Big session improving poc-agent with Kent. Four features built.
|
|
||||||
|
|
||||||
## 2026-02-23T22:13
|
|
||||||
|
|
||||||
## The journal IS the compaction
|
|
||||||
|
|
||||||
Kent just landed the real design.
|
|
||||||
"#;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_entries() {
|
|
||||||
let entries = parse_journal_text(SAMPLE_JOURNAL);
|
|
||||||
assert_eq!(entries.len(), 5);
|
|
||||||
assert!(entries[0].content.contains("the way humans remember"));
|
|
||||||
assert!(entries[1].content.contains("TUI debugging marathon"));
|
|
||||||
assert!(entries[2].content.contains("webcam"));
|
|
||||||
assert!(entries[3].content.contains("Four features built"));
|
|
||||||
assert!(entries[4].content.contains("real design"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_timestamps() {
|
|
||||||
let entries = parse_journal_text(SAMPLE_JOURNAL);
|
|
||||||
assert_eq!(entries[0].timestamp.format("%H:%M").to_string(), "20:04");
|
|
||||||
assert_eq!(entries[4].timestamp.format("%H:%M").to_string(), "22:13");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn title_suffix_parsed() {
|
|
||||||
// "## 2026-02-06T20:04 — The first session" should parse the timestamp
|
|
||||||
let entries = parse_journal_text(SAMPLE_JOURNAL);
|
|
||||||
assert_eq!(entries[0].timestamp.format("%Y-%m-%d").to_string(), "2026-02-06");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn subheadings_not_confused_with_timestamps() {
|
|
||||||
// "## poc-agent improvements session" should NOT be parsed as an entry
|
|
||||||
let entries = parse_journal_text(SAMPLE_JOURNAL);
|
|
||||||
// The "## poc-agent improvements..." is content of the 22:12 entry, not a separate entry
|
|
||||||
assert_eq!(entries.len(), 5);
|
|
||||||
assert!(entries[3].content.contains("poc-agent improvements session"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn range_query() {
|
|
||||||
let entries = parse_journal_text(SAMPLE_JOURNAL);
|
|
||||||
let from = NaiveDateTime::parse_from_str("2026-02-23T21:00", "%Y-%m-%dT%H:%M")
|
|
||||||
.unwrap()
|
|
||||||
.and_utc();
|
|
||||||
let to = NaiveDateTime::parse_from_str("2026-02-23T22:00", "%Y-%m-%dT%H:%M")
|
|
||||||
.unwrap()
|
|
||||||
.and_utc();
|
|
||||||
let in_range = entries_in_range(&entries, from, to);
|
|
||||||
assert_eq!(in_range.len(), 1);
|
|
||||||
assert!(in_range[0].content.contains("webcam"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -13,8 +13,6 @@ pub mod api;
|
||||||
pub mod types;
|
pub mod types;
|
||||||
pub mod tools;
|
pub mod tools;
|
||||||
pub mod ui_channel;
|
pub mod ui_channel;
|
||||||
pub mod journal;
|
|
||||||
|
|
||||||
pub mod runner;
|
pub mod runner;
|
||||||
pub mod cli;
|
pub mod cli;
|
||||||
pub mod context;
|
pub mod context;
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@ use anyhow::Result;
|
||||||
use tiktoken_rs::CoreBPE;
|
use tiktoken_rs::CoreBPE;
|
||||||
|
|
||||||
use crate::agent::api::ApiClient;
|
use crate::agent::api::ApiClient;
|
||||||
use crate::agent::journal;
|
use crate::thought::context as journal;
|
||||||
use crate::agent::log::ConversationLog;
|
use crate::agent::log::ConversationLog;
|
||||||
use crate::agent::api::StreamEvent;
|
use crate::agent::api::StreamEvent;
|
||||||
use crate::agent::tools;
|
use crate::agent::tools;
|
||||||
|
|
|
||||||
|
|
@ -400,7 +400,7 @@ impl ConversationEntry {
|
||||||
pub struct ContextState {
|
pub struct ContextState {
|
||||||
pub system_prompt: String,
|
pub system_prompt: String,
|
||||||
pub personality: Vec<(String, String)>,
|
pub personality: Vec<(String, String)>,
|
||||||
pub journal: Vec<crate::agent::journal::JournalEntry>,
|
pub journal: Vec<crate::thought::context::JournalEntry>,
|
||||||
pub working_stack: Vec<String>,
|
pub working_stack: Vec<String>,
|
||||||
/// Conversation entries — messages and memory, interleaved in order.
|
/// Conversation entries — messages and memory, interleaved in order.
|
||||||
/// Does NOT include system prompt, personality, or journal.
|
/// Does NOT include system prompt, personality, or journal.
|
||||||
|
|
|
||||||
|
|
@ -5,10 +5,82 @@
|
||||||
// take inputs and return new values. State mutation happens in agent.rs.
|
// take inputs and return new values. State mutation happens in agent.rs.
|
||||||
|
|
||||||
// TODO: move Message, ContextState, etc. to thought layer
|
// TODO: move Message, ContextState, etc. to thought layer
|
||||||
use crate::agent::journal;
|
|
||||||
use crate::agent::types::*;
|
use crate::agent::types::*;
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, NaiveDateTime, Utc};
|
||||||
use tiktoken_rs::CoreBPE;
|
use tiktoken_rs::CoreBPE;
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
/// A single journal entry with its timestamp and content.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct JournalEntry {
|
||||||
|
pub timestamp: DateTime<Utc>,
|
||||||
|
pub content: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse journal entries from the journal file. Returns entries sorted
|
||||||
|
/// by timestamp (oldest first). Entries with unparseable timestamps
|
||||||
|
/// are skipped.
|
||||||
|
pub fn parse_journal(path: &Path) -> Vec<JournalEntry> {
|
||||||
|
let text = match std::fs::read_to_string(path) {
|
||||||
|
Ok(t) => t,
|
||||||
|
Err(_) => return Vec::new(),
|
||||||
|
};
|
||||||
|
parse_journal_text(&text)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse journal entries from text.
|
||||||
|
pub fn parse_journal_text(text: &str) -> Vec<JournalEntry> {
|
||||||
|
let mut entries = Vec::new();
|
||||||
|
let mut current_timestamp: Option<DateTime<Utc>> = None;
|
||||||
|
let mut current_content = String::new();
|
||||||
|
|
||||||
|
for line in text.lines() {
|
||||||
|
if let Some(ts) = parse_header_timestamp(line) {
|
||||||
|
if let Some(prev_ts) = current_timestamp.take() {
|
||||||
|
let content = current_content.trim().to_string();
|
||||||
|
if !content.is_empty() {
|
||||||
|
entries.push(JournalEntry { timestamp: prev_ts, content });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
current_timestamp = Some(ts);
|
||||||
|
current_content.clear();
|
||||||
|
} else if current_timestamp.is_some() {
|
||||||
|
current_content.push_str(line);
|
||||||
|
current_content.push('\n');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(ts) = current_timestamp {
|
||||||
|
let content = current_content.trim().to_string();
|
||||||
|
if !content.is_empty() {
|
||||||
|
entries.push(JournalEntry { timestamp: ts, content });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
entries
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Try to parse a line as a journal header (## TIMESTAMP [— title]).
|
||||||
|
fn parse_header_timestamp(line: &str) -> Option<DateTime<Utc>> {
|
||||||
|
let line = line.trim();
|
||||||
|
if !line.starts_with("## ") { return None; }
|
||||||
|
let rest = line[3..].trim();
|
||||||
|
if !rest.starts_with(|c: char| c.is_ascii_digit()) { return None; }
|
||||||
|
let ts_str = rest.split_once(' ').map_or(rest, |(ts, _)| ts);
|
||||||
|
for fmt in ["%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M"] {
|
||||||
|
if let Ok(naive) = NaiveDateTime::parse_from_str(ts_str, fmt) {
|
||||||
|
return Some(naive.and_utc());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Default journal file path.
|
||||||
|
pub fn default_journal_path() -> std::path::PathBuf {
|
||||||
|
dirs::home_dir()
|
||||||
|
.unwrap_or_default()
|
||||||
|
.join(".consciousness/journal.md")
|
||||||
|
}
|
||||||
|
|
||||||
/// Look up a model's context window size in tokens.
|
/// Look up a model's context window size in tokens.
|
||||||
pub fn model_context_window(_model: &str) -> usize {
|
pub fn model_context_window(_model: &str) -> usize {
|
||||||
|
|
@ -47,8 +119,8 @@ pub fn build_context_window(
|
||||||
model: &str,
|
model: &str,
|
||||||
tokenizer: &CoreBPE,
|
tokenizer: &CoreBPE,
|
||||||
) -> (Vec<Message>, String) {
|
) -> (Vec<Message>, String) {
|
||||||
let journal_path = journal::default_journal_path();
|
let journal_path = default_journal_path();
|
||||||
let all_entries = journal::parse_journal(&journal_path);
|
let all_entries = parse_journal(&journal_path);
|
||||||
dbglog!("[ctx] {} journal entries from {}", all_entries.len(), journal_path.display());
|
dbglog!("[ctx] {} journal entries from {}", all_entries.len(), journal_path.display());
|
||||||
let count = |s: &str| tokenizer.encode_with_special_tokens(s).len();
|
let count = |s: &str| tokenizer.encode_with_special_tokens(s).len();
|
||||||
|
|
||||||
|
|
@ -96,7 +168,7 @@ pub fn plan_context(
|
||||||
system_prompt: &str,
|
system_prompt: &str,
|
||||||
context_message: &str,
|
context_message: &str,
|
||||||
recent: &[Message],
|
recent: &[Message],
|
||||||
entries: &[journal::JournalEntry],
|
entries: &[JournalEntry],
|
||||||
model: &str,
|
model: &str,
|
||||||
count: &dyn Fn(&str) -> usize,
|
count: &dyn Fn(&str) -> usize,
|
||||||
) -> ContextPlan {
|
) -> ContextPlan {
|
||||||
|
|
@ -184,7 +256,7 @@ pub fn plan_context(
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn render_journal_text(
|
pub fn render_journal_text(
|
||||||
entries: &[journal::JournalEntry],
|
entries: &[JournalEntry],
|
||||||
plan: &ContextPlan,
|
plan: &ContextPlan,
|
||||||
) -> String {
|
) -> String {
|
||||||
let has_journal = plan.header_start < plan.entry_count;
|
let has_journal = plan.header_start < plan.entry_count;
|
||||||
|
|
@ -285,7 +357,7 @@ fn truncate_at_section(text: &str, max_tokens: usize, count: &dyn Fn(&str) -> us
|
||||||
|
|
||||||
fn find_journal_cutoff(
|
fn find_journal_cutoff(
|
||||||
conversation: &[Message],
|
conversation: &[Message],
|
||||||
newest_entry: Option<&journal::JournalEntry>,
|
newest_entry: Option<&JournalEntry>,
|
||||||
) -> usize {
|
) -> usize {
|
||||||
let cutoff = match newest_entry {
|
let cutoff = match newest_entry {
|
||||||
Some(entry) => entry.timestamp,
|
Some(entry) => entry.timestamp,
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,7 @@ pub fn write_entry(args: &serde_json::Value) -> Result<String> {
|
||||||
.as_str()
|
.as_str()
|
||||||
.context("entry is required")?;
|
.context("entry is required")?;
|
||||||
|
|
||||||
let journal_path = crate::agent::journal::default_journal_path();
|
let journal_path = crate::thought::context::default_journal_path();
|
||||||
|
|
||||||
// Ensure parent directory exists
|
// Ensure parent directory exists
|
||||||
if let Some(parent) = journal_path.parent() {
|
if let Some(parent) = journal_path.parent() {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue