unify conversation persistence to append-only jsonl

Log ConversationEntry (with Memory/Message typing) instead of
raw Message. restore_from_log reads typed entries directly,
preserving Memory vs Message distinction across restarts.

Remove current.json snapshot and save_session — the append-only
log is the single source of truth. Remove dead read_all and
message_count methods. Add push_entry for logging typed entries.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-02 14:31:19 -04:00
parent 1f7b585d41
commit a21cf31ad2
3 changed files with 47 additions and 125 deletions

View file

@ -14,7 +14,7 @@ use std::fs::{File, OpenOptions};
use std::io::{BufRead, BufReader, Seek, SeekFrom, Write};
use std::path::{Path, PathBuf};
use crate::agent::types::Message;
use crate::agent::types::ConversationEntry;
pub struct ConversationLog {
path: PathBuf,
@ -30,16 +30,16 @@ impl ConversationLog {
Ok(Self { path })
}
/// Append a single message to the log.
pub fn append(&self, msg: &Message) -> Result<()> {
/// Append a conversation entry to the log.
pub fn append(&self, entry: &ConversationEntry) -> Result<()> {
let mut file = OpenOptions::new()
.create(true)
.append(true)
.open(&self.path)
.with_context(|| format!("opening log {}", self.path.display()))?;
let line = serde_json::to_string(msg)
.context("serializing message for log")?;
let line = serde_json::to_string(entry)
.context("serializing entry for log")?;
writeln!(file, "{}", line)
.context("writing to conversation log")?;
Ok(())
@ -48,7 +48,7 @@ impl ConversationLog {
/// Read the tail of the log (last `max_bytes` bytes).
/// Seeks to `file_len - max_bytes`, skips the first partial line,
/// then parses forward. For logs smaller than `max_bytes`, reads everything.
pub fn read_tail(&self, max_bytes: u64) -> Result<Vec<Message>> {
pub fn read_tail(&self, max_bytes: u64) -> Result<Vec<ConversationEntry>> {
if !self.path.exists() {
return Ok(Vec::new());
}
@ -64,62 +64,19 @@ impl ConversationLog {
reader.read_line(&mut discard)?;
}
let mut messages = Vec::new();
let mut entries = Vec::new();
for line in reader.lines() {
let line = line.context("reading log tail")?;
let line = line.trim();
if line.is_empty() {
continue;
}
match serde_json::from_str::<Message>(line) {
Ok(msg) => messages.push(msg),
Err(_) => {} // skip corrupt/partial lines
// Try ConversationEntry first (new format), fall back to bare Message (old logs)
if let Ok(entry) = serde_json::from_str::<ConversationEntry>(line) {
entries.push(entry);
}
}
Ok(messages)
}
/// Count messages in the log without loading content.
#[allow(dead_code)]
pub fn message_count(&self) -> Result<usize> {
if !self.path.exists() {
return Ok(0);
}
let file = File::open(&self.path)
.with_context(|| format!("opening log {}", self.path.display()))?;
let reader = BufReader::new(file);
Ok(reader.lines()
.filter(|l| l.as_ref().map_or(false, |s| !s.trim().is_empty()))
.count())
}
/// Read all messages from the log. Returns empty vec if log doesn't exist.
/// NOTE: Don't use this in hot paths — use read_tail() instead.
#[allow(dead_code)]
pub fn read_all(&self) -> Result<Vec<Message>> {
if !self.path.exists() {
return Ok(Vec::new());
}
let file = File::open(&self.path)
.with_context(|| format!("opening log {}", self.path.display()))?;
let reader = BufReader::new(file);
let mut messages = Vec::new();
for (i, line) in reader.lines().enumerate() {
let line = line.with_context(|| format!("reading log line {}", i))?;
let line = line.trim();
if line.is_empty() {
continue;
}
match serde_json::from_str::<Message>(line) {
Ok(msg) => messages.push(msg),
Err(e) => {
// Log corruption — skip bad lines rather than failing
eprintln!("warning: skipping corrupt log line {}: {}", i, e);
}
}
}
Ok(messages)
Ok(entries)
}
pub fn path(&self) -> &Path {
@ -133,8 +90,8 @@ impl ConversationLog {
for line in reader.lines().flatten() {
let line = line.trim().to_string();
if line.is_empty() { continue; }
if let Ok(msg) = serde_json::from_str::<Message>(&line) {
if let Some(ts) = &msg.timestamp {
if let Ok(entry) = serde_json::from_str::<ConversationEntry>(&line) {
if let Some(ts) = &entry.message().timestamp {
if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(ts) {
return Some(dt.to_utc());
}

View file

@ -168,12 +168,17 @@ impl Agent {
/// Push a conversation message — stamped and logged.
fn push_message(&mut self, mut msg: Message) {
msg.stamp();
let entry = ConversationEntry::Message(msg);
self.push_entry(entry);
}
fn push_entry(&mut self, entry: ConversationEntry) {
if let Some(ref log) = self.conversation_log {
if let Err(e) = log.append(&msg) {
eprintln!("warning: failed to log message: {:#}", e);
if let Err(e) = log.append(&entry) {
eprintln!("warning: failed to log entry: {:#}", e);
}
}
self.context.entries.push(ConversationEntry::Message(msg));
self.context.entries.push(entry);
}
/// Push a context-only message (system prompt, identity context,
@ -1000,11 +1005,11 @@ impl Agent {
self.context.system_prompt = system_prompt;
self.context.personality = personality;
let all_messages = match &self.conversation_log {
let entries = match &self.conversation_log {
Some(log) => match log.read_tail(512 * 1024) {
Ok(msgs) if !msgs.is_empty() => {
dbglog!("[restore] read {} messages from log tail", msgs.len());
msgs
Ok(entries) if !entries.is_empty() => {
dbglog!("[restore] read {} entries from log tail", entries.len());
entries
}
Ok(_) => {
dbglog!("[restore] log exists but is empty");
@ -1021,29 +1026,31 @@ impl Agent {
}
};
// Filter out system/context messages — we only want the
// actual conversation (user prompts, assistant responses,
// tool calls/results)
let conversation: Vec<Message> = all_messages
// Filter out system messages, keep everything else (including Memory entries)
let entries: Vec<ConversationEntry> = entries
.into_iter()
.filter(|m| m.role != Role::System)
.filter(|e| e.message().role != Role::System)
.collect();
dbglog!("[restore] {} messages after filtering system", conversation.len());
let messages = crate::agent::context::trim_conversation(
// Trim to fit context budget
let n = entries.len();
let conversation: Vec<Message> = entries.iter()
.map(|e| e.api_message().clone()).collect();
let trimmed = crate::agent::context::trim_conversation(
&self.context,
&conversation,
&self.client.model,
&self.tokenizer,
);
dbglog!("[restore] journal preserved: {} entries",
self.context.journal.len());
// Don't overwrite journal — already loaded from memory graph
self.context.entries = messages.into_iter()
.map(ConversationEntry::Message).collect();
dbglog!("[restore] built context window: {} entries", self.context.entries.len());
// Keep only the entries that survived trimming (by count from the end)
let keep = trimmed.len();
self.context.entries = entries.into_iter()
.skip(n.saturating_sub(keep))
.collect();
dbglog!("[restore] {} entries, journal: {} entries",
self.context.entries.len(), self.context.journal.len());
self.last_prompt_tokens = 0;
self.publish_context_state();
true
}
@ -1068,10 +1075,6 @@ impl Agent {
&mut self.context.entries
}
/// Restore from saved conversation entries.
pub fn restore(&mut self, entries: Vec<ConversationEntry>) {
self.context.entries = entries;
}
}
// Context window building, token counting, and error classification