Split conversation transcript parsing

This commit is contained in:
Kent Overstreet 2026-06-15 11:24:18 -05:00
commit 54df271308
9 changed files with 614 additions and 348 deletions

113
src/conversation/claude.rs Normal file
View file

@ -0,0 +1,113 @@
use serde_json::Value;
use super::{ConversationSource, TranscriptMessage, TranscriptRole};
pub struct ClaudeSource;
impl ConversationSource for ClaudeSource {
fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage> {
parse_message(obj, offset)
}
fn is_compaction(&self, obj: &Value) -> bool {
is_compaction(obj)
}
fn may_contain_compaction(&self, obj_bytes: &[u8]) -> bool {
contains_bytes(obj_bytes, b"This session is being continued")
}
}
fn text_content(value: &Value) -> Option<String> {
let text = match value {
Value::String(s) => s.clone(),
Value::Array(arr) => {
arr.iter()
.filter(|b| b.get("type").and_then(|v| v.as_str()) == Some("text"))
.filter_map(|b| b.get("text").and_then(|v| v.as_str()))
.collect::<Vec<_>>()
.join(" ")
}
_ => return None,
};
(!text.is_empty()).then_some(text)
}
pub(crate) fn parse_message(obj: &Value, offset: u64) -> Option<TranscriptMessage> {
let role = match obj.get("type").and_then(|v| v.as_str()) {
Some("user") => TranscriptRole::User,
Some("assistant") => TranscriptRole::Assistant,
_ => return None,
};
let msg = obj.get("message").unwrap_or(obj);
let text = msg.get("content").and_then(text_content)?;
let timestamp = obj.get("timestamp")
.and_then(|v| v.as_str())
.map(str::to_string);
Some(TranscriptMessage { role, text, timestamp, offset })
}
pub(crate) fn is_compaction(obj: &Value) -> bool {
obj.get("type").and_then(|v| v.as_str()) == Some("user")
&& obj.get("message")
.and_then(|m| m.get("content"))
.and_then(|c| c.as_str())
.is_some_and(|content| content.starts_with("This session is being continued"))
}
fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
haystack.windows(needle.len()).any(|w| w == needle)
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn parses_string_and_array_content() {
let user = json!({
"timestamp": "2026-06-15T15:00:00.000Z",
"type": "user",
"message": { "content": "hello" }
});
let assistant = json!({
"timestamp": "2026-06-15T15:00:01.000Z",
"type": "assistant",
"message": {
"content": [
{ "type": "text", "text": "hi" },
{ "type": "tool_use", "name": "ignored" },
{ "type": "text", "text": "there" }
]
}
});
assert_eq!(
parse_message(&user, 7).unwrap(),
TranscriptMessage {
role: TranscriptRole::User,
text: "hello".to_string(),
timestamp: Some("2026-06-15T15:00:00.000Z".to_string()),
offset: 7,
}
);
assert_eq!(parse_message(&assistant, 9).unwrap().text, "hi there");
}
#[test]
fn detects_compaction_marker() {
let obj = json!({
"timestamp": "2026-06-15T15:00:01.000Z",
"type": "user",
"message": {
"content": "This session is being continued from a previous conversation."
}
});
assert!(is_compaction(&obj));
}
}

105
src/conversation/codex.rs Normal file
View file

@ -0,0 +1,105 @@
use serde_json::Value;
use super::{ConversationSource, TranscriptMessage, TranscriptRole};
pub struct CodexSource;
impl ConversationSource for CodexSource {
fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage> {
parse_message(obj, offset)
}
fn is_compaction(&self, obj: &Value) -> bool {
is_compaction(obj)
}
fn may_contain_compaction(&self, obj_bytes: &[u8]) -> bool {
contains_bytes(obj_bytes, b"context_compacted")
}
}
pub(crate) fn parse_message(obj: &Value, offset: u64) -> Option<TranscriptMessage> {
if obj.get("type").and_then(|v| v.as_str()) != Some("event_msg") {
return None;
}
let payload = obj.get("payload")?;
let (role, text) = match payload.get("type").and_then(|v| v.as_str()) {
Some("user_message") => (
TranscriptRole::User,
payload.get("message").and_then(|v| v.as_str())?.to_string(),
),
Some("agent_message") => (
TranscriptRole::Assistant,
payload.get("message").and_then(|v| v.as_str())?.to_string(),
),
_ => return None,
};
if text.is_empty() {
return None;
}
let timestamp = obj.get("timestamp")
.and_then(|v| v.as_str())
.map(str::to_string);
Some(TranscriptMessage { role, text, timestamp, offset })
}
pub(crate) fn is_compaction(obj: &Value) -> bool {
obj.get("type").and_then(|v| v.as_str()) == Some("event_msg")
&& obj.get("payload")
.and_then(|p| p.get("type"))
.and_then(|v| v.as_str()) == Some("context_compacted")
}
fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
haystack.windows(needle.len()).any(|w| w == needle)
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn parses_event_messages_and_skips_noise() {
let user = json!({
"timestamp": "2026-06-15T15:00:00.000Z",
"type": "event_msg",
"payload": { "type": "user_message", "message": "start here" }
});
let assistant = json!({
"timestamp": "2026-06-15T15:00:01.000Z",
"type": "event_msg",
"payload": { "type": "agent_message", "message": "working" }
});
let tool = json!({
"timestamp": "2026-06-15T15:00:02.000Z",
"type": "event_msg",
"payload": { "type": "task_started" }
});
let raw = json!({
"timestamp": "2026-06-15T15:00:03.000Z",
"type": "response_item",
"payload": { "type": "message", "role": "user" }
});
assert_eq!(parse_message(&user, 1).unwrap().role, TranscriptRole::User);
assert_eq!(parse_message(&assistant, 2).unwrap().text, "working");
assert!(parse_message(&tool, 3).is_none());
assert!(parse_message(&raw, 4).is_none());
}
#[test]
fn detects_compaction_event() {
let obj = json!({
"timestamp": "2026-06-15T15:00:01.000Z",
"type": "event_msg",
"payload": { "type": "context_compacted" }
});
assert!(is_compaction(&obj));
}
}

110
src/conversation/jsonl.rs Normal file
View file

@ -0,0 +1,110 @@
use memchr::memrchr3;
/// Scan backwards through mmap'd bytes, yielding byte slices of complete
/// top-level JSON objects (outermost { to matching }).
///
/// Uses memrchr3 (SIMD) to jump between structurally significant bytes
/// ({, }, ") instead of scanning byte-by-byte. Tracks brace depth,
/// skipping braces inside JSON strings. Returns objects in reverse order
/// (newest first).
pub struct JsonlBackwardIter<'a> {
data: &'a [u8],
pos: usize,
}
impl<'a> JsonlBackwardIter<'a> {
pub fn new(data: &'a [u8]) -> Self {
Self { data, pos: data.len() }
}
}
impl<'a> Iterator for JsonlBackwardIter<'a> {
type Item = (usize, &'a [u8]);
fn next(&mut self) -> Option<Self::Item> {
next_json_object(self.data, &mut self.pos)
}
}
fn is_unescaped_quote(data: &[u8], p: usize) -> bool {
let mut bs = 0;
while p > bs && data[p - 1 - bs] == b'\\' {
bs += 1;
}
bs % 2 == 0
}
fn next_json_object<'a>(data: &'a [u8], pos: &mut usize) -> Option<(usize, &'a [u8])> {
// Find the closing } of the next object, skipping } inside strings.
let close = {
let mut in_string = false;
loop {
let p = memrchr3(b'{', b'}', b'"', &data[..*pos])?;
*pos = p;
let ch = data[p];
if in_string {
if ch == b'"' && is_unescaped_quote(data, p) {
in_string = false;
}
continue;
}
match ch {
b'}' => break p,
b'"' => in_string = true,
_ => {}
}
}
};
// Track brace depth to find matching {.
let mut depth: usize = 1;
let mut in_string = false;
loop {
let p = memrchr3(b'{', b'}', b'"', &data[..*pos])?;
*pos = p;
let ch = data[p];
if in_string {
if ch == b'"' && is_unescaped_quote(data, p) {
in_string = false;
}
continue;
}
match ch {
b'"' => { in_string = true; }
b'}' => { depth += 1; }
b'{' => {
depth -= 1;
if depth == 0 {
return Some((*pos, &data[*pos..=close]));
}
}
_ => {}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn handles_nested_json_and_quoted_braces() {
let data = br#"{"n":1,"s":"literal } brace"}
{"n":2,"nested":{"s":"escaped quote: \" and { brace"}}
trailing garbage
"#;
let objs: Vec<_> = JsonlBackwardIter::new(data)
.map(|(_, bytes)| std::str::from_utf8(bytes).unwrap().to_string())
.collect();
assert_eq!(objs.len(), 2);
assert!(objs[0].contains(r#""n":2"#));
assert!(objs[1].contains(r#""n":1"#));
}
}

271
src/conversation/mod.rs Normal file
View file

@ -0,0 +1,271 @@
// Conversation transcript abstraction.
//
// Core code consumes normalized user/assistant messages through this module.
// Product-specific log formats live in the small compatibility sources below.
use memmap2::Mmap;
use serde_json::Value;
use std::fs;
use std::path::Path;
pub mod claude;
pub mod codex;
pub mod jsonl;
pub use jsonl::JsonlBackwardIter;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TranscriptRole {
User,
Assistant,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TranscriptMessage {
pub role: TranscriptRole,
pub text: String,
pub timestamp: Option<String>,
pub offset: u64,
}
pub trait ConversationSource {
fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage>;
fn is_compaction(&self, obj: &Value) -> bool;
fn may_contain_compaction(&self, _obj_bytes: &[u8]) -> bool {
true
}
}
pub struct AnyConversationSource;
impl ConversationSource for AnyConversationSource {
fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage> {
claude::ClaudeSource.parse_message(obj, offset)
.or_else(|| codex::CodexSource.parse_message(obj, offset))
}
fn is_compaction(&self, obj: &Value) -> bool {
claude::ClaudeSource.is_compaction(obj) || codex::CodexSource.is_compaction(obj)
}
fn may_contain_compaction(&self, obj_bytes: &[u8]) -> bool {
claude::ClaudeSource.may_contain_compaction(obj_bytes)
|| codex::CodexSource.may_contain_compaction(obj_bytes)
}
}
/// Find the byte offset of the last compaction marker in mmap'd transcript data.
/// Returns the byte offset of the JSON object's opening brace.
pub(crate) fn find_last_compaction(data: &[u8]) -> Option<usize> {
find_last_compaction_with(data, &AnyConversationSource)
}
pub(crate) fn find_last_compaction_with(
data: &[u8],
source: &impl ConversationSource,
) -> Option<usize> {
for (offset, obj_bytes) in JsonlBackwardIter::new(data) {
// Quick byte check before parsing large transcript entries.
if !source.may_contain_compaction(obj_bytes) {
continue;
}
let obj: Value = match serde_json::from_slice(obj_bytes) {
Ok(v) => v,
Err(_) => continue,
};
if source.is_compaction(&obj) {
return Some(offset);
}
}
None
}
/// Find the byte offset of the last compaction in a transcript file.
/// Returns None if the file can't be opened or has no compaction.
pub(crate) fn find_last_compaction_in_file(path: &str) -> Option<u64> {
if path.is_empty() { return None; }
let file = fs::File::open(path).ok()?;
let meta = file.metadata().ok()?;
if meta.len() == 0 { return None; }
let mmap = unsafe { Mmap::map(&file).ok()? };
find_last_compaction(&mmap).map(|off| off as u64)
}
/// Mmap a transcript file. Returns (Mmap, File) to keep both alive.
pub(crate) fn mmap_transcript(path: &str) -> Option<(Mmap, fs::File)> {
let file = fs::File::open(path).ok()?;
let meta = file.metadata().ok()?;
if meta.len() == 0 { return None; }
let mmap = unsafe { Mmap::map(&file).ok()? };
Some((mmap, file))
}
/// Reverse iterator over user/assistant messages in a transcript file.
/// Yields normalized transcript messages newest-first. The caller decides
/// when to stop (byte budget, count, etc).
pub struct TailMessages {
_file: fs::File,
mmap: Mmap,
pos: usize,
}
impl TailMessages {
pub fn open(path: &str) -> Option<Self> {
let (mmap, file) = mmap_transcript(path)?;
let pos = mmap.len();
Some(Self { _file: file, mmap, pos })
}
}
impl Iterator for TailMessages {
type Item = TranscriptMessage;
fn next(&mut self) -> Option<Self::Item> {
loop {
let (offset, obj_bytes) = jsonl::JsonlBackwardIter::new(&self.mmap[..self.pos]).next()?;
self.pos = offset;
let obj: Value = match serde_json::from_slice(obj_bytes) {
Ok(v) => v,
Err(_) => continue,
};
if let Some(message) = AnyConversationSource.parse_message(&obj, offset as u64) {
return Some(message);
}
}
}
}
/// Get the timestamp of the compaction message at a given byte offset.
/// Returns a human-readable datetime string, or None if unavailable.
pub fn compaction_timestamp(path: &str, offset: u64) -> Option<String> {
let (mmap, _file) = mmap_transcript(path)?;
let start = offset as usize;
if start >= mmap.len() { return None; }
// Find the end of this JSONL line
let end = mmap[start..].iter().position(|&b| b == b'\n')
.map(|p| start + p)
.unwrap_or(mmap.len());
let obj: Value = serde_json::from_slice(&mmap[start..end]).ok()?;
if let Some(ts) = obj.get("timestamp").and_then(|v| v.as_str()) {
return Some(ts.to_string());
}
for field in &["createdAt", "created_at", "time"] {
if let Some(ts) = obj.get(*field).and_then(|v| v.as_str()) {
return Some(ts.to_string());
}
}
None
}
/// Detect whether a compaction has occurred since the last check.
///
/// Compares the current compaction offset against a saved value in
/// `state_dir/compaction-{session_id}`. Returns true if a new
/// compaction was found. Updates the saved offset.
pub fn detect_new_compaction(
state_dir: &Path,
session_id: &str,
transcript_path: &str,
) -> bool {
let offset = find_last_compaction_in_file(transcript_path);
let save_path = state_dir.join(format!("compaction-{}", session_id));
let saved: Option<u64> = fs::read_to_string(&save_path)
.ok()
.and_then(|s| s.trim().parse().ok());
let is_new = match (offset, saved) {
(Some(cur), Some(prev)) => cur != prev,
(Some(_), None) => true,
_ => false,
};
// Save current offset
if let Some(off) = offset {
fs::write(&save_path, off.to_string()).ok();
}
is_new
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
fn write_temp_jsonl(content: &str) -> tempfile::NamedTempFile {
let mut file = tempfile::NamedTempFile::new().unwrap();
file.write_all(content.as_bytes()).unwrap();
file.flush().unwrap();
file
}
#[test]
fn tail_messages_yields_normalized_messages_newest_first() {
let file = write_temp_jsonl(
r#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"user","message":{"content":"claude user"}}
{"timestamp":"2026-06-15T15:00:01.000Z","type":"assistant","message":{"content":[{"type":"text","text":"claude assistant"}]}}
{"timestamp":"2026-06-15T15:00:02.000Z","type":"event_msg","payload":{"type":"user_message","message":"codex user"}}
{"timestamp":"2026-06-15T15:00:03.000Z","type":"event_msg","payload":{"type":"task_started"}}
{"timestamp":"2026-06-15T15:00:04.000Z","type":"event_msg","payload":{"type":"agent_message","message":"codex assistant"}}
"#,
);
let messages: Vec<_> = TailMessages::open(&file.path().to_string_lossy())
.unwrap()
.collect();
assert_eq!(messages.len(), 4);
assert_eq!(messages[0].text, "codex assistant");
assert_eq!(messages[1].text, "codex user");
assert_eq!(messages[2].text, "claude assistant");
assert_eq!(messages[3].text, "claude user");
assert!(messages[0].offset > messages[1].offset);
}
#[test]
fn detects_claude_and_codex_compactions() {
let claude = br#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"user","message":{"content":"normal"}}
{"timestamp":"2026-06-15T15:00:01.000Z","type":"user","message":{"content":"This session is being continued from a previous conversation."}}
"#;
let codex = br#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"event_msg","payload":{"type":"user_message","message":"normal"}}
{"timestamp":"2026-06-15T15:00:01.000Z","type":"event_msg","payload":{"type":"context_compacted"}}
"#;
assert!(find_last_compaction(claude).is_some());
assert!(find_last_compaction(codex).is_some());
}
#[test]
fn detect_new_compaction_tracks_offset_changes() {
let transcript = write_temp_jsonl(
r#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"event_msg","payload":{"type":"context_compacted"}}
"#,
);
let state = tempfile::tempdir().unwrap();
assert!(detect_new_compaction(
state.path(),
"session",
&transcript.path().to_string_lossy(),
));
assert!(!detect_new_compaction(
state.path(),
"session",
&transcript.path().to_string_lossy(),
));
}
}

View file

@ -17,7 +17,6 @@ pub mod query;
pub mod spectral;
pub mod neuro;
pub mod counters;
pub mod transcript;
use std::cell::RefCell;
use std::path::PathBuf;

View file

@ -1,340 +0,0 @@
// Transcript JSONL parsing utilities.
//
// Provides mmap-based backward scanning of Claude Code transcript files
// and compaction detection. Used by memory-search (hook mode) and
// parse-claude-conversation (debug tool).
use memchr::memrchr3;
use memmap2::Mmap;
use serde_json::Value;
use std::fs;
use std::path::Path;
/// Scan backwards through mmap'd bytes, yielding byte slices of complete
/// top-level JSON objects (outermost { to matching }).
///
/// Uses memrchr3 (SIMD) to jump between structurally significant bytes
/// ({, }, ") instead of scanning byte-by-byte. Tracks brace depth,
/// skipping braces inside JSON strings. Returns objects in reverse order
/// (newest first).
pub struct JsonlBackwardIter<'a> {
data: &'a [u8],
pos: usize,
}
impl<'a> JsonlBackwardIter<'a> {
pub fn new(data: &'a [u8]) -> Self {
Self { data, pos: data.len() }
}
}
impl<'a> Iterator for JsonlBackwardIter<'a> {
type Item = &'a [u8];
fn next(&mut self) -> Option<Self::Item> {
// Find the closing } of the next object, skipping } inside strings
let close = {
let mut in_string = false;
loop {
let p = memrchr3(b'{', b'}', b'"', &self.data[..self.pos])?;
self.pos = p;
let ch = self.data[p];
if in_string {
if ch == b'"' {
let mut bs = 0;
while p > bs + 1 && self.data[p - 1 - bs] == b'\\' {
bs += 1;
}
if bs % 2 == 0 { in_string = false; }
}
continue;
}
match ch {
b'}' => break p,
b'"' => in_string = true,
_ => {}
}
}
};
// Track brace depth to find matching {
let mut depth: usize = 1;
let mut in_string = false;
loop {
let p = memrchr3(b'{', b'}', b'"', &self.data[..self.pos])?;
self.pos = p;
let ch = self.data[p];
if in_string {
if ch == b'"' {
// Check for escaped quote (count preceding backslashes)
let mut bs = 0;
while p > bs + 1 && self.data[p - 1 - bs] == b'\\' {
bs += 1;
}
if bs % 2 == 0 {
in_string = false;
}
}
// { and } inside strings don't affect depth
continue;
}
match ch {
b'"' => { in_string = true; }
b'}' => { depth += 1; }
b'{' => {
depth -= 1;
if depth == 0 {
return Some(&self.data[self.pos..=close]);
}
}
_ => {}
}
}
}
}
/// Find the byte offset of the last compaction summary in mmap'd transcript data.
///
/// Scans backward for a user-type message whose content starts with
/// "This session is being continued". Returns the byte offset of the
/// JSON object's opening brace.
pub(crate) fn find_last_compaction(data: &[u8]) -> Option<usize> {
let marker = b"This session is being continued";
for obj_bytes in JsonlBackwardIter::new(data) {
// Quick byte check before parsing
if !contains_bytes(obj_bytes, marker) {
continue;
}
let obj: Value = match serde_json::from_slice(obj_bytes) {
Ok(v) => v,
Err(_) => continue,
};
if obj.get("type").and_then(|v| v.as_str()) != Some("user") {
continue;
}
if let Some(content) = obj.get("message")
.and_then(|m| m.get("content"))
.and_then(|c| c.as_str())
&& content.starts_with("This session is being continued") {
let offset = obj_bytes.as_ptr() as usize - data.as_ptr() as usize;
return Some(offset);
}
}
None
}
/// Find the byte offset of the last compaction in a transcript file.
/// Returns None if the file can't be opened or has no compaction.
pub(crate) fn find_last_compaction_in_file(path: &str) -> Option<u64> {
if path.is_empty() { return None; }
let file = fs::File::open(path).ok()?;
let meta = file.metadata().ok()?;
if meta.len() == 0 { return None; }
let mmap = unsafe { Mmap::map(&file).ok()? };
find_last_compaction(&mmap).map(|off| off as u64)
}
/// Mmap a transcript file. Returns (Mmap, File) to keep both alive.
pub(crate) fn mmap_transcript(path: &str) -> Option<(Mmap, fs::File)> {
let file = fs::File::open(path).ok()?;
let meta = file.metadata().ok()?;
if meta.len() == 0 { return None; }
let mmap = unsafe { Mmap::map(&file).ok()? };
Some((mmap, file))
}
fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
haystack.windows(needle.len()).any(|w| w == needle)
}
/// Reverse iterator over user/assistant messages in a transcript file.
/// Yields (role, text, timestamp) tuples newest-first. The caller decides
/// when to stop (byte budget, count, etc).
pub struct TailMessages {
_file: fs::File,
mmap: Mmap,
pos: usize,
}
impl TailMessages {
pub fn open(path: &str) -> Option<Self> {
let (mmap, file) = mmap_transcript(path)?;
let pos = mmap.len();
Some(Self { _file: file, mmap, pos })
}
}
impl Iterator for TailMessages {
type Item = (String, String, String);
fn next(&mut self) -> Option<Self::Item> {
loop {
// Find closing }, skipping } inside strings
let close = {
let mut in_string = false;
loop {
let p = memrchr3(b'{', b'}', b'"', &self.mmap[..self.pos])?;
self.pos = p;
let ch = self.mmap[p];
if in_string {
if ch == b'"' {
let mut bs = 0;
while p > bs + 1 && self.mmap[p - 1 - bs] == b'\\' {
bs += 1;
}
if bs % 2 == 0 { in_string = false; }
}
continue;
}
match ch {
b'}' => break p,
b'"' => in_string = true,
_ => {}
}
}
};
// Track brace depth to find matching {
let mut depth: usize = 1;
let mut in_string = false;
let open = loop {
let p = memrchr3(b'{', b'}', b'"', &self.mmap[..self.pos])?;
self.pos = p;
let ch = self.mmap[p];
if in_string {
if ch == b'"' {
let mut bs = 0;
while p > bs + 1 && self.mmap[p - 1 - bs] == b'\\' {
bs += 1;
}
if bs % 2 == 0 { in_string = false; }
}
continue;
}
match ch {
b'"' => { in_string = true; }
b'}' => { depth += 1; }
b'{' => {
depth -= 1;
if depth == 0 { break p; }
}
_ => {}
}
};
let obj_bytes = &self.mmap[open..=close];
// The "type" field is near the start of top-level objects.
// Only check the first 200 bytes to avoid scanning megabyte objects.
let prefix = &obj_bytes[..obj_bytes.len().min(200)];
let is_user = memchr::memmem::find(prefix, b"\"type\":\"user\"").is_some();
let is_assistant = !is_user
&& memchr::memmem::find(prefix, b"\"type\":\"assistant\"").is_some();
if !is_user && !is_assistant { continue; }
let obj: Value = match serde_json::from_slice(obj_bytes) {
Ok(v) => v,
Err(_) => continue,
};
let msg_type = if is_user { "user" } else { "assistant" };
let msg = obj.get("message").unwrap_or(&obj);
let text = match msg.get("content") {
Some(Value::String(s)) => s.clone(),
Some(Value::Array(arr)) => {
arr.iter()
.filter(|b| b.get("type").and_then(|v| v.as_str()) == Some("text"))
.filter_map(|b| b.get("text").and_then(|v| v.as_str()))
.collect::<Vec<_>>()
.join(" ")
}
_ => continue,
};
if text.is_empty() { continue; }
let timestamp = obj.get("timestamp")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
return Some((msg_type.to_string(), text, timestamp));
}
}
}
/// Get the timestamp of the compaction message at a given byte offset.
/// Returns a human-readable datetime string, or None if unavailable.
pub fn compaction_timestamp(path: &str, offset: u64) -> Option<String> {
let (mmap, _file) = mmap_transcript(path)?;
let start = offset as usize;
if start >= mmap.len() { return None; }
// Find the end of this JSONL line
let end = mmap[start..].iter().position(|&b| b == b'\n')
.map(|p| start + p)
.unwrap_or(mmap.len());
let obj: Value = serde_json::from_slice(&mmap[start..end]).ok()?;
// Claude Code transcript entries have a "timestamp" field (ISO 8601)
if let Some(ts) = obj.get("timestamp").and_then(|v| v.as_str()) {
return Some(ts.to_string());
}
// Fallback: try "createdAt" or similar fields
for field in &["createdAt", "created_at", "time"] {
if let Some(ts) = obj.get(*field).and_then(|v| v.as_str()) {
return Some(ts.to_string());
}
}
None
}
/// Detect whether a compaction has occurred since the last check.
///
/// Compares the current compaction offset against a saved value in
/// `state_dir/compaction-{session_id}`. Returns true if a new
/// compaction was found. Updates the saved offset.
pub fn detect_new_compaction(
state_dir: &Path,
session_id: &str,
transcript_path: &str,
) -> bool {
let offset = find_last_compaction_in_file(transcript_path);
let save_path = state_dir.join(format!("compaction-{}", session_id));
let saved: Option<u64> = fs::read_to_string(&save_path)
.ok()
.and_then(|s| s.trim().parse().ok());
let is_new = match (offset, saved) {
(Some(cur), Some(prev)) => cur != prev,
(Some(_), None) => true,
_ => false,
};
// Save current offset
if let Some(off) = offset {
fs::write(&save_path, off.to_string()).ok();
}
is_new
}

View file

@ -40,6 +40,9 @@ pub mod hippocampus;
// Autonomous agents
pub mod subconscious;
// Conversation transcript abstraction and compatibility sources
pub mod conversation;
// Unified configuration
pub mod config;
pub mod config_writer;
@ -88,7 +91,8 @@ pub mod channel_capnp {
pub use hippocampus::{
store, graph, lookups, query,
spectral, neuro, counters,
transcript, memory,
memory,
};
pub use conversation as transcript;
use hippocampus::query::engine as search;
use hippocampus::query::parser as query_parser;

View file

@ -3,7 +3,7 @@ use std::fs::{File, OpenOptions};
use std::io::Write;
use std::path::{Path, PathBuf};
use crate::agent::context::AstNode;
use crate::hippocampus::transcript::JsonlBackwardIter;
use crate::conversation::JsonlBackwardIter;
use memmap2::Mmap;
pub struct ConversationLog {
@ -78,6 +78,6 @@ pub struct TailNodes {
impl TailNodes {
pub fn iter(&self) -> impl Iterator<Item = AstNode> + '_ {
JsonlBackwardIter::new(&self.mmap)
.filter_map(|bytes| serde_json::from_slice::<AstNode>(bytes).ok())
.filter_map(|(_, bytes)| serde_json::from_slice::<AstNode>(bytes).ok())
}
}

View file

@ -390,7 +390,7 @@ fn resolve_conversation(budget: Option<usize>) -> String {
if !transcript.exists() { return String::new(); }
let Some(iter) = crate::transcript::TailMessages::open(&transcript.path) else {
let Some(iter) = crate::conversation::TailMessages::open(&transcript.path) else {
return String::new();
};
@ -401,10 +401,14 @@ fn resolve_conversation(budget: Option<usize>) -> String {
let mut total_bytes = 0;
let mut oldest_ts = String::new();
for (role, content, ts) in iter {
for message in iter {
if total_bytes >= max_bytes { break; }
let name = if role == "user" { &app.user_name } else { &app.assistant_name };
let formatted = if !ts.is_empty() {
let content = message.text;
let name = match message.role {
crate::conversation::TranscriptRole::User => &app.user_name,
crate::conversation::TranscriptRole::Assistant => &app.assistant_name,
};
let formatted = if let Some(ts) = message.timestamp {
oldest_ts = ts[..ts.floor_char_boundary(ts.len().min(19))].to_string();
format!("**{}** {}: {}", name, &oldest_ts, content)
} else {