2026-03-09 17:06:32 -04:00
|
|
|
// Transcript JSONL parsing utilities.
|
|
|
|
|
//
|
|
|
|
|
// Provides mmap-based backward scanning of Claude Code transcript files
|
|
|
|
|
// and compaction detection. Used by memory-search (hook mode) and
|
|
|
|
|
// parse-claude-conversation (debug tool).
|
|
|
|
|
|
|
|
|
|
use memmap2::Mmap;
|
|
|
|
|
use serde_json::Value;
|
|
|
|
|
use std::fs;
|
|
|
|
|
use std::path::Path;
|
|
|
|
|
|
|
|
|
|
/// Scan backwards through mmap'd bytes, yielding byte slices of complete
|
|
|
|
|
/// top-level JSON objects (outermost { to matching }).
|
|
|
|
|
///
|
|
|
|
|
/// Tracks brace depth, skipping braces inside JSON strings. Returns
|
|
|
|
|
/// objects in reverse order (newest first).
|
|
|
|
|
pub struct JsonlBackwardIter<'a> {
|
|
|
|
|
data: &'a [u8],
|
|
|
|
|
pos: usize,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<'a> JsonlBackwardIter<'a> {
|
|
|
|
|
pub fn new(data: &'a [u8]) -> Self {
|
|
|
|
|
Self { data, pos: data.len() }
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<'a> Iterator for JsonlBackwardIter<'a> {
|
|
|
|
|
type Item = &'a [u8];
|
|
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
|
if self.pos == 0 {
|
|
|
|
|
return None;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Find the closing } of the next object (scanning backward)
|
|
|
|
|
let close = loop {
|
|
|
|
|
if self.pos == 0 { return None; }
|
|
|
|
|
self.pos -= 1;
|
|
|
|
|
if self.data[self.pos] == b'}' {
|
|
|
|
|
break self.pos;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Track brace depth to find matching {
|
|
|
|
|
let mut depth: usize = 1;
|
|
|
|
|
let mut in_string = false;
|
|
|
|
|
|
|
|
|
|
loop {
|
|
|
|
|
if self.pos == 0 {
|
|
|
|
|
return None;
|
|
|
|
|
}
|
|
|
|
|
self.pos -= 1;
|
|
|
|
|
let ch = self.data[self.pos];
|
|
|
|
|
|
|
|
|
|
if in_string {
|
|
|
|
|
if ch == b'"' {
|
|
|
|
|
let mut bs = 0;
|
|
|
|
|
while self.pos > bs && self.data[self.pos - 1 - bs] == b'\\' {
|
|
|
|
|
bs += 1;
|
|
|
|
|
}
|
|
|
|
|
if bs % 2 == 0 {
|
|
|
|
|
in_string = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
match ch {
|
|
|
|
|
b'"' => { in_string = true; }
|
|
|
|
|
b'}' => { depth += 1; }
|
|
|
|
|
b'{' => {
|
|
|
|
|
depth -= 1;
|
|
|
|
|
if depth == 0 {
|
|
|
|
|
return Some(&self.data[self.pos..=close]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
_ => {}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Find the byte offset of the last compaction summary in mmap'd transcript data.
|
|
|
|
|
///
|
|
|
|
|
/// Scans backward for a user-type message whose content starts with
|
|
|
|
|
/// "This session is being continued". Returns the byte offset of the
|
|
|
|
|
/// JSON object's opening brace.
|
|
|
|
|
pub fn find_last_compaction(data: &[u8]) -> Option<usize> {
|
|
|
|
|
let marker = b"This session is being continued";
|
|
|
|
|
|
|
|
|
|
for obj_bytes in JsonlBackwardIter::new(data) {
|
|
|
|
|
// Quick byte check before parsing
|
|
|
|
|
if !contains_bytes(obj_bytes, marker) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let obj: Value = match serde_json::from_slice(obj_bytes) {
|
|
|
|
|
Ok(v) => v,
|
|
|
|
|
Err(_) => continue,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if obj.get("type").and_then(|v| v.as_str()) != Some("user") {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if let Some(content) = obj.get("message")
|
|
|
|
|
.and_then(|m| m.get("content"))
|
|
|
|
|
.and_then(|c| c.as_str())
|
2026-03-21 19:42:38 -04:00
|
|
|
&& content.starts_with("This session is being continued") {
|
2026-03-09 17:06:32 -04:00
|
|
|
let offset = obj_bytes.as_ptr() as usize - data.as_ptr() as usize;
|
|
|
|
|
return Some(offset);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
None
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Find the byte offset of the last compaction in a transcript file.
|
|
|
|
|
/// Returns None if the file can't be opened or has no compaction.
|
|
|
|
|
pub fn find_last_compaction_in_file(path: &str) -> Option<u64> {
|
|
|
|
|
if path.is_empty() { return None; }
|
|
|
|
|
|
|
|
|
|
let file = fs::File::open(path).ok()?;
|
|
|
|
|
let meta = file.metadata().ok()?;
|
|
|
|
|
if meta.len() == 0 { return None; }
|
|
|
|
|
|
|
|
|
|
let mmap = unsafe { Mmap::map(&file).ok()? };
|
|
|
|
|
find_last_compaction(&mmap).map(|off| off as u64)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Mmap a transcript file. Returns (Mmap, File) to keep both alive.
|
|
|
|
|
pub fn mmap_transcript(path: &str) -> Option<(Mmap, fs::File)> {
|
|
|
|
|
let file = fs::File::open(path).ok()?;
|
|
|
|
|
let meta = file.metadata().ok()?;
|
|
|
|
|
if meta.len() == 0 { return None; }
|
|
|
|
|
let mmap = unsafe { Mmap::map(&file).ok()? };
|
|
|
|
|
Some((mmap, file))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
|
|
|
|
|
haystack.windows(needle.len()).any(|w| w == needle)
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-22 03:02:11 -04:00
|
|
|
/// Reverse-scan a transcript file, collecting user/assistant messages
|
|
|
|
|
/// until `max_tokens` tokens (~4 chars each) are accumulated. Stops at
|
|
|
|
|
/// the last compaction boundary. Returns messages in chronological order.
|
|
|
|
|
pub fn tail_messages(path: &str, max_tokens: usize) -> Vec<(String, String, String)> {
|
|
|
|
|
let (mmap, _file) = match mmap_transcript(path) {
|
|
|
|
|
Some(v) => v,
|
|
|
|
|
None => return Vec::new(),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let compaction_marker = b"This session is being continued";
|
|
|
|
|
let mut messages: Vec<(String, String, String)> = Vec::new();
|
|
|
|
|
let mut token_count = 0;
|
|
|
|
|
|
|
|
|
|
for obj_bytes in JsonlBackwardIter::new(&mmap) {
|
|
|
|
|
if token_count >= max_tokens { break; }
|
|
|
|
|
|
|
|
|
|
// Stop at compaction boundary
|
|
|
|
|
if contains_bytes(obj_bytes, compaction_marker) {
|
|
|
|
|
let obj: Value = match serde_json::from_slice(obj_bytes) {
|
|
|
|
|
Ok(v) => v,
|
|
|
|
|
Err(_) => continue,
|
|
|
|
|
};
|
|
|
|
|
if obj.get("type").and_then(|v| v.as_str()) == Some("user") {
|
|
|
|
|
if let Some(c) = obj.get("message")
|
|
|
|
|
.and_then(|m| m.get("content"))
|
|
|
|
|
.and_then(|c| c.as_str())
|
|
|
|
|
&& c.starts_with("This session is being continued") {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let obj: Value = match serde_json::from_slice(obj_bytes) {
|
|
|
|
|
Ok(v) => v,
|
|
|
|
|
Err(_) => continue,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
|
|
|
|
|
if msg_type != "user" && msg_type != "assistant" { continue; }
|
|
|
|
|
|
|
|
|
|
let msg = obj.get("message").unwrap_or(&obj);
|
|
|
|
|
let text = match msg.get("content") {
|
|
|
|
|
Some(Value::String(s)) => s.clone(),
|
|
|
|
|
Some(Value::Array(arr)) => {
|
|
|
|
|
arr.iter()
|
|
|
|
|
.filter(|b| b.get("type").and_then(|v| v.as_str()) == Some("text"))
|
|
|
|
|
.filter_map(|b| b.get("text").and_then(|v| v.as_str()))
|
|
|
|
|
.collect::<Vec<_>>()
|
|
|
|
|
.join(" ")
|
|
|
|
|
}
|
|
|
|
|
_ => continue,
|
|
|
|
|
};
|
|
|
|
|
if text.is_empty() { continue; }
|
|
|
|
|
|
|
|
|
|
let timestamp = obj.get("timestamp")
|
|
|
|
|
.and_then(|v| v.as_str())
|
|
|
|
|
.unwrap_or("")
|
|
|
|
|
.to_string();
|
|
|
|
|
|
|
|
|
|
token_count += text.len() / 4;
|
|
|
|
|
messages.push((msg_type.to_string(), text, timestamp));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
messages.reverse();
|
|
|
|
|
messages
|
|
|
|
|
}
|
|
|
|
|
|
Agent identity, parallel scheduling, memory-search fixes, stemmer optimization
- Agent identity injection: prepend core-personality to all agent prompts
so agents dream as me, not as generic graph workers. Include instructions
to walk the graph and connect new nodes to core concepts.
- Parallel agent scheduling: sequential within type, parallel across types.
Different agent types (linker, organize, replay) run concurrently.
- Linker prompt: graph walking instead of keyword search for connections.
"Explore the local topology and walk the graph until you find the best
connections."
- memory-search fixes: format_results no longer truncates to 5 results,
pipeline default raised to 50, returned file cleared on compaction,
--seen and --seen-full merged, compaction timestamp in --seen output,
max_entries=3 per prompt for steady memory drip.
- Stemmer optimization: strip_suffix now works in-place on a single String
buffer instead of allocating 18 new Strings per word. Note for future:
reversed-suffix trie for O(suffix_len) instead of O(n_rules).
- Transcript: add compaction_timestamp() for --seen display.
- Agent budget configurable (default 4000 from config).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-15 12:49:10 -04:00
|
|
|
/// Get the timestamp of the compaction message at a given byte offset.
|
|
|
|
|
/// Returns a human-readable datetime string, or None if unavailable.
|
|
|
|
|
pub fn compaction_timestamp(path: &str, offset: u64) -> Option<String> {
|
|
|
|
|
let (mmap, _file) = mmap_transcript(path)?;
|
|
|
|
|
let start = offset as usize;
|
|
|
|
|
if start >= mmap.len() { return None; }
|
|
|
|
|
|
|
|
|
|
// Find the end of this JSONL line
|
|
|
|
|
let end = mmap[start..].iter().position(|&b| b == b'\n')
|
|
|
|
|
.map(|p| start + p)
|
|
|
|
|
.unwrap_or(mmap.len());
|
|
|
|
|
|
|
|
|
|
let obj: Value = serde_json::from_slice(&mmap[start..end]).ok()?;
|
|
|
|
|
|
|
|
|
|
// Claude Code transcript entries have a "timestamp" field (ISO 8601)
|
|
|
|
|
if let Some(ts) = obj.get("timestamp").and_then(|v| v.as_str()) {
|
|
|
|
|
return Some(ts.to_string());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Fallback: try "createdAt" or similar fields
|
|
|
|
|
for field in &["createdAt", "created_at", "time"] {
|
|
|
|
|
if let Some(ts) = obj.get(*field).and_then(|v| v.as_str()) {
|
|
|
|
|
return Some(ts.to_string());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
None
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-09 17:06:32 -04:00
|
|
|
/// Detect whether a compaction has occurred since the last check.
|
|
|
|
|
///
|
|
|
|
|
/// Compares the current compaction offset against a saved value in
|
|
|
|
|
/// `state_dir/compaction-{session_id}`. Returns true if a new
|
|
|
|
|
/// compaction was found. Updates the saved offset.
|
|
|
|
|
pub fn detect_new_compaction(
|
|
|
|
|
state_dir: &Path,
|
|
|
|
|
session_id: &str,
|
|
|
|
|
transcript_path: &str,
|
|
|
|
|
) -> bool {
|
|
|
|
|
let offset = find_last_compaction_in_file(transcript_path);
|
|
|
|
|
|
|
|
|
|
let save_path = state_dir.join(format!("compaction-{}", session_id));
|
|
|
|
|
let saved: Option<u64> = fs::read_to_string(&save_path)
|
|
|
|
|
.ok()
|
|
|
|
|
.and_then(|s| s.trim().parse().ok());
|
|
|
|
|
|
|
|
|
|
let is_new = match (offset, saved) {
|
|
|
|
|
(Some(cur), Some(prev)) => cur != prev,
|
|
|
|
|
(Some(_), None) => true,
|
|
|
|
|
_ => false,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Save current offset
|
|
|
|
|
if let Some(off) = offset {
|
|
|
|
|
fs::write(&save_path, off.to_string()).ok();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
is_new
|
|
|
|
|
}
|