transcript: extract JSONL backward scanner and compaction detection into library
Move JsonlBackwardIter and find_last_compaction() from parse-claude-conversation into a shared transcript module. Both memory-search and parse-claude-conversation now use the same robust compaction detection: mmap-based backward scan, JSON parsing to verify user-type message, content prefix check. Replaces memory-search's old detect_compaction() which did a forward scan with raw string matching on "continued from a previous conversation" — that could false-positive on the string appearing in assistant output or tool results. Add parse-claude-conversation as a new binary for debugging what's in the context window post-compaction. Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
This commit is contained in:
parent
0e17ab00b0
commit
c2f245740c
4 changed files with 548 additions and 9 deletions
176
poc-memory/src/transcript.rs
Normal file
176
poc-memory/src/transcript.rs
Normal file
|
|
@ -0,0 +1,176 @@
|
|||
// Transcript JSONL parsing utilities.
|
||||
//
|
||||
// Provides mmap-based backward scanning of Claude Code transcript files
|
||||
// and compaction detection. Used by memory-search (hook mode) and
|
||||
// parse-claude-conversation (debug tool).
|
||||
|
||||
use memmap2::Mmap;
|
||||
use serde_json::Value;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
/// Scan backwards through mmap'd bytes, yielding byte slices of complete
|
||||
/// top-level JSON objects (outermost { to matching }).
|
||||
///
|
||||
/// Tracks brace depth, skipping braces inside JSON strings. Returns
|
||||
/// objects in reverse order (newest first).
|
||||
pub struct JsonlBackwardIter<'a> {
|
||||
data: &'a [u8],
|
||||
pos: usize,
|
||||
}
|
||||
|
||||
impl<'a> JsonlBackwardIter<'a> {
|
||||
pub fn new(data: &'a [u8]) -> Self {
|
||||
Self { data, pos: data.len() }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for JsonlBackwardIter<'a> {
|
||||
type Item = &'a [u8];
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.pos == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Find the closing } of the next object (scanning backward)
|
||||
let close = loop {
|
||||
if self.pos == 0 { return None; }
|
||||
self.pos -= 1;
|
||||
if self.data[self.pos] == b'}' {
|
||||
break self.pos;
|
||||
}
|
||||
};
|
||||
|
||||
// Track brace depth to find matching {
|
||||
let mut depth: usize = 1;
|
||||
let mut in_string = false;
|
||||
|
||||
loop {
|
||||
if self.pos == 0 {
|
||||
return None;
|
||||
}
|
||||
self.pos -= 1;
|
||||
let ch = self.data[self.pos];
|
||||
|
||||
if in_string {
|
||||
if ch == b'"' {
|
||||
let mut bs = 0;
|
||||
while self.pos > bs && self.data[self.pos - 1 - bs] == b'\\' {
|
||||
bs += 1;
|
||||
}
|
||||
if bs % 2 == 0 {
|
||||
in_string = false;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
match ch {
|
||||
b'"' => { in_string = true; }
|
||||
b'}' => { depth += 1; }
|
||||
b'{' => {
|
||||
depth -= 1;
|
||||
if depth == 0 {
|
||||
return Some(&self.data[self.pos..=close]);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Find the byte offset of the last compaction summary in mmap'd transcript data.
|
||||
///
|
||||
/// Scans backward for a user-type message whose content starts with
|
||||
/// "This session is being continued". Returns the byte offset of the
|
||||
/// JSON object's opening brace.
|
||||
pub fn find_last_compaction(data: &[u8]) -> Option<usize> {
|
||||
let marker = b"This session is being continued";
|
||||
|
||||
for obj_bytes in JsonlBackwardIter::new(data) {
|
||||
// Quick byte check before parsing
|
||||
if !contains_bytes(obj_bytes, marker) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let obj: Value = match serde_json::from_slice(obj_bytes) {
|
||||
Ok(v) => v,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
if obj.get("type").and_then(|v| v.as_str()) != Some("user") {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(content) = obj.get("message")
|
||||
.and_then(|m| m.get("content"))
|
||||
.and_then(|c| c.as_str())
|
||||
{
|
||||
if content.starts_with("This session is being continued") {
|
||||
let offset = obj_bytes.as_ptr() as usize - data.as_ptr() as usize;
|
||||
return Some(offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Find the byte offset of the last compaction in a transcript file.
|
||||
/// Returns None if the file can't be opened or has no compaction.
|
||||
pub fn find_last_compaction_in_file(path: &str) -> Option<u64> {
|
||||
if path.is_empty() { return None; }
|
||||
|
||||
let file = fs::File::open(path).ok()?;
|
||||
let meta = file.metadata().ok()?;
|
||||
if meta.len() == 0 { return None; }
|
||||
|
||||
let mmap = unsafe { Mmap::map(&file).ok()? };
|
||||
find_last_compaction(&mmap).map(|off| off as u64)
|
||||
}
|
||||
|
||||
/// Mmap a transcript file. Returns (Mmap, File) to keep both alive.
|
||||
pub fn mmap_transcript(path: &str) -> Option<(Mmap, fs::File)> {
|
||||
let file = fs::File::open(path).ok()?;
|
||||
let meta = file.metadata().ok()?;
|
||||
if meta.len() == 0 { return None; }
|
||||
let mmap = unsafe { Mmap::map(&file).ok()? };
|
||||
Some((mmap, file))
|
||||
}
|
||||
|
||||
fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
|
||||
haystack.windows(needle.len()).any(|w| w == needle)
|
||||
}
|
||||
|
||||
/// Detect whether a compaction has occurred since the last check.
|
||||
///
|
||||
/// Compares the current compaction offset against a saved value in
|
||||
/// `state_dir/compaction-{session_id}`. Returns true if a new
|
||||
/// compaction was found. Updates the saved offset.
|
||||
pub fn detect_new_compaction(
|
||||
state_dir: &Path,
|
||||
session_id: &str,
|
||||
transcript_path: &str,
|
||||
) -> bool {
|
||||
let offset = find_last_compaction_in_file(transcript_path);
|
||||
|
||||
let save_path = state_dir.join(format!("compaction-{}", session_id));
|
||||
let saved: Option<u64> = fs::read_to_string(&save_path)
|
||||
.ok()
|
||||
.and_then(|s| s.trim().parse().ok());
|
||||
|
||||
let is_new = match (offset, saved) {
|
||||
(Some(cur), Some(prev)) => cur != prev,
|
||||
(Some(_), None) => true,
|
||||
_ => false,
|
||||
};
|
||||
|
||||
// Save current offset
|
||||
if let Some(off) = offset {
|
||||
fs::write(&save_path, off.to_string()).ok();
|
||||
}
|
||||
|
||||
is_new
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue