transcript: extract JSONL backward scanner and compaction detection into library

Move JsonlBackwardIter and find_last_compaction() from
parse-claude-conversation into a shared transcript module. Both
memory-search and parse-claude-conversation now use the same robust
compaction detection: mmap-based backward scan, JSON parsing to
verify user-type message, content prefix check.

Replaces memory-search's old detect_compaction() which did a forward
scan with raw string matching on "continued from a previous
conversation" — that could false-positive on the string appearing
in assistant output or tool results.

Add parse-claude-conversation as a new binary for debugging what's
in the context window post-compaction.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-03-09 17:06:32 -04:00
parent 0e17ab00b0
commit c2f245740c
4 changed files with 548 additions and 9 deletions

View file

@ -31,6 +31,10 @@ struct Args {
#[arg(long)]
seen: bool,
/// Show full seen set (list all keys)
#[arg(long)]
seen_full: bool,
/// Max results to return
#[arg(long, default_value = "5")]
max_results: usize,
@ -50,7 +54,7 @@ fn main() {
let args = Args::parse();
if args.seen {
if args.seen || args.seen_full {
show_seen();
return;
}
@ -87,8 +91,11 @@ fn main() {
let state_dir = PathBuf::from("/tmp/claude-memory-search");
fs::create_dir_all(&state_dir).ok();
// Detect post-compaction reload
let is_compaction = prompt.contains("continued from a previous conversation");
// Detect post-compaction reload via mmap backward scan
let transcript_path = json["transcript_path"].as_str().unwrap_or("");
let is_compaction = poc_memory::transcript::detect_new_compaction(
&state_dir, session_id, transcript_path,
);
// First prompt or post-compaction: load full context
let cookie_path = state_dir.join(format!("cookie-{}", session_id));
@ -155,7 +162,6 @@ fn main() {
};
// Search for node keys in last ~150k tokens of transcript
let transcript_path = json["transcript_path"].as_str().unwrap_or("");
if debug { println!("[memory-search] transcript: {}", transcript_path); }
let terms = extract_weighted_terms(transcript_path, 150_000, &store);
@ -363,7 +369,7 @@ fn extract_key_from_line(line: &str) -> Option<String> {
let rest = &line[after_bracket + 2..];
let key_end = rest.find(" (c").unwrap_or(rest.len());
let key = rest[..key_end].trim();
if key.is_empty() || !key.contains('.') {
if key.is_empty() {
None
} else {
Some(key.to_string())
@ -374,13 +380,19 @@ fn generate_cookie() -> String {
uuid::Uuid::new_v4().as_simple().to_string()[..12].to_string()
}
/// Parse a seen-file line: "TIMESTAMP\tKEY" or legacy "KEY"
fn parse_seen_line(line: &str) -> &str {
line.split_once('\t').map(|(_, key)| key).unwrap_or(line)
}
fn load_seen(dir: &Path, session_id: &str) -> HashSet<String> {
let path = dir.join(format!("seen-{}", session_id));
if path.exists() {
fs::read_to_string(path)
.unwrap_or_default()
.lines()
.map(|s| s.to_string())
.filter(|s| !s.is_empty())
.map(|s| parse_seen_line(s).to_string())
.collect()
} else {
HashSet::new()
@ -390,7 +402,8 @@ fn load_seen(dir: &Path, session_id: &str) -> HashSet<String> {
fn mark_seen(dir: &Path, session_id: &str, key: &str) {
let path = dir.join(format!("seen-{}", session_id));
if let Ok(mut f) = fs::OpenOptions::new().create(true).append(true).open(path) {
writeln!(f, "{}", key).ok();
let ts = chrono::Local::now().format("%Y-%m-%dT%H:%M:%S");
writeln!(f, "{}\t{}", ts, key).ok();
}
}
@ -454,8 +467,29 @@ fn show_seen() {
}
}
let seen = load_seen(&state_dir, session_id);
println!("\nSeen set ({} total, {} pre-seeded):", seen.len(), seen.len() - returned.len());
// Read seen file in insertion order (append-only file)
let seen_path = state_dir.join(format!("seen-{}", session_id));
let seen_lines: Vec<String> = fs::read_to_string(&seen_path)
.unwrap_or_default()
.lines()
.filter(|s| !s.is_empty())
.map(|s| s.to_string())
.collect();
let returned_set: HashSet<_> = returned.iter().cloned().collect();
println!("\nSeen set ({} total, {} pre-seeded):", seen_lines.len(), seen_lines.len() - returned.len());
if Args::parse().seen_full {
for line in &seen_lines {
let key = parse_seen_line(line);
let marker = if returned_set.contains(key) { "" } else { " " };
// Show timestamp if present, otherwise just key
if let Some((ts, k)) = line.split_once('\t') {
println!(" {} {}{}", ts, marker, k);
} else {
println!(" (no ts) {}{}", marker, line);
}
}
}
}
fn cleanup_stale_files(dir: &Path, max_age: Duration) {