// Transcript JSONL parsing utilities. // // Provides mmap-based backward scanning of Claude Code transcript files // and compaction detection. Used by memory-search (hook mode) and // parse-claude-conversation (debug tool). use memchr::memrchr3; use memmap2::Mmap; use serde_json::Value; use std::fs; use std::path::Path; /// Scan backwards through mmap'd bytes, yielding byte slices of complete /// top-level JSON objects (outermost { to matching }). /// /// Uses memrchr3 (SIMD) to jump between structurally significant bytes /// ({, }, ") instead of scanning byte-by-byte. Tracks brace depth, /// skipping braces inside JSON strings. Returns objects in reverse order /// (newest first). pub struct JsonlBackwardIter<'a> { data: &'a [u8], pos: usize, } impl<'a> JsonlBackwardIter<'a> { pub fn new(data: &'a [u8]) -> Self { Self { data, pos: data.len() } } } impl<'a> Iterator for JsonlBackwardIter<'a> { type Item = &'a [u8]; fn next(&mut self) -> Option { // Find the closing } of the next object let close = loop { let p = memrchr3(b'{', b'}', b'"', &self.data[..self.pos])?; self.pos = p; if self.data[p] == b'}' { break p; } // Skip past any { or " that aren't our closing brace }; // Track brace depth to find matching { let mut depth: usize = 1; let mut in_string = false; loop { let p = memrchr3(b'{', b'}', b'"', &self.data[..self.pos])?; self.pos = p; let ch = self.data[p]; if in_string { if ch == b'"' { // Check for escaped quote (count preceding backslashes) let mut bs = 0; while p > bs + 1 && self.data[p - 1 - bs] == b'\\' { bs += 1; } if bs % 2 == 0 { in_string = false; } } // { and } inside strings don't affect depth continue; } match ch { b'"' => { in_string = true; } b'}' => { depth += 1; } b'{' => { depth -= 1; if depth == 0 { return Some(&self.data[self.pos..=close]); } } _ => {} } } } } /// Find the byte offset of the last compaction summary in mmap'd transcript data. /// /// Scans backward for a user-type message whose content starts with /// "This session is being continued". Returns the byte offset of the /// JSON object's opening brace. pub fn find_last_compaction(data: &[u8]) -> Option { let marker = b"This session is being continued"; for obj_bytes in JsonlBackwardIter::new(data) { // Quick byte check before parsing if !contains_bytes(obj_bytes, marker) { continue; } let obj: Value = match serde_json::from_slice(obj_bytes) { Ok(v) => v, Err(_) => continue, }; if obj.get("type").and_then(|v| v.as_str()) != Some("user") { continue; } if let Some(content) = obj.get("message") .and_then(|m| m.get("content")) .and_then(|c| c.as_str()) && content.starts_with("This session is being continued") { let offset = obj_bytes.as_ptr() as usize - data.as_ptr() as usize; return Some(offset); } } None } /// Find the byte offset of the last compaction in a transcript file. /// Returns None if the file can't be opened or has no compaction. pub fn find_last_compaction_in_file(path: &str) -> Option { if path.is_empty() { return None; } let file = fs::File::open(path).ok()?; let meta = file.metadata().ok()?; if meta.len() == 0 { return None; } let mmap = unsafe { Mmap::map(&file).ok()? }; find_last_compaction(&mmap).map(|off| off as u64) } /// Mmap a transcript file. Returns (Mmap, File) to keep both alive. pub fn mmap_transcript(path: &str) -> Option<(Mmap, fs::File)> { let file = fs::File::open(path).ok()?; let meta = file.metadata().ok()?; if meta.len() == 0 { return None; } let mmap = unsafe { Mmap::map(&file).ok()? }; Some((mmap, file)) } fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool { haystack.windows(needle.len()).any(|w| w == needle) } /// Reverse-scan a transcript file, collecting user/assistant messages /// until `max_tokens` tokens (~4 chars each) are accumulated. Stops at /// the last compaction boundary. Returns messages in chronological order. pub fn tail_messages(path: &str, max_bytes: usize) -> Vec<(String, String, String)> { let (mmap, _file) = match mmap_transcript(path) { Some(v) => v, None => return Vec::new(), }; let mut messages: Vec<(String, String, String)> = Vec::new(); let mut total_bytes = 0; for obj_bytes in JsonlBackwardIter::new(&mmap) { if total_bytes >= max_bytes { break; } // Quick byte check: skip objects that aren't user/assistant // (avoids parsing large tool_result / system objects) if !contains_bytes(obj_bytes, b"\"user\"") && !contains_bytes(obj_bytes, b"\"assistant\"") { continue; } let obj: Value = match serde_json::from_slice(obj_bytes) { Ok(v) => v, Err(_) => continue, }; let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or(""); if msg_type != "user" && msg_type != "assistant" { continue; } let msg = obj.get("message").unwrap_or(&obj); let text = match msg.get("content") { Some(Value::String(s)) => s.clone(), Some(Value::Array(arr)) => { arr.iter() .filter(|b| b.get("type").and_then(|v| v.as_str()) == Some("text")) .filter_map(|b| b.get("text").and_then(|v| v.as_str())) .collect::>() .join(" ") } _ => continue, }; if text.is_empty() { continue; } // Stop at compaction boundary if msg_type == "user" && text.starts_with("This session is being continued") { break; } let timestamp = obj.get("timestamp") .and_then(|v| v.as_str()) .unwrap_or("") .to_string(); total_bytes += text.len(); messages.push((msg_type.to_string(), text, timestamp)); } messages.reverse(); messages } /// Get the timestamp of the compaction message at a given byte offset. /// Returns a human-readable datetime string, or None if unavailable. pub fn compaction_timestamp(path: &str, offset: u64) -> Option { let (mmap, _file) = mmap_transcript(path)?; let start = offset as usize; if start >= mmap.len() { return None; } // Find the end of this JSONL line let end = mmap[start..].iter().position(|&b| b == b'\n') .map(|p| start + p) .unwrap_or(mmap.len()); let obj: Value = serde_json::from_slice(&mmap[start..end]).ok()?; // Claude Code transcript entries have a "timestamp" field (ISO 8601) if let Some(ts) = obj.get("timestamp").and_then(|v| v.as_str()) { return Some(ts.to_string()); } // Fallback: try "createdAt" or similar fields for field in &["createdAt", "created_at", "time"] { if let Some(ts) = obj.get(*field).and_then(|v| v.as_str()) { return Some(ts.to_string()); } } None } /// Detect whether a compaction has occurred since the last check. /// /// Compares the current compaction offset against a saved value in /// `state_dir/compaction-{session_id}`. Returns true if a new /// compaction was found. Updates the saved offset. pub fn detect_new_compaction( state_dir: &Path, session_id: &str, transcript_path: &str, ) -> bool { let offset = find_last_compaction_in_file(transcript_path); let save_path = state_dir.join(format!("compaction-{}", session_id)); let saved: Option = fs::read_to_string(&save_path) .ok() .and_then(|s| s.trim().parse().ok()); let is_new = match (offset, saved) { (Some(cur), Some(prev)) => cur != prev, (Some(_), None) => true, _ => false, }; // Save current offset if let Some(off) = offset { fs::write(&save_path, off.to_string()).ok(); } is_new }