// Transcript JSONL parsing utilities. // // Provides mmap-based backward scanning of Claude Code transcript files // and compaction detection. Used by memory-search (hook mode) and // parse-claude-conversation (debug tool). use memchr::memrchr3; use memmap2::Mmap; use serde_json::Value; use std::fs; use std::path::Path; /// Scan backwards through mmap'd bytes, yielding byte slices of complete /// top-level JSON objects (outermost { to matching }). /// /// Uses memrchr3 (SIMD) to jump between structurally significant bytes /// ({, }, ") instead of scanning byte-by-byte. Tracks brace depth, /// skipping braces inside JSON strings. Returns objects in reverse order /// (newest first). pub struct JsonlBackwardIter<'a> { data: &'a [u8], pos: usize, } impl<'a> JsonlBackwardIter<'a> { pub fn new(data: &'a [u8]) -> Self { Self { data, pos: data.len() } } } impl<'a> Iterator for JsonlBackwardIter<'a> { type Item = &'a [u8]; fn next(&mut self) -> Option { // Find the closing } of the next object let close = loop { let p = memrchr3(b'{', b'}', b'"', &self.data[..self.pos])?; self.pos = p; if self.data[p] == b'}' { break p; } // Skip past any { or " that aren't our closing brace }; // Track brace depth to find matching { let mut depth: usize = 1; let mut in_string = false; loop { let p = memrchr3(b'{', b'}', b'"', &self.data[..self.pos])?; self.pos = p; let ch = self.data[p]; if in_string { if ch == b'"' { // Check for escaped quote (count preceding backslashes) let mut bs = 0; while p > bs + 1 && self.data[p - 1 - bs] == b'\\' { bs += 1; } if bs % 2 == 0 { in_string = false; } } // { and } inside strings don't affect depth continue; } match ch { b'"' => { in_string = true; } b'}' => { depth += 1; } b'{' => { depth -= 1; if depth == 0 { return Some(&self.data[self.pos..=close]); } } _ => {} } } } } /// Find the byte offset of the last compaction summary in mmap'd transcript data. /// /// Scans backward for a user-type message whose content starts with /// "This session is being continued". Returns the byte offset of the /// JSON object's opening brace. pub fn find_last_compaction(data: &[u8]) -> Option { let marker = b"This session is being continued"; for obj_bytes in JsonlBackwardIter::new(data) { // Quick byte check before parsing if !contains_bytes(obj_bytes, marker) { continue; } let obj: Value = match serde_json::from_slice(obj_bytes) { Ok(v) => v, Err(_) => continue, }; if obj.get("type").and_then(|v| v.as_str()) != Some("user") { continue; } if let Some(content) = obj.get("message") .and_then(|m| m.get("content")) .and_then(|c| c.as_str()) && content.starts_with("This session is being continued") { let offset = obj_bytes.as_ptr() as usize - data.as_ptr() as usize; return Some(offset); } } None } /// Find the byte offset of the last compaction in a transcript file. /// Returns None if the file can't be opened or has no compaction. pub fn find_last_compaction_in_file(path: &str) -> Option { if path.is_empty() { return None; } let file = fs::File::open(path).ok()?; let meta = file.metadata().ok()?; if meta.len() == 0 { return None; } let mmap = unsafe { Mmap::map(&file).ok()? }; find_last_compaction(&mmap).map(|off| off as u64) } /// Mmap a transcript file. Returns (Mmap, File) to keep both alive. pub fn mmap_transcript(path: &str) -> Option<(Mmap, fs::File)> { let file = fs::File::open(path).ok()?; let meta = file.metadata().ok()?; if meta.len() == 0 { return None; } let mmap = unsafe { Mmap::map(&file).ok()? }; Some((mmap, file)) } fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool { haystack.windows(needle.len()).any(|w| w == needle) } /// Reverse iterator over user/assistant messages in a transcript file. /// Yields (role, text, timestamp) tuples newest-first. The caller decides /// when to stop (byte budget, count, etc). pub struct TailMessages { _file: fs::File, mmap: Mmap, pos: usize, } impl TailMessages { pub fn open(path: &str) -> Option { let (mmap, file) = mmap_transcript(path)?; let pos = mmap.len(); Some(Self { _file: file, mmap, pos }) } } impl Iterator for TailMessages { type Item = (String, String, String); fn next(&mut self) -> Option { loop { // Find closing } let close = loop { let p = memrchr3(b'{', b'}', b'"', &self.mmap[..self.pos])?; self.pos = p; if self.mmap[p] == b'}' { break p; } }; // Track brace depth to find matching { let mut depth: usize = 1; let mut in_string = false; let open = loop { let p = memrchr3(b'{', b'}', b'"', &self.mmap[..self.pos])?; self.pos = p; let ch = self.mmap[p]; if in_string { if ch == b'"' { let mut bs = 0; while p > bs + 1 && self.mmap[p - 1 - bs] == b'\\' { bs += 1; } if bs % 2 == 0 { in_string = false; } } continue; } match ch { b'"' => { in_string = true; } b'}' => { depth += 1; } b'{' => { depth -= 1; if depth == 0 { break p; } } _ => {} } }; let obj_bytes = &self.mmap[open..=close]; // Quick byte check: skip objects that aren't user/assistant // messages. Avoids serde-parsing megabyte tool_result objects. let is_user = memchr::memmem::find(obj_bytes, b"\"type\":\"user\"").is_some(); let is_assistant = !is_user && memchr::memmem::find(obj_bytes, b"\"type\":\"assistant\"").is_some(); if !is_user && !is_assistant { continue; } let obj: Value = match serde_json::from_slice(obj_bytes) { Ok(v) => v, Err(_) => continue, }; let msg_type = if is_user { "user" } else { "assistant" }; let msg = obj.get("message").unwrap_or(&obj); let text = match msg.get("content") { Some(Value::String(s)) => s.clone(), Some(Value::Array(arr)) => { arr.iter() .filter(|b| b.get("type").and_then(|v| v.as_str()) == Some("text")) .filter_map(|b| b.get("text").and_then(|v| v.as_str())) .collect::>() .join(" ") } _ => continue, }; if text.is_empty() { continue; } let timestamp = obj.get("timestamp") .and_then(|v| v.as_str()) .unwrap_or("") .to_string(); return Some((msg_type.to_string(), text, timestamp)); } } } /// Get the timestamp of the compaction message at a given byte offset. /// Returns a human-readable datetime string, or None if unavailable. pub fn compaction_timestamp(path: &str, offset: u64) -> Option { let (mmap, _file) = mmap_transcript(path)?; let start = offset as usize; if start >= mmap.len() { return None; } // Find the end of this JSONL line let end = mmap[start..].iter().position(|&b| b == b'\n') .map(|p| start + p) .unwrap_or(mmap.len()); let obj: Value = serde_json::from_slice(&mmap[start..end]).ok()?; // Claude Code transcript entries have a "timestamp" field (ISO 8601) if let Some(ts) = obj.get("timestamp").and_then(|v| v.as_str()) { return Some(ts.to_string()); } // Fallback: try "createdAt" or similar fields for field in &["createdAt", "created_at", "time"] { if let Some(ts) = obj.get(*field).and_then(|v| v.as_str()) { return Some(ts.to_string()); } } None } /// Detect whether a compaction has occurred since the last check. /// /// Compares the current compaction offset against a saved value in /// `state_dir/compaction-{session_id}`. Returns true if a new /// compaction was found. Updates the saved offset. pub fn detect_new_compaction( state_dir: &Path, session_id: &str, transcript_path: &str, ) -> bool { let offset = find_last_compaction_in_file(transcript_path); let save_path = state_dir.join(format!("compaction-{}", session_id)); let saved: Option = fs::read_to_string(&save_path) .ok() .and_then(|s| s.trim().parse().ok()); let is_new = match (offset, saved) { (Some(cur), Some(prev)) => cur != prev, (Some(_), None) => true, _ => false, }; // Save current offset if let Some(off) = offset { fs::write(&save_path, off.to_string()).ok(); } is_new }