diff --git a/poc-memory/Cargo.toml b/poc-memory/Cargo.toml index 14fde9e..c25b782 100644 --- a/poc-memory/Cargo.toml +++ b/poc-memory/Cargo.toml @@ -16,6 +16,7 @@ clap = { version = "4", features = ["derive"] } libc = "0.2" faer = "0.24.0" rkyv = { version = "0.7", features = ["validation", "std"] } +memchr = "2" memmap2 = "0.9" rayon = "1" peg = "0.8" diff --git a/poc-memory/src/agents/defs.rs b/poc-memory/src/agents/defs.rs index d0f961d..6745fc7 100644 --- a/poc-memory/src/agents/defs.rs +++ b/poc-memory/src/agents/defs.rs @@ -456,7 +456,7 @@ fn resolve_conversation() -> String { let Some(path) = transcript else { return String::new() }; let path_str = path.to_string_lossy(); - let messages = crate::transcript::tail_messages(&path_str, 25_000); + let messages = crate::transcript::tail_messages(&path_str, 200_000); if messages.is_empty() { return String::new(); } let cfg = crate::config::get(); diff --git a/poc-memory/src/transcript.rs b/poc-memory/src/transcript.rs index 3855768..aee1015 100644 --- a/poc-memory/src/transcript.rs +++ b/poc-memory/src/transcript.rs @@ -4,6 +4,7 @@ // and compaction detection. Used by memory-search (hook mode) and // parse-claude-conversation (debug tool). +use memchr::memrchr3; use memmap2::Mmap; use serde_json::Value; use std::fs; @@ -12,8 +13,10 @@ use std::path::Path; /// Scan backwards through mmap'd bytes, yielding byte slices of complete /// top-level JSON objects (outermost { to matching }). /// -/// Tracks brace depth, skipping braces inside JSON strings. Returns -/// objects in reverse order (newest first). +/// Uses memrchr3 (SIMD) to jump between structurally significant bytes +/// ({, }, ") instead of scanning byte-by-byte. Tracks brace depth, +/// skipping braces inside JSON strings. Returns objects in reverse order +/// (newest first). pub struct JsonlBackwardIter<'a> { data: &'a [u8], pos: usize, @@ -29,17 +32,14 @@ impl<'a> Iterator for JsonlBackwardIter<'a> { type Item = &'a [u8]; fn next(&mut self) -> Option { - if self.pos == 0 { - return None; - } - - // Find the closing } of the next object (scanning backward) + // Find the closing } of the next object let close = loop { - if self.pos == 0 { return None; } - self.pos -= 1; - if self.data[self.pos] == b'}' { - break self.pos; + let p = memrchr3(b'{', b'}', b'"', &self.data[..self.pos])?; + self.pos = p; + if self.data[p] == b'}' { + break p; } + // Skip past any { or " that aren't our closing brace }; // Track brace depth to find matching { @@ -47,22 +47,22 @@ impl<'a> Iterator for JsonlBackwardIter<'a> { let mut in_string = false; loop { - if self.pos == 0 { - return None; - } - self.pos -= 1; - let ch = self.data[self.pos]; + let p = memrchr3(b'{', b'}', b'"', &self.data[..self.pos])?; + self.pos = p; + let ch = self.data[p]; if in_string { if ch == b'"' { + // Check for escaped quote (count preceding backslashes) let mut bs = 0; - while self.pos > bs && self.data[self.pos - 1 - bs] == b'\\' { + while p > bs + 1 && self.data[p - 1 - bs] == b'\\' { bs += 1; } if bs % 2 == 0 { in_string = false; } } + // { and } inside strings don't affect depth continue; } @@ -145,18 +145,17 @@ fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool { /// Reverse-scan a transcript file, collecting user/assistant messages /// until `max_tokens` tokens (~4 chars each) are accumulated. Stops at /// the last compaction boundary. Returns messages in chronological order. -pub fn tail_messages(path: &str, max_tokens: usize) -> Vec<(String, String, String)> { +pub fn tail_messages(path: &str, max_bytes: usize) -> Vec<(String, String, String)> { let (mmap, _file) = match mmap_transcript(path) { Some(v) => v, None => return Vec::new(), }; - let compaction_marker = b"This session is being continued"; let mut messages: Vec<(String, String, String)> = Vec::new(); - let mut token_count = 0; + let mut total_bytes = 0; for obj_bytes in JsonlBackwardIter::new(&mmap) { - if token_count >= max_tokens { break; } + if total_bytes >= max_bytes { break; } // Quick byte check: skip objects that aren't user/assistant // (avoids parsing large tool_result / system objects) @@ -197,7 +196,7 @@ pub fn tail_messages(path: &str, max_tokens: usize) -> Vec<(String, String, Stri .unwrap_or("") .to_string(); - token_count += text.len() / 4; + total_bytes += text.len(); messages.push((msg_type.to_string(), text, timestamp)); }