use memchr::memrchr3; /// Scan backwards through mmap'd bytes, yielding byte slices of complete /// top-level JSON objects (outermost { to matching }). /// /// Uses memrchr3 (SIMD) to jump between structurally significant bytes /// ({, }, ") instead of scanning byte-by-byte. Tracks brace depth, /// skipping braces inside JSON strings. Returns objects in reverse order /// (newest first). pub struct JsonlBackwardIter<'a> { data: &'a [u8], pos: usize, } impl<'a> JsonlBackwardIter<'a> { pub fn new(data: &'a [u8]) -> Self { Self { data, pos: data.len() } } } impl<'a> Iterator for JsonlBackwardIter<'a> { type Item = (usize, &'a [u8]); fn next(&mut self) -> Option { next_json_object(self.data, &mut self.pos) } } fn is_unescaped_quote(data: &[u8], p: usize) -> bool { let mut bs = 0; while p > bs && data[p - 1 - bs] == b'\\' { bs += 1; } bs % 2 == 0 } fn next_json_object<'a>(data: &'a [u8], pos: &mut usize) -> Option<(usize, &'a [u8])> { // Find the closing } of the next object, skipping } inside strings. let close = { let mut in_string = false; loop { let p = memrchr3(b'{', b'}', b'"', &data[..*pos])?; *pos = p; let ch = data[p]; if in_string { if ch == b'"' && is_unescaped_quote(data, p) { in_string = false; } continue; } match ch { b'}' => break p, b'"' => in_string = true, _ => {} } } }; // Track brace depth to find matching {. let mut depth: usize = 1; let mut in_string = false; loop { let p = memrchr3(b'{', b'}', b'"', &data[..*pos])?; *pos = p; let ch = data[p]; if in_string { if ch == b'"' && is_unescaped_quote(data, p) { in_string = false; } continue; } match ch { b'"' => { in_string = true; } b'}' => { depth += 1; } b'{' => { depth -= 1; if depth == 0 { return Some((*pos, &data[*pos..=close])); } } _ => {} } } } #[cfg(test)] mod tests { use super::*; #[test] fn handles_nested_json_and_quoted_braces() { let data = br#"{"n":1,"s":"literal } brace"} {"n":2,"nested":{"s":"escaped quote: \" and { brace"}} trailing garbage "#; let objs: Vec<_> = JsonlBackwardIter::new(data) .map(|(_, bytes)| std::str::from_utf8(bytes).unwrap().to_string()) .collect(); assert_eq!(objs.len(), 2); assert!(objs[0].contains(r#""n":2"#)); assert!(objs[1].contains(r#""n":1"#)); } }