add tail_messages() for fast reverse transcript scanning

Reverse-scans the mmap'd transcript using JsonlBackwardIter,
collecting user/assistant messages up to a token budget, stopping
at the compaction boundary. Returns messages in chronological order.

resolve_conversation() now uses this instead of parsing the entire
file through extract_conversation + split_on_compaction.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Kent Overstreet 2026-03-22 03:02:11 -04:00
parent a03bf390a8
commit e39096b787
2 changed files with 71 additions and 20 deletions

View file

@ -142,6 +142,73 @@ fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
haystack.windows(needle.len()).any(|w| w == needle)
}
/// Reverse-scan a transcript file, collecting user/assistant messages
/// until `max_tokens` tokens (~4 chars each) are accumulated. Stops at
/// the last compaction boundary. Returns messages in chronological order.
pub fn tail_messages(path: &str, max_tokens: usize) -> Vec<(String, String, String)> {
let (mmap, _file) = match mmap_transcript(path) {
Some(v) => v,
None => return Vec::new(),
};
let compaction_marker = b"This session is being continued";
let mut messages: Vec<(String, String, String)> = Vec::new();
let mut token_count = 0;
for obj_bytes in JsonlBackwardIter::new(&mmap) {
if token_count >= max_tokens { break; }
// Stop at compaction boundary
if contains_bytes(obj_bytes, compaction_marker) {
let obj: Value = match serde_json::from_slice(obj_bytes) {
Ok(v) => v,
Err(_) => continue,
};
if obj.get("type").and_then(|v| v.as_str()) == Some("user") {
if let Some(c) = obj.get("message")
.and_then(|m| m.get("content"))
.and_then(|c| c.as_str())
&& c.starts_with("This session is being continued") {
break;
}
}
}
let obj: Value = match serde_json::from_slice(obj_bytes) {
Ok(v) => v,
Err(_) => continue,
};
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
if msg_type != "user" && msg_type != "assistant" { continue; }
let msg = obj.get("message").unwrap_or(&obj);
let text = match msg.get("content") {
Some(Value::String(s)) => s.clone(),
Some(Value::Array(arr)) => {
arr.iter()
.filter(|b| b.get("type").and_then(|v| v.as_str()) == Some("text"))
.filter_map(|b| b.get("text").and_then(|v| v.as_str()))
.collect::<Vec<_>>()
.join(" ")
}
_ => continue,
};
if text.is_empty() { continue; }
let timestamp = obj.get("timestamp")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
token_count += text.len() / 4;
messages.push((msg_type.to_string(), text, timestamp));
}
messages.reverse();
messages
}
/// Get the timestamp of the compaction message at a given byte offset.
/// Returns a human-readable datetime string, or None if unavailable.
pub fn compaction_timestamp(path: &str, offset: u64) -> Option<String> {