329 lines
11 KiB
Rust
329 lines
11 KiB
Rust
|
|
// parse-claude-conversation: debug tool for inspecting what's in the context window
|
||
|
|
//
|
||
|
|
// Two-layer design:
|
||
|
|
// 1. extract_context_items() — walks JSONL from last compaction, yields
|
||
|
|
// structured records representing what's in the context window
|
||
|
|
// 2. format_as_context() — renders those records as they appear to Claude
|
||
|
|
//
|
||
|
|
// The transcript is mmap'd and scanned backwards from EOF using brace-depth
|
||
|
|
// tracking to find complete JSON objects, avoiding a full forward scan of
|
||
|
|
// what can be a 500MB+ file.
|
||
|
|
//
|
||
|
|
// Usage:
|
||
|
|
// parse-claude-conversation [TRANSCRIPT_PATH]
|
||
|
|
// parse-claude-conversation --last # use the last stashed session
|
||
|
|
|
||
|
|
use clap::Parser;
|
||
|
|
use memmap2::Mmap;
|
||
|
|
use poc_memory::transcript::{JsonlBackwardIter, find_last_compaction};
|
||
|
|
use serde_json::Value;
|
||
|
|
use std::fs;
|
||
|
|
|
||
|
|
#[derive(Parser)]
|
||
|
|
#[command(name = "parse-claude-conversation")]
|
||
|
|
struct Args {
|
||
|
|
/// Transcript JSONL path (or --last to use stashed session)
|
||
|
|
path: Option<String>,
|
||
|
|
|
||
|
|
/// Use the last stashed session from memory-search
|
||
|
|
#[arg(long)]
|
||
|
|
last: bool,
|
||
|
|
|
||
|
|
/// Dump raw JSONL objects. Optional integer: number of extra objects
|
||
|
|
/// to include before the compaction boundary.
|
||
|
|
#[arg(long, num_args = 0..=1, default_missing_value = "0")]
|
||
|
|
raw: Option<usize>,
|
||
|
|
}
|
||
|
|
|
||
|
|
// --- Context extraction ---
|
||
|
|
|
||
|
|
/// A single item in the context window, as Claude sees it.
|
||
|
|
enum ContextItem {
|
||
|
|
UserText(String),
|
||
|
|
SystemReminder(String),
|
||
|
|
AssistantText(String),
|
||
|
|
AssistantThinking,
|
||
|
|
ToolUse { name: String, input: String },
|
||
|
|
ToolResult(String),
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Extract context items from the transcript, starting from the last compaction.
|
||
|
|
fn extract_context_items(data: &[u8]) -> Vec<ContextItem> {
|
||
|
|
let start = find_last_compaction(data).unwrap_or(0);
|
||
|
|
let region = &data[start..];
|
||
|
|
|
||
|
|
let mut items = Vec::new();
|
||
|
|
|
||
|
|
// Forward scan through JSONL lines from compaction onward
|
||
|
|
for line in region.split(|&b| b == b'\n') {
|
||
|
|
if line.is_empty() { continue; }
|
||
|
|
|
||
|
|
let obj: Value = match serde_json::from_slice(line) {
|
||
|
|
Ok(v) => v,
|
||
|
|
Err(_) => continue,
|
||
|
|
};
|
||
|
|
|
||
|
|
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
|
||
|
|
|
||
|
|
match msg_type {
|
||
|
|
"user" => {
|
||
|
|
if let Some(content) = obj.get("message").and_then(|m| m.get("content")) {
|
||
|
|
extract_user_content(content, &mut items);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
"assistant" => {
|
||
|
|
if let Some(content) = obj.get("message").and_then(|m| m.get("content")) {
|
||
|
|
extract_assistant_content(content, &mut items);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
_ => {}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
items
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Parse user message content into context items.
|
||
|
|
fn extract_user_content(content: &Value, items: &mut Vec<ContextItem>) {
|
||
|
|
match content {
|
||
|
|
Value::String(s) => {
|
||
|
|
split_system_reminders(s, items, false);
|
||
|
|
}
|
||
|
|
Value::Array(arr) => {
|
||
|
|
for block in arr {
|
||
|
|
let btype = block.get("type").and_then(|v| v.as_str()).unwrap_or("");
|
||
|
|
match btype {
|
||
|
|
"text" => {
|
||
|
|
if let Some(t) = block.get("text").and_then(|v| v.as_str()) {
|
||
|
|
split_system_reminders(t, items, false);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
"tool_result" => {
|
||
|
|
let result_text = extract_tool_result_text(block);
|
||
|
|
if !result_text.is_empty() {
|
||
|
|
split_system_reminders(&result_text, items, true);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
_ => {}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
_ => {}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Extract text from a tool_result block (content can be string or array).
|
||
|
|
fn extract_tool_result_text(block: &Value) -> String {
|
||
|
|
match block.get("content") {
|
||
|
|
Some(Value::String(s)) => s.clone(),
|
||
|
|
Some(Value::Array(arr)) => {
|
||
|
|
arr.iter()
|
||
|
|
.filter_map(|b| b.get("text").and_then(|v| v.as_str()))
|
||
|
|
.collect::<Vec<_>>()
|
||
|
|
.join("\n")
|
||
|
|
}
|
||
|
|
_ => String::new(),
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Split text on <system-reminder> tags. Non-reminder text emits UserText
|
||
|
|
/// or ToolResult depending on `is_tool_result`.
|
||
|
|
fn split_system_reminders(text: &str, items: &mut Vec<ContextItem>, is_tool_result: bool) {
|
||
|
|
let mut remaining = text;
|
||
|
|
|
||
|
|
loop {
|
||
|
|
if let Some(start) = remaining.find("<system-reminder>") {
|
||
|
|
let before = remaining[..start].trim();
|
||
|
|
if !before.is_empty() {
|
||
|
|
if is_tool_result {
|
||
|
|
items.push(ContextItem::ToolResult(before.to_string()));
|
||
|
|
} else {
|
||
|
|
items.push(ContextItem::UserText(before.to_string()));
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
let after_open = &remaining[start + "<system-reminder>".len()..];
|
||
|
|
if let Some(end) = after_open.find("</system-reminder>") {
|
||
|
|
let reminder = after_open[..end].trim();
|
||
|
|
if !reminder.is_empty() {
|
||
|
|
items.push(ContextItem::SystemReminder(reminder.to_string()));
|
||
|
|
}
|
||
|
|
remaining = &after_open[end + "</system-reminder>".len()..];
|
||
|
|
} else {
|
||
|
|
let reminder = after_open.trim();
|
||
|
|
if !reminder.is_empty() {
|
||
|
|
items.push(ContextItem::SystemReminder(reminder.to_string()));
|
||
|
|
}
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
let trimmed = remaining.trim();
|
||
|
|
if !trimmed.is_empty() {
|
||
|
|
if is_tool_result {
|
||
|
|
items.push(ContextItem::ToolResult(trimmed.to_string()));
|
||
|
|
} else {
|
||
|
|
items.push(ContextItem::UserText(trimmed.to_string()));
|
||
|
|
}
|
||
|
|
}
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Parse assistant message content into context items.
|
||
|
|
fn extract_assistant_content(content: &Value, items: &mut Vec<ContextItem>) {
|
||
|
|
match content {
|
||
|
|
Value::String(s) => {
|
||
|
|
let trimmed = s.trim();
|
||
|
|
if !trimmed.is_empty() {
|
||
|
|
items.push(ContextItem::AssistantText(trimmed.to_string()));
|
||
|
|
}
|
||
|
|
}
|
||
|
|
Value::Array(arr) => {
|
||
|
|
for block in arr {
|
||
|
|
let btype = block.get("type").and_then(|v| v.as_str()).unwrap_or("");
|
||
|
|
match btype {
|
||
|
|
"text" => {
|
||
|
|
if let Some(t) = block.get("text").and_then(|v| v.as_str()) {
|
||
|
|
let trimmed = t.trim();
|
||
|
|
if !trimmed.is_empty() {
|
||
|
|
items.push(ContextItem::AssistantText(trimmed.to_string()));
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
"tool_use" => {
|
||
|
|
let name = block.get("name")
|
||
|
|
.and_then(|v| v.as_str())
|
||
|
|
.unwrap_or("?")
|
||
|
|
.to_string();
|
||
|
|
let input = block.get("input")
|
||
|
|
.map(|v| v.to_string())
|
||
|
|
.unwrap_or_default();
|
||
|
|
items.push(ContextItem::ToolUse { name, input });
|
||
|
|
}
|
||
|
|
"thinking" => {
|
||
|
|
items.push(ContextItem::AssistantThinking);
|
||
|
|
}
|
||
|
|
_ => {}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
_ => {}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// --- Formatting layer ---
|
||
|
|
|
||
|
|
fn truncate(s: &str, max: usize) -> String {
|
||
|
|
if s.len() <= max {
|
||
|
|
s.to_string()
|
||
|
|
} else {
|
||
|
|
format!("{}...({} total)", &s[..max], s.len())
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
fn format_as_context(items: &[ContextItem]) {
|
||
|
|
for item in items {
|
||
|
|
match item {
|
||
|
|
ContextItem::UserText(text) => {
|
||
|
|
println!("USER: {}", truncate(text, 300));
|
||
|
|
println!();
|
||
|
|
}
|
||
|
|
ContextItem::SystemReminder(text) => {
|
||
|
|
println!("<system-reminder>");
|
||
|
|
println!("{}", truncate(text, 500));
|
||
|
|
println!("</system-reminder>");
|
||
|
|
println!();
|
||
|
|
}
|
||
|
|
ContextItem::AssistantText(text) => {
|
||
|
|
println!("ASSISTANT: {}", truncate(text, 300));
|
||
|
|
println!();
|
||
|
|
}
|
||
|
|
ContextItem::AssistantThinking => {
|
||
|
|
println!("[thinking]");
|
||
|
|
println!();
|
||
|
|
}
|
||
|
|
ContextItem::ToolUse { name, input } => {
|
||
|
|
println!("TOOL_USE: {} {}", name, truncate(input, 200));
|
||
|
|
println!();
|
||
|
|
}
|
||
|
|
ContextItem::ToolResult(text) => {
|
||
|
|
println!("TOOL_RESULT: {}", truncate(text, 300));
|
||
|
|
println!();
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
fn main() {
|
||
|
|
let args = Args::parse();
|
||
|
|
|
||
|
|
let path = if args.last {
|
||
|
|
let stash = fs::read_to_string("/tmp/claude-memory-search/last-input.json")
|
||
|
|
.expect("No stashed input");
|
||
|
|
let json: Value = serde_json::from_str(&stash).expect("Bad JSON");
|
||
|
|
json["transcript_path"]
|
||
|
|
.as_str()
|
||
|
|
.expect("No transcript_path")
|
||
|
|
.to_string()
|
||
|
|
} else if let Some(p) = args.path {
|
||
|
|
p
|
||
|
|
} else {
|
||
|
|
eprintln!("error: provide a transcript path or --last");
|
||
|
|
std::process::exit(1);
|
||
|
|
};
|
||
|
|
|
||
|
|
let file = fs::File::open(&path).expect("Can't open transcript");
|
||
|
|
let mmap = unsafe { Mmap::map(&file).expect("Failed to mmap") };
|
||
|
|
|
||
|
|
eprintln!(
|
||
|
|
"Transcript: {} ({:.1} MB)",
|
||
|
|
&path,
|
||
|
|
mmap.len() as f64 / 1_000_000.0
|
||
|
|
);
|
||
|
|
|
||
|
|
let compaction_offset = find_last_compaction(&mmap).unwrap_or(0);
|
||
|
|
eprintln!("Compaction at byte offset: {}", compaction_offset);
|
||
|
|
|
||
|
|
if let Some(extra) = args.raw {
|
||
|
|
use std::io::Write;
|
||
|
|
|
||
|
|
// Collect `extra` JSON objects before the compaction boundary
|
||
|
|
let mut before = Vec::new();
|
||
|
|
if extra > 0 && compaction_offset > 0 {
|
||
|
|
for obj_bytes in JsonlBackwardIter::new(&mmap[..compaction_offset]) {
|
||
|
|
if let Ok(obj) = serde_json::from_slice::<Value>(obj_bytes) {
|
||
|
|
let t = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
|
||
|
|
if t == "file-history-snapshot" { continue; }
|
||
|
|
}
|
||
|
|
before.push(obj_bytes.to_vec());
|
||
|
|
if before.len() >= extra {
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
before.reverse();
|
||
|
|
}
|
||
|
|
|
||
|
|
for obj in &before {
|
||
|
|
std::io::stdout().write_all(obj).ok();
|
||
|
|
println!();
|
||
|
|
}
|
||
|
|
|
||
|
|
// Then dump everything from compaction onward
|
||
|
|
let region = &mmap[compaction_offset..];
|
||
|
|
for line in region.split(|&b| b == b'\n') {
|
||
|
|
if line.is_empty() { continue; }
|
||
|
|
if let Ok(obj) = serde_json::from_slice::<Value>(line) {
|
||
|
|
let t = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
|
||
|
|
if t == "file-history-snapshot" { continue; }
|
||
|
|
std::io::stdout().write_all(line).ok();
|
||
|
|
println!();
|
||
|
|
}
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
let items = extract_context_items(&mmap);
|
||
|
|
eprintln!("Context items: {}", items.len());
|
||
|
|
format_as_context(&items);
|
||
|
|
}
|
||
|
|
}
|