From 81fec99767cfa5767f1597544ed25e4cbe6f38b8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 17 Mar 2026 18:00:58 -0400 Subject: [PATCH] history: show DELETED marker on tombstone entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cmd_history was silently hiding the deleted flag, making it impossible to tell from the output that a node had been deleted. This masked the kernel-patterns deletion — looked like the node existed in the log but wouldn't load. Also adds merge-logs and diag-key diagnostic binaries, and makes Node::to_capnp public for use by external tools. Co-Authored-By: Kent Overstreet --- poc-memory/Cargo.toml | 8 ++ poc-memory/src/bin/diag-key.rs | 45 +++++++ poc-memory/src/bin/merge-logs.rs | 205 +++++++++++++++++++++++++++++++ poc-memory/src/cli/node.rs | 9 +- poc-memory/src/store/types.rs | 2 +- 5 files changed, 264 insertions(+), 5 deletions(-) create mode 100644 poc-memory/src/bin/diag-key.rs create mode 100644 poc-memory/src/bin/merge-logs.rs diff --git a/poc-memory/Cargo.toml b/poc-memory/Cargo.toml index d840475..df00dd0 100644 --- a/poc-memory/Cargo.toml +++ b/poc-memory/Cargo.toml @@ -45,3 +45,11 @@ path = "src/bin/memory-search.rs" [[bin]] name = "poc-hook" path = "src/bin/poc-hook.rs" + +[[bin]] +name = "merge-logs" +path = "src/bin/merge-logs.rs" + +[[bin]] +name = "diag-key" +path = "src/bin/diag-key.rs" diff --git a/poc-memory/src/bin/diag-key.rs b/poc-memory/src/bin/diag-key.rs new file mode 100644 index 0000000..446dfb8 --- /dev/null +++ b/poc-memory/src/bin/diag-key.rs @@ -0,0 +1,45 @@ +// Diagnostic: dump all entries matching a key pattern from a capnp log +use std::io::BufReader; +use std::fs; +use capnp::{message, serialize}; +use poc_memory::memory_capnp; +use poc_memory::store::Node; + +fn main() { + let args: Vec = std::env::args().collect(); + if args.len() != 3 { + eprintln!("usage: diag-key "); + std::process::exit(1); + } + let path = &args[1]; + let pattern = &args[2]; + + let file = fs::File::open(path).unwrap(); + let mut reader = BufReader::new(file); + let mut entry_num = 0u64; + let mut matches = 0u64; + + while let Ok(msg) = serialize::read_message(&mut reader, message::ReaderOptions::new()) { + let log = msg.get_root::().unwrap(); + for node_reader in log.get_nodes().unwrap() { + entry_num += 1; + let node = Node::from_capnp_migrate(node_reader).unwrap(); + + // Exact substring match, but exclude keys with trailing chars + // (e.g. "kernel-patterns-foo") unless pattern itself has the dash + if node.key == *pattern || (node.key.contains(pattern) && !node.key.contains(&format!("{}-", pattern))) { + matches += 1; + println!("Entry #{}: key={:?} (len={})", entry_num, node.key, node.key.len()); + println!(" key bytes: {:02x?}", node.key.as_bytes()); + println!(" uuid: {:02x?}", node.uuid); + println!(" version: {}", node.version); + println!(" deleted: {}", node.deleted); + println!(" timestamp: {}", node.timestamp); + println!(" content len: {}", node.content.len()); + println!(" provenance: {}", node.provenance); + println!(); + } + } + } + eprintln!("Scanned {} entries, {} matches for {:?}", entry_num, matches, pattern); +} diff --git a/poc-memory/src/bin/merge-logs.rs b/poc-memory/src/bin/merge-logs.rs new file mode 100644 index 0000000..e872ff8 --- /dev/null +++ b/poc-memory/src/bin/merge-logs.rs @@ -0,0 +1,205 @@ +// merge-logs: Recover historical entries from a checkpoint log and merge +// with the current log into a NEW output file. +// +// This tool was written to recover history destroyed by rewrite_store() +// (see persist.rs comment). It reads two capnp node logs, finds entries +// in the old log that don't exist in the current log (by uuid+version), +// and writes a merged log containing both. +// +// SAFETY: This tool never modifies either input file. The merged output +// goes to a new directory specified by the user. +// +// Usage: +// merge-logs +// +// Example: +// merge-logs ~/.claude/memory/checkpoints/nodes.capnp \ +// ~/.claude/memory/nodes.capnp \ +// /tmp/merged-store + +use std::collections::{HashMap, HashSet}; +use std::fs; +use std::io::{BufReader, BufWriter}; +use std::path::Path; + +use capnp::message; +use capnp::serialize; + +use poc_memory::memory_capnp; +use poc_memory::store::Node; + +/// Read all node entries from a capnp log file, preserving order. +fn read_all_entries(path: &Path) -> Result, String> { + let file = fs::File::open(path) + .map_err(|e| format!("open {}: {}", path.display(), e))?; + let mut reader = BufReader::new(file); + let mut entries = Vec::new(); + + while let Ok(msg) = serialize::read_message(&mut reader, message::ReaderOptions::new()) { + let log = msg.get_root::() + .map_err(|e| format!("read log from {}: {}", path.display(), e))?; + for node_reader in log.get_nodes() + .map_err(|e| format!("get nodes from {}: {}", path.display(), e))? { + let node = Node::from_capnp_migrate(node_reader)?; + entries.push(node); + } + } + + Ok(entries) +} + +/// Write node entries to a new capnp log file in chunks. +fn write_entries(path: &Path, entries: &[Node]) -> Result<(), String> { + let file = fs::File::create(path) + .map_err(|e| format!("create {}: {}", path.display(), e))?; + let mut writer = BufWriter::new(file); + + for chunk in entries.chunks(100) { + let mut msg = message::Builder::new_default(); + { + let log = msg.init_root::(); + let mut list = log.init_nodes(chunk.len() as u32); + for (i, node) in chunk.iter().enumerate() { + node.to_capnp(list.reborrow().get(i as u32)); + } + } + serialize::write_message(&mut writer, &msg) + .map_err(|e| format!("write: {}", e))?; + } + + Ok(()) +} + +fn main() -> Result<(), String> { + let args: Vec = std::env::args().collect(); + if args.len() != 4 { + eprintln!("Usage: merge-logs "); + eprintln!(); + eprintln!("Merges historical entries from old_log with current_log,"); + eprintln!("writing the result to output_dir/nodes.capnp."); + eprintln!("Neither input file is modified."); + std::process::exit(1); + } + + let old_path = Path::new(&args[1]); + let current_path = Path::new(&args[2]); + let output_dir = Path::new(&args[3]); + + // Validate inputs exist + if !old_path.exists() { + return Err(format!("old log not found: {}", old_path.display())); + } + if !current_path.exists() { + return Err(format!("current log not found: {}", current_path.display())); + } + + // Create output directory (must not already contain nodes.capnp) + fs::create_dir_all(output_dir) + .map_err(|e| format!("create output dir: {}", e))?; + let output_path = output_dir.join("nodes.capnp"); + if output_path.exists() { + return Err(format!("output already exists: {} — refusing to overwrite", + output_path.display())); + } + + eprintln!("Reading old log: {} ...", old_path.display()); + let old_entries = read_all_entries(old_path)?; + eprintln!(" {} entries", old_entries.len()); + + eprintln!("Reading current log: {} ...", current_path.display()); + let current_entries = read_all_entries(current_path)?; + eprintln!(" {} entries", current_entries.len()); + + // Build set of (uuid, version) pairs from current log + let current_set: HashSet<([u8; 16], u32)> = current_entries.iter() + .map(|n| (n.uuid, n.version)) + .collect(); + + // Find entries in old log not present in current log + let recovered: Vec<&Node> = old_entries.iter() + .filter(|n| !current_set.contains(&(n.uuid, n.version))) + .collect(); + + eprintln!(); + eprintln!("Current log has {} unique (uuid, version) pairs", current_set.len()); + eprintln!("Old log entries already in current: {}", old_entries.len() - recovered.len()); + eprintln!("Old log entries to recover: {}", recovered.len()); + + // Count unique keys being recovered + let recovered_keys: HashSet<&str> = recovered.iter() + .map(|n| n.key.as_str()) + .collect(); + eprintln!("Unique keys with recovered history: {}", recovered_keys.len()); + + // Show some stats about what we're recovering + let mut version_counts: HashMap<&str, Vec> = HashMap::new(); + for node in &recovered { + version_counts.entry(&node.key) + .or_default() + .push(node.version); + } + let mut keys_by_versions: Vec<_> = version_counts.iter() + .map(|(k, v)| (*k, v.len())) + .collect(); + keys_by_versions.sort_by(|a, b| b.1.cmp(&a.1)); + eprintln!(); + eprintln!("Top 20 keys by recovered versions:"); + for (key, count) in keys_by_versions.iter().take(20) { + eprintln!(" {:4} versions {}", count, key); + } + + // Build merged log: recovered entries (preserving order), then current entries + let mut merged: Vec = Vec::with_capacity(recovered.len() + current_entries.len()); + for node in recovered { + merged.push(node.clone()); + } + for node in current_entries { + merged.push(node); + } + + eprintln!(); + eprintln!("Writing merged log: {} ({} entries) ...", + output_path.display(), merged.len()); + write_entries(&output_path, &merged)?; + + let output_size = fs::metadata(&output_path).map(|m| m.len()).unwrap_or(0); + eprintln!("Done. Output: {} ({:.1} MB)", output_path.display(), + output_size as f64 / 1_048_576.0); + + // Verify: replay the merged log and check node count + eprintln!(); + eprintln!("Verifying merged log..."); + let verify_entries = read_all_entries(&output_path)?; + eprintln!(" Read back {} entries (expected {})", + verify_entries.len(), merged.len()); + + // Replay to get final state + let mut final_nodes: HashMap = HashMap::new(); + for node in &verify_entries { + let dominated = final_nodes.get(&node.key) + .map(|n| node.version >= n.version) + .unwrap_or(true); + if dominated { + if node.deleted { + final_nodes.remove(&node.key); + } else { + final_nodes.insert(node.key.clone(), node.clone()); + } + } + } + eprintln!(" Replay produces {} live nodes", final_nodes.len()); + + if verify_entries.len() != merged.len() { + return Err(format!("Verification failed: wrote {} but read back {}", + merged.len(), verify_entries.len())); + } + + eprintln!(); + eprintln!("Merge complete. To use the merged log:"); + eprintln!(" 1. Back up ~/.claude/memory/nodes.capnp"); + eprintln!(" 2. cp {} ~/.claude/memory/nodes.capnp", output_path.display()); + eprintln!(" 3. rm ~/.claude/memory/state.bin ~/.claude/memory/snapshot.rkyv"); + eprintln!(" 4. poc-memory admin fsck"); + + Ok(()) +} diff --git a/poc-memory/src/cli/node.rs b/poc-memory/src/cli/node.rs index fa3a046..5d8f046 100644 --- a/poc-memory/src/cli/node.rs +++ b/poc-memory/src/cli/node.rs @@ -256,16 +256,17 @@ pub fn cmd_history(key: &[String], full: bool) -> Result<(), String> { } else { format!("(raw:{})", node.timestamp) }; + let deleted_marker = if node.deleted { " DELETED" } else { "" }; let content_len = node.content.len(); if full { - eprintln!("=== v{} {} {} w={:.3} {}b ===", - node.version, ts, node.provenance, node.weight, content_len); + eprintln!("=== v{} {} {}{} w={:.3} {}b ===", + node.version, ts, node.provenance, deleted_marker, node.weight, content_len); eprintln!("{}", node.content); } else { let preview = crate::util::first_n_chars(&node.content, 120); let preview = preview.replace('\n', "\\n"); - eprintln!(" v{:<3} {} {:24} w={:.3} {}b", - node.version, ts, node.provenance, node.weight, content_len); + eprintln!(" v{:<3} {} {:24} w={:.3} {}b{}", + node.version, ts, node.provenance, node.weight, content_len, deleted_marker); eprintln!(" {}", preview); } } diff --git a/poc-memory/src/store/types.rs b/poc-memory/src/store/types.rs index 6fb4a37..72716d8 100644 --- a/poc-memory/src/store/types.rs +++ b/poc-memory/src/store/types.rs @@ -74,7 +74,7 @@ macro_rules! capnp_message { } } - pub(crate) fn to_capnp(&self, mut b: $builder) { + pub fn to_capnp(&self, mut b: $builder) { paste::paste! { $(b.[](&self.$tf);)* $(b.[](&self.$uf);)*