// merge-logs: Recover historical entries from a checkpoint log and merge // with the current log into a NEW output file. // // This tool was written to recover history destroyed by rewrite_store() // (see persist.rs comment). It reads two capnp node logs, finds entries // in the old log that don't exist in the current log (by uuid+version), // and writes a merged log containing both. // // SAFETY: This tool never modifies either input file. The merged output // goes to a new directory specified by the user. // // Usage: // merge-logs // // Example: // merge-logs ~/.claude/memory/checkpoints/nodes.capnp \ // ~/.claude/memory/nodes.capnp \ // /tmp/merged-store use std::collections::{HashMap, HashSet}; use std::fs; use std::io::{BufReader, BufWriter}; use std::path::Path; use capnp::message; use capnp::serialize; use poc_memory::memory_capnp; use poc_memory::store::Node; /// Read all node entries from a capnp log file, preserving order. fn read_all_entries(path: &Path) -> Result, String> { let file = fs::File::open(path) .map_err(|e| format!("open {}: {}", path.display(), e))?; let mut reader = BufReader::new(file); let mut entries = Vec::new(); while let Ok(msg) = serialize::read_message(&mut reader, message::ReaderOptions::new()) { let log = msg.get_root::() .map_err(|e| format!("read log from {}: {}", path.display(), e))?; for node_reader in log.get_nodes() .map_err(|e| format!("get nodes from {}: {}", path.display(), e))? { let node = Node::from_capnp_migrate(node_reader)?; entries.push(node); } } Ok(entries) } /// Write node entries to a new capnp log file in chunks. fn write_entries(path: &Path, entries: &[Node]) -> Result<(), String> { let file = fs::File::create(path) .map_err(|e| format!("create {}: {}", path.display(), e))?; let mut writer = BufWriter::new(file); for chunk in entries.chunks(100) { let mut msg = message::Builder::new_default(); { let log = msg.init_root::(); let mut list = log.init_nodes(chunk.len() as u32); for (i, node) in chunk.iter().enumerate() { node.to_capnp(list.reborrow().get(i as u32)); } } serialize::write_message(&mut writer, &msg) .map_err(|e| format!("write: {}", e))?; } Ok(()) } fn main() -> Result<(), String> { let args: Vec = std::env::args().collect(); if args.len() != 4 { eprintln!("Usage: merge-logs "); eprintln!(); eprintln!("Merges historical entries from old_log with current_log,"); eprintln!("writing the result to output_dir/nodes.capnp."); eprintln!("Neither input file is modified."); std::process::exit(1); } let old_path = Path::new(&args[1]); let current_path = Path::new(&args[2]); let output_dir = Path::new(&args[3]); // Validate inputs exist if !old_path.exists() { return Err(format!("old log not found: {}", old_path.display())); } if !current_path.exists() { return Err(format!("current log not found: {}", current_path.display())); } // Create output directory (must not already contain nodes.capnp) fs::create_dir_all(output_dir) .map_err(|e| format!("create output dir: {}", e))?; let output_path = output_dir.join("nodes.capnp"); if output_path.exists() { return Err(format!("output already exists: {} — refusing to overwrite", output_path.display())); } eprintln!("Reading old log: {} ...", old_path.display()); let old_entries = read_all_entries(old_path)?; eprintln!(" {} entries", old_entries.len()); eprintln!("Reading current log: {} ...", current_path.display()); let current_entries = read_all_entries(current_path)?; eprintln!(" {} entries", current_entries.len()); // Build set of (uuid, version) pairs from current log let current_set: HashSet<([u8; 16], u32)> = current_entries.iter() .map(|n| (n.uuid, n.version)) .collect(); // Find entries in old log not present in current log let recovered: Vec<&Node> = old_entries.iter() .filter(|n| !current_set.contains(&(n.uuid, n.version))) .collect(); eprintln!(); eprintln!("Current log has {} unique (uuid, version) pairs", current_set.len()); eprintln!("Old log entries already in current: {}", old_entries.len() - recovered.len()); eprintln!("Old log entries to recover: {}", recovered.len()); // Count unique keys being recovered let recovered_keys: HashSet<&str> = recovered.iter() .map(|n| n.key.as_str()) .collect(); eprintln!("Unique keys with recovered history: {}", recovered_keys.len()); // Show some stats about what we're recovering let mut version_counts: HashMap<&str, Vec> = HashMap::new(); for node in &recovered { version_counts.entry(&node.key) .or_default() .push(node.version); } let mut keys_by_versions: Vec<_> = version_counts.iter() .map(|(k, v)| (*k, v.len())) .collect(); keys_by_versions.sort_by(|a, b| b.1.cmp(&a.1)); eprintln!(); eprintln!("Top 20 keys by recovered versions:"); for (key, count) in keys_by_versions.iter().take(20) { eprintln!(" {:4} versions {}", count, key); } // Build merged log: recovered entries (preserving order), then current entries let mut merged: Vec = Vec::with_capacity(recovered.len() + current_entries.len()); for node in recovered { merged.push(node.clone()); } for node in current_entries { merged.push(node); } eprintln!(); eprintln!("Writing merged log: {} ({} entries) ...", output_path.display(), merged.len()); write_entries(&output_path, &merged)?; let output_size = fs::metadata(&output_path).map(|m| m.len()).unwrap_or(0); eprintln!("Done. Output: {} ({:.1} MB)", output_path.display(), output_size as f64 / 1_048_576.0); // Verify: replay the merged log and check node count eprintln!(); eprintln!("Verifying merged log..."); let verify_entries = read_all_entries(&output_path)?; eprintln!(" Read back {} entries (expected {})", verify_entries.len(), merged.len()); // Replay to get final state let mut final_nodes: HashMap = HashMap::new(); for node in &verify_entries { let dominated = final_nodes.get(&node.key) .map(|n| node.version >= n.version) .unwrap_or(true); if dominated { if node.deleted { final_nodes.remove(&node.key); } else { final_nodes.insert(node.key.clone(), node.clone()); } } } eprintln!(" Replay produces {} live nodes", final_nodes.len()); if verify_entries.len() != merged.len() { return Err(format!("Verification failed: wrote {} but read back {}", merged.len(), verify_entries.len())); } eprintln!(); eprintln!("Merge complete. To use the merged log:"); eprintln!(" 1. Back up ~/.claude/memory/nodes.capnp"); eprintln!(" 2. cp {} ~/.claude/memory/nodes.capnp", output_path.display()); eprintln!(" 3. rm ~/.claude/memory/state.bin ~/.claude/memory/snapshot.rkyv"); eprintln!(" 4. poc-memory admin fsck"); Ok(()) }