history: show DELETED marker on tombstone entries
cmd_history was silently hiding the deleted flag, making it impossible to tell from the output that a node had been deleted. This masked the kernel-patterns deletion — looked like the node existed in the log but wouldn't load. Also adds merge-logs and diag-key diagnostic binaries, and makes Node::to_capnp public for use by external tools. Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
9775d468b2
commit
81fec99767
5 changed files with 264 additions and 5 deletions
|
|
@ -45,3 +45,11 @@ path = "src/bin/memory-search.rs"
|
||||||
[[bin]]
|
[[bin]]
|
||||||
name = "poc-hook"
|
name = "poc-hook"
|
||||||
path = "src/bin/poc-hook.rs"
|
path = "src/bin/poc-hook.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "merge-logs"
|
||||||
|
path = "src/bin/merge-logs.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "diag-key"
|
||||||
|
path = "src/bin/diag-key.rs"
|
||||||
|
|
|
||||||
45
poc-memory/src/bin/diag-key.rs
Normal file
45
poc-memory/src/bin/diag-key.rs
Normal file
|
|
@ -0,0 +1,45 @@
|
||||||
|
// Diagnostic: dump all entries matching a key pattern from a capnp log
|
||||||
|
use std::io::BufReader;
|
||||||
|
use std::fs;
|
||||||
|
use capnp::{message, serialize};
|
||||||
|
use poc_memory::memory_capnp;
|
||||||
|
use poc_memory::store::Node;
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let args: Vec<String> = std::env::args().collect();
|
||||||
|
if args.len() != 3 {
|
||||||
|
eprintln!("usage: diag-key <nodes.capnp> <key-substring>");
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
let path = &args[1];
|
||||||
|
let pattern = &args[2];
|
||||||
|
|
||||||
|
let file = fs::File::open(path).unwrap();
|
||||||
|
let mut reader = BufReader::new(file);
|
||||||
|
let mut entry_num = 0u64;
|
||||||
|
let mut matches = 0u64;
|
||||||
|
|
||||||
|
while let Ok(msg) = serialize::read_message(&mut reader, message::ReaderOptions::new()) {
|
||||||
|
let log = msg.get_root::<memory_capnp::node_log::Reader>().unwrap();
|
||||||
|
for node_reader in log.get_nodes().unwrap() {
|
||||||
|
entry_num += 1;
|
||||||
|
let node = Node::from_capnp_migrate(node_reader).unwrap();
|
||||||
|
|
||||||
|
// Exact substring match, but exclude keys with trailing chars
|
||||||
|
// (e.g. "kernel-patterns-foo") unless pattern itself has the dash
|
||||||
|
if node.key == *pattern || (node.key.contains(pattern) && !node.key.contains(&format!("{}-", pattern))) {
|
||||||
|
matches += 1;
|
||||||
|
println!("Entry #{}: key={:?} (len={})", entry_num, node.key, node.key.len());
|
||||||
|
println!(" key bytes: {:02x?}", node.key.as_bytes());
|
||||||
|
println!(" uuid: {:02x?}", node.uuid);
|
||||||
|
println!(" version: {}", node.version);
|
||||||
|
println!(" deleted: {}", node.deleted);
|
||||||
|
println!(" timestamp: {}", node.timestamp);
|
||||||
|
println!(" content len: {}", node.content.len());
|
||||||
|
println!(" provenance: {}", node.provenance);
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
eprintln!("Scanned {} entries, {} matches for {:?}", entry_num, matches, pattern);
|
||||||
|
}
|
||||||
205
poc-memory/src/bin/merge-logs.rs
Normal file
205
poc-memory/src/bin/merge-logs.rs
Normal file
|
|
@ -0,0 +1,205 @@
|
||||||
|
// merge-logs: Recover historical entries from a checkpoint log and merge
|
||||||
|
// with the current log into a NEW output file.
|
||||||
|
//
|
||||||
|
// This tool was written to recover history destroyed by rewrite_store()
|
||||||
|
// (see persist.rs comment). It reads two capnp node logs, finds entries
|
||||||
|
// in the old log that don't exist in the current log (by uuid+version),
|
||||||
|
// and writes a merged log containing both.
|
||||||
|
//
|
||||||
|
// SAFETY: This tool never modifies either input file. The merged output
|
||||||
|
// goes to a new directory specified by the user.
|
||||||
|
//
|
||||||
|
// Usage:
|
||||||
|
// merge-logs <old_log> <current_log> <output_dir>
|
||||||
|
//
|
||||||
|
// Example:
|
||||||
|
// merge-logs ~/.claude/memory/checkpoints/nodes.capnp \
|
||||||
|
// ~/.claude/memory/nodes.capnp \
|
||||||
|
// /tmp/merged-store
|
||||||
|
|
||||||
|
use std::collections::{HashMap, HashSet};
|
||||||
|
use std::fs;
|
||||||
|
use std::io::{BufReader, BufWriter};
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
use capnp::message;
|
||||||
|
use capnp::serialize;
|
||||||
|
|
||||||
|
use poc_memory::memory_capnp;
|
||||||
|
use poc_memory::store::Node;
|
||||||
|
|
||||||
|
/// Read all node entries from a capnp log file, preserving order.
|
||||||
|
fn read_all_entries(path: &Path) -> Result<Vec<Node>, String> {
|
||||||
|
let file = fs::File::open(path)
|
||||||
|
.map_err(|e| format!("open {}: {}", path.display(), e))?;
|
||||||
|
let mut reader = BufReader::new(file);
|
||||||
|
let mut entries = Vec::new();
|
||||||
|
|
||||||
|
while let Ok(msg) = serialize::read_message(&mut reader, message::ReaderOptions::new()) {
|
||||||
|
let log = msg.get_root::<memory_capnp::node_log::Reader>()
|
||||||
|
.map_err(|e| format!("read log from {}: {}", path.display(), e))?;
|
||||||
|
for node_reader in log.get_nodes()
|
||||||
|
.map_err(|e| format!("get nodes from {}: {}", path.display(), e))? {
|
||||||
|
let node = Node::from_capnp_migrate(node_reader)?;
|
||||||
|
entries.push(node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(entries)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write node entries to a new capnp log file in chunks.
|
||||||
|
fn write_entries(path: &Path, entries: &[Node]) -> Result<(), String> {
|
||||||
|
let file = fs::File::create(path)
|
||||||
|
.map_err(|e| format!("create {}: {}", path.display(), e))?;
|
||||||
|
let mut writer = BufWriter::new(file);
|
||||||
|
|
||||||
|
for chunk in entries.chunks(100) {
|
||||||
|
let mut msg = message::Builder::new_default();
|
||||||
|
{
|
||||||
|
let log = msg.init_root::<memory_capnp::node_log::Builder>();
|
||||||
|
let mut list = log.init_nodes(chunk.len() as u32);
|
||||||
|
for (i, node) in chunk.iter().enumerate() {
|
||||||
|
node.to_capnp(list.reborrow().get(i as u32));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
serialize::write_message(&mut writer, &msg)
|
||||||
|
.map_err(|e| format!("write: {}", e))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() -> Result<(), String> {
|
||||||
|
let args: Vec<String> = std::env::args().collect();
|
||||||
|
if args.len() != 4 {
|
||||||
|
eprintln!("Usage: merge-logs <old_log> <current_log> <output_dir>");
|
||||||
|
eprintln!();
|
||||||
|
eprintln!("Merges historical entries from old_log with current_log,");
|
||||||
|
eprintln!("writing the result to output_dir/nodes.capnp.");
|
||||||
|
eprintln!("Neither input file is modified.");
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
let old_path = Path::new(&args[1]);
|
||||||
|
let current_path = Path::new(&args[2]);
|
||||||
|
let output_dir = Path::new(&args[3]);
|
||||||
|
|
||||||
|
// Validate inputs exist
|
||||||
|
if !old_path.exists() {
|
||||||
|
return Err(format!("old log not found: {}", old_path.display()));
|
||||||
|
}
|
||||||
|
if !current_path.exists() {
|
||||||
|
return Err(format!("current log not found: {}", current_path.display()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create output directory (must not already contain nodes.capnp)
|
||||||
|
fs::create_dir_all(output_dir)
|
||||||
|
.map_err(|e| format!("create output dir: {}", e))?;
|
||||||
|
let output_path = output_dir.join("nodes.capnp");
|
||||||
|
if output_path.exists() {
|
||||||
|
return Err(format!("output already exists: {} — refusing to overwrite",
|
||||||
|
output_path.display()));
|
||||||
|
}
|
||||||
|
|
||||||
|
eprintln!("Reading old log: {} ...", old_path.display());
|
||||||
|
let old_entries = read_all_entries(old_path)?;
|
||||||
|
eprintln!(" {} entries", old_entries.len());
|
||||||
|
|
||||||
|
eprintln!("Reading current log: {} ...", current_path.display());
|
||||||
|
let current_entries = read_all_entries(current_path)?;
|
||||||
|
eprintln!(" {} entries", current_entries.len());
|
||||||
|
|
||||||
|
// Build set of (uuid, version) pairs from current log
|
||||||
|
let current_set: HashSet<([u8; 16], u32)> = current_entries.iter()
|
||||||
|
.map(|n| (n.uuid, n.version))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Find entries in old log not present in current log
|
||||||
|
let recovered: Vec<&Node> = old_entries.iter()
|
||||||
|
.filter(|n| !current_set.contains(&(n.uuid, n.version)))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
eprintln!();
|
||||||
|
eprintln!("Current log has {} unique (uuid, version) pairs", current_set.len());
|
||||||
|
eprintln!("Old log entries already in current: {}", old_entries.len() - recovered.len());
|
||||||
|
eprintln!("Old log entries to recover: {}", recovered.len());
|
||||||
|
|
||||||
|
// Count unique keys being recovered
|
||||||
|
let recovered_keys: HashSet<&str> = recovered.iter()
|
||||||
|
.map(|n| n.key.as_str())
|
||||||
|
.collect();
|
||||||
|
eprintln!("Unique keys with recovered history: {}", recovered_keys.len());
|
||||||
|
|
||||||
|
// Show some stats about what we're recovering
|
||||||
|
let mut version_counts: HashMap<&str, Vec<u32>> = HashMap::new();
|
||||||
|
for node in &recovered {
|
||||||
|
version_counts.entry(&node.key)
|
||||||
|
.or_default()
|
||||||
|
.push(node.version);
|
||||||
|
}
|
||||||
|
let mut keys_by_versions: Vec<_> = version_counts.iter()
|
||||||
|
.map(|(k, v)| (*k, v.len()))
|
||||||
|
.collect();
|
||||||
|
keys_by_versions.sort_by(|a, b| b.1.cmp(&a.1));
|
||||||
|
eprintln!();
|
||||||
|
eprintln!("Top 20 keys by recovered versions:");
|
||||||
|
for (key, count) in keys_by_versions.iter().take(20) {
|
||||||
|
eprintln!(" {:4} versions {}", count, key);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build merged log: recovered entries (preserving order), then current entries
|
||||||
|
let mut merged: Vec<Node> = Vec::with_capacity(recovered.len() + current_entries.len());
|
||||||
|
for node in recovered {
|
||||||
|
merged.push(node.clone());
|
||||||
|
}
|
||||||
|
for node in current_entries {
|
||||||
|
merged.push(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
eprintln!();
|
||||||
|
eprintln!("Writing merged log: {} ({} entries) ...",
|
||||||
|
output_path.display(), merged.len());
|
||||||
|
write_entries(&output_path, &merged)?;
|
||||||
|
|
||||||
|
let output_size = fs::metadata(&output_path).map(|m| m.len()).unwrap_or(0);
|
||||||
|
eprintln!("Done. Output: {} ({:.1} MB)", output_path.display(),
|
||||||
|
output_size as f64 / 1_048_576.0);
|
||||||
|
|
||||||
|
// Verify: replay the merged log and check node count
|
||||||
|
eprintln!();
|
||||||
|
eprintln!("Verifying merged log...");
|
||||||
|
let verify_entries = read_all_entries(&output_path)?;
|
||||||
|
eprintln!(" Read back {} entries (expected {})",
|
||||||
|
verify_entries.len(), merged.len());
|
||||||
|
|
||||||
|
// Replay to get final state
|
||||||
|
let mut final_nodes: HashMap<String, Node> = HashMap::new();
|
||||||
|
for node in &verify_entries {
|
||||||
|
let dominated = final_nodes.get(&node.key)
|
||||||
|
.map(|n| node.version >= n.version)
|
||||||
|
.unwrap_or(true);
|
||||||
|
if dominated {
|
||||||
|
if node.deleted {
|
||||||
|
final_nodes.remove(&node.key);
|
||||||
|
} else {
|
||||||
|
final_nodes.insert(node.key.clone(), node.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
eprintln!(" Replay produces {} live nodes", final_nodes.len());
|
||||||
|
|
||||||
|
if verify_entries.len() != merged.len() {
|
||||||
|
return Err(format!("Verification failed: wrote {} but read back {}",
|
||||||
|
merged.len(), verify_entries.len()));
|
||||||
|
}
|
||||||
|
|
||||||
|
eprintln!();
|
||||||
|
eprintln!("Merge complete. To use the merged log:");
|
||||||
|
eprintln!(" 1. Back up ~/.claude/memory/nodes.capnp");
|
||||||
|
eprintln!(" 2. cp {} ~/.claude/memory/nodes.capnp", output_path.display());
|
||||||
|
eprintln!(" 3. rm ~/.claude/memory/state.bin ~/.claude/memory/snapshot.rkyv");
|
||||||
|
eprintln!(" 4. poc-memory admin fsck");
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
@ -256,16 +256,17 @@ pub fn cmd_history(key: &[String], full: bool) -> Result<(), String> {
|
||||||
} else {
|
} else {
|
||||||
format!("(raw:{})", node.timestamp)
|
format!("(raw:{})", node.timestamp)
|
||||||
};
|
};
|
||||||
|
let deleted_marker = if node.deleted { " DELETED" } else { "" };
|
||||||
let content_len = node.content.len();
|
let content_len = node.content.len();
|
||||||
if full {
|
if full {
|
||||||
eprintln!("=== v{} {} {} w={:.3} {}b ===",
|
eprintln!("=== v{} {} {}{} w={:.3} {}b ===",
|
||||||
node.version, ts, node.provenance, node.weight, content_len);
|
node.version, ts, node.provenance, deleted_marker, node.weight, content_len);
|
||||||
eprintln!("{}", node.content);
|
eprintln!("{}", node.content);
|
||||||
} else {
|
} else {
|
||||||
let preview = crate::util::first_n_chars(&node.content, 120);
|
let preview = crate::util::first_n_chars(&node.content, 120);
|
||||||
let preview = preview.replace('\n', "\\n");
|
let preview = preview.replace('\n', "\\n");
|
||||||
eprintln!(" v{:<3} {} {:24} w={:.3} {}b",
|
eprintln!(" v{:<3} {} {:24} w={:.3} {}b{}",
|
||||||
node.version, ts, node.provenance, node.weight, content_len);
|
node.version, ts, node.provenance, node.weight, content_len, deleted_marker);
|
||||||
eprintln!(" {}", preview);
|
eprintln!(" {}", preview);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -74,7 +74,7 @@ macro_rules! capnp_message {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn to_capnp(&self, mut b: $builder) {
|
pub fn to_capnp(&self, mut b: $builder) {
|
||||||
paste::paste! {
|
paste::paste! {
|
||||||
$(b.[<set_ $tf>](&self.$tf);)*
|
$(b.[<set_ $tf>](&self.$tf);)*
|
||||||
$(b.[<set_ $uf>](&self.$uf);)*
|
$(b.[<set_ $uf>](&self.$uf);)*
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue