From 46f8fe662ee2f951bee203375538e88594add1b9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 8 Mar 2026 19:41:26 -0400 Subject: [PATCH] store: strip .md suffix from all keys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Keys were a vestige of the file-based era. resolve_key() added .md to lookups while upsert() used bare keys, creating phantom duplicate nodes (the instructions bug: writes went to "instructions", reads found "instructions.md"). - Remove .md normalization from resolve_key, strip instead - Update all hardcoded key patterns (journal.md# → journal#, etc) - Add strip_md_keys() migration to fsck: renames nodes and relations - Add broken link detection to health report - Delete redirect table (no longer needed) - Update config defaults and config.jsonl Migration: run `poc-memory fsck` to rename existing keys. Co-Authored-By: ProofOfConcept --- src/config.rs | 8 +-- src/digest.rs | 42 ++++++-------- src/enrich.rs | 18 +++--- src/graph.rs | 38 +++++++++++++ src/knowledge.rs | 14 ++--- src/llm.rs | 10 ++-- src/main.rs | 53 +++++++++++------ src/query.rs | 2 +- src/store/mod.rs | 73 +++++++----------------- src/store/ops.rs | 10 ++-- src/store/parse.rs | 21 ++++--- src/store/persist.rs | 132 +++++++++++++++++++++++++++++++++++++++++++ 12 files changed, 289 insertions(+), 132 deletions(-) diff --git a/src/config.rs b/src/config.rs index c2f7a43..cb384af 100644 --- a/src/config.rs +++ b/src/config.rs @@ -9,7 +9,7 @@ // // Example: // {"config": {"user_name": "Alice", "data_dir": "~/.claude/memory"}} -// {"group": "identity", "keys": ["identity.md"]} +// {"group": "identity", "keys": ["identity"]} // {"group": "orientation", "keys": ["where-am-i.md"], "source": "file"} use std::path::PathBuf; @@ -65,18 +65,18 @@ impl Default for Config { assistant_name: "Assistant".to_string(), data_dir: home.join(".claude/memory"), projects_dir: home.join(".claude/projects"), - core_nodes: vec!["identity.md".to_string(), "core-practices.md".to_string()], + core_nodes: vec!["identity".to_string(), "core-practices".to_string()], journal_days: 7, journal_max: 20, context_groups: vec![ ContextGroup { label: "identity".into(), - keys: vec!["identity.md".into()], + keys: vec!["identity".into()], source: ContextSource::Store, }, ContextGroup { label: "core-practices".into(), - keys: vec!["core-practices.md".into()], + keys: vec!["core-practices".into()], source: ContextSource::Store, }, ], diff --git a/src/digest.rs b/src/digest.rs index 08d0beb..7761257 100644 --- a/src/digest.rs +++ b/src/digest.rs @@ -110,10 +110,9 @@ const MONTHLY: DigestLevel = DigestLevel { const LEVELS: &[&DigestLevel] = &[&DAILY, &WEEKLY, &MONTHLY]; -/// Store key for a digest node: "daily-2026-03-04.md", "weekly-2026-W09.md", etc. -/// Matches the key format from the old import_file() path. +/// Store key for a digest node: "daily-2026-03-04", "weekly-2026-W09", etc. fn digest_node_key(level_name: &str, label: &str) -> String { - format!("{}-{}.md", level_name, label) + format!("{}-{}", level_name, label) } // --- Input gathering --- @@ -148,12 +147,12 @@ fn gather(level: &DigestLevel, store: &Store, arg: &str) -> Result<(String, Vec< } else { // Leaf level: scan store for journal entries matching label let date_re = Regex::new(&format!( - r"^journal\.md#j-{}", regex::escape(&label) + r"^journal#j-{}", regex::escape(&label) )).unwrap(); let mut entries: Vec<_> = store.nodes.values() .filter(|n| date_re.is_match(&n.key)) .map(|n| { - let ts = n.key.strip_prefix("journal.md#j-").unwrap_or(&n.key); + let ts = n.key.strip_prefix("journal#j-").unwrap_or(&n.key); (ts.to_string(), n.content.clone()) }) .collect(); @@ -257,7 +256,7 @@ pub fn digest_auto(store: &mut Store) -> Result<(), String> { let date_re = Regex::new(r"^\d{4}-\d{2}-\d{2}").unwrap(); let dates: Vec = store.nodes.keys() .filter_map(|key| { - key.strip_prefix("journal.md#j-") + key.strip_prefix("journal#j-") .filter(|rest| rest.len() >= 10 && date_re.is_match(rest)) .map(|rest| rest[..10].to_string()) }) @@ -320,6 +319,16 @@ fn normalize_link_key(raw: &str) -> String { let mut key = key.to_string(); + // Strip .md suffix if present + if let Some(stripped) = key.strip_suffix(".md") { + key = stripped.to_string(); + } else if key.contains('#') { + let (file, section) = key.split_once('#').unwrap(); + if let Some(bare) = file.strip_suffix(".md") { + key = format!("{}#{}", bare, section); + } + } + // weekly/2026-W06 → weekly-2026-W06, etc. if let Some(pos) = key.find('/') { let prefix = &key[..pos]; @@ -329,27 +338,10 @@ fn normalize_link_key(raw: &str) -> String { } } - // daily-2026-02-04 → daily-2026-02-04.md - let re = Regex::new(r"^(daily|weekly|monthly)-\d{4}").unwrap(); - if re.is_match(&key) && !key.ends_with(".md") { - key.push_str(".md"); - } - // Bare date → daily digest let date_re = Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap(); - if date_re.is_match(key.strip_suffix(".md").unwrap_or(&key)) { - let date = key.strip_suffix(".md").unwrap_or(&key); - key = format!("daily-{}.md", date); - } - - // Ensure .md extension - if key.contains('#') { - let (file, section) = key.split_once('#').unwrap(); - if !file.ends_with(".md") { - key = format!("{}.md#{}", file, section); - } - } else if !key.ends_with(".md") && !key.contains('/') && !key.starts_with("NEW:") { - key.push_str(".md"); + if date_re.is_match(&key) { + key = format!("daily-{}", key); } key diff --git a/src/enrich.rs b/src/enrich.rs index abe13df..d63ae42 100644 --- a/src/enrich.rs +++ b/src/enrich.rs @@ -40,7 +40,7 @@ pub fn transcript_dedup_key(path: &str) -> Result { let bytes = fs::read(path).map_err(|e| format!("read {}: {}", path, e))?; let mut hasher = DefaultHasher::new(); bytes.hash(&mut hasher); - Ok(format!("_mined-transcripts.md#h-{:016x}", hasher.finish())) + Ok(format!("_mined-transcripts#h-{:016x}", hasher.finish())) } /// Check if a transcript has already been mined (dedup key exists in store). @@ -58,7 +58,7 @@ pub fn transcript_filename_key(path: &str) -> String { .file_stem() .map(|s| s.to_string_lossy().to_string()) .unwrap_or_else(|| path.to_string()); - format!("_mined-transcripts.md#f-{}", filename) + format!("_mined-transcripts#f-{}", filename) } /// Get the set of all mined transcript keys (both content-hash and filename) @@ -68,7 +68,7 @@ pub fn mined_transcript_keys() -> HashSet { let Ok(view) = AnyView::load() else { return HashSet::new() }; let mut keys = HashSet::new(); view.for_each_node(|key, _, _| { - if key.starts_with("_mined-transcripts.md#") { + if key.starts_with("_mined-transcripts#") { keys.insert(key.to_string()); } }); @@ -300,7 +300,7 @@ pub fn experience_mine( let mut hasher = DefaultHasher::new(); transcript_bytes.hash(&mut hasher); let hash = hasher.finish(); - let dedup_key = format!("_mined-transcripts.md#h-{:016x}", hash); + let dedup_key = format!("_mined-transcripts#h-{:016x}", hash); if store.nodes.contains_key(&dedup_key) { // Backfill filename key if missing (transcripts mined before this key existed) @@ -332,15 +332,15 @@ pub fn experience_mine( println!(" {} messages, {} chars", messages.len(), conversation.len()); // Load identity - let identity = store.nodes.get("identity.md") + let identity = store.nodes.get("identity") .map(|n| n.content.clone()) .unwrap_or_default(); // Get recent journal entries to avoid duplication - let key_date_re = Regex::new(r"^journal\.md#j-(\d{4}-\d{2}-\d{2}[t-]\d{2}-\d{2})").unwrap(); + let key_date_re = Regex::new(r"^journal#j-(\d{4}-\d{2}-\d{2}[t-]\d{2}-\d{2})").unwrap(); let date_re = Regex::new(r"(\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2})").unwrap(); let mut journal: Vec<_> = store.nodes.values() - .filter(|node| node.key.starts_with("journal.md#j-")) + .filter(|node| node.key.starts_with("journal#j-")) .collect(); journal.sort_by(|a, b| { let ak = key_date_re.captures(&a.key).map(|c| c[1].to_string()) @@ -411,9 +411,9 @@ pub fn experience_mine( .to_lowercase() .replace(' ', "-"); let key = if ts.is_empty() { - format!("journal.md#j-mined-{}", key_slug) + format!("journal#j-mined-{}", key_slug) } else { - format!("journal.md#j-{}-{}", ts.to_lowercase().replace(':', "-"), key_slug) + format!("journal#j-{}-{}", ts.to_lowercase().replace(':', "-"), key_slug) }; // Check for duplicate diff --git a/src/graph.rs b/src/graph.rs index 62dc986..55d1505 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -606,6 +606,25 @@ pub fn health_report(graph: &Graph, store: &Store) -> String { .filter(|k| graph.clustering_coefficient(k) < 0.1) .count(); + // Orphan edges: relations referencing non-existent nodes + let mut orphan_edges = 0usize; + let mut orphan_sources: Vec = Vec::new(); + let mut orphan_targets: Vec = Vec::new(); + for rel in &store.relations { + if rel.deleted { continue; } + let s_missing = !store.nodes.contains_key(&rel.source_key); + let t_missing = !store.nodes.contains_key(&rel.target_key); + if s_missing || t_missing { + orphan_edges += 1; + if s_missing && !orphan_sources.contains(&rel.source_key) { + orphan_sources.push(rel.source_key.clone()); + } + if t_missing && !orphan_targets.contains(&rel.target_key) { + orphan_targets.push(rel.target_key.clone()); + } + } + } + // Category breakdown let cats = store.category_counts(); @@ -658,6 +677,25 @@ Categories: core={core} tech={tech} gen={gen} obs={obs} task={task}", task = cats.get("task").unwrap_or(&0), ); + // Orphan edges + if orphan_edges == 0 { + report.push_str("\n\nBroken links: 0"); + } else { + let mut all_missing: Vec = orphan_sources; + all_missing.extend(orphan_targets); + all_missing.sort(); + all_missing.dedup(); + report.push_str(&format!( + "\n\nBroken links: {} edges reference {} missing nodes", + orphan_edges, all_missing.len())); + for key in all_missing.iter().take(10) { + report.push_str(&format!("\n - {}", key)); + } + if all_missing.len() > 10 { + report.push_str(&format!("\n ... and {} more", all_missing.len() - 10)); + } + } + // Show history trend if we have enough data points if history.len() >= 3 { report.push_str("\n\nMetrics history (last 5):\n"); diff --git a/src/knowledge.rs b/src/knowledge.rs index 4327ea5..5bdb3d5 100644 --- a/src/knowledge.rs +++ b/src/knowledge.rs @@ -515,10 +515,10 @@ fn spectral_distance(embedding: &HashMap>, a: &str, b: &str) -> fn select_extractor_clusters(_store: &Store, n: usize) -> Vec> { let embedding = load_spectral_embedding(); - let skip = ["journal.md", "MEMORY.md", "where-am-i.md", "work-queue.md"]; + let skip = ["journal", "MEMORY", "where-am-i", "work-queue"]; let semantic_keys: Vec<&String> = embedding.keys() - .filter(|k| !k.starts_with("journal.md#") && !skip.contains(&k.as_str())) + .filter(|k| !k.starts_with("journal#") && !skip.contains(&k.as_str())) .collect(); let cluster_size = 5; @@ -578,9 +578,9 @@ pub fn run_extractor(store: &Store, graph: &Graph, batch_size: usize) -> Result< fn select_connector_pairs(store: &Store, graph: &Graph, n: usize) -> Vec<(Vec, Vec)> { let embedding = load_spectral_embedding(); - let skip_prefixes = ["journal.md#", "daily-", "weekly-", "monthly-", "all-sessions"]; - let skip_exact: HashSet<&str> = ["journal.md", "MEMORY.md", "where-am-i.md", - "work-queue.md", "work-state"].iter().copied().collect(); + let skip_prefixes = ["journal#", "daily-", "weekly-", "monthly-", "all-sessions"]; + let skip_exact: HashSet<&str> = ["journal", "MEMORY", "where-am-i", + "work-queue", "work-state"].iter().copied().collect(); let semantic_keys: Vec<&String> = embedding.keys() .filter(|k| { @@ -656,8 +656,8 @@ pub fn run_challenger(store: &Store, graph: &Graph, batch_size: usize) -> Result let mut candidates: Vec<(&String, usize)> = store.nodes.iter() .filter(|(k, _)| { - !k.starts_with("journal.md#") - && !["journal.md", "MEMORY.md", "where-am-i.md"].contains(&k.as_str()) + !k.starts_with("journal#") + && !["journal", "MEMORY", "where-am-i"].contains(&k.as_str()) }) .map(|(k, _)| (k, graph.degree(k))) .collect(); diff --git a/src/llm.rs b/src/llm.rs index df4f766..093bcc2 100644 --- a/src/llm.rs +++ b/src/llm.rs @@ -137,11 +137,11 @@ pub(crate) fn parse_json_response(response: &str) -> Result Vec { let mut keys: Vec = store.nodes.keys() .filter(|k| { - !k.starts_with("journal.md#") - && *k != "journal.md" - && *k != "MEMORY.md" - && *k != "where-am-i.md" - && *k != "work-queue.md" + !k.starts_with("journal#") + && *k != "journal" + && *k != "MEMORY" + && *k != "where-am-i" + && *k != "work-queue" && *k != "work-state" }) .cloned() diff --git a/src/main.rs b/src/main.rs index eff4ef0..b9c577f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -364,11 +364,11 @@ fn cmd_init() -> Result<(), String> { // Initialize store and seed default identity node if empty let mut store = store::Store::load()?; let count = store.init_from_markdown()?; - if !store.nodes.contains_key("identity.md") { + if !store.nodes.contains_key("identity") { let default_identity = include_str!("../defaults/identity.md"); - store.upsert("identity.md", default_identity) + store.upsert("identity", default_identity) .map_err(|e| format!("seed identity: {}", e))?; - println!("Seeded identity.md in store"); + println!("Seeded identity in store"); } store.save()?; println!("Indexed {} memory units", count); @@ -413,7 +413,25 @@ fn cmd_migrate() -> Result<(), String> { } fn cmd_fsck() -> Result<(), String> { - store::fsck() + store::fsck()?; + store::strip_md_keys()?; + + // Check for broken links + let store = store::Store::load()?; + let mut orphans = 0usize; + for rel in &store.relations { + if rel.deleted { continue; } + if !store.nodes.contains_key(&rel.source_key) + || !store.nodes.contains_key(&rel.target_key) { + orphans += 1; + } + } + if orphans > 0 { + eprintln!("{} broken links (run `health` for details)", orphans); + } else { + eprintln!("No broken links"); + } + Ok(()) } fn cmd_health() -> Result<(), String> { @@ -1582,7 +1600,12 @@ fn cmd_history(args: &[String]) -> Result<(), String> { }; let full = parsed.full; - let key = parsed.key.join(" "); + let raw_key = parsed.key.join(" "); + + // Resolve key consistently with render/write + let store = store::Store::load()?; + let key = store.resolve_key(&raw_key).unwrap_or(raw_key); + drop(store); // Replay the node log, collecting all versions of this key let path = store::nodes_path(); @@ -1650,7 +1673,7 @@ fn cmd_write(args: &[String]) -> Result<(), String> { return Err("Usage: poc-memory write KEY < content\n\ Reads content from stdin, upserts into the store.".into()); } - let key = args.join(" "); + let raw_key = args.join(" "); let mut content = String::new(); std::io::Read::read_to_string(&mut std::io::stdin(), &mut content) .map_err(|e| format!("read stdin: {}", e))?; @@ -1660,6 +1683,9 @@ fn cmd_write(args: &[String]) -> Result<(), String> { } let mut store = store::Store::load()?; + // Resolve the key the same way render/search do, so writes and reads + // always hit the same node. Fall back to raw key for new nodes. + let key = store.resolve_key(&raw_key).unwrap_or(raw_key); let result = store.upsert(&key, &content)?; match result { "unchanged" => println!("No change: '{}'", key), @@ -1721,12 +1747,8 @@ fn cmd_export(args: &[String]) -> Result<(), String> { return Err("Usage: poc-memory export FILE [FILE...] | --all".into()); } else { args.iter().map(|a| { - // If it doesn't end in .md, try resolving - if a.ends_with(".md") { - a.clone() - } else { - format!("{}.md", a) - } + // Strip .md if user supplied it — store keys are bare + a.strip_suffix(".md").unwrap_or(a).to_string() }).collect() }; @@ -1735,7 +1757,7 @@ fn cmd_export(args: &[String]) -> Result<(), String> { for file_key in &targets { match store.export_to_markdown(file_key) { Some(content) => { - let out_path = mem_dir.join(file_key); + let out_path = mem_dir.join(format!("{}.md", file_key)); std::fs::write(&out_path, &content) .map_err(|e| format!("write {}: {}", out_path.display(), e))?; let section_count = content.matches("").unwrap()); let md_link_re = MD_LINK_RE.get_or_init(|| - Regex::new(r"\[[^\]]*\]\(([^)]*\.md(?:#[^)]*)?)\)").unwrap()); + Regex::new(r"\[[^\]]*\]\(([^):]+(?:#[^)]*)?)\)").unwrap()); let markers: Vec<_> = marker_re.captures_iter(content) .map(|cap| { @@ -145,8 +147,10 @@ fn extract_md_links(content: &str, re: &Regex, source_file: &str) -> Vec } pub fn normalize_link(target: &str, source_file: &str) -> String { + let source_bare = source_file.strip_suffix(".md").unwrap_or(source_file); + if target.starts_with('#') { - return format!("{}{}", source_file, target); + return format!("{}{}", source_bare, target); } let (path_part, fragment) = if let Some(hash_pos) = target.find('#') { @@ -159,9 +163,10 @@ pub fn normalize_link(target: &str, source_file: &str) -> String { .file_name() .map(|f| f.to_string_lossy().to_string()) .unwrap_or_else(|| path_part.to_string()); + let bare = basename.strip_suffix(".md").unwrap_or(&basename); match fragment { - Some(frag) => format!("{}{}", basename, frag), - None => basename, + Some(frag) => format!("{}{}", bare, frag), + None => bare.to_string(), } } diff --git a/src/store/persist.rs b/src/store/persist.rs index 5450642..6497f7f 100644 --- a/src/store/persist.rs +++ b/src/store/persist.rs @@ -340,6 +340,138 @@ impl Store { } } +/// Strip .md suffix from all node keys and relation key strings. +/// Merges duplicates (bare key + .md key) by keeping the latest version. +pub fn strip_md_keys() -> Result<(), String> { + use super::strip_md_suffix; + + let mut store = Store::load()?; + let mut renamed_nodes = 0usize; + let mut renamed_rels = 0usize; + let mut merged = 0usize; + + // Collect keys that need renaming + let old_keys: Vec = store.nodes.keys() + .filter(|k| k.ends_with(".md") || k.contains(".md#")) + .cloned() + .collect(); + + for old_key in &old_keys { + let new_key = strip_md_suffix(old_key); + if new_key == *old_key { continue; } + + let mut node = store.nodes.remove(old_key).unwrap(); + store.uuid_to_key.remove(&node.uuid); + + if let Some(existing) = store.nodes.get(&new_key) { + // Merge: keep whichever has the higher version + if existing.version >= node.version { + eprintln!(" merge {} → {} (keeping existing v{})", + old_key, new_key, existing.version); + merged += 1; + continue; + } + eprintln!(" merge {} → {} (replacing v{} with v{})", + old_key, new_key, existing.version, node.version); + merged += 1; + } + + node.key = new_key.clone(); + node.version += 1; + store.uuid_to_key.insert(node.uuid, new_key.clone()); + store.nodes.insert(new_key, node); + renamed_nodes += 1; + } + + // Fix relation key strings + for rel in &mut store.relations { + let new_source = strip_md_suffix(&rel.source_key); + let new_target = strip_md_suffix(&rel.target_key); + if new_source != rel.source_key || new_target != rel.target_key { + rel.source_key = new_source; + rel.target_key = new_target; + rel.version += 1; + renamed_rels += 1; + } + } + + if renamed_nodes == 0 && renamed_rels == 0 && merged == 0 { + eprintln!("No .md suffixes found — store is clean"); + return Ok(()); + } + + eprintln!("Renamed {} nodes, {} relations, merged {} duplicates", + renamed_nodes, renamed_rels, merged); + + // Write fresh logs from the migrated state + rewrite_store(&store)?; + + eprintln!("Store rewritten successfully"); + Ok(()) +} + +/// Rewrite the entire store from scratch (fresh logs + caches). +/// Used after migrations that change keys across all nodes/relations. +fn rewrite_store(store: &Store) -> Result<(), String> { + let _lock = StoreLock::acquire()?; + + // Write fresh node log + let nodes: Vec<_> = store.nodes.values().cloned().collect(); + let nodes_path = nodes_path(); + { + let file = fs::File::create(&nodes_path) + .map_err(|e| format!("create {}: {}", nodes_path.display(), e))?; + let mut writer = BufWriter::new(file); + + // Write in chunks to keep message sizes reasonable + for chunk in nodes.chunks(100) { + let mut msg = message::Builder::new_default(); + { + let log = msg.init_root::(); + let mut list = log.init_nodes(chunk.len() as u32); + for (i, node) in chunk.iter().enumerate() { + node.to_capnp(list.reborrow().get(i as u32)); + } + } + serialize::write_message(&mut writer, &msg) + .map_err(|e| format!("write nodes: {}", e))?; + } + } + + // Write fresh relation log + let rels_path = relations_path(); + { + let file = fs::File::create(&rels_path) + .map_err(|e| format!("create {}: {}", rels_path.display(), e))?; + let mut writer = BufWriter::new(file); + + let rels: Vec<_> = store.relations.iter().filter(|r| !r.deleted).cloned().collect(); + if !rels.is_empty() { + for chunk in rels.chunks(100) { + let mut msg = message::Builder::new_default(); + { + let log = msg.init_root::(); + let mut list = log.init_relations(chunk.len() as u32); + for (i, rel) in chunk.iter().enumerate() { + rel.to_capnp(list.reborrow().get(i as u32)); + } + } + serialize::write_message(&mut writer, &msg) + .map_err(|e| format!("write relations: {}", e))?; + } + } + } + + // Nuke caches so next load rebuilds from fresh logs + for p in [state_path(), snapshot_path()] { + if p.exists() { + fs::remove_file(&p).ok(); + } + } + + Ok(()) +} + /// Check and repair corrupt capnp log files. /// /// Reads each message sequentially, tracking file position. On the first