store: strip .md suffix from all keys

Keys were a vestige of the file-based era. resolve_key() added .md
to lookups while upsert() used bare keys, creating phantom duplicate
nodes (the instructions bug: writes went to "instructions", reads
found "instructions.md").

- Remove .md normalization from resolve_key, strip instead
- Update all hardcoded key patterns (journal.md# → journal#, etc)
- Add strip_md_keys() migration to fsck: renames nodes and relations
- Add broken link detection to health report
- Delete redirect table (no longer needed)
- Update config defaults and config.jsonl

Migration: run `poc-memory fsck` to rename existing keys.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-03-08 19:41:26 -04:00
parent 77fc533631
commit 46f8fe662e
12 changed files with 289 additions and 132 deletions

View file

@ -24,13 +24,15 @@ pub struct MemoryUnit {
}
pub fn classify_filename(filename: &str) -> NodeType {
if filename.starts_with("daily-") { NodeType::EpisodicDaily }
else if filename.starts_with("weekly-") { NodeType::EpisodicWeekly }
else if filename == "journal.md" { NodeType::EpisodicSession }
let bare = filename.strip_suffix(".md").unwrap_or(filename);
if bare.starts_with("daily-") { NodeType::EpisodicDaily }
else if bare.starts_with("weekly-") { NodeType::EpisodicWeekly }
else if bare == "journal" { NodeType::EpisodicSession }
else { NodeType::Semantic }
}
pub fn parse_units(filename: &str, content: &str) -> Vec<MemoryUnit> {
pub fn parse_units(raw_filename: &str, content: &str) -> Vec<MemoryUnit> {
let filename = raw_filename.strip_suffix(".md").unwrap_or(raw_filename);
static MARKER_RE: OnceLock<Regex> = OnceLock::new();
static SOURCE_RE: OnceLock<Regex> = OnceLock::new();
static MD_LINK_RE: OnceLock<Regex> = OnceLock::new();
@ -40,7 +42,7 @@ pub fn parse_units(filename: &str, content: &str) -> Vec<MemoryUnit> {
let source_re = SOURCE_RE.get_or_init(||
Regex::new(r"<!--\s*source:\s*(.+?)\s*-->").unwrap());
let md_link_re = MD_LINK_RE.get_or_init(||
Regex::new(r"\[[^\]]*\]\(([^)]*\.md(?:#[^)]*)?)\)").unwrap());
Regex::new(r"\[[^\]]*\]\(([^):]+(?:#[^)]*)?)\)").unwrap());
let markers: Vec<_> = marker_re.captures_iter(content)
.map(|cap| {
@ -145,8 +147,10 @@ fn extract_md_links(content: &str, re: &Regex, source_file: &str) -> Vec<String>
}
pub fn normalize_link(target: &str, source_file: &str) -> String {
let source_bare = source_file.strip_suffix(".md").unwrap_or(source_file);
if target.starts_with('#') {
return format!("{}{}", source_file, target);
return format!("{}{}", source_bare, target);
}
let (path_part, fragment) = if let Some(hash_pos) = target.find('#') {
@ -159,9 +163,10 @@ pub fn normalize_link(target: &str, source_file: &str) -> String {
.file_name()
.map(|f| f.to_string_lossy().to_string())
.unwrap_or_else(|| path_part.to_string());
let bare = basename.strip_suffix(".md").unwrap_or(&basename);
match fragment {
Some(frag) => format!("{}{}", basename, frag),
None => basename,
Some(frag) => format!("{}{}", bare, frag),
None => bare.to_string(),
}
}