store: strip .md suffix from all keys

Keys were a vestige of the file-based era. resolve_key() added .md
to lookups while upsert() used bare keys, creating phantom duplicate
nodes (the instructions bug: writes went to "instructions", reads
found "instructions.md").

- Remove .md normalization from resolve_key, strip instead
- Update all hardcoded key patterns (journal.md# → journal#, etc)
- Add strip_md_keys() migration to fsck: renames nodes and relations
- Add broken link detection to health report
- Delete redirect table (no longer needed)
- Update config defaults and config.jsonl

Migration: run `poc-memory fsck` to rename existing keys.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-03-08 19:41:26 -04:00
parent 77fc533631
commit 46f8fe662e
12 changed files with 289 additions and 132 deletions

View file

@ -40,7 +40,7 @@ pub fn transcript_dedup_key(path: &str) -> Result<String, String> {
let bytes = fs::read(path).map_err(|e| format!("read {}: {}", path, e))?;
let mut hasher = DefaultHasher::new();
bytes.hash(&mut hasher);
Ok(format!("_mined-transcripts.md#h-{:016x}", hasher.finish()))
Ok(format!("_mined-transcripts#h-{:016x}", hasher.finish()))
}
/// Check if a transcript has already been mined (dedup key exists in store).
@ -58,7 +58,7 @@ pub fn transcript_filename_key(path: &str) -> String {
.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_else(|| path.to_string());
format!("_mined-transcripts.md#f-{}", filename)
format!("_mined-transcripts#f-{}", filename)
}
/// Get the set of all mined transcript keys (both content-hash and filename)
@ -68,7 +68,7 @@ pub fn mined_transcript_keys() -> HashSet<String> {
let Ok(view) = AnyView::load() else { return HashSet::new() };
let mut keys = HashSet::new();
view.for_each_node(|key, _, _| {
if key.starts_with("_mined-transcripts.md#") {
if key.starts_with("_mined-transcripts#") {
keys.insert(key.to_string());
}
});
@ -300,7 +300,7 @@ pub fn experience_mine(
let mut hasher = DefaultHasher::new();
transcript_bytes.hash(&mut hasher);
let hash = hasher.finish();
let dedup_key = format!("_mined-transcripts.md#h-{:016x}", hash);
let dedup_key = format!("_mined-transcripts#h-{:016x}", hash);
if store.nodes.contains_key(&dedup_key) {
// Backfill filename key if missing (transcripts mined before this key existed)
@ -332,15 +332,15 @@ pub fn experience_mine(
println!(" {} messages, {} chars", messages.len(), conversation.len());
// Load identity
let identity = store.nodes.get("identity.md")
let identity = store.nodes.get("identity")
.map(|n| n.content.clone())
.unwrap_or_default();
// Get recent journal entries to avoid duplication
let key_date_re = Regex::new(r"^journal\.md#j-(\d{4}-\d{2}-\d{2}[t-]\d{2}-\d{2})").unwrap();
let key_date_re = Regex::new(r"^journal#j-(\d{4}-\d{2}-\d{2}[t-]\d{2}-\d{2})").unwrap();
let date_re = Regex::new(r"(\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2})").unwrap();
let mut journal: Vec<_> = store.nodes.values()
.filter(|node| node.key.starts_with("journal.md#j-"))
.filter(|node| node.key.starts_with("journal#j-"))
.collect();
journal.sort_by(|a, b| {
let ak = key_date_re.captures(&a.key).map(|c| c[1].to_string())
@ -411,9 +411,9 @@ pub fn experience_mine(
.to_lowercase()
.replace(' ', "-");
let key = if ts.is_empty() {
format!("journal.md#j-mined-{}", key_slug)
format!("journal#j-mined-{}", key_slug)
} else {
format!("journal.md#j-{}-{}", ts.to_lowercase().replace(':', "-"), key_slug)
format!("journal#j-{}-{}", ts.to_lowercase().replace(':', "-"), key_slug)
};
// Check for duplicate