// Append-only Cap'n Proto storage + derived KV cache // // Two log files are source of truth: // nodes.capnp - ContentNode messages // relations.capnp - Relation messages // // The Store struct is the derived cache: latest version per UUID, // rebuilt from logs when stale. Three-tier load strategy: // 1. rkyv mmap snapshot (snapshot.rkyv) — ~4ms deserialize // 2. bincode cache (state.bin) — ~10ms // 3. capnp log replay — ~40ms // Staleness: log file sizes embedded in cache headers. // // Module layout: // types.rs — Node, Relation, enums, capnp macros, path helpers // parse.rs — markdown → MemoryUnit parsing // view.rs — zero-copy read-only access (StoreView, MmapView) // persist.rs — load, save, replay, append, snapshot (all disk IO) // ops.rs — mutations (upsert, delete, decay, cap_degree, etc.) // mod.rs — re-exports, key resolution, ingestion, rendering mod types; mod parse; mod view; mod persist; mod ops; // Re-export everything callers need pub use types::*; pub use parse::{MemoryUnit, parse_units}; pub use view::{StoreView, AnyView}; pub use persist::fsck; pub use persist::strip_md_keys; use crate::graph::{self, Graph}; use std::fs; use std::io::Write as IoWrite; use std::path::Path; use parse::classify_filename; /// Strip .md suffix from a key, handling both bare keys and section keys. /// "journal.md#j-2026" → "journal#j-2026", "identity.md" → "identity", "identity" → "identity" pub fn strip_md_suffix(key: &str) -> String { if let Some((file, section)) = key.split_once('#') { let bare = file.strip_suffix(".md").unwrap_or(file); format!("{}#{}", bare, section) } else { key.strip_suffix(".md").unwrap_or(key).to_string() } } impl Store { pub fn build_graph(&self) -> Graph { graph::build_graph(self) } pub fn resolve_key(&self, target: &str) -> Result { // Strip .md suffix if present — keys no longer use it let bare = strip_md_suffix(target); if self.nodes.contains_key(&bare) { return Ok(bare); } let matches: Vec<_> = self.nodes.keys() .filter(|k| k.to_lowercase().contains(&target.to_lowercase())) .cloned().collect(); match matches.len() { 0 => Err(format!("No entry for '{}'. Run 'init'?", target)), 1 => Ok(matches[0].clone()), n if n <= 10 => { let list = matches.join("\n "); Err(format!("Ambiguous '{}'. Matches:\n {}", target, list)) } n => Err(format!("Too many matches for '{}' ({}). Be more specific.", target, n)), } } /// Resolve a link target to (key, uuid). fn resolve_node_uuid(&self, target: &str) -> Option<(String, [u8; 16])> { let bare = strip_md_suffix(target); let n = self.nodes.get(&bare)?; Some((bare, n.uuid)) } /// Append retrieval event to retrieval.log without needing a Store instance. pub fn log_retrieval_static(query: &str, results: &[String]) { let path = memory_dir().join("retrieval.log"); let line = format!("[{}] q=\"{}\" hits={}\n", today(), query, results.len()); if let Ok(mut f) = fs::OpenOptions::new() .create(true).append(true).open(&path) { let _ = f.write_all(line.as_bytes()); } } /// Scan markdown files and index all memory units pub fn init_from_markdown(&mut self) -> Result { let dir = memory_dir(); let mut count = 0; if dir.exists() { // Build edge set for O(1) dedup during ingestion let mut edge_set = self.build_edge_set(); count = self.scan_dir_for_init(&dir, &mut edge_set)?; } Ok(count) } /// Build a HashSet of existing (source, target) UUID pairs for O(1) dedup. fn build_edge_set(&self) -> std::collections::HashSet<([u8; 16], [u8; 16])> { let mut set = std::collections::HashSet::with_capacity(self.relations.len() * 2); for r in &self.relations { set.insert((r.source, r.target)); set.insert((r.target, r.source)); } set } fn scan_dir_for_init( &mut self, dir: &Path, edge_set: &mut std::collections::HashSet<([u8; 16], [u8; 16])>, ) -> Result { let mut count = 0; let entries = fs::read_dir(dir) .map_err(|e| format!("read dir {}: {}", dir.display(), e))?; for entry in entries.flatten() { let path = entry.path(); if path.is_dir() { count += self.scan_dir_for_init(&path, edge_set)?; continue; } let Some(ext) = path.extension() else { continue }; if ext != "md" { continue } let filename = path.file_name().unwrap().to_string_lossy().to_string(); let content = fs::read_to_string(&path) .map_err(|e| format!("read {}: {}", path.display(), e))?; let units = parse_units(&filename, &content); let (new_count, _) = self.ingest_units(&units, &filename)?; count += new_count; // Create relations from links let mut new_relations = Vec::new(); for unit in &units { let source_uuid = match self.nodes.get(&unit.key) { Some(n) => n.uuid, None => continue, }; for link in unit.marker_links.iter().chain(unit.md_links.iter()) { let Some((key, uuid)) = self.resolve_node_uuid(link) else { continue }; if !edge_set.contains(&(source_uuid, uuid)) { edge_set.insert((source_uuid, uuid)); edge_set.insert((uuid, source_uuid)); new_relations.push(new_relation( source_uuid, uuid, RelationType::Link, 1.0, &unit.key, &key, )); } } for cause in &unit.causes { let Some((key, uuid)) = self.resolve_node_uuid(cause) else { continue }; if !edge_set.contains(&(uuid, source_uuid)) { edge_set.insert((uuid, source_uuid)); new_relations.push(new_relation( uuid, source_uuid, RelationType::Causal, 1.0, &key, &unit.key, )); } } } if !new_relations.is_empty() { self.append_relations(&new_relations)?; self.relations.extend(new_relations); } } Ok(count) } /// Process parsed memory units: diff against existing nodes, persist changes. fn ingest_units(&mut self, units: &[MemoryUnit], filename: &str) -> Result<(usize, usize), String> { let node_type = classify_filename(filename); let mut new_nodes = Vec::new(); let mut updated_nodes = Vec::new(); for (pos, unit) in units.iter().enumerate() { if let Some(existing) = self.nodes.get(&unit.key) { if existing.content != unit.content || existing.position != pos as u32 { let mut node = existing.clone(); node.content = unit.content.clone(); node.position = pos as u32; node.version += 1; if let Some(ref s) = unit.state { node.state_tag = s.clone(); } if let Some(ref s) = unit.source_ref { node.source_ref = s.clone(); } updated_nodes.push(node); } } else { let mut node = new_node(&unit.key, &unit.content); node.node_type = node_type; node.position = pos as u32; if let Some(ref s) = unit.state { node.state_tag = s.clone(); } if let Some(ref s) = unit.source_ref { node.source_ref = s.clone(); } new_nodes.push(node); } } if !new_nodes.is_empty() { self.append_nodes(&new_nodes)?; for node in &new_nodes { self.uuid_to_key.insert(node.uuid, node.key.clone()); self.nodes.insert(node.key.clone(), node.clone()); } } if !updated_nodes.is_empty() { self.append_nodes(&updated_nodes)?; for node in &updated_nodes { self.nodes.insert(node.key.clone(), node.clone()); } } Ok((new_nodes.len(), updated_nodes.len())) } /// Import a markdown file into the store, parsing it into nodes. pub fn import_file(&mut self, path: &Path) -> Result<(usize, usize), String> { let filename = path.file_name().unwrap().to_string_lossy().to_string(); let content = fs::read_to_string(path) .map_err(|e| format!("read {}: {}", path.display(), e))?; let units = parse_units(&filename, &content); self.ingest_units(&units, &filename) } /// Gather all sections for a file key, sorted by position. pub fn file_sections(&self, file_key: &str) -> Option> { let prefix = format!("{}#", file_key); let mut sections: Vec<_> = self.nodes.values() .filter(|n| n.key == file_key || n.key.starts_with(&prefix)) .collect(); if sections.is_empty() { return None; } sections.sort_by_key(|n| n.position); Some(sections) } /// Render a file key as plain content (no mem markers). pub fn render_file(&self, file_key: &str) -> Option { let sections = self.file_sections(file_key)?; let mut output = String::new(); for node in §ions { output.push_str(&node.content); if !node.content.ends_with('\n') { output.push('\n'); } output.push('\n'); } Some(output.trim_end().to_string()) } /// Render a file key back to markdown with reconstituted mem markers. pub fn export_to_markdown(&self, file_key: &str) -> Option { let sections = self.file_sections(file_key)?; let mut output = String::new(); for node in §ions { if node.key.contains('#') { let section_id = node.key.rsplit_once('#').map_or("", |(_, s)| s); let links: Vec<_> = self.relations.iter() .filter(|r| r.source_key == node.key && !r.deleted && r.rel_type != RelationType::Causal) .map(|r| r.target_key.clone()) .collect(); let causes: Vec<_> = self.relations.iter() .filter(|r| r.target_key == node.key && !r.deleted && r.rel_type == RelationType::Causal) .map(|r| r.source_key.clone()) .collect(); let mut marker_parts = vec![format!("id={}", section_id)]; if !links.is_empty() { marker_parts.push(format!("links={}", links.join(","))); } if !causes.is_empty() { marker_parts.push(format!("causes={}", causes.join(","))); } output.push_str(&format!("\n", marker_parts.join(" "))); } output.push_str(&node.content); if !node.content.ends_with('\n') { output.push('\n'); } output.push('\n'); } Some(output.trim_end().to_string()) } /// Find the episodic node that best matches the given entry text. pub fn find_journal_node(&self, entry_text: &str) -> Option { if entry_text.is_empty() { return None; } let words: Vec<&str> = entry_text.split_whitespace() .filter(|w| w.len() > 5) .take(5) .collect(); let mut best_key = None; let mut best_score = 0; for (key, node) in &self.nodes { if node.node_type != NodeType::EpisodicSession { continue; } let content_lower = node.content.to_lowercase(); let score: usize = words.iter() .filter(|w| content_lower.contains(&w.to_lowercase())) .count(); if score > best_score { best_score = score; best_key = Some(key.clone()); } } best_key } }