consciousness/poc-memory/src/store/mod.rs

// Append-only Cap'n Proto storage + derived KV cache
//
// Two log files are source of truth:
//   nodes.capnp     - ContentNode messages
//   relations.capnp - Relation messages
//
// The Store struct is the derived cache: latest version per UUID,
// rebuilt from logs when stale. Three-tier load strategy:
//   1. rkyv mmap snapshot (snapshot.rkyv) — ~4ms deserialize
//   2. bincode cache (state.bin) — ~10ms
//   3. capnp log replay — ~40ms
// Staleness: log file sizes embedded in cache headers.
//
// Module layout:
//   types.rs   — Node, Relation, enums, capnp macros, path helpers
//   parse.rs   — markdown → MemoryUnit parsing
//   view.rs    — zero-copy read-only access (StoreView, MmapView)
//   persist.rs — load, save, replay, append, snapshot (all disk IO)
//   ops.rs     — mutations (upsert, delete, decay, cap_degree, etc.)
//   mod.rs     — re-exports, key resolution, ingestion, rendering

mod types;
mod parse;
mod view;
mod persist;
mod ops;

// Re-export everything callers need
pub use types::*;
pub use parse::{MemoryUnit, parse_units};
pub use view::{StoreView, AnyView};
pub use persist::fsck;
pub use persist::strip_md_keys;

use crate::graph::{self, Graph};

use std::fs;
use std::io::Write as IoWrite;
use std::path::Path;

use parse::classify_filename;

/// Strip .md suffix from a key, handling both bare keys and section keys.
/// "journal.md#j-2026" → "journal#j-2026", "identity.md" → "identity", "identity" → "identity"
pub fn strip_md_suffix(key: &str) -> String {
    if let Some((file, section)) = key.split_once('#') {
        let bare = file.strip_suffix(".md").unwrap_or(file);
        format!("{}#{}", bare, section)
    } else {
        key.strip_suffix(".md").unwrap_or(key).to_string()
    }
}

impl Store {
    pub fn build_graph(&self) -> Graph {
        graph::build_graph(self)
    }

    pub fn resolve_key(&self, target: &str) -> Result<String, String> {
        // Strip .md suffix if present — keys no longer use it
        let bare = strip_md_suffix(target);

        if self.nodes.contains_key(&bare) {
            return Ok(bare);
        }

        let matches: Vec<_> = self.nodes.keys()
            .filter(|k| k.to_lowercase().contains(&target.to_lowercase()))
            .cloned().collect();

        match matches.len() {
            0 => Err(format!("No entry for '{}'. Run 'init'?", target)),
            1 => Ok(matches[0].clone()),
            n if n <= 10 => {
                let list = matches.join("\n  ");
                Err(format!("Ambiguous '{}'. Matches:\n  {}", target, list))
            }
            n => Err(format!("Too many matches for '{}' ({}). Be more specific.", target, n)),
        }
    }

    /// Resolve a link target to (key, uuid).
    fn resolve_node_uuid(&self, target: &str) -> Option<(String, [u8; 16])> {
        let bare = strip_md_suffix(target);
        let n = self.nodes.get(&bare)?;
        Some((bare, n.uuid))
    }

    /// Append retrieval event to retrieval.log without needing a Store instance.
    pub fn log_retrieval_static(query: &str, results: &[String]) {
        let path = memory_dir().join("retrieval.log");
        let line = format!("[{}] q=\"{}\" hits={}\n", today(), query, results.len());
        if let Ok(mut f) = fs::OpenOptions::new()
            .create(true).append(true).open(&path) {
            let _ = f.write_all(line.as_bytes());
        }
    }

    /// Scan markdown files and index all memory units
    pub fn init_from_markdown(&mut self) -> Result<usize, String> {
        let dir = memory_dir();
        let mut count = 0;
        if dir.exists() {
            // Build edge set for O(1) dedup during ingestion
            let mut edge_set = self.build_edge_set();
            count = self.scan_dir_for_init(&dir, &mut edge_set)?;
        }
        Ok(count)
    }

    /// Build a HashSet of existing (source, target) UUID pairs for O(1) dedup.
    fn build_edge_set(&self) -> std::collections::HashSet<([u8; 16], [u8; 16])> {
        let mut set = std::collections::HashSet::with_capacity(self.relations.len() * 2);
        for r in &self.relations {
            set.insert((r.source, r.target));
            set.insert((r.target, r.source));
        }
        set
    }

    fn scan_dir_for_init(
        &mut self,
        dir: &Path,
        edge_set: &mut std::collections::HashSet<([u8; 16], [u8; 16])>,
    ) -> Result<usize, String> {
        let mut count = 0;
        let entries = fs::read_dir(dir)
            .map_err(|e| format!("read dir {}: {}", dir.display(), e))?;

        for entry in entries.flatten() {
            let path = entry.path();
            if path.is_dir() {
                count += self.scan_dir_for_init(&path, edge_set)?;
                continue;
            }
            let Some(ext) = path.extension() else { continue };
            if ext != "md" { continue }

            let filename = path.file_name().unwrap().to_string_lossy().to_string();
            let content = fs::read_to_string(&path)
                .map_err(|e| format!("read {}: {}", path.display(), e))?;

            let units = parse_units(&filename, &content);
            let (new_count, _) = self.ingest_units(&units, &filename)?;
            count += new_count;

            // Create relations from links
            let mut new_relations = Vec::new();
            for unit in &units {
                let source_uuid = match self.nodes.get(&unit.key) {
                    Some(n) => n.uuid,
                    None => continue,
                };

                for link in unit.marker_links.iter().chain(unit.md_links.iter()) {
                    let Some((key, uuid)) = self.resolve_node_uuid(link) else { continue };
                    if !edge_set.contains(&(source_uuid, uuid)) {
                        edge_set.insert((source_uuid, uuid));
                        edge_set.insert((uuid, source_uuid));
                        new_relations.push(new_relation(
                            source_uuid, uuid, RelationType::Link, 1.0,
                            &unit.key, &key,
                        ));
                    }
                }

                for cause in &unit.causes {
                    let Some((key, uuid)) = self.resolve_node_uuid(cause) else { continue };
                    if !edge_set.contains(&(uuid, source_uuid)) {
                        edge_set.insert((uuid, source_uuid));
                        new_relations.push(new_relation(
                            uuid, source_uuid, RelationType::Causal, 1.0,
                            &key, &unit.key,
                        ));
                    }
                }
            }

            if !new_relations.is_empty() {
                self.append_relations(&new_relations)?;
                self.relations.extend(new_relations);
            }
        }
        Ok(count)
    }

    /// Process parsed memory units: diff against existing nodes, persist changes.
    fn ingest_units(&mut self, units: &[MemoryUnit], filename: &str) -> Result<(usize, usize), String> {
        let node_type = classify_filename(filename);
        let mut new_nodes = Vec::new();
        let mut updated_nodes = Vec::new();

        for (pos, unit) in units.iter().enumerate() {
            if let Some(existing) = self.nodes.get(&unit.key) {
                if existing.content != unit.content || existing.position != pos as u32 {
                    let mut node = existing.clone();
                    node.content = unit.content.clone();
                    node.position = pos as u32;
                    node.version += 1;
                    if let Some(ref s) = unit.state { node.state_tag = s.clone(); }
                    if let Some(ref s) = unit.source_ref { node.source_ref = s.clone(); }
                    updated_nodes.push(node);
                }
            } else {
                let mut node = new_node(&unit.key, &unit.content);
                node.node_type = node_type;
                node.position = pos as u32;
                if let Some(ref s) = unit.state { node.state_tag = s.clone(); }
                if let Some(ref s) = unit.source_ref { node.source_ref = s.clone(); }
                new_nodes.push(node);
            }
        }

        if !new_nodes.is_empty() {
            self.append_nodes(&new_nodes)?;
            for node in &new_nodes {
                self.uuid_to_key.insert(node.uuid, node.key.clone());
                self.nodes.insert(node.key.clone(), node.clone());
            }
        }
        if !updated_nodes.is_empty() {
            self.append_nodes(&updated_nodes)?;
            for node in &updated_nodes {
                self.nodes.insert(node.key.clone(), node.clone());
            }
        }

        Ok((new_nodes.len(), updated_nodes.len()))
    }

    /// Import a markdown file into the store, parsing it into nodes.
    pub fn import_file(&mut self, path: &Path) -> Result<(usize, usize), String> {
        let filename = path.file_name().unwrap().to_string_lossy().to_string();
        let content = fs::read_to_string(path)
            .map_err(|e| format!("read {}: {}", path.display(), e))?;
        let units = parse_units(&filename, &content);
        self.ingest_units(&units, &filename)
    }

    /// Gather all sections for a file key, sorted by position.
    pub fn file_sections(&self, file_key: &str) -> Option<Vec<&Node>> {
        let prefix = format!("{}#", file_key);
        let mut sections: Vec<_> = self.nodes.values()
            .filter(|n| n.key == file_key || n.key.starts_with(&prefix))
            .collect();
        if sections.is_empty() {
            return None;
        }
        sections.sort_by_key(|n| n.position);
        Some(sections)
    }

    /// Render a file key as plain content (no mem markers).
    pub fn render_file(&self, file_key: &str) -> Option<String> {
        let sections = self.file_sections(file_key)?;
        let mut output = String::new();
        for node in &sections {
            output.push_str(&node.content);
            if !node.content.ends_with('\n') {
                output.push('\n');
            }
            output.push('\n');
        }
        Some(output.trim_end().to_string())
    }

    /// Render a file key back to markdown with reconstituted mem markers.
    pub fn export_to_markdown(&self, file_key: &str) -> Option<String> {
        let sections = self.file_sections(file_key)?;

        let mut output = String::new();
        for node in &sections {
            if node.key.contains('#') {
                let section_id = node.key.rsplit_once('#').map_or("", |(_, s)| s);

                let links: Vec<_> = self.relations.iter()
                    .filter(|r| r.source_key == node.key && !r.deleted
                            && r.rel_type != RelationType::Causal)
                    .map(|r| r.target_key.clone())
                    .collect();
                let causes: Vec<_> = self.relations.iter()
                    .filter(|r| r.target_key == node.key && !r.deleted
                            && r.rel_type == RelationType::Causal)
                    .map(|r| r.source_key.clone())
                    .collect();

                let mut marker_parts = vec![format!("id={}", section_id)];
                if !links.is_empty() {
                    marker_parts.push(format!("links={}", links.join(",")));
                }
                if !causes.is_empty() {
                    marker_parts.push(format!("causes={}", causes.join(",")));
                }

                output.push_str(&format!("<!-- mem: {} -->\n", marker_parts.join(" ")));
            }
            output.push_str(&node.content);
            if !node.content.ends_with('\n') {
                output.push('\n');
            }
            output.push('\n');
        }

        Some(output.trim_end().to_string())
    }

    /// Find the episodic node that best matches the given entry text.
    pub fn find_journal_node(&self, entry_text: &str) -> Option<String> {
        if entry_text.is_empty() {
            return None;
        }

        let words: Vec<&str> = entry_text.split_whitespace()
            .filter(|w| w.len() > 5)
            .take(5)
            .collect();

        let mut best_key = None;
        let mut best_score = 0;

        for (key, node) in &self.nodes {
            if node.node_type != NodeType::EpisodicSession {
                continue;
            }
            let content_lower = node.content.to_lowercase();
            let score: usize = words.iter()
                .filter(|w| content_lower.contains(&w.to_lowercase()))
                .count();
            if score > best_score {
                best_score = score;
                best_key = Some(key.clone());
            }
        }

        best_key
    }
}