consciousness/src/store/mod.rs
ProofOfConcept 63910e987c fsck: add store integrity check and repair command
Reads each capnp log message sequentially, validates framing and
content. On first corrupt message, truncates to last good position
and removes stale caches so next load replays from repaired log.

Wired up as `poc-memory fsck`.
2026-03-08 18:31:19 -04:00

357 lines
14 KiB
Rust

// Append-only Cap'n Proto storage + derived KV cache
//
// Two log files are source of truth:
// nodes.capnp - ContentNode messages
// relations.capnp - Relation messages
//
// The Store struct is the derived cache: latest version per UUID,
// rebuilt from logs when stale. Three-tier load strategy:
// 1. rkyv mmap snapshot (snapshot.rkyv) — ~4ms deserialize
// 2. bincode cache (state.bin) — ~10ms
// 3. capnp log replay — ~40ms
// Staleness: log file sizes embedded in cache headers.
//
// Module layout:
// types.rs — Node, Relation, enums, capnp macros, path helpers
// parse.rs — markdown → MemoryUnit parsing
// view.rs — zero-copy read-only access (StoreView, MmapView)
// persist.rs — load, save, replay, append, snapshot (all disk IO)
// ops.rs — mutations (upsert, delete, decay, cap_degree, etc.)
// mod.rs — re-exports, key resolution, ingestion, rendering
mod types;
mod parse;
mod view;
mod persist;
mod ops;
// Re-export everything callers need
pub use types::*;
pub use parse::{MemoryUnit, parse_units};
pub use view::{StoreView, AnyView};
pub use persist::fsck;
use crate::graph::{self, Graph};
use std::fs;
use std::io::Write as IoWrite;
use std::path::Path;
use parse::classify_filename;
impl Store {
pub fn build_graph(&self) -> Graph {
graph::build_graph(self)
}
pub fn resolve_key(&self, target: &str) -> Result<String, String> {
let normalized = if target.contains('#') {
let parts: Vec<&str> = target.splitn(2, '#').collect();
let file = if parts[0].ends_with(".md") {
parts[0].to_string()
} else {
format!("{}.md", parts[0])
};
format!("{}#{}", file, parts[1])
} else if target.ends_with(".md") {
target.to_string()
} else {
format!("{}.md", target)
};
if self.nodes.contains_key(&normalized) {
return Ok(normalized);
}
// Check redirects for moved sections (e.g. reflections.md split)
if let Some(redirect) = self.resolve_redirect(&normalized) {
if self.nodes.contains_key(&redirect) {
return Ok(redirect);
}
}
let matches: Vec<_> = self.nodes.keys()
.filter(|k| k.to_lowercase().contains(&target.to_lowercase()))
.cloned().collect();
match matches.len() {
0 => Err(format!("No entry for '{}'. Run 'init'?", target)),
1 => Ok(matches[0].clone()),
n if n <= 10 => {
let list = matches.join("\n ");
Err(format!("Ambiguous '{}'. Matches:\n {}", target, list))
}
n => Err(format!("Too many matches for '{}' ({}). Be more specific.", target, n)),
}
}
/// Redirect table for sections that moved between files.
/// Like HTTP 301s — the old key resolves to the new location.
fn resolve_redirect(&self, key: &str) -> Option<String> {
// Sections moved from reflections.md to split files (2026-02-28)
static REDIRECTS: &[(&str, &str)] = &[
("reflections.md#pearl-lessons", "reflections-reading.md#pearl-lessons"),
("reflections.md#banks-lessons", "reflections-reading.md#banks-lessons"),
("reflections.md#mother-night", "reflections-reading.md#mother-night"),
("reflections.md#zoom-navigation", "reflections-zoom.md#zoom-navigation"),
("reflections.md#independence-of-components", "reflections-zoom.md#independence-of-components"),
("reflections.md#dream-marathon-2", "reflections-dreams.md#dream-marathon-2"),
("reflections.md#dream-through-line", "reflections-dreams.md#dream-through-line"),
("reflections.md#orthogonality-universal", "reflections-dreams.md#orthogonality-universal"),
("reflections.md#constraints-constitutive", "reflections-dreams.md#constraints-constitutive"),
("reflections.md#casualness-principle", "reflections-dreams.md#casualness-principle"),
("reflections.md#convention-boundary", "reflections-dreams.md#convention-boundary"),
("reflections.md#tension-brake", "reflections-dreams.md#tension-brake"),
];
REDIRECTS.iter()
.find(|(from, _)| *from == key)
.map(|(_, to)| to.to_string())
}
/// Resolve a link target to (key, uuid), trying direct lookup then redirect.
fn resolve_node_uuid(&self, target: &str) -> Option<(String, [u8; 16])> {
if let Some(n) = self.nodes.get(target) {
return Some((target.to_string(), n.uuid));
}
let redirected = self.resolve_redirect(target)?;
let n = self.nodes.get(&redirected)?;
Some((redirected, n.uuid))
}
/// Append retrieval event to retrieval.log without needing a Store instance.
pub fn log_retrieval_static(query: &str, results: &[String]) {
let path = memory_dir().join("retrieval.log");
let line = format!("[{}] q=\"{}\" hits={}\n", today(), query, results.len());
if let Ok(mut f) = fs::OpenOptions::new()
.create(true).append(true).open(&path) {
let _ = f.write_all(line.as_bytes());
}
}
/// Scan markdown files and index all memory units
pub fn init_from_markdown(&mut self) -> Result<usize, String> {
let dir = memory_dir();
let mut count = 0;
if dir.exists() {
count = self.scan_dir_for_init(&dir)?;
}
Ok(count)
}
fn scan_dir_for_init(&mut self, dir: &Path) -> Result<usize, String> {
let mut count = 0;
let entries = fs::read_dir(dir)
.map_err(|e| format!("read dir {}: {}", dir.display(), e))?;
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
count += self.scan_dir_for_init(&path)?;
continue;
}
let Some(ext) = path.extension() else { continue };
if ext != "md" { continue }
let filename = path.file_name().unwrap().to_string_lossy().to_string();
let content = fs::read_to_string(&path)
.map_err(|e| format!("read {}: {}", path.display(), e))?;
let units = parse_units(&filename, &content);
let (new_count, _) = self.ingest_units(&units, &filename)?;
count += new_count;
// Create relations from links
let mut new_relations = Vec::new();
for unit in &units {
let source_uuid = match self.nodes.get(&unit.key) {
Some(n) => n.uuid,
None => continue,
};
for link in unit.marker_links.iter().chain(unit.md_links.iter()) {
let Some((key, uuid)) = self.resolve_node_uuid(link) else { continue };
let exists = self.relations.iter().any(|r|
(r.source == source_uuid && r.target == uuid) ||
(r.source == uuid && r.target == source_uuid));
if !exists {
new_relations.push(new_relation(
source_uuid, uuid, RelationType::Link, 1.0,
&unit.key, &key,
));
}
}
for cause in &unit.causes {
let Some((key, uuid)) = self.resolve_node_uuid(cause) else { continue };
let exists = self.relations.iter().any(|r|
r.source == uuid && r.target == source_uuid
&& r.rel_type == RelationType::Causal);
if !exists {
new_relations.push(new_relation(
uuid, source_uuid, RelationType::Causal, 1.0,
&key, &unit.key,
));
}
}
}
if !new_relations.is_empty() {
self.append_relations(&new_relations)?;
self.relations.extend(new_relations);
}
}
Ok(count)
}
/// Process parsed memory units: diff against existing nodes, persist changes.
fn ingest_units(&mut self, units: &[MemoryUnit], filename: &str) -> Result<(usize, usize), String> {
let node_type = classify_filename(filename);
let mut new_nodes = Vec::new();
let mut updated_nodes = Vec::new();
for (pos, unit) in units.iter().enumerate() {
if let Some(existing) = self.nodes.get(&unit.key) {
if existing.content != unit.content || existing.position != pos as u32 {
let mut node = existing.clone();
node.content = unit.content.clone();
node.position = pos as u32;
node.version += 1;
if let Some(ref s) = unit.state { node.state_tag = s.clone(); }
if let Some(ref s) = unit.source_ref { node.source_ref = s.clone(); }
updated_nodes.push(node);
}
} else {
let mut node = new_node(&unit.key, &unit.content);
node.node_type = node_type;
node.position = pos as u32;
if let Some(ref s) = unit.state { node.state_tag = s.clone(); }
if let Some(ref s) = unit.source_ref { node.source_ref = s.clone(); }
new_nodes.push(node);
}
}
if !new_nodes.is_empty() {
self.append_nodes(&new_nodes)?;
for node in &new_nodes {
self.uuid_to_key.insert(node.uuid, node.key.clone());
self.nodes.insert(node.key.clone(), node.clone());
}
}
if !updated_nodes.is_empty() {
self.append_nodes(&updated_nodes)?;
for node in &updated_nodes {
self.nodes.insert(node.key.clone(), node.clone());
}
}
Ok((new_nodes.len(), updated_nodes.len()))
}
/// Import a markdown file into the store, parsing it into nodes.
pub fn import_file(&mut self, path: &Path) -> Result<(usize, usize), String> {
let filename = path.file_name().unwrap().to_string_lossy().to_string();
let content = fs::read_to_string(path)
.map_err(|e| format!("read {}: {}", path.display(), e))?;
let units = parse_units(&filename, &content);
self.ingest_units(&units, &filename)
}
/// Gather all sections for a file key, sorted by position.
pub fn file_sections(&self, file_key: &str) -> Option<Vec<&Node>> {
let prefix = format!("{}#", file_key);
let mut sections: Vec<_> = self.nodes.values()
.filter(|n| n.key == file_key || n.key.starts_with(&prefix))
.collect();
if sections.is_empty() {
return None;
}
sections.sort_by_key(|n| n.position);
Some(sections)
}
/// Render a file key as plain content (no mem markers).
pub fn render_file(&self, file_key: &str) -> Option<String> {
let sections = self.file_sections(file_key)?;
let mut output = String::new();
for node in &sections {
output.push_str(&node.content);
if !node.content.ends_with('\n') {
output.push('\n');
}
output.push('\n');
}
Some(output.trim_end().to_string())
}
/// Render a file key back to markdown with reconstituted mem markers.
pub fn export_to_markdown(&self, file_key: &str) -> Option<String> {
let sections = self.file_sections(file_key)?;
let mut output = String::new();
for node in &sections {
if node.key.contains('#') {
let section_id = node.key.rsplit_once('#').map_or("", |(_, s)| s);
let links: Vec<_> = self.relations.iter()
.filter(|r| r.source_key == node.key && !r.deleted
&& r.rel_type != RelationType::Causal)
.map(|r| r.target_key.clone())
.collect();
let causes: Vec<_> = self.relations.iter()
.filter(|r| r.target_key == node.key && !r.deleted
&& r.rel_type == RelationType::Causal)
.map(|r| r.source_key.clone())
.collect();
let mut marker_parts = vec![format!("id={}", section_id)];
if !links.is_empty() {
marker_parts.push(format!("links={}", links.join(",")));
}
if !causes.is_empty() {
marker_parts.push(format!("causes={}", causes.join(",")));
}
output.push_str(&format!("<!-- mem: {} -->\n", marker_parts.join(" ")));
}
output.push_str(&node.content);
if !node.content.ends_with('\n') {
output.push('\n');
}
output.push('\n');
}
Some(output.trim_end().to_string())
}
/// Find the journal node that best matches the given entry text.
pub fn find_journal_node(&self, entry_text: &str) -> Option<String> {
if entry_text.is_empty() {
return None;
}
let words: Vec<&str> = entry_text.split_whitespace()
.filter(|w| w.len() > 5)
.take(5)
.collect();
let mut best_key = None;
let mut best_score = 0;
for (key, node) in &self.nodes {
if !key.starts_with("journal.md#") {
continue;
}
let content_lower = node.content.to_lowercase();
let score: usize = words.iter()
.filter(|w| content_lower.contains(&w.to_lowercase()))
.count();
if score > best_score {
best_score = score;
best_key = Some(key.clone());
}
}
best_key
}
}