diff --git a/src/hippocampus/store/capnp.rs b/src/hippocampus/store/capnp.rs index 1f997d3..ddd6531 100644 --- a/src/hippocampus/store/capnp.rs +++ b/src/hippocampus/store/capnp.rs @@ -1,13 +1,19 @@ -// Persistence layer: load, replay, append +// Cap'n Proto serialization and persistence // // capnp logs are the source of truth; redb provides indexed access. +// This module contains: +// - Serialization macros (capnp_enum!, capnp_message!) +// - Load/replay from capnp logs +// - Append to capnp logs +// - fsck (corruption repair) use super::{index, types::*}; use redb::ReadableTableMetadata; use crate::memory_capnp; +use super::Store; -use anyhow::{Context, Result}; +use anyhow::{anyhow, Context, Result}; use capnp::message; use capnp::serialize; @@ -16,6 +22,194 @@ use std::fs; use std::io::{BufReader, Seek}; use std::path::Path; +// --------------------------------------------------------------------------- +// Capnp serialization macros +// +// Declarative mapping between Rust types and capnp generated types. +// Adding a field to the schema means adding it in one place below; +// both read and write are generated from the same declaration. +// --------------------------------------------------------------------------- + +/// Generate to_capnp/from_capnp conversion methods for an enum. +macro_rules! capnp_enum { + ($rust_type:ident, $capnp_type:path, [$($variant:ident),+ $(,)?]) => { + impl $rust_type { + #[allow(clippy::wrong_self_convention, dead_code)] + pub(crate) fn to_capnp(&self) -> $capnp_type { + match self { + $(Self::$variant => <$capnp_type>::$variant,)+ + } + } + pub(crate) fn from_capnp(v: $capnp_type) -> Self { + match v { + $(<$capnp_type>::$variant => Self::$variant,)+ + } + } + } + }; +} + +/// Generate from_capnp/to_capnp methods for a struct with capnp serialization. +/// Fields are grouped by serialization kind: +/// text - capnp Text fields (String in Rust) +/// uuid - capnp Data fields ([u8; 16] in Rust) +/// prim - copy types (u32, f32, f64, bool) +/// enm - enums with to_capnp/from_capnp methods +/// skip - Rust-only fields not in capnp (set to Default on read) +macro_rules! capnp_message { + ( + $struct:ident, + reader: $reader:ty, + builder: $builder:ty, + text: [$($tf:ident),* $(,)?], + uuid: [$($uf:ident),* $(,)?], + prim: [$($pf:ident),* $(,)?], + enm: [$($ef:ident: $et:ident),* $(,)?], + skip: [$($sf:ident),* $(,)?] $(,)? + ) => { + impl $struct { + pub fn from_capnp(r: $reader) -> Result { + paste::paste! { + Ok(Self { + $($tf: read_text(r.[]()),)* + $($uf: read_uuid(r.[]()),)* + $($pf: r.[](),)* + $($ef: $et::from_capnp( + r.[]().map_err(|_| anyhow!(concat!("bad ", stringify!($ef))))? + ),)* + $($sf: Default::default(),)* + }) + } + } + + pub fn to_capnp(&self, mut b: $builder) { + paste::paste! { + $(b.[](&self.$tf);)* + $(b.[](&self.$uf);)* + $(b.[](self.$pf);)* + $(b.[](self.$ef.to_capnp());)* + } + } + } + }; +} + +// --------------------------------------------------------------------------- +// Capnp helpers +// --------------------------------------------------------------------------- + +/// Read a capnp text field, returning empty string on any error +fn read_text(result: capnp::Result) -> String { + result.ok() + .and_then(|t| t.to_str().ok()) + .unwrap_or("") + .to_string() +} + +/// Read a capnp data field as [u8; 16], zero-padded +fn read_uuid(result: capnp::Result<&[u8]>) -> [u8; 16] { + let mut out = [0u8; 16]; + if let Ok(data) = result + && data.len() >= 16 { + out.copy_from_slice(&data[..16]); + } + out +} + +// --------------------------------------------------------------------------- +// Type-to-capnp mappings +// --------------------------------------------------------------------------- + +capnp_enum!(NodeType, memory_capnp::NodeType, + [EpisodicSession, EpisodicDaily, EpisodicWeekly, Semantic, EpisodicMonthly]); + +capnp_enum!(RelationType, memory_capnp::RelationType, + [Link, Causal, Auto]); + +capnp_message!(Node, + reader: memory_capnp::content_node::Reader<'_>, + builder: memory_capnp::content_node::Builder<'_>, + text: [key, content, source_ref, provenance], + uuid: [uuid], + prim: [version, timestamp, weight, emotion, deleted, + retrievals, uses, wrongs, last_replayed, + spaced_repetition_interval, created_at, last_scored], + enm: [node_type: NodeType], + skip: [community_id, clustering_coefficient, degree], +); + +capnp_message!(Relation, + reader: memory_capnp::relation::Reader<'_>, + builder: memory_capnp::relation::Builder<'_>, + text: [source_key, target_key, provenance], + uuid: [uuid, source, target], + prim: [version, timestamp, strength, deleted], + enm: [rel_type: RelationType], + skip: [], +); + +// --------------------------------------------------------------------------- +// Migration helpers (legacy provenance enum → string) +// --------------------------------------------------------------------------- + +/// Convert legacy capnp provenance enum to string label. +fn legacy_provenance_label(p: memory_capnp::Provenance) -> &'static str { + use memory_capnp::Provenance::*; + match p { + Manual => "manual", + Journal => "journal", + Agent => "agent", + Dream => "dream", + Derived => "derived", + AgentExperienceMine => "agent:experience-mine", + AgentKnowledgeObservation => "agent:knowledge-observation", + AgentKnowledgePattern => "agent:knowledge-pattern", + AgentKnowledgeConnector => "agent:knowledge-connector", + AgentKnowledgeChallenger => "agent:knowledge-challenger", + AgentConsolidate => "agent:consolidate", + AgentDigest => "agent:digest", + AgentFactMine => "agent:fact-mine", + AgentDecay => "agent:decay", + } +} + +impl Node { + /// Read from capnp with migration: if the new provenance text field + /// is empty (old record), fall back to the deprecated provenanceOld enum. + pub fn from_capnp_migrate(r: memory_capnp::content_node::Reader<'_>) -> Result { + let mut node = Self::from_capnp(r)?; + if node.provenance.is_empty() + && let Ok(old) = r.get_provenance_old() { + node.provenance = legacy_provenance_label(old).to_string(); + } + // Sanitize timestamps: old capnp records have raw offsets instead + // of unix epoch. Anything past year 2100 (~4102444800) is bogus. + const MAX_SANE_EPOCH: i64 = 4_102_444_800; + if node.timestamp > MAX_SANE_EPOCH || node.timestamp < 0 { + node.timestamp = node.created_at; + } + if node.created_at > MAX_SANE_EPOCH || node.created_at < 0 { + node.created_at = node.timestamp.min(MAX_SANE_EPOCH); + } + Ok(node) + } +} + +impl Relation { + pub fn from_capnp_migrate(r: memory_capnp::relation::Reader<'_>) -> Result { + let mut rel = Self::from_capnp(r)?; + if rel.provenance.is_empty() + && let Ok(old) = r.get_provenance_old() { + rel.provenance = legacy_provenance_label(old).to_string(); + } + Ok(rel) + } +} + +// --------------------------------------------------------------------------- +// Store persistence methods +// --------------------------------------------------------------------------- + impl Store { /// Load store by replaying capnp logs, then open/verify redb indices. pub fn load() -> Result { diff --git a/src/hippocampus/store/mod.rs b/src/hippocampus/store/mod.rs index 9812c54..29af846 100644 --- a/src/hippocampus/store/mod.rs +++ b/src/hippocampus/store/mod.rs @@ -7,9 +7,9 @@ // redb provides indexed access; Store struct holds in-memory state. // // Module layout: -// types.rs — Node, Relation, enums, capnp macros, path helpers +// types.rs — Node, Relation, enums, path/time helpers +// capnp.rs — serialization macros, log IO (load, replay, append, fsck) // index.rs — redb index operations -// capnp.rs — capnp log IO (load, replay, append, fsck) // ops.rs — mutations (upsert, delete, rename, etc.) // view.rs — StoreView trait for read-only access @@ -23,7 +23,7 @@ mod view; pub use types::{ memory_dir, nodes_path, now_epoch, epoch_to_local, format_date, format_datetime, format_datetime_space, compact_timestamp, today, - Node, Relation, NodeType, RelationType, Store, + Node, Relation, NodeType, RelationType, new_node, new_relation, }; pub use view::StoreView; @@ -32,6 +32,7 @@ pub use ops::current_provenance; use crate::graph::{self, Graph}; +use std::collections::HashMap; use anyhow::{bail, Result}; /// Strip .md suffix from a key, handling both bare keys and section keys. @@ -45,6 +46,31 @@ pub fn strip_md_suffix(key: &str) -> String { } } +// The full in-memory store +pub struct Store { + pub nodes: HashMap, // key → latest node + pub uuid_to_key: HashMap<[u8; 16], String>, // uuid → key (rebuilt from nodes) + pub relations: Vec, // all active relations + /// Log sizes at load time — used for staleness detection. + pub(crate) loaded_nodes_size: u64, + pub(crate) loaded_rels_size: u64, + /// redb index database + pub(crate) db: Option, +} + +impl Default for Store { + fn default() -> Self { + Store { + nodes: HashMap::new(), + uuid_to_key: HashMap::new(), + relations: Vec::new(), + loaded_nodes_size: 0, + loaded_rels_size: 0, + db: None, + } + } +} + impl Store { pub fn build_graph(&self) -> Graph { graph::build_graph(self) diff --git a/src/hippocampus/store/ops.rs b/src/hippocampus/store/ops.rs index cd3ee97..d7def92 100644 --- a/src/hippocampus/store/ops.rs +++ b/src/hippocampus/store/ops.rs @@ -2,7 +2,7 @@ // // CRUD (upsert, delete), maintenance (decay, cap_degree), and graph metrics. -use super::{index, types::*}; +use super::{index, types::*, Store}; use anyhow::{anyhow, bail, Result}; use std::collections::{HashMap, HashSet}; diff --git a/src/hippocampus/store/types.rs b/src/hippocampus/store/types.rs index 4db374b..c679f6d 100644 --- a/src/hippocampus/store/types.rs +++ b/src/hippocampus/store/types.rs @@ -1,90 +1,14 @@ // Core types for the memory store // -// Node, Relation, enums, Params, and supporting types. Also contains -// the capnp serialization macros that generate bidirectional conversion. +// Node, Relation, enums, Store struct, path helpers, time helpers. +// capnp serialization is in capnp.rs. -use crate::memory_capnp; - -use anyhow::{anyhow, Result}; use serde::{Deserialize, Serialize}; use uuid::Uuid; -use std::collections::HashMap; use std::path::PathBuf; use std::time::{SystemTime, UNIX_EPOCH}; -// --------------------------------------------------------------------------- -// Capnp serialization macros -// -// Declarative mapping between Rust types and capnp generated types. -// Adding a field to the schema means adding it in one place below; -// both read and write are generated from the same declaration. -// --------------------------------------------------------------------------- - -/// Generate to_capnp/from_capnp conversion methods for an enum. -macro_rules! capnp_enum { - ($rust_type:ident, $capnp_type:path, [$($variant:ident),+ $(,)?]) => { - impl $rust_type { - #[allow(clippy::wrong_self_convention, dead_code)] - pub(crate) fn to_capnp(&self) -> $capnp_type { - match self { - $(Self::$variant => <$capnp_type>::$variant,)+ - } - } - pub(crate) fn from_capnp(v: $capnp_type) -> Self { - match v { - $(<$capnp_type>::$variant => Self::$variant,)+ - } - } - } - }; -} - -/// Generate from_capnp/to_capnp methods for a struct with capnp serialization. -/// Fields are grouped by serialization kind: -/// text - capnp Text fields (String in Rust) -/// uuid - capnp Data fields ([u8; 16] in Rust) -/// prim - copy types (u32, f32, f64, bool) -/// enm - enums with to_capnp/from_capnp methods -/// skip - Rust-only fields not in capnp (set to Default on read) -macro_rules! capnp_message { - ( - $struct:ident, - reader: $reader:ty, - builder: $builder:ty, - text: [$($tf:ident),* $(,)?], - uuid: [$($uf:ident),* $(,)?], - prim: [$($pf:ident),* $(,)?], - enm: [$($ef:ident: $et:ident),* $(,)?], - skip: [$($sf:ident),* $(,)?] $(,)? - ) => { - impl $struct { - pub fn from_capnp(r: $reader) -> Result { - paste::paste! { - Ok(Self { - $($tf: read_text(r.[]()),)* - $($uf: read_uuid(r.[]()),)* - $($pf: r.[](),)* - $($ef: $et::from_capnp( - r.[]().map_err(|_| anyhow!(concat!("bad ", stringify!($ef))))? - ),)* - $($sf: Default::default(),)* - }) - } - } - - pub fn to_capnp(&self, mut b: $builder) { - paste::paste! { - $(b.[](&self.$tf);)* - $(b.[](&self.$uf);)* - $(b.[](self.$pf);)* - $(b.[](self.$ef.to_capnp());)* - } - } - } - }; -} - pub fn memory_dir() -> PathBuf { crate::config::get().data_dir.clone() } @@ -226,133 +150,6 @@ pub enum RelationType { Auto, } -capnp_enum!(NodeType, memory_capnp::NodeType, - [EpisodicSession, EpisodicDaily, EpisodicWeekly, Semantic, EpisodicMonthly]); - -capnp_enum!(RelationType, memory_capnp::RelationType, - [Link, Causal, Auto]); - -capnp_message!(Node, - reader: memory_capnp::content_node::Reader<'_>, - builder: memory_capnp::content_node::Builder<'_>, - text: [key, content, source_ref, provenance], - uuid: [uuid], - prim: [version, timestamp, weight, emotion, deleted, - retrievals, uses, wrongs, last_replayed, - spaced_repetition_interval, created_at, last_scored], - enm: [node_type: NodeType], - skip: [community_id, clustering_coefficient, degree], -); - -/// Convert legacy capnp provenance enum to string label. -fn legacy_provenance_label(p: memory_capnp::Provenance) -> &'static str { - use memory_capnp::Provenance::*; - match p { - Manual => "manual", - Journal => "journal", - Agent => "agent", - Dream => "dream", - Derived => "derived", - AgentExperienceMine => "agent:experience-mine", - AgentKnowledgeObservation => "agent:knowledge-observation", - AgentKnowledgePattern => "agent:knowledge-pattern", - AgentKnowledgeConnector => "agent:knowledge-connector", - AgentKnowledgeChallenger => "agent:knowledge-challenger", - AgentConsolidate => "agent:consolidate", - AgentDigest => "agent:digest", - AgentFactMine => "agent:fact-mine", - AgentDecay => "agent:decay", - } -} - -impl Node { - /// Read from capnp with migration: if the new provenance text field - /// is empty (old record), fall back to the deprecated provenanceOld enum. - pub fn from_capnp_migrate(r: memory_capnp::content_node::Reader<'_>) -> Result { - let mut node = Self::from_capnp(r)?; - if node.provenance.is_empty() - && let Ok(old) = r.get_provenance_old() { - node.provenance = legacy_provenance_label(old).to_string(); - } - // Sanitize timestamps: old capnp records have raw offsets instead - // of unix epoch. Anything past year 2100 (~4102444800) is bogus. - const MAX_SANE_EPOCH: i64 = 4_102_444_800; - if node.timestamp > MAX_SANE_EPOCH || node.timestamp < 0 { - node.timestamp = node.created_at; - } - if node.created_at > MAX_SANE_EPOCH || node.created_at < 0 { - node.created_at = node.timestamp.min(MAX_SANE_EPOCH); - } - Ok(node) - } -} - -capnp_message!(Relation, - reader: memory_capnp::relation::Reader<'_>, - builder: memory_capnp::relation::Builder<'_>, - text: [source_key, target_key, provenance], - uuid: [uuid, source, target], - prim: [version, timestamp, strength, deleted], - enm: [rel_type: RelationType], - skip: [], -); - -impl Relation { - pub fn from_capnp_migrate(r: memory_capnp::relation::Reader<'_>) -> Result { - let mut rel = Self::from_capnp(r)?; - if rel.provenance.is_empty() - && let Ok(old) = r.get_provenance_old() { - rel.provenance = legacy_provenance_label(old).to_string(); - } - Ok(rel) - } -} - -// The full in-memory store -pub struct Store { - pub nodes: HashMap, // key → latest node - pub uuid_to_key: HashMap<[u8; 16], String>, // uuid → key (rebuilt from nodes) - pub relations: Vec, // all active relations - /// Log sizes at load time — used for staleness detection. - pub(crate) loaded_nodes_size: u64, - pub(crate) loaded_rels_size: u64, - /// redb index database - pub(crate) db: Option, -} - -impl Default for Store { - fn default() -> Self { - Store { - nodes: HashMap::new(), - uuid_to_key: HashMap::new(), - relations: Vec::new(), - loaded_nodes_size: 0, - loaded_rels_size: 0, - db: None, - } - } -} - -// Cap'n Proto serialization helpers - -/// Read a capnp text field, returning empty string on any error -pub(crate) fn read_text(result: capnp::Result) -> String { - result.ok() - .and_then(|t| t.to_str().ok()) - .unwrap_or("") - .to_string() -} - -/// Read a capnp data field as [u8; 16], zero-padded -pub(crate) fn read_uuid(result: capnp::Result<&[u8]>) -> [u8; 16] { - let mut out = [0u8; 16]; - if let Ok(data) = result - && data.len() >= 16 { - out.copy_from_slice(&data[..16]); - } - out -} - /// Create a new node with defaults pub fn new_node(key: &str, content: &str) -> Node { Node { diff --git a/src/hippocampus/store/view.rs b/src/hippocampus/store/view.rs index d51a389..dedeae9 100644 --- a/src/hippocampus/store/view.rs +++ b/src/hippocampus/store/view.rs @@ -1,6 +1,7 @@ // Read-only access abstraction for the memory store use super::types::*; +use super::Store; // --------------------------------------------------------------------------- // StoreView: read-only access trait for search and graph code.