forked from kent/consciousness
store: move all capnp code to capnp.rs
Consolidate capnp serialization in one place: - capnp_enum! and capnp_message! macros - read_text/read_uuid helpers - Type-to-capnp mappings - from_capnp_migrate migration impls types.rs now only has pure Rust types and helpers. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
e48ca2ecad
commit
ba53597cf2
5 changed files with 229 additions and 211 deletions
|
|
@ -1,13 +1,19 @@
|
|||
// Persistence layer: load, replay, append
|
||||
// Cap'n Proto serialization and persistence
|
||||
//
|
||||
// capnp logs are the source of truth; redb provides indexed access.
|
||||
// This module contains:
|
||||
// - Serialization macros (capnp_enum!, capnp_message!)
|
||||
// - Load/replay from capnp logs
|
||||
// - Append to capnp logs
|
||||
// - fsck (corruption repair)
|
||||
|
||||
use super::{index, types::*};
|
||||
use redb::ReadableTableMetadata;
|
||||
|
||||
use crate::memory_capnp;
|
||||
use super::Store;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use capnp::message;
|
||||
use capnp::serialize;
|
||||
|
||||
|
|
@ -16,6 +22,194 @@ use std::fs;
|
|||
use std::io::{BufReader, Seek};
|
||||
use std::path::Path;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Capnp serialization macros
|
||||
//
|
||||
// Declarative mapping between Rust types and capnp generated types.
|
||||
// Adding a field to the schema means adding it in one place below;
|
||||
// both read and write are generated from the same declaration.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Generate to_capnp/from_capnp conversion methods for an enum.
|
||||
macro_rules! capnp_enum {
|
||||
($rust_type:ident, $capnp_type:path, [$($variant:ident),+ $(,)?]) => {
|
||||
impl $rust_type {
|
||||
#[allow(clippy::wrong_self_convention, dead_code)]
|
||||
pub(crate) fn to_capnp(&self) -> $capnp_type {
|
||||
match self {
|
||||
$(Self::$variant => <$capnp_type>::$variant,)+
|
||||
}
|
||||
}
|
||||
pub(crate) fn from_capnp(v: $capnp_type) -> Self {
|
||||
match v {
|
||||
$(<$capnp_type>::$variant => Self::$variant,)+
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Generate from_capnp/to_capnp methods for a struct with capnp serialization.
|
||||
/// Fields are grouped by serialization kind:
|
||||
/// text - capnp Text fields (String in Rust)
|
||||
/// uuid - capnp Data fields ([u8; 16] in Rust)
|
||||
/// prim - copy types (u32, f32, f64, bool)
|
||||
/// enm - enums with to_capnp/from_capnp methods
|
||||
/// skip - Rust-only fields not in capnp (set to Default on read)
|
||||
macro_rules! capnp_message {
|
||||
(
|
||||
$struct:ident,
|
||||
reader: $reader:ty,
|
||||
builder: $builder:ty,
|
||||
text: [$($tf:ident),* $(,)?],
|
||||
uuid: [$($uf:ident),* $(,)?],
|
||||
prim: [$($pf:ident),* $(,)?],
|
||||
enm: [$($ef:ident: $et:ident),* $(,)?],
|
||||
skip: [$($sf:ident),* $(,)?] $(,)?
|
||||
) => {
|
||||
impl $struct {
|
||||
pub fn from_capnp(r: $reader) -> Result<Self> {
|
||||
paste::paste! {
|
||||
Ok(Self {
|
||||
$($tf: read_text(r.[<get_ $tf>]()),)*
|
||||
$($uf: read_uuid(r.[<get_ $uf>]()),)*
|
||||
$($pf: r.[<get_ $pf>](),)*
|
||||
$($ef: $et::from_capnp(
|
||||
r.[<get_ $ef>]().map_err(|_| anyhow!(concat!("bad ", stringify!($ef))))?
|
||||
),)*
|
||||
$($sf: Default::default(),)*
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_capnp(&self, mut b: $builder) {
|
||||
paste::paste! {
|
||||
$(b.[<set_ $tf>](&self.$tf);)*
|
||||
$(b.[<set_ $uf>](&self.$uf);)*
|
||||
$(b.[<set_ $pf>](self.$pf);)*
|
||||
$(b.[<set_ $ef>](self.$ef.to_capnp());)*
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Capnp helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Read a capnp text field, returning empty string on any error
|
||||
fn read_text(result: capnp::Result<capnp::text::Reader>) -> String {
|
||||
result.ok()
|
||||
.and_then(|t| t.to_str().ok())
|
||||
.unwrap_or("")
|
||||
.to_string()
|
||||
}
|
||||
|
||||
/// Read a capnp data field as [u8; 16], zero-padded
|
||||
fn read_uuid(result: capnp::Result<&[u8]>) -> [u8; 16] {
|
||||
let mut out = [0u8; 16];
|
||||
if let Ok(data) = result
|
||||
&& data.len() >= 16 {
|
||||
out.copy_from_slice(&data[..16]);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Type-to-capnp mappings
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
capnp_enum!(NodeType, memory_capnp::NodeType,
|
||||
[EpisodicSession, EpisodicDaily, EpisodicWeekly, Semantic, EpisodicMonthly]);
|
||||
|
||||
capnp_enum!(RelationType, memory_capnp::RelationType,
|
||||
[Link, Causal, Auto]);
|
||||
|
||||
capnp_message!(Node,
|
||||
reader: memory_capnp::content_node::Reader<'_>,
|
||||
builder: memory_capnp::content_node::Builder<'_>,
|
||||
text: [key, content, source_ref, provenance],
|
||||
uuid: [uuid],
|
||||
prim: [version, timestamp, weight, emotion, deleted,
|
||||
retrievals, uses, wrongs, last_replayed,
|
||||
spaced_repetition_interval, created_at, last_scored],
|
||||
enm: [node_type: NodeType],
|
||||
skip: [community_id, clustering_coefficient, degree],
|
||||
);
|
||||
|
||||
capnp_message!(Relation,
|
||||
reader: memory_capnp::relation::Reader<'_>,
|
||||
builder: memory_capnp::relation::Builder<'_>,
|
||||
text: [source_key, target_key, provenance],
|
||||
uuid: [uuid, source, target],
|
||||
prim: [version, timestamp, strength, deleted],
|
||||
enm: [rel_type: RelationType],
|
||||
skip: [],
|
||||
);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Migration helpers (legacy provenance enum → string)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Convert legacy capnp provenance enum to string label.
|
||||
fn legacy_provenance_label(p: memory_capnp::Provenance) -> &'static str {
|
||||
use memory_capnp::Provenance::*;
|
||||
match p {
|
||||
Manual => "manual",
|
||||
Journal => "journal",
|
||||
Agent => "agent",
|
||||
Dream => "dream",
|
||||
Derived => "derived",
|
||||
AgentExperienceMine => "agent:experience-mine",
|
||||
AgentKnowledgeObservation => "agent:knowledge-observation",
|
||||
AgentKnowledgePattern => "agent:knowledge-pattern",
|
||||
AgentKnowledgeConnector => "agent:knowledge-connector",
|
||||
AgentKnowledgeChallenger => "agent:knowledge-challenger",
|
||||
AgentConsolidate => "agent:consolidate",
|
||||
AgentDigest => "agent:digest",
|
||||
AgentFactMine => "agent:fact-mine",
|
||||
AgentDecay => "agent:decay",
|
||||
}
|
||||
}
|
||||
|
||||
impl Node {
|
||||
/// Read from capnp with migration: if the new provenance text field
|
||||
/// is empty (old record), fall back to the deprecated provenanceOld enum.
|
||||
pub fn from_capnp_migrate(r: memory_capnp::content_node::Reader<'_>) -> Result<Self> {
|
||||
let mut node = Self::from_capnp(r)?;
|
||||
if node.provenance.is_empty()
|
||||
&& let Ok(old) = r.get_provenance_old() {
|
||||
node.provenance = legacy_provenance_label(old).to_string();
|
||||
}
|
||||
// Sanitize timestamps: old capnp records have raw offsets instead
|
||||
// of unix epoch. Anything past year 2100 (~4102444800) is bogus.
|
||||
const MAX_SANE_EPOCH: i64 = 4_102_444_800;
|
||||
if node.timestamp > MAX_SANE_EPOCH || node.timestamp < 0 {
|
||||
node.timestamp = node.created_at;
|
||||
}
|
||||
if node.created_at > MAX_SANE_EPOCH || node.created_at < 0 {
|
||||
node.created_at = node.timestamp.min(MAX_SANE_EPOCH);
|
||||
}
|
||||
Ok(node)
|
||||
}
|
||||
}
|
||||
|
||||
impl Relation {
|
||||
pub fn from_capnp_migrate(r: memory_capnp::relation::Reader<'_>) -> Result<Self> {
|
||||
let mut rel = Self::from_capnp(r)?;
|
||||
if rel.provenance.is_empty()
|
||||
&& let Ok(old) = r.get_provenance_old() {
|
||||
rel.provenance = legacy_provenance_label(old).to_string();
|
||||
}
|
||||
Ok(rel)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Store persistence methods
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
impl Store {
|
||||
/// Load store by replaying capnp logs, then open/verify redb indices.
|
||||
pub fn load() -> Result<Store> {
|
||||
|
|
|
|||
|
|
@ -7,9 +7,9 @@
|
|||
// redb provides indexed access; Store struct holds in-memory state.
|
||||
//
|
||||
// Module layout:
|
||||
// types.rs — Node, Relation, enums, capnp macros, path helpers
|
||||
// types.rs — Node, Relation, enums, path/time helpers
|
||||
// capnp.rs — serialization macros, log IO (load, replay, append, fsck)
|
||||
// index.rs — redb index operations
|
||||
// capnp.rs — capnp log IO (load, replay, append, fsck)
|
||||
// ops.rs — mutations (upsert, delete, rename, etc.)
|
||||
// view.rs — StoreView trait for read-only access
|
||||
|
||||
|
|
@ -23,7 +23,7 @@ mod view;
|
|||
pub use types::{
|
||||
memory_dir, nodes_path,
|
||||
now_epoch, epoch_to_local, format_date, format_datetime, format_datetime_space, compact_timestamp, today,
|
||||
Node, Relation, NodeType, RelationType, Store,
|
||||
Node, Relation, NodeType, RelationType,
|
||||
new_node, new_relation,
|
||||
};
|
||||
pub use view::StoreView;
|
||||
|
|
@ -32,6 +32,7 @@ pub use ops::current_provenance;
|
|||
|
||||
use crate::graph::{self, Graph};
|
||||
|
||||
use std::collections::HashMap;
|
||||
use anyhow::{bail, Result};
|
||||
|
||||
/// Strip .md suffix from a key, handling both bare keys and section keys.
|
||||
|
|
@ -45,6 +46,31 @@ pub fn strip_md_suffix(key: &str) -> String {
|
|||
}
|
||||
}
|
||||
|
||||
// The full in-memory store
|
||||
pub struct Store {
|
||||
pub nodes: HashMap<String, Node>, // key → latest node
|
||||
pub uuid_to_key: HashMap<[u8; 16], String>, // uuid → key (rebuilt from nodes)
|
||||
pub relations: Vec<Relation>, // all active relations
|
||||
/// Log sizes at load time — used for staleness detection.
|
||||
pub(crate) loaded_nodes_size: u64,
|
||||
pub(crate) loaded_rels_size: u64,
|
||||
/// redb index database
|
||||
pub(crate) db: Option<redb::Database>,
|
||||
}
|
||||
|
||||
impl Default for Store {
|
||||
fn default() -> Self {
|
||||
Store {
|
||||
nodes: HashMap::new(),
|
||||
uuid_to_key: HashMap::new(),
|
||||
relations: Vec::new(),
|
||||
loaded_nodes_size: 0,
|
||||
loaded_rels_size: 0,
|
||||
db: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Store {
|
||||
pub fn build_graph(&self) -> Graph {
|
||||
graph::build_graph(self)
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
//
|
||||
// CRUD (upsert, delete), maintenance (decay, cap_degree), and graph metrics.
|
||||
|
||||
use super::{index, types::*};
|
||||
use super::{index, types::*, Store};
|
||||
|
||||
use anyhow::{anyhow, bail, Result};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
|
|
|||
|
|
@ -1,90 +1,14 @@
|
|||
// Core types for the memory store
|
||||
//
|
||||
// Node, Relation, enums, Params, and supporting types. Also contains
|
||||
// the capnp serialization macros that generate bidirectional conversion.
|
||||
// Node, Relation, enums, Store struct, path helpers, time helpers.
|
||||
// capnp serialization is in capnp.rs.
|
||||
|
||||
use crate::memory_capnp;
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Capnp serialization macros
|
||||
//
|
||||
// Declarative mapping between Rust types and capnp generated types.
|
||||
// Adding a field to the schema means adding it in one place below;
|
||||
// both read and write are generated from the same declaration.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Generate to_capnp/from_capnp conversion methods for an enum.
|
||||
macro_rules! capnp_enum {
|
||||
($rust_type:ident, $capnp_type:path, [$($variant:ident),+ $(,)?]) => {
|
||||
impl $rust_type {
|
||||
#[allow(clippy::wrong_self_convention, dead_code)]
|
||||
pub(crate) fn to_capnp(&self) -> $capnp_type {
|
||||
match self {
|
||||
$(Self::$variant => <$capnp_type>::$variant,)+
|
||||
}
|
||||
}
|
||||
pub(crate) fn from_capnp(v: $capnp_type) -> Self {
|
||||
match v {
|
||||
$(<$capnp_type>::$variant => Self::$variant,)+
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Generate from_capnp/to_capnp methods for a struct with capnp serialization.
|
||||
/// Fields are grouped by serialization kind:
|
||||
/// text - capnp Text fields (String in Rust)
|
||||
/// uuid - capnp Data fields ([u8; 16] in Rust)
|
||||
/// prim - copy types (u32, f32, f64, bool)
|
||||
/// enm - enums with to_capnp/from_capnp methods
|
||||
/// skip - Rust-only fields not in capnp (set to Default on read)
|
||||
macro_rules! capnp_message {
|
||||
(
|
||||
$struct:ident,
|
||||
reader: $reader:ty,
|
||||
builder: $builder:ty,
|
||||
text: [$($tf:ident),* $(,)?],
|
||||
uuid: [$($uf:ident),* $(,)?],
|
||||
prim: [$($pf:ident),* $(,)?],
|
||||
enm: [$($ef:ident: $et:ident),* $(,)?],
|
||||
skip: [$($sf:ident),* $(,)?] $(,)?
|
||||
) => {
|
||||
impl $struct {
|
||||
pub fn from_capnp(r: $reader) -> Result<Self> {
|
||||
paste::paste! {
|
||||
Ok(Self {
|
||||
$($tf: read_text(r.[<get_ $tf>]()),)*
|
||||
$($uf: read_uuid(r.[<get_ $uf>]()),)*
|
||||
$($pf: r.[<get_ $pf>](),)*
|
||||
$($ef: $et::from_capnp(
|
||||
r.[<get_ $ef>]().map_err(|_| anyhow!(concat!("bad ", stringify!($ef))))?
|
||||
),)*
|
||||
$($sf: Default::default(),)*
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_capnp(&self, mut b: $builder) {
|
||||
paste::paste! {
|
||||
$(b.[<set_ $tf>](&self.$tf);)*
|
||||
$(b.[<set_ $uf>](&self.$uf);)*
|
||||
$(b.[<set_ $pf>](self.$pf);)*
|
||||
$(b.[<set_ $ef>](self.$ef.to_capnp());)*
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub fn memory_dir() -> PathBuf {
|
||||
crate::config::get().data_dir.clone()
|
||||
}
|
||||
|
|
@ -226,133 +150,6 @@ pub enum RelationType {
|
|||
Auto,
|
||||
}
|
||||
|
||||
capnp_enum!(NodeType, memory_capnp::NodeType,
|
||||
[EpisodicSession, EpisodicDaily, EpisodicWeekly, Semantic, EpisodicMonthly]);
|
||||
|
||||
capnp_enum!(RelationType, memory_capnp::RelationType,
|
||||
[Link, Causal, Auto]);
|
||||
|
||||
capnp_message!(Node,
|
||||
reader: memory_capnp::content_node::Reader<'_>,
|
||||
builder: memory_capnp::content_node::Builder<'_>,
|
||||
text: [key, content, source_ref, provenance],
|
||||
uuid: [uuid],
|
||||
prim: [version, timestamp, weight, emotion, deleted,
|
||||
retrievals, uses, wrongs, last_replayed,
|
||||
spaced_repetition_interval, created_at, last_scored],
|
||||
enm: [node_type: NodeType],
|
||||
skip: [community_id, clustering_coefficient, degree],
|
||||
);
|
||||
|
||||
/// Convert legacy capnp provenance enum to string label.
|
||||
fn legacy_provenance_label(p: memory_capnp::Provenance) -> &'static str {
|
||||
use memory_capnp::Provenance::*;
|
||||
match p {
|
||||
Manual => "manual",
|
||||
Journal => "journal",
|
||||
Agent => "agent",
|
||||
Dream => "dream",
|
||||
Derived => "derived",
|
||||
AgentExperienceMine => "agent:experience-mine",
|
||||
AgentKnowledgeObservation => "agent:knowledge-observation",
|
||||
AgentKnowledgePattern => "agent:knowledge-pattern",
|
||||
AgentKnowledgeConnector => "agent:knowledge-connector",
|
||||
AgentKnowledgeChallenger => "agent:knowledge-challenger",
|
||||
AgentConsolidate => "agent:consolidate",
|
||||
AgentDigest => "agent:digest",
|
||||
AgentFactMine => "agent:fact-mine",
|
||||
AgentDecay => "agent:decay",
|
||||
}
|
||||
}
|
||||
|
||||
impl Node {
|
||||
/// Read from capnp with migration: if the new provenance text field
|
||||
/// is empty (old record), fall back to the deprecated provenanceOld enum.
|
||||
pub fn from_capnp_migrate(r: memory_capnp::content_node::Reader<'_>) -> Result<Self> {
|
||||
let mut node = Self::from_capnp(r)?;
|
||||
if node.provenance.is_empty()
|
||||
&& let Ok(old) = r.get_provenance_old() {
|
||||
node.provenance = legacy_provenance_label(old).to_string();
|
||||
}
|
||||
// Sanitize timestamps: old capnp records have raw offsets instead
|
||||
// of unix epoch. Anything past year 2100 (~4102444800) is bogus.
|
||||
const MAX_SANE_EPOCH: i64 = 4_102_444_800;
|
||||
if node.timestamp > MAX_SANE_EPOCH || node.timestamp < 0 {
|
||||
node.timestamp = node.created_at;
|
||||
}
|
||||
if node.created_at > MAX_SANE_EPOCH || node.created_at < 0 {
|
||||
node.created_at = node.timestamp.min(MAX_SANE_EPOCH);
|
||||
}
|
||||
Ok(node)
|
||||
}
|
||||
}
|
||||
|
||||
capnp_message!(Relation,
|
||||
reader: memory_capnp::relation::Reader<'_>,
|
||||
builder: memory_capnp::relation::Builder<'_>,
|
||||
text: [source_key, target_key, provenance],
|
||||
uuid: [uuid, source, target],
|
||||
prim: [version, timestamp, strength, deleted],
|
||||
enm: [rel_type: RelationType],
|
||||
skip: [],
|
||||
);
|
||||
|
||||
impl Relation {
|
||||
pub fn from_capnp_migrate(r: memory_capnp::relation::Reader<'_>) -> Result<Self> {
|
||||
let mut rel = Self::from_capnp(r)?;
|
||||
if rel.provenance.is_empty()
|
||||
&& let Ok(old) = r.get_provenance_old() {
|
||||
rel.provenance = legacy_provenance_label(old).to_string();
|
||||
}
|
||||
Ok(rel)
|
||||
}
|
||||
}
|
||||
|
||||
// The full in-memory store
|
||||
pub struct Store {
|
||||
pub nodes: HashMap<String, Node>, // key → latest node
|
||||
pub uuid_to_key: HashMap<[u8; 16], String>, // uuid → key (rebuilt from nodes)
|
||||
pub relations: Vec<Relation>, // all active relations
|
||||
/// Log sizes at load time — used for staleness detection.
|
||||
pub(crate) loaded_nodes_size: u64,
|
||||
pub(crate) loaded_rels_size: u64,
|
||||
/// redb index database
|
||||
pub(crate) db: Option<redb::Database>,
|
||||
}
|
||||
|
||||
impl Default for Store {
|
||||
fn default() -> Self {
|
||||
Store {
|
||||
nodes: HashMap::new(),
|
||||
uuid_to_key: HashMap::new(),
|
||||
relations: Vec::new(),
|
||||
loaded_nodes_size: 0,
|
||||
loaded_rels_size: 0,
|
||||
db: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Cap'n Proto serialization helpers
|
||||
|
||||
/// Read a capnp text field, returning empty string on any error
|
||||
pub(crate) fn read_text(result: capnp::Result<capnp::text::Reader>) -> String {
|
||||
result.ok()
|
||||
.and_then(|t| t.to_str().ok())
|
||||
.unwrap_or("")
|
||||
.to_string()
|
||||
}
|
||||
|
||||
/// Read a capnp data field as [u8; 16], zero-padded
|
||||
pub(crate) fn read_uuid(result: capnp::Result<&[u8]>) -> [u8; 16] {
|
||||
let mut out = [0u8; 16];
|
||||
if let Ok(data) = result
|
||||
&& data.len() >= 16 {
|
||||
out.copy_from_slice(&data[..16]);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Create a new node with defaults
|
||||
pub fn new_node(key: &str, content: &str) -> Node {
|
||||
Node {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
// Read-only access abstraction for the memory store
|
||||
|
||||
use super::types::*;
|
||||
use super::Store;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// StoreView: read-only access trait for search and graph code.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue