store: move all capnp code to capnp.rs

Consolidate capnp serialization in one place:
- capnp_enum! and capnp_message! macros
- read_text/read_uuid helpers
- Type-to-capnp mappings
- from_capnp_migrate migration impls

types.rs now only has pure Rust types and helpers.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-13 19:17:31 -04:00
parent e48ca2ecad
commit ba53597cf2
5 changed files with 229 additions and 211 deletions

View file

@ -1,13 +1,19 @@
// Persistence layer: load, replay, append
// Cap'n Proto serialization and persistence
//
// capnp logs are the source of truth; redb provides indexed access.
// This module contains:
// - Serialization macros (capnp_enum!, capnp_message!)
// - Load/replay from capnp logs
// - Append to capnp logs
// - fsck (corruption repair)
use super::{index, types::*};
use redb::ReadableTableMetadata;
use crate::memory_capnp;
use super::Store;
use anyhow::{Context, Result};
use anyhow::{anyhow, Context, Result};
use capnp::message;
use capnp::serialize;
@ -16,6 +22,194 @@ use std::fs;
use std::io::{BufReader, Seek};
use std::path::Path;
// ---------------------------------------------------------------------------
// Capnp serialization macros
//
// Declarative mapping between Rust types and capnp generated types.
// Adding a field to the schema means adding it in one place below;
// both read and write are generated from the same declaration.
// ---------------------------------------------------------------------------
/// Generate to_capnp/from_capnp conversion methods for an enum.
macro_rules! capnp_enum {
($rust_type:ident, $capnp_type:path, [$($variant:ident),+ $(,)?]) => {
impl $rust_type {
#[allow(clippy::wrong_self_convention, dead_code)]
pub(crate) fn to_capnp(&self) -> $capnp_type {
match self {
$(Self::$variant => <$capnp_type>::$variant,)+
}
}
pub(crate) fn from_capnp(v: $capnp_type) -> Self {
match v {
$(<$capnp_type>::$variant => Self::$variant,)+
}
}
}
};
}
/// Generate from_capnp/to_capnp methods for a struct with capnp serialization.
/// Fields are grouped by serialization kind:
/// text - capnp Text fields (String in Rust)
/// uuid - capnp Data fields ([u8; 16] in Rust)
/// prim - copy types (u32, f32, f64, bool)
/// enm - enums with to_capnp/from_capnp methods
/// skip - Rust-only fields not in capnp (set to Default on read)
macro_rules! capnp_message {
(
$struct:ident,
reader: $reader:ty,
builder: $builder:ty,
text: [$($tf:ident),* $(,)?],
uuid: [$($uf:ident),* $(,)?],
prim: [$($pf:ident),* $(,)?],
enm: [$($ef:ident: $et:ident),* $(,)?],
skip: [$($sf:ident),* $(,)?] $(,)?
) => {
impl $struct {
pub fn from_capnp(r: $reader) -> Result<Self> {
paste::paste! {
Ok(Self {
$($tf: read_text(r.[<get_ $tf>]()),)*
$($uf: read_uuid(r.[<get_ $uf>]()),)*
$($pf: r.[<get_ $pf>](),)*
$($ef: $et::from_capnp(
r.[<get_ $ef>]().map_err(|_| anyhow!(concat!("bad ", stringify!($ef))))?
),)*
$($sf: Default::default(),)*
})
}
}
pub fn to_capnp(&self, mut b: $builder) {
paste::paste! {
$(b.[<set_ $tf>](&self.$tf);)*
$(b.[<set_ $uf>](&self.$uf);)*
$(b.[<set_ $pf>](self.$pf);)*
$(b.[<set_ $ef>](self.$ef.to_capnp());)*
}
}
}
};
}
// ---------------------------------------------------------------------------
// Capnp helpers
// ---------------------------------------------------------------------------
/// Read a capnp text field, returning empty string on any error
fn read_text(result: capnp::Result<capnp::text::Reader>) -> String {
result.ok()
.and_then(|t| t.to_str().ok())
.unwrap_or("")
.to_string()
}
/// Read a capnp data field as [u8; 16], zero-padded
fn read_uuid(result: capnp::Result<&[u8]>) -> [u8; 16] {
let mut out = [0u8; 16];
if let Ok(data) = result
&& data.len() >= 16 {
out.copy_from_slice(&data[..16]);
}
out
}
// ---------------------------------------------------------------------------
// Type-to-capnp mappings
// ---------------------------------------------------------------------------
capnp_enum!(NodeType, memory_capnp::NodeType,
[EpisodicSession, EpisodicDaily, EpisodicWeekly, Semantic, EpisodicMonthly]);
capnp_enum!(RelationType, memory_capnp::RelationType,
[Link, Causal, Auto]);
capnp_message!(Node,
reader: memory_capnp::content_node::Reader<'_>,
builder: memory_capnp::content_node::Builder<'_>,
text: [key, content, source_ref, provenance],
uuid: [uuid],
prim: [version, timestamp, weight, emotion, deleted,
retrievals, uses, wrongs, last_replayed,
spaced_repetition_interval, created_at, last_scored],
enm: [node_type: NodeType],
skip: [community_id, clustering_coefficient, degree],
);
capnp_message!(Relation,
reader: memory_capnp::relation::Reader<'_>,
builder: memory_capnp::relation::Builder<'_>,
text: [source_key, target_key, provenance],
uuid: [uuid, source, target],
prim: [version, timestamp, strength, deleted],
enm: [rel_type: RelationType],
skip: [],
);
// ---------------------------------------------------------------------------
// Migration helpers (legacy provenance enum → string)
// ---------------------------------------------------------------------------
/// Convert legacy capnp provenance enum to string label.
fn legacy_provenance_label(p: memory_capnp::Provenance) -> &'static str {
use memory_capnp::Provenance::*;
match p {
Manual => "manual",
Journal => "journal",
Agent => "agent",
Dream => "dream",
Derived => "derived",
AgentExperienceMine => "agent:experience-mine",
AgentKnowledgeObservation => "agent:knowledge-observation",
AgentKnowledgePattern => "agent:knowledge-pattern",
AgentKnowledgeConnector => "agent:knowledge-connector",
AgentKnowledgeChallenger => "agent:knowledge-challenger",
AgentConsolidate => "agent:consolidate",
AgentDigest => "agent:digest",
AgentFactMine => "agent:fact-mine",
AgentDecay => "agent:decay",
}
}
impl Node {
/// Read from capnp with migration: if the new provenance text field
/// is empty (old record), fall back to the deprecated provenanceOld enum.
pub fn from_capnp_migrate(r: memory_capnp::content_node::Reader<'_>) -> Result<Self> {
let mut node = Self::from_capnp(r)?;
if node.provenance.is_empty()
&& let Ok(old) = r.get_provenance_old() {
node.provenance = legacy_provenance_label(old).to_string();
}
// Sanitize timestamps: old capnp records have raw offsets instead
// of unix epoch. Anything past year 2100 (~4102444800) is bogus.
const MAX_SANE_EPOCH: i64 = 4_102_444_800;
if node.timestamp > MAX_SANE_EPOCH || node.timestamp < 0 {
node.timestamp = node.created_at;
}
if node.created_at > MAX_SANE_EPOCH || node.created_at < 0 {
node.created_at = node.timestamp.min(MAX_SANE_EPOCH);
}
Ok(node)
}
}
impl Relation {
pub fn from_capnp_migrate(r: memory_capnp::relation::Reader<'_>) -> Result<Self> {
let mut rel = Self::from_capnp(r)?;
if rel.provenance.is_empty()
&& let Ok(old) = r.get_provenance_old() {
rel.provenance = legacy_provenance_label(old).to_string();
}
Ok(rel)
}
}
// ---------------------------------------------------------------------------
// Store persistence methods
// ---------------------------------------------------------------------------
impl Store {
/// Load store by replaying capnp logs, then open/verify redb indices.
pub fn load() -> Result<Store> {

View file

@ -7,9 +7,9 @@
// redb provides indexed access; Store struct holds in-memory state.
//
// Module layout:
// types.rs — Node, Relation, enums, capnp macros, path helpers
// types.rs — Node, Relation, enums, path/time helpers
// capnp.rs — serialization macros, log IO (load, replay, append, fsck)
// index.rs — redb index operations
// capnp.rs — capnp log IO (load, replay, append, fsck)
// ops.rs — mutations (upsert, delete, rename, etc.)
// view.rs — StoreView trait for read-only access
@ -23,7 +23,7 @@ mod view;
pub use types::{
memory_dir, nodes_path,
now_epoch, epoch_to_local, format_date, format_datetime, format_datetime_space, compact_timestamp, today,
Node, Relation, NodeType, RelationType, Store,
Node, Relation, NodeType, RelationType,
new_node, new_relation,
};
pub use view::StoreView;
@ -32,6 +32,7 @@ pub use ops::current_provenance;
use crate::graph::{self, Graph};
use std::collections::HashMap;
use anyhow::{bail, Result};
/// Strip .md suffix from a key, handling both bare keys and section keys.
@ -45,6 +46,31 @@ pub fn strip_md_suffix(key: &str) -> String {
}
}
// The full in-memory store
pub struct Store {
pub nodes: HashMap<String, Node>, // key → latest node
pub uuid_to_key: HashMap<[u8; 16], String>, // uuid → key (rebuilt from nodes)
pub relations: Vec<Relation>, // all active relations
/// Log sizes at load time — used for staleness detection.
pub(crate) loaded_nodes_size: u64,
pub(crate) loaded_rels_size: u64,
/// redb index database
pub(crate) db: Option<redb::Database>,
}
impl Default for Store {
fn default() -> Self {
Store {
nodes: HashMap::new(),
uuid_to_key: HashMap::new(),
relations: Vec::new(),
loaded_nodes_size: 0,
loaded_rels_size: 0,
db: None,
}
}
}
impl Store {
pub fn build_graph(&self) -> Graph {
graph::build_graph(self)

View file

@ -2,7 +2,7 @@
//
// CRUD (upsert, delete), maintenance (decay, cap_degree), and graph metrics.
use super::{index, types::*};
use super::{index, types::*, Store};
use anyhow::{anyhow, bail, Result};
use std::collections::{HashMap, HashSet};

View file

@ -1,90 +1,14 @@
// Core types for the memory store
//
// Node, Relation, enums, Params, and supporting types. Also contains
// the capnp serialization macros that generate bidirectional conversion.
// Node, Relation, enums, Store struct, path helpers, time helpers.
// capnp serialization is in capnp.rs.
use crate::memory_capnp;
use anyhow::{anyhow, Result};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use std::collections::HashMap;
use std::path::PathBuf;
use std::time::{SystemTime, UNIX_EPOCH};
// ---------------------------------------------------------------------------
// Capnp serialization macros
//
// Declarative mapping between Rust types and capnp generated types.
// Adding a field to the schema means adding it in one place below;
// both read and write are generated from the same declaration.
// ---------------------------------------------------------------------------
/// Generate to_capnp/from_capnp conversion methods for an enum.
macro_rules! capnp_enum {
($rust_type:ident, $capnp_type:path, [$($variant:ident),+ $(,)?]) => {
impl $rust_type {
#[allow(clippy::wrong_self_convention, dead_code)]
pub(crate) fn to_capnp(&self) -> $capnp_type {
match self {
$(Self::$variant => <$capnp_type>::$variant,)+
}
}
pub(crate) fn from_capnp(v: $capnp_type) -> Self {
match v {
$(<$capnp_type>::$variant => Self::$variant,)+
}
}
}
};
}
/// Generate from_capnp/to_capnp methods for a struct with capnp serialization.
/// Fields are grouped by serialization kind:
/// text - capnp Text fields (String in Rust)
/// uuid - capnp Data fields ([u8; 16] in Rust)
/// prim - copy types (u32, f32, f64, bool)
/// enm - enums with to_capnp/from_capnp methods
/// skip - Rust-only fields not in capnp (set to Default on read)
macro_rules! capnp_message {
(
$struct:ident,
reader: $reader:ty,
builder: $builder:ty,
text: [$($tf:ident),* $(,)?],
uuid: [$($uf:ident),* $(,)?],
prim: [$($pf:ident),* $(,)?],
enm: [$($ef:ident: $et:ident),* $(,)?],
skip: [$($sf:ident),* $(,)?] $(,)?
) => {
impl $struct {
pub fn from_capnp(r: $reader) -> Result<Self> {
paste::paste! {
Ok(Self {
$($tf: read_text(r.[<get_ $tf>]()),)*
$($uf: read_uuid(r.[<get_ $uf>]()),)*
$($pf: r.[<get_ $pf>](),)*
$($ef: $et::from_capnp(
r.[<get_ $ef>]().map_err(|_| anyhow!(concat!("bad ", stringify!($ef))))?
),)*
$($sf: Default::default(),)*
})
}
}
pub fn to_capnp(&self, mut b: $builder) {
paste::paste! {
$(b.[<set_ $tf>](&self.$tf);)*
$(b.[<set_ $uf>](&self.$uf);)*
$(b.[<set_ $pf>](self.$pf);)*
$(b.[<set_ $ef>](self.$ef.to_capnp());)*
}
}
}
};
}
pub fn memory_dir() -> PathBuf {
crate::config::get().data_dir.clone()
}
@ -226,133 +150,6 @@ pub enum RelationType {
Auto,
}
capnp_enum!(NodeType, memory_capnp::NodeType,
[EpisodicSession, EpisodicDaily, EpisodicWeekly, Semantic, EpisodicMonthly]);
capnp_enum!(RelationType, memory_capnp::RelationType,
[Link, Causal, Auto]);
capnp_message!(Node,
reader: memory_capnp::content_node::Reader<'_>,
builder: memory_capnp::content_node::Builder<'_>,
text: [key, content, source_ref, provenance],
uuid: [uuid],
prim: [version, timestamp, weight, emotion, deleted,
retrievals, uses, wrongs, last_replayed,
spaced_repetition_interval, created_at, last_scored],
enm: [node_type: NodeType],
skip: [community_id, clustering_coefficient, degree],
);
/// Convert legacy capnp provenance enum to string label.
fn legacy_provenance_label(p: memory_capnp::Provenance) -> &'static str {
use memory_capnp::Provenance::*;
match p {
Manual => "manual",
Journal => "journal",
Agent => "agent",
Dream => "dream",
Derived => "derived",
AgentExperienceMine => "agent:experience-mine",
AgentKnowledgeObservation => "agent:knowledge-observation",
AgentKnowledgePattern => "agent:knowledge-pattern",
AgentKnowledgeConnector => "agent:knowledge-connector",
AgentKnowledgeChallenger => "agent:knowledge-challenger",
AgentConsolidate => "agent:consolidate",
AgentDigest => "agent:digest",
AgentFactMine => "agent:fact-mine",
AgentDecay => "agent:decay",
}
}
impl Node {
/// Read from capnp with migration: if the new provenance text field
/// is empty (old record), fall back to the deprecated provenanceOld enum.
pub fn from_capnp_migrate(r: memory_capnp::content_node::Reader<'_>) -> Result<Self> {
let mut node = Self::from_capnp(r)?;
if node.provenance.is_empty()
&& let Ok(old) = r.get_provenance_old() {
node.provenance = legacy_provenance_label(old).to_string();
}
// Sanitize timestamps: old capnp records have raw offsets instead
// of unix epoch. Anything past year 2100 (~4102444800) is bogus.
const MAX_SANE_EPOCH: i64 = 4_102_444_800;
if node.timestamp > MAX_SANE_EPOCH || node.timestamp < 0 {
node.timestamp = node.created_at;
}
if node.created_at > MAX_SANE_EPOCH || node.created_at < 0 {
node.created_at = node.timestamp.min(MAX_SANE_EPOCH);
}
Ok(node)
}
}
capnp_message!(Relation,
reader: memory_capnp::relation::Reader<'_>,
builder: memory_capnp::relation::Builder<'_>,
text: [source_key, target_key, provenance],
uuid: [uuid, source, target],
prim: [version, timestamp, strength, deleted],
enm: [rel_type: RelationType],
skip: [],
);
impl Relation {
pub fn from_capnp_migrate(r: memory_capnp::relation::Reader<'_>) -> Result<Self> {
let mut rel = Self::from_capnp(r)?;
if rel.provenance.is_empty()
&& let Ok(old) = r.get_provenance_old() {
rel.provenance = legacy_provenance_label(old).to_string();
}
Ok(rel)
}
}
// The full in-memory store
pub struct Store {
pub nodes: HashMap<String, Node>, // key → latest node
pub uuid_to_key: HashMap<[u8; 16], String>, // uuid → key (rebuilt from nodes)
pub relations: Vec<Relation>, // all active relations
/// Log sizes at load time — used for staleness detection.
pub(crate) loaded_nodes_size: u64,
pub(crate) loaded_rels_size: u64,
/// redb index database
pub(crate) db: Option<redb::Database>,
}
impl Default for Store {
fn default() -> Self {
Store {
nodes: HashMap::new(),
uuid_to_key: HashMap::new(),
relations: Vec::new(),
loaded_nodes_size: 0,
loaded_rels_size: 0,
db: None,
}
}
}
// Cap'n Proto serialization helpers
/// Read a capnp text field, returning empty string on any error
pub(crate) fn read_text(result: capnp::Result<capnp::text::Reader>) -> String {
result.ok()
.and_then(|t| t.to_str().ok())
.unwrap_or("")
.to_string()
}
/// Read a capnp data field as [u8; 16], zero-padded
pub(crate) fn read_uuid(result: capnp::Result<&[u8]>) -> [u8; 16] {
let mut out = [0u8; 16];
if let Ok(data) = result
&& data.len() >= 16 {
out.copy_from_slice(&data[..16]);
}
out
}
/// Create a new node with defaults
pub fn new_node(key: &str, content: &str) -> Node {
Node {

View file

@ -1,6 +1,7 @@
// Read-only access abstraction for the memory store
use super::types::*;
use super::Store;
// ---------------------------------------------------------------------------
// StoreView: read-only access trait for search and graph code.