Replace rkyv/bincode caching with redb indices

Remove three-tier loading (rkyv snapshot, bincode cache, capnp replay)
in favor of direct capnp log replay + redb for indexed access.

- Remove all rkyv derives from types (Node, Relation, enums, etc.)
- Remove Snapshot struct, RKYV_MAGIC, CACHE_MAGIC constants
- Remove load_snapshot_mmap(), save(), save_snapshot()
- Remove MmapView, AnyView from view.rs (keep StoreView trait)
- Simplify Store::load() to just replay capnp logs
- Add db.rs with redb schema: nodes, uuid_to_key, visits, transcript_progress
- Simplify cmd_fsck to just check capnp integrity + graph health

capnp logs remain source of truth; redb indices will be rebuilt on demand.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-13 18:30:58 -04:00
parent 1d88293ccf
commit 2caccf875d
8 changed files with 201 additions and 636 deletions

View file

@ -1,11 +1,6 @@
// Persistence layer: load, save, replay, append, snapshot
// Persistence layer: load, replay, append
//
// Three-tier loading strategy:
// 1. rkyv mmap snapshot (snapshot.rkyv) — ~4ms deserialize
// 2. bincode cache (state.bin) — ~10ms
// 3. capnp log replay — ~40ms
//
// Logs are append-only; cache staleness uses log file sizes, not mtimes.
// capnp logs are the source of truth; redb provides indexed access.
use super::types::*;
@ -21,62 +16,11 @@ use std::io::{BufReader, Seek};
use std::path::Path;
impl Store {
/// Load store from state.bin cache if fresh, otherwise rebuild from capnp logs.
///
/// Staleness check uses log file sizes (not mtimes). Since logs are
/// append-only, any write grows the file, invalidating the cache.
/// This avoids the mtime race that caused data loss with concurrent
/// writers (dream loop, link audit, journal enrichment).
/// Load store by replaying capnp logs.
pub fn load() -> Result<Store> {
// 1. Try rkyv mmap snapshot (~4ms with deserialize, <1ms zero-copy)
match Self::load_snapshot_mmap() {
Ok(Some(mut store)) => {
// rkyv snapshot doesn't include visits — replay from log
let visits_p = visits_path();
if visits_p.exists() {
store.replay_visits(&visits_p).ok();
}
let tp_p = transcript_progress_path();
if tp_p.exists() {
store.replay_transcript_progress(&tp_p).ok();
}
return Ok(store);
},
Ok(None) => {},
Err(e) => eprintln!("rkyv snapshot: {}", e),
}
// 2. Try bincode state.bin cache (~10ms)
let nodes_p = nodes_path();
let rels_p = relations_path();
let state_p = state_path();
let nodes_size = fs::metadata(&nodes_p).map(|m| m.len()).unwrap_or(0);
let rels_size = fs::metadata(&rels_p).map(|m| m.len()).unwrap_or(0);
if let Ok(data) = fs::read(&state_p)
&& data.len() >= CACHE_HEADER_LEN && data[..4] == CACHE_MAGIC {
let cached_nodes = u64::from_le_bytes(data[4..12].try_into().unwrap());
let cached_rels = u64::from_le_bytes(data[12..20].try_into().unwrap());
if cached_nodes == nodes_size && cached_rels == rels_size
&& let Ok(mut store) = bincode::deserialize::<Store>(&data[CACHE_HEADER_LEN..]) {
// Rebuild uuid_to_key (skipped by serde)
for (key, node) in &store.nodes {
store.uuid_to_key.insert(node.uuid, key.clone());
}
store.loaded_nodes_size = nodes_size;
store.loaded_rels_size = rels_size;
// Bootstrap: write rkyv snapshot if missing
if !snapshot_path().exists()
&& let Err(e) = store.save_snapshot(cached_nodes, cached_rels) {
eprintln!("rkyv bootstrap: {}", e);
}
return Ok(store);
}
}
// Stale or no cache — rebuild from capnp logs
let mut store = Store::default();
if nodes_p.exists() {
@ -94,7 +38,7 @@ impl Store {
store.replay_transcript_progress(&tp_p)?;
}
// Record log sizes after replay — this is the state we reflect
// Record log sizes after replay
store.loaded_nodes_size = fs::metadata(&nodes_p).map(|m| m.len()).unwrap_or(0);
store.loaded_rels_size = fs::metadata(&rels_p).map(|m| m.len()).unwrap_or(0);
@ -104,31 +48,6 @@ impl Store {
store.nodes.contains_key(&r.target_key)
);
store.save()?;
Ok(store)
}
/// Load store directly from capnp logs, bypassing all caches.
/// Used by fsck to verify cache consistency.
pub fn load_from_logs() -> Result<Store> {
let nodes_p = nodes_path();
let rels_p = relations_path();
let mut store = Store::default();
if nodes_p.exists() {
store.replay_nodes(&nodes_p)?;
}
if rels_p.exists() {
store.replay_relations(&rels_p)?;
}
let visits_p = visits_path();
if visits_p.exists() {
store.replay_visits(&visits_p)?;
}
let tp_p = transcript_progress_path();
if tp_p.exists() {
store.replay_transcript_progress(&tp_p)?;
}
Ok(store)
}
@ -588,135 +507,10 @@ impl Store {
.unwrap_or(0)
}
/// Save the derived cache with log size header for staleness detection.
/// Uses atomic write (tmp + rename) to prevent partial reads.
/// Placeholder - indices will be updated on write with redb.
pub fn save(&self) -> Result<()> {
let _lock = StoreLock::acquire()?;
let path = state_path();
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).ok();
}
// Use log sizes from load time, not current filesystem sizes.
// If another writer appended since we loaded, our recorded size
// will be smaller than the actual log → next reader detects stale
// cache and replays the (correct, append-only) log.
let nodes_size = self.loaded_nodes_size;
let rels_size = self.loaded_rels_size;
let bincode_data = bincode::serialize(self)
.with_context(|| format!("bincode serialize"))?;
let mut data = Vec::with_capacity(CACHE_HEADER_LEN + bincode_data.len());
data.extend_from_slice(&CACHE_MAGIC);
data.extend_from_slice(&nodes_size.to_le_bytes());
data.extend_from_slice(&rels_size.to_le_bytes());
data.extend_from_slice(&bincode_data);
// Atomic write: tmp file + rename
let tmp_path = path.with_extension("bin.tmp");
fs::write(&tmp_path, &data)
.with_context(|| format!("write {}", tmp_path.display()))?;
fs::rename(&tmp_path, &path)
.with_context(|| format!("rename {}{}", tmp_path.display(), path.display()))?;
// Also write rkyv snapshot (mmap-friendly)
if let Err(e) = self.save_snapshot(nodes_size, rels_size) {
eprintln!("rkyv snapshot save: {}", e);
}
Ok(())
}
/// Serialize store as rkyv snapshot with staleness header.
/// Assumes StoreLock is already held by caller.
fn save_snapshot(&self, nodes_size: u64, rels_size: u64) -> Result<()> {
let snap = Snapshot {
nodes: self.nodes.clone(),
relations: self.relations.iter().filter(|r| !r.deleted).cloned().collect(),
gaps: self.gaps.clone(),
params: self.params,
};
let rkyv_data = rkyv::to_bytes::<_, 256>(&snap)
.with_context(|| format!("rkyv serialize"))?;
let mut data = Vec::with_capacity(RKYV_HEADER_LEN + rkyv_data.len());
data.extend_from_slice(&RKYV_MAGIC);
data.extend_from_slice(&1u32.to_le_bytes()); // format version
data.extend_from_slice(&nodes_size.to_le_bytes());
data.extend_from_slice(&rels_size.to_le_bytes());
data.extend_from_slice(&(rkyv_data.len() as u64).to_le_bytes());
data.extend_from_slice(&rkyv_data);
let path = snapshot_path();
let tmp_path = path.with_extension("rkyv.tmp");
fs::write(&tmp_path, &data)
.with_context(|| format!("write {}", tmp_path.display()))?;
fs::rename(&tmp_path, &path)
.with_context(|| format!("rename"))?;
Ok(())
}
/// Try loading store from mmap'd rkyv snapshot.
/// Returns None if snapshot is missing or stale (log sizes don't match).
fn load_snapshot_mmap() -> Result<Option<Store>> {
let path = snapshot_path();
if !path.exists() { return Ok(None); }
let nodes_size = fs::metadata(nodes_path()).map(|m| m.len()).unwrap_or(0);
let rels_size = fs::metadata(relations_path()).map(|m| m.len()).unwrap_or(0);
let file = fs::File::open(&path)
.with_context(|| format!("open {}", path.display()))?;
let mmap = unsafe { memmap2::Mmap::map(&file) }
.with_context(|| format!("mmap {}", path.display()))?;
if mmap.len() < RKYV_HEADER_LEN { return Ok(None); }
if mmap[..4] != RKYV_MAGIC { return Ok(None); }
// [4..8] = version, skip for now
let cached_nodes = u64::from_le_bytes(mmap[8..16].try_into().unwrap());
let cached_rels = u64::from_le_bytes(mmap[16..24].try_into().unwrap());
let data_len = u64::from_le_bytes(mmap[24..32].try_into().unwrap()) as usize;
if cached_nodes != nodes_size || cached_rels != rels_size {
return Ok(None); // stale
}
if mmap.len() < RKYV_HEADER_LEN + data_len {
return Ok(None); // truncated
}
let rkyv_data = &mmap[RKYV_HEADER_LEN..RKYV_HEADER_LEN + data_len];
// SAFETY: we wrote this file ourselves via save_snapshot().
// Skip full validation (check_archived_root) — the staleness header
// already confirms this snapshot matches the current log state.
let archived = unsafe { rkyv::archived_root::<Snapshot>(rkyv_data) };
let snap: Snapshot = <ArchivedSnapshot as rkyv::Deserialize<Snapshot, rkyv::Infallible>>
::deserialize(archived, &mut rkyv::Infallible).unwrap();
let mut store = Store {
nodes: snap.nodes,
relations: snap.relations,
gaps: snap.gaps,
params: snap.params,
..Default::default()
};
// Rebuild uuid_to_key (not serialized)
for (key, node) in &store.nodes {
store.uuid_to_key.insert(node.uuid, key.clone());
}
store.loaded_nodes_size = nodes_size;
store.loaded_rels_size = rels_size;
Ok(Some(store))
}
}
/// Check and repair corrupt capnp log files.
@ -802,14 +596,6 @@ pub fn fsck() -> Result<()> {
}
if any_corrupt {
// Nuke caches so next load replays from the repaired logs
for p in [state_path(), snapshot_path()] {
if p.exists() {
fs::remove_file(&p)
.with_context(|| format!("remove {}", p.display()))?;
eprintln!("removed stale cache: {}", p.display());
}
}
eprintln!("repair complete — run `poc-memory status` to verify");
} else {
eprintln!("store is clean");