store: redb indexes offsets into capnp log, not full nodes

Restructure store module with clearer file names:
- persist.rs → capnp.rs (capnp log IO)
- db.rs → index.rs (redb index operations)

redb now stores key → offset mapping, not serialized nodes.
Mutations record the offset after appending to capnp log.
rebuild_index scans capnp log to reconstruct the index.

The HashMap still exists for now; next step is to use the
index for lookups and remove it.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-13 19:10:08 -04:00
parent 9309de68fc
commit f413a853d8
5 changed files with 225 additions and 149 deletions

View file

@ -2,7 +2,7 @@
//
// capnp logs are the source of truth; redb provides indexed access.
use super::{db, types::*};
use super::{index, types::*};
use redb::ReadableTableMetadata;
use crate::memory_capnp;
@ -52,7 +52,7 @@ impl Store {
fn open_or_rebuild_db(&self, path: &Path) -> Result<redb::Database> {
// Try opening existing database
if path.exists() {
match db::open_db(path) {
match index::open_db(path) {
Ok(database) => {
if self.db_is_healthy(&database)? {
return Ok(database);
@ -65,8 +65,8 @@ impl Store {
}
}
// Rebuild from in-memory state
db::rebuild_from_store(path, self)
// Rebuild index from capnp log
rebuild_index(path, &nodes_path())
}
/// Check if redb indices match in-memory state.
@ -76,7 +76,7 @@ impl Store {
let txn = database.begin_read()?;
// Quick check: node count should match
let nodes_table = txn.open_table(db::NODES)?;
let nodes_table = txn.open_table(index::NODES)?;
let db_count = nodes_table.len()?;
if db_count != self.nodes.len() as u64 {
@ -218,13 +218,15 @@ impl Store {
/// Append nodes to the log file.
/// Serializes to a Vec first, then does a single write() syscall
/// so the append is atomic with O_APPEND even without flock.
pub fn append_nodes(&mut self, nodes: &[Node]) -> Result<()> {
/// Returns the offset where the message was written.
pub fn append_nodes(&mut self, nodes: &[Node]) -> Result<u64> {
let _lock = StoreLock::acquire()?;
self.append_nodes_unlocked(nodes)
}
/// Append nodes without acquiring the lock. Caller must hold StoreLock.
pub(crate) fn append_nodes_unlocked(&mut self, nodes: &[Node]) -> Result<()> {
/// Returns the offset where the message was written.
pub(crate) fn append_nodes_unlocked(&mut self, nodes: &[Node]) -> Result<u64> {
let mut msg = message::Builder::new_default();
{
let log = msg.init_root::<memory_capnp::node_log::Builder>();
@ -241,12 +243,16 @@ impl Store {
let file = fs::OpenOptions::new()
.create(true).append(true).open(&path)
.with_context(|| format!("open {}", path.display()))?;
// Get offset before writing
let offset = file.metadata().map(|m| m.len()).unwrap_or(0);
use std::io::Write;
(&file).write_all(&buf)
.with_context(|| format!("write nodes"))?;
self.loaded_nodes_size = file.metadata().map(|m| m.len()).unwrap_or(0);
Ok(())
Ok(offset)
}
/// Replay only new entries appended to the node log since we last loaded.
@ -418,3 +424,88 @@ pub fn fsck() -> Result<()> {
Ok(())
}
/// Rebuild redb index from capnp log.
/// Scans the log, tracking offsets, and records latest version of each node.
fn rebuild_index(db_path: &Path, capnp_path: &Path) -> Result<redb::Database> {
// Remove old database if it exists
if db_path.exists() {
fs::remove_file(db_path)
.with_context(|| format!("remove old db {}", db_path.display()))?;
}
let database = index::open_db(db_path)?;
if !capnp_path.exists() {
return Ok(database);
}
// Track latest (offset, uuid, version, deleted) per key
let mut latest: HashMap<String, (u64, [u8; 16], u32, bool)> = HashMap::new();
let file = fs::File::open(capnp_path)
.with_context(|| format!("open {}", capnp_path.display()))?;
let mut reader = BufReader::new(file);
loop {
let offset = reader.stream_position()?;
let msg = match serialize::read_message(&mut reader, message::ReaderOptions::new()) {
Ok(m) => m,
Err(_) => break,
};
let log = match msg.get_root::<memory_capnp::node_log::Reader>() {
Ok(l) => l,
Err(_) => continue,
};
let nodes = match log.get_nodes() {
Ok(n) => n,
Err(_) => continue,
};
for node_reader in nodes {
let key = node_reader.get_key().ok()
.and_then(|t| t.to_str().ok())
.unwrap_or("")
.to_string();
if key.is_empty() { continue; }
let version = node_reader.get_version();
let deleted = node_reader.get_deleted();
let mut uuid = [0u8; 16];
if let Ok(data) = node_reader.get_uuid() {
if data.len() >= 16 {
uuid.copy_from_slice(&data[..16]);
}
}
// Keep if newer version
let dominated = latest.get(&key)
.map(|(_, _, v, _)| version >= *v)
.unwrap_or(true);
if dominated {
latest.insert(key, (offset, uuid, version, deleted));
}
}
}
// Write index entries for non-deleted nodes
{
let txn = database.begin_write()?;
{
let mut nodes_table = txn.open_table(index::NODES)?;
let mut uuid_table = txn.open_table(index::UUID_TO_KEY)?;
for (key, (offset, uuid, _, deleted)) in latest {
if !deleted {
nodes_table.insert(key.as_str(), offset)?;
uuid_table.insert(uuid.as_slice(), key.as_str())?;
}
}
}
txn.commit()?;
}
Ok(database)
}

View file

@ -1,119 +0,0 @@
// redb index tables
//
// capnp logs are source of truth; redb provides indexed access.
// Tables:
// nodes: key → Node (JSON serialized)
// uuid_to_key: [u8;16] → key
//
// Relations stay in-memory for now (frequently iterated in full).
use super::types::*;
use anyhow::{Context, Result};
use redb::{Database, ReadableDatabase, TableDefinition};
use std::path::Path;
// Table definitions
pub const NODES: TableDefinition<&str, &[u8]> = TableDefinition::new("nodes");
pub const UUID_TO_KEY: TableDefinition<&[u8], &str> = TableDefinition::new("uuid_to_key");
/// Open or create the redb database, ensuring all tables exist.
pub fn open_db(path: &Path) -> Result<Database> {
let db = Database::create(path)
.with_context(|| format!("create redb {}", path.display()))?;
// Ensure tables exist by opening a write transaction
let txn = db.begin_write()?;
{
let _ = txn.open_table(NODES)?;
let _ = txn.open_table(UUID_TO_KEY)?;
}
txn.commit()?;
Ok(db)
}
/// Rebuild redb indices from in-memory Store (loaded from capnp logs).
/// Deletes and recreates the database for a clean rebuild.
pub fn rebuild_from_store(path: &Path, store: &Store) -> Result<Database> {
// Remove old database if it exists
if path.exists() {
std::fs::remove_file(path)
.with_context(|| format!("remove old db {}", path.display()))?;
}
let db = open_db(path)?;
let txn = db.begin_write()?;
{
let mut nodes_table = txn.open_table(NODES)?;
let mut uuid_table = txn.open_table(UUID_TO_KEY)?;
for (key, node) in &store.nodes {
let json = serde_json::to_vec(node)
.with_context(|| format!("serialize node {}", key))?;
nodes_table.insert(key.as_str(), json.as_slice())?;
uuid_table.insert(node.uuid.as_slice(), key.as_str())?;
}
}
txn.commit()?;
Ok(db)
}
/// Get a node by key from redb.
pub fn get_node(db: &Database, key: &str) -> Result<Option<Node>> {
let txn = db.begin_read()?;
let table = txn.open_table(NODES)?;
match table.get(key)? {
Some(data) => {
let node: Node = serde_json::from_slice(data.value())
.with_context(|| format!("deserialize node {}", key))?;
Ok(Some(node))
}
None => Ok(None),
}
}
/// Get key by uuid from redb.
pub fn get_key_by_uuid(db: &Database, uuid: &[u8; 16]) -> Result<Option<String>> {
let txn = db.begin_read()?;
let table = txn.open_table(UUID_TO_KEY)?;
match table.get(uuid.as_slice())? {
Some(key) => Ok(Some(key.value().to_string())),
None => Ok(None),
}
}
/// Insert or update a node in redb.
pub fn upsert_node(db: &Database, node: &Node) -> Result<()> {
let txn = db.begin_write()?;
{
let mut nodes_table = txn.open_table(NODES)?;
let mut uuid_table = txn.open_table(UUID_TO_KEY)?;
let json = serde_json::to_vec(node)
.with_context(|| format!("serialize node {}", node.key))?;
nodes_table.insert(node.key.as_str(), json.as_slice())?;
uuid_table.insert(node.uuid.as_slice(), node.key.as_str())?;
}
txn.commit()?;
Ok(())
}
/// Delete a node from redb indices (by marking key empty, keeping uuid mapping).
pub fn delete_node(db: &Database, key: &str, uuid: &[u8; 16]) -> Result<()> {
let txn = db.begin_write()?;
{
let mut nodes_table = txn.open_table(NODES)?;
let mut uuid_table = txn.open_table(UUID_TO_KEY)?;
nodes_table.remove(key)?;
uuid_table.remove(uuid.as_slice())?;
}
txn.commit()?;
Ok(())
}

View file

@ -0,0 +1,104 @@
// redb index tables
//
// capnp logs are source of truth; redb provides indexed access.
// Tables:
// nodes: key → offset in capnp log (u64)
// uuid_to_key: [u8;16] → key
//
// To read a node: lookup offset in redb, seek in capnp file, deserialize.
use anyhow::{Context, Result};
use redb::{Database, ReadableDatabase, ReadableTable, ReadableTableMetadata, TableDefinition};
use std::path::Path;
// Table definitions - nodes maps key to byte offset in capnp log
pub const NODES: TableDefinition<&str, u64> = TableDefinition::new("nodes");
pub const UUID_TO_KEY: TableDefinition<&[u8], &str> = TableDefinition::new("uuid_to_key");
/// Open or create the redb database, ensuring all tables exist.
pub fn open_db(path: &Path) -> Result<Database> {
let db = Database::create(path)
.with_context(|| format!("create redb {}", path.display()))?;
// Ensure tables exist by opening a write transaction
let txn = db.begin_write()?;
{
let _ = txn.open_table(NODES)?;
let _ = txn.open_table(UUID_TO_KEY)?;
}
txn.commit()?;
Ok(db)
}
/// Record a node's location in the index.
pub fn index_node(db: &Database, key: &str, offset: u64, uuid: &[u8; 16]) -> Result<()> {
let txn = db.begin_write()?;
{
let mut nodes_table = txn.open_table(NODES)?;
let mut uuid_table = txn.open_table(UUID_TO_KEY)?;
nodes_table.insert(key, offset)?;
uuid_table.insert(uuid.as_slice(), key)?;
}
txn.commit()?;
Ok(())
}
/// Get offset for a node by key.
pub fn get_offset(db: &Database, key: &str) -> Result<Option<u64>> {
let txn = db.begin_read()?;
let table = txn.open_table(NODES)?;
Ok(table.get(key)?.map(|v| v.value()))
}
/// Check if a key exists in the index.
pub fn contains_key(db: &Database, key: &str) -> Result<bool> {
let txn = db.begin_read()?;
let table = txn.open_table(NODES)?;
Ok(table.get(key)?.is_some())
}
/// Get key by uuid from redb.
pub fn get_key_by_uuid(db: &Database, uuid: &[u8; 16]) -> Result<Option<String>> {
let txn = db.begin_read()?;
let table = txn.open_table(UUID_TO_KEY)?;
match table.get(uuid.as_slice())? {
Some(key) => Ok(Some(key.value().to_string())),
None => Ok(None),
}
}
/// Remove a node from the index.
pub fn remove_node(db: &Database, key: &str, uuid: &[u8; 16]) -> Result<()> {
let txn = db.begin_write()?;
{
let mut nodes_table = txn.open_table(NODES)?;
let mut uuid_table = txn.open_table(UUID_TO_KEY)?;
nodes_table.remove(key)?;
uuid_table.remove(uuid.as_slice())?;
}
txn.commit()?;
Ok(())
}
/// Count nodes in the index.
pub fn node_count(db: &Database) -> Result<u64> {
let txn = db.begin_read()?;
let table = txn.open_table(NODES)?;
Ok(table.len()?)
}
/// Collect all keys from the index.
pub fn all_keys(db: &Database) -> Result<Vec<String>> {
let txn = db.begin_read()?;
let table = txn.open_table(NODES)?;
let mut keys = Vec::new();
for entry in table.iter()? {
let (key, _) = entry?;
keys.push(key.value().to_string());
}
Ok(keys)
}

View file

@ -7,17 +7,17 @@
// redb provides indexed access; Store struct holds in-memory state.
//
// Module layout:
// types.rs — Node, Relation, enums, capnp macros, path helpers
// view.rs — StoreView trait for read-only access
// persist.rs — load, replay, append (capnp IO)
// ops.rs — mutations (upsert, delete, decay, cap_degree, etc.)
// mod.rs — re-exports, key resolution, ingestion, rendering
// types.rs — Node, Relation, enums, capnp macros, path helpers
// index.rs — redb index operations
// capnp.rs — capnp log IO (load, replay, append, fsck)
// ops.rs — mutations (upsert, delete, rename, etc.)
// view.rs — StoreView trait for read-only access
mod types;
mod view;
mod persist;
mod index;
mod capnp;
mod ops;
pub mod db;
mod view;
// Re-export everything callers need
pub use types::{
@ -27,7 +27,7 @@ pub use types::{
new_node, new_relation,
};
pub use view::StoreView;
pub use persist::fsck;
pub use capnp::fsck;
pub use ops::current_provenance;
use crate::graph::{self, Graph};

View file

@ -2,7 +2,7 @@
//
// CRUD (upsert, delete), maintenance (decay, cap_degree), and graph metrics.
use super::{db, types::*};
use super::{index, types::*};
use anyhow::{anyhow, bail, Result};
use std::collections::{HashMap, HashSet};
@ -15,7 +15,7 @@ pub fn current_provenance() -> String {
}
impl Store {
/// Add or update a node (appends to log + updates cache + redb).
/// Add or update a node (appends to log + updates index).
/// Holds StoreLock across refresh + check + write to prevent duplicate UUIDs.
pub fn upsert_node(&mut self, mut node: Node) -> Result<()> {
let _lock = StoreLock::acquire()?;
@ -25,9 +25,9 @@ impl Store {
node.uuid = existing.uuid;
node.version = existing.version + 1;
}
self.append_nodes_unlocked(&[node.clone()])?;
let offset = self.append_nodes_unlocked(&[node.clone()])?;
if let Some(ref database) = self.db {
db::upsert_node(database, &node)?;
index::index_node(database, &node.key, offset, &node.uuid)?;
}
self.uuid_to_key.insert(node.uuid, node.key.clone());
self.nodes.insert(node.key.clone(), node);
@ -77,18 +77,18 @@ impl Store {
node.provenance = provenance.to_string();
node.timestamp = now_epoch();
node.version += 1;
self.append_nodes_unlocked(std::slice::from_ref(&node))?;
let offset = self.append_nodes_unlocked(std::slice::from_ref(&node))?;
if let Some(ref database) = self.db {
db::upsert_node(database, &node)?;
index::index_node(database, &node.key, offset, &node.uuid)?;
}
self.nodes.insert(key.to_string(), node);
Ok("updated")
} else {
let mut node = new_node(key, content);
node.provenance = provenance.to_string();
self.append_nodes_unlocked(std::slice::from_ref(&node))?;
let offset = self.append_nodes_unlocked(std::slice::from_ref(&node))?;
if let Some(ref database) = self.db {
db::upsert_node(database, &node)?;
index::index_node(database, &node.key, offset, &node.uuid)?;
}
self.uuid_to_key.insert(node.uuid, node.key.clone());
self.nodes.insert(key.to_string(), node);
@ -114,7 +114,7 @@ impl Store {
deleted.timestamp = now_epoch();
self.append_nodes_unlocked(std::slice::from_ref(&deleted))?;
if let Some(ref database) = self.db {
db::delete_node(database, key, &uuid)?;
index::remove_node(database, key, &uuid)?;
}
self.nodes.remove(key);
Ok(())
@ -172,15 +172,15 @@ impl Store {
.collect();
// Persist under single lock
self.append_nodes_unlocked(&[renamed.clone(), tombstone.clone()])?;
let offset = self.append_nodes_unlocked(&[renamed.clone(), tombstone.clone()])?;
if !updated_rels.is_empty() {
self.append_relations_unlocked(&updated_rels)?;
}
// Update redb: delete old key, insert renamed
// Update index: remove old key, add renamed
if let Some(ref database) = self.db {
db::delete_node(database, old_key, &tombstone.uuid)?;
db::upsert_node(database, &renamed)?;
index::remove_node(database, old_key, &tombstone.uuid)?;
index::index_node(database, new_key, offset, &renamed.uuid)?;
}
// Update in-memory cache