store: read nodes via index instead of HashMap

- Add get_node() and contains_key() methods that read via redb index
- Migrate all store/ reads to use index lookup
- Remove HashMap cache updates from mutations (write-through to capnp+index only)
- Remove replay_nodes() - load no longer builds HashMap
- Update db_is_healthy to validate by spot-checking offsets
- Fix set_weight bug: now persists weight changes to capnp

Store.nodes HashMap still exists for code outside store/ module,
but store/ itself no longer uses it.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-13 19:31:28 -04:00
parent ba53597cf2
commit 7eb86656d4
4 changed files with 167 additions and 112 deletions

View file

@ -206,39 +206,79 @@ impl Relation {
}
}
// ---------------------------------------------------------------------------
// Direct node access
// ---------------------------------------------------------------------------
/// Read a single node at the given offset in the capnp log.
/// The offset must point to a valid message containing the node.
pub fn read_node_at_offset(offset: u64) -> Result<Node> {
let path = nodes_path();
let mut file = fs::File::open(&path)
.with_context(|| format!("open {}", path.display()))?;
use std::io::{Seek, SeekFrom};
file.seek(SeekFrom::Start(offset))?;
let mut reader = BufReader::new(file);
let msg = serialize::read_message(&mut reader, message::ReaderOptions::new())
.with_context(|| format!("read message at offset {}", offset))?;
let log = msg.get_root::<memory_capnp::node_log::Reader>()
.with_context(|| "read node log")?;
let nodes = log.get_nodes()
.with_context(|| "get nodes")?;
// A message at this offset should have exactly one node (from upsert),
// or we take the last one if there are multiple (from batch operations like rename)
if nodes.is_empty() {
anyhow::bail!("no nodes in message at offset {}", offset);
}
// Return the first non-deleted node, or the first one if all are deleted
for node_reader in nodes.iter() {
let node = Node::from_capnp_migrate(node_reader)?;
if !node.deleted {
return Ok(node);
}
}
// All nodes in this message are deleted - shouldn't happen if index is correct
Node::from_capnp_migrate(nodes.get(0))
}
// ---------------------------------------------------------------------------
// Store persistence methods
// ---------------------------------------------------------------------------
impl Store {
/// Load store by replaying capnp logs, then open/verify redb indices.
/// Load store by opening redb index and replaying relations.
pub fn load() -> Result<Store> {
let nodes_p = nodes_path();
let rels_p = relations_path();
let mut store = Store::default();
if nodes_p.exists() {
store.replay_nodes(&nodes_p)?;
}
// Open redb index first (rebuilds from capnp if needed)
let db_p = db_path();
store.db = Some(store.open_or_rebuild_db(&db_p)?);
// Replay relations
if rels_p.exists() {
store.replay_relations(&rels_p)?;
}
// Record log sizes after replay
// Record log sizes
store.loaded_nodes_size = fs::metadata(&nodes_p).map(|m| m.len()).unwrap_or(0);
store.loaded_rels_size = fs::metadata(&rels_p).map(|m| m.len()).unwrap_or(0);
// Drop edges referencing deleted/missing nodes
let db = store.db.as_ref().unwrap();
store.relations.retain(|r|
store.nodes.contains_key(&r.source_key) &&
store.nodes.contains_key(&r.target_key)
index::contains_key(db, &r.source_key).unwrap_or(false) &&
index::contains_key(db, &r.target_key).unwrap_or(false)
);
// Open redb and verify/rebuild indices
let db_p = db_path();
store.db = Some(store.open_or_rebuild_db(&db_p)?);
Ok(store)
}
@ -263,80 +303,38 @@ impl Store {
rebuild_index(path, &nodes_path())
}
/// Check if redb indices match in-memory state.
/// Check if redb index is healthy by verifying some offsets are valid.
fn db_is_healthy(&self, database: &redb::Database) -> Result<bool> {
use redb::ReadableDatabase;
use redb::{ReadableDatabase, ReadableTable};
let txn = database.begin_read()?;
// Quick check: node count should match
let nodes_table = txn.open_table(index::NODES)?;
let db_count = nodes_table.len()?;
if db_count != self.nodes.len() as u64 {
return Ok(false);
// Check that we can read the table and it has entries
if nodes_table.len()? == 0 {
// Empty database - might be stale or new
let capnp_size = fs::metadata(nodes_path()).map(|m| m.len()).unwrap_or(0);
return Ok(capnp_size == 0); // healthy only if capnp is also empty
}
// Spot check: verify a few random nodes exist with matching keys
// (full verification would be too slow)
for (i, key) in self.nodes.keys().enumerate() {
if i >= 10 { break; } // check first 10
if nodes_table.get(key.as_str())?.is_none() {
// Spot check: verify a few offsets point to valid messages
let mut checked = 0;
for entry in nodes_table.iter()? {
if checked >= 5 { break; }
let (key, offset) = entry?;
let offset = offset.value();
// Try to read the node at this offset
if read_node_at_offset(offset).is_err() {
return Ok(false);
}
checked += 1;
let _ = key; // silence unused warning
}
Ok(true)
}
/// Replay node log, keeping latest version per UUID.
/// Tracks all UUIDs seen per key to detect duplicates.
fn replay_nodes(&mut self, path: &Path) -> Result<()> {
let file = fs::File::open(path)
.with_context(|| format!("open {}", path.display()))?;
let mut reader = BufReader::new(file);
// Track all non-deleted UUIDs per key to detect duplicates
let mut key_uuids: HashMap<String, Vec<[u8; 16]>> = HashMap::new();
while let Ok(msg) = serialize::read_message(&mut reader, message::ReaderOptions::new()) {
let log = msg.get_root::<memory_capnp::node_log::Reader>()
.with_context(|| format!("read node log"))?;
for node_reader in log.get_nodes()
.with_context(|| format!("get nodes"))? {
let node = Node::from_capnp_migrate(node_reader)?;
let existing_version = self.nodes.get(&node.key)
.map(|n| n.version)
.unwrap_or(0);
if node.version >= existing_version {
if node.deleted {
self.nodes.remove(&node.key);
self.uuid_to_key.remove(&node.uuid);
if let Some(uuids) = key_uuids.get_mut(&node.key) {
uuids.retain(|u| *u != node.uuid);
}
} else {
self.uuid_to_key.insert(node.uuid, node.key.clone());
self.nodes.insert(node.key.clone(), node.clone());
let uuids = key_uuids.entry(node.key).or_default();
if !uuids.contains(&node.uuid) {
uuids.push(node.uuid);
}
}
}
}
}
// Report duplicate keys
for (key, uuids) in &key_uuids {
if uuids.len() > 1 {
dbglog!("WARNING: key '{}' has {} UUIDs (duplicate nodes)", key, uuids.len());
}
}
Ok(())
}
/// Replay relation log, keeping latest version per UUID
fn replay_relations(&mut self, path: &Path) -> Result<()> {
let file = fs::File::open(path)