graph: use index for bulk reads, skip capnp deserialization

- Add all_keys() to StoreView, use in build_adjacency instead of
  for_each_node (which was ignoring content/weight anyway)
- Add all_key_uuid_pairs() for single-pass uuid mapping
- Extend KEY_TO_UUID to store [uuid:16][node_type:1][timestamp:8]
- for_each_node_meta now reads from index, no capnp needed
- Add NodeType::from_u8() for unpacking

Graph health: 7s → 2s (3.5x faster)

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-13 22:11:50 -04:00
commit faad14dc95
6 changed files with 103 additions and 40 deletions

View file

@ -21,6 +21,7 @@ use std::path::Path;
// Node tables
pub const NODES: TableDefinition<&str, u64> = TableDefinition::new("nodes");
// KEY_TO_UUID: key → [uuid:16][node_type:1][timestamp:8] = 25 bytes
pub const KEY_TO_UUID: TableDefinition<&str, &[u8]> = TableDefinition::new("key_to_uuid");
pub const UUID_OFFSETS: MultimapTableDefinition<&[u8], u64> = MultimapTableDefinition::new("uuid_offsets");
pub const NODES_BY_PROVENANCE: MultimapTableDefinition<&str, &str> = MultimapTableDefinition::new("nodes_by_provenance");
@ -54,14 +55,41 @@ pub fn open_db(path: &Path) -> Result<Database> {
Ok(db)
}
/// Pack node metadata: [uuid:16][node_type:1][timestamp:8] = 25 bytes
fn pack_node_meta(uuid: &[u8; 16], node_type: u8, timestamp: i64) -> [u8; 25] {
let mut buf = [0u8; 25];
buf[0..16].copy_from_slice(uuid);
buf[16] = node_type;
buf[17..25].copy_from_slice(&timestamp.to_be_bytes());
buf
}
/// Unpack node metadata. Handles both old (16-byte) and new (25-byte) formats.
pub fn unpack_node_meta(data: &[u8]) -> ([u8; 16], u8, i64) {
let mut uuid = [0u8; 16];
uuid.copy_from_slice(&data[0..16]);
if data.len() >= 25 {
let node_type = data[16];
let timestamp = i64::from_be_bytes([
data[17], data[18], data[19], data[20],
data[21], data[22], data[23], data[24],
]);
(uuid, node_type, timestamp)
} else {
// Old format: just uuid, default metadata
(uuid, 0, 0)
}
}
/// Record a node's location in the index.
pub fn index_node(txn: &WriteTransaction, key: &str, offset: u64, uuid: &[u8; 16]) -> Result<()> {
pub fn index_node(txn: &WriteTransaction, key: &str, offset: u64, uuid: &[u8; 16], node_type: u8, timestamp: i64) -> Result<()> {
let mut nodes_table = txn.open_table(NODES)?;
let mut key_uuid_table = txn.open_table(KEY_TO_UUID)?;
let mut uuid_offsets = txn.open_multimap_table(UUID_OFFSETS)?;
nodes_table.insert(key, offset)?;
key_uuid_table.insert(key, uuid.as_slice())?;
let packed = pack_node_meta(uuid, node_type, timestamp);
key_uuid_table.insert(key, packed.as_slice())?;
uuid_offsets.insert(uuid.as_slice(), offset)?;
Ok(())
}
@ -85,11 +113,9 @@ pub fn get_uuid_for_key(db: &Database, key: &str) -> Result<Option<[u8; 16]>> {
let txn = db.begin_read()?;
let table = txn.open_table(KEY_TO_UUID)?;
match table.get(key)? {
Some(uuid) => {
let slice = uuid.value();
let mut arr = [0u8; 16];
arr.copy_from_slice(slice);
Ok(Some(arr))
Some(data) => {
let (uuid, _, _) = unpack_node_meta(data.value());
Ok(Some(uuid))
}
None => Ok(None),
}
@ -131,6 +157,19 @@ pub fn all_keys(db: &Database) -> Result<Vec<String>> {
Ok(keys)
}
/// Collect all (key, uuid, node_type, timestamp) in a single table scan.
pub fn all_key_uuid_pairs(db: &Database) -> Result<Vec<(String, [u8; 16], u8, i64)>> {
let txn = db.begin_read()?;
let table = txn.open_table(KEY_TO_UUID)?;
let mut pairs = Vec::new();
for entry in table.iter()? {
let (key, data) = entry?;
let (uuid, node_type, timestamp) = unpack_node_meta(data.value());
pairs.push((key.value().to_string(), uuid, node_type, timestamp));
}
Ok(pairs)
}
// ── Relation index operations ──────────────────────────────────────
//
// RELS value format: [other_uuid:16][strength:4][rel_type:1][is_outgoing:1] = 22 bytes