store: add weight to index, index-only key matching
- KEY_TO_UUID now stores weight (30 bytes: uuid+type+ts+deleted+weight) - UUID_OFFSETS changed to composite key for O(log n) max-offset lookup - Add NODES_BY_TYPE index for efficient type+date range queries - Add for_each_key_weight() to StoreView for index-only iteration - match_seeds uses index-only path when content not needed - Fix transaction consistency in ops (single txn for related updates) - rebuild() now records all uuid→offset mappings for version history - Backwards compatible: old index formats decoded with default weight Co-Authored-By: Proof of Concept <poc@bcachefs.org> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
fc978e2f2e
commit
ba4e01b6f3
9 changed files with 774 additions and 500 deletions
|
|
@ -3,31 +3,35 @@
|
|||
// capnp logs are source of truth; redb provides indexed access.
|
||||
//
|
||||
// Node tables:
|
||||
// NODES: key → offset (current version)
|
||||
// KEY_TO_UUID: key → uuid
|
||||
// UUID_OFFSETS: uuid → offsets (multimap, all versions)
|
||||
// NODES_BY_PROVENANCE: provenance → keys (multimap)
|
||||
// NODES_BY_TYPE: [type_byte][timestamp_be] → key (for range queries by type+date)
|
||||
// KEY_TO_UUID: key → (uuid, node_type, timestamp, deleted)
|
||||
// Keeps entries for deleted nodes to enable index-based restore.
|
||||
// UUID_OFFSETS: [uuid:16][offset:8 BE] → () composite key for O(log n) max-offset lookup
|
||||
// NODES_BY_PROVENANCE: provenance → (timestamp, uuid) (multimap)
|
||||
//
|
||||
// Relation tables:
|
||||
// RELS: node_uuid → (other_uuid, strength, rel_type, is_outgoing) packed (multimap)
|
||||
// Each relation stored twice — once per endpoint with direction bit.
|
||||
//
|
||||
// To get key from uuid: UUID_OFFSETS → read_node_at_offset() → node.key
|
||||
// To get current offset: KEY_TO_UUID[key] → uuid → max(UUID_OFFSETS[uuid][*])
|
||||
// To get key from uuid: read_node_at_offset(max_offset) → node.key
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use redb::{Database, MultimapTableDefinition, ReadableDatabase, ReadableTable, TableDefinition, WriteTransaction};
|
||||
use redb::{Database, MultimapTableDefinition, ReadableDatabase, ReadableTable, ReadableTableMetadata, TableDefinition, WriteTransaction};
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
use super::types::Node;
|
||||
use super::capnp::read_node_at_offset;
|
||||
|
||||
// Node tables
|
||||
pub const NODES: TableDefinition<&str, u64> = TableDefinition::new("nodes");
|
||||
// KEY_TO_UUID: key → [uuid:16][node_type:1][timestamp:8] = 25 bytes
|
||||
// KEY_TO_UUID: key → [uuid:16][node_type:1][timestamp:8][deleted:1][weight:4] = 30 bytes
|
||||
pub const KEY_TO_UUID: TableDefinition<&str, &[u8]> = TableDefinition::new("key_to_uuid");
|
||||
pub const UUID_OFFSETS: MultimapTableDefinition<&[u8], u64> = MultimapTableDefinition::new("uuid_offsets");
|
||||
// NODES_BY_PROVENANCE: provenance → [timestamp:8 BE][key] (sorted by timestamp desc via negated ts)
|
||||
// UUID_OFFSETS: [uuid:16][offset:8 BE] → () — offset in key for range scans
|
||||
pub const UUID_OFFSETS: TableDefinition<&[u8], ()> = TableDefinition::new("uuid_offsets");
|
||||
// NODES_BY_PROVENANCE: provenance → [negated_timestamp:8][uuid:16] = 24 bytes (sorted by timestamp desc)
|
||||
pub const NODES_BY_PROVENANCE: MultimapTableDefinition<&str, &[u8]> = MultimapTableDefinition::new("nodes_by_provenance");
|
||||
// Composite key: [node_type: u8][timestamp: i64 BE] for range queries
|
||||
pub const NODES_BY_TYPE: TableDefinition<&[u8], &str> = TableDefinition::new("nodes_by_type");
|
||||
// NODES_BY_TYPE: [type:1][neg_timestamp:8] → uuid (for type+date range queries, newest first)
|
||||
pub const NODES_BY_TYPE: TableDefinition<&[u8], &[u8]> = TableDefinition::new("nodes_by_type");
|
||||
|
||||
// Relations table - each relation stored twice (once per endpoint)
|
||||
// Value: (other_uuid: [u8;16], strength: f32, rel_type: u8, is_outgoing: bool)
|
||||
|
|
@ -43,9 +47,8 @@ pub fn open_db(path: &Path) -> Result<Database> {
|
|||
let txn = db.begin_write()?;
|
||||
{
|
||||
// Node tables
|
||||
let _ = txn.open_table(NODES)?;
|
||||
let _ = txn.open_table(KEY_TO_UUID)?;
|
||||
let _ = txn.open_multimap_table(UUID_OFFSETS)?;
|
||||
let _ = txn.open_table(UUID_OFFSETS)?;
|
||||
let _ = txn.open_multimap_table(NODES_BY_PROVENANCE)?;
|
||||
let _ = txn.open_table(NODES_BY_TYPE)?;
|
||||
// Relations
|
||||
|
|
@ -56,150 +59,297 @@ pub fn open_db(path: &Path) -> Result<Database> {
|
|||
Ok(db)
|
||||
}
|
||||
|
||||
/// Pack node metadata: [uuid:16][node_type:1][timestamp:8] = 25 bytes
|
||||
fn pack_node_meta(uuid: &[u8; 16], node_type: u8, timestamp: i64) -> [u8; 25] {
|
||||
let mut buf = [0u8; 25];
|
||||
/// Pack node metadata: [uuid:16][node_type:1][timestamp:8][deleted:1][weight:4] = 30 bytes
|
||||
fn pack_node_meta(uuid: &[u8; 16], node_type: u8, timestamp: i64, deleted: bool, weight: f32) -> [u8; 30] {
|
||||
let mut buf = [0u8; 30];
|
||||
buf[0..16].copy_from_slice(uuid);
|
||||
buf[16] = node_type;
|
||||
buf[17..25].copy_from_slice(×tamp.to_be_bytes());
|
||||
buf[25] = if deleted { 1 } else { 0 };
|
||||
buf[26..30].copy_from_slice(&weight.to_be_bytes());
|
||||
buf
|
||||
}
|
||||
|
||||
/// Unpack node metadata. Handles both old (16-byte) and new (25-byte) formats.
|
||||
pub fn unpack_node_meta(data: &[u8]) -> ([u8; 16], u8, i64) {
|
||||
/// Unpack node metadata. Returns (uuid, node_type, timestamp, deleted, weight).
|
||||
/// Handles old formats (16-byte, 25-byte, 26-byte) and new (30-byte).
|
||||
pub fn unpack_node_meta(data: &[u8]) -> ([u8; 16], u8, i64, bool, f32) {
|
||||
let mut uuid = [0u8; 16];
|
||||
uuid.copy_from_slice(&data[0..16]);
|
||||
if data.len() >= 25 {
|
||||
if data.len() >= 30 {
|
||||
let node_type = data[16];
|
||||
let timestamp = i64::from_be_bytes([
|
||||
data[17], data[18], data[19], data[20],
|
||||
data[21], data[22], data[23], data[24],
|
||||
]);
|
||||
(uuid, node_type, timestamp)
|
||||
let deleted = data[25] != 0;
|
||||
let weight = f32::from_be_bytes([data[26], data[27], data[28], data[29]]);
|
||||
(uuid, node_type, timestamp, deleted, weight)
|
||||
} else if data.len() >= 26 {
|
||||
let node_type = data[16];
|
||||
let timestamp = i64::from_be_bytes([
|
||||
data[17], data[18], data[19], data[20],
|
||||
data[21], data[22], data[23], data[24],
|
||||
]);
|
||||
let deleted = data[25] != 0;
|
||||
(uuid, node_type, timestamp, deleted, 0.5) // default weight
|
||||
} else if data.len() >= 25 {
|
||||
let node_type = data[16];
|
||||
let timestamp = i64::from_be_bytes([
|
||||
data[17], data[18], data[19], data[20],
|
||||
data[21], data[22], data[23], data[24],
|
||||
]);
|
||||
(uuid, node_type, timestamp, false, 0.5)
|
||||
} else {
|
||||
// Old format: just uuid, default metadata
|
||||
(uuid, 0, 0)
|
||||
(uuid, 0, 0, false, 0.5)
|
||||
}
|
||||
}
|
||||
|
||||
/// Pack provenance value: [negated_timestamp:8][key] for descending sort
|
||||
fn pack_provenance_value(timestamp: i64, key: &str) -> Vec<u8> {
|
||||
/// Pack provenance value: [negated_timestamp:8][uuid:16] = 24 bytes for descending sort
|
||||
fn pack_provenance_value(timestamp: i64, uuid: &[u8; 16]) -> [u8; 24] {
|
||||
let mut buf = [0u8; 24];
|
||||
let neg_ts = (!timestamp).to_be_bytes(); // negate for descending order
|
||||
let mut buf = Vec::with_capacity(8 + key.len());
|
||||
buf.extend_from_slice(&neg_ts);
|
||||
buf.extend_from_slice(key.as_bytes());
|
||||
buf[0..8].copy_from_slice(&neg_ts);
|
||||
buf[8..24].copy_from_slice(uuid);
|
||||
buf
|
||||
}
|
||||
|
||||
/// Unpack provenance value: returns (timestamp, key)
|
||||
fn unpack_provenance_value(data: &[u8]) -> (i64, String) {
|
||||
/// Unpack provenance value: returns (timestamp, uuid)
|
||||
pub fn unpack_provenance_value(data: &[u8]) -> (i64, [u8; 16]) {
|
||||
let neg_ts = i64::from_be_bytes([data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7]]);
|
||||
let timestamp = !neg_ts;
|
||||
let key = String::from_utf8_lossy(&data[8..]).to_string();
|
||||
(timestamp, key)
|
||||
let mut uuid = [0u8; 16];
|
||||
uuid.copy_from_slice(&data[8..24]);
|
||||
(timestamp, uuid)
|
||||
}
|
||||
|
||||
/// Record a node's location in the index.
|
||||
pub fn index_node(txn: &WriteTransaction, key: &str, offset: u64, uuid: &[u8; 16], node_type: u8, timestamp: i64, provenance: &str) -> Result<()> {
|
||||
let mut nodes_table = txn.open_table(NODES)?;
|
||||
let mut key_uuid_table = txn.open_table(KEY_TO_UUID)?;
|
||||
let mut uuid_offsets = txn.open_multimap_table(UUID_OFFSETS)?;
|
||||
let mut by_provenance = txn.open_multimap_table(NODES_BY_PROVENANCE)?;
|
||||
/// Pack UUID_OFFSETS key: [uuid:16][offset:8 BE] = 24 bytes
|
||||
fn pack_uuid_offset(uuid: &[u8; 16], offset: u64) -> [u8; 24] {
|
||||
let mut buf = [0u8; 24];
|
||||
buf[0..16].copy_from_slice(uuid);
|
||||
buf[16..24].copy_from_slice(&offset.to_be_bytes());
|
||||
buf
|
||||
}
|
||||
|
||||
nodes_table.insert(key, offset)?;
|
||||
let packed = pack_node_meta(uuid, node_type, timestamp);
|
||||
/// Pack NODES_BY_TYPE key: [type:1][neg_timestamp:8] = 9 bytes (newest first within type)
|
||||
fn pack_type_key(node_type: u8, timestamp: i64) -> [u8; 9] {
|
||||
let mut buf = [0u8; 9];
|
||||
buf[0] = node_type;
|
||||
buf[1..9].copy_from_slice(&(!timestamp).to_be_bytes());
|
||||
buf
|
||||
}
|
||||
|
||||
/// Unpack offset from UUID_OFFSETS key
|
||||
fn unpack_uuid_offset_key(key: &[u8]) -> ([u8; 16], u64) {
|
||||
let mut uuid = [0u8; 16];
|
||||
uuid.copy_from_slice(&key[0..16]);
|
||||
let offset = u64::from_be_bytes([key[16], key[17], key[18], key[19], key[20], key[21], key[22], key[23]]);
|
||||
(uuid, offset)
|
||||
}
|
||||
|
||||
/// Record a node's location in the index (for live nodes).
|
||||
pub fn index_node(txn: &WriteTransaction, key: &str, offset: u64, uuid: &[u8; 16], node_type: u8, timestamp: i64, provenance: &str, weight: f32) -> Result<()> {
|
||||
let mut key_uuid_table = txn.open_table(KEY_TO_UUID)?;
|
||||
let mut uuid_offsets = txn.open_table(UUID_OFFSETS)?;
|
||||
let mut by_provenance = txn.open_multimap_table(NODES_BY_PROVENANCE)?;
|
||||
let mut by_type = txn.open_table(NODES_BY_TYPE)?;
|
||||
|
||||
let packed = pack_node_meta(uuid, node_type, timestamp, false, weight);
|
||||
key_uuid_table.insert(key, packed.as_slice())?;
|
||||
uuid_offsets.insert(uuid.as_slice(), offset)?;
|
||||
let prov_val = pack_provenance_value(timestamp, key);
|
||||
let uuid_offset_key = pack_uuid_offset(uuid, offset);
|
||||
uuid_offsets.insert(uuid_offset_key.as_slice(), ())?;
|
||||
let prov_val = pack_provenance_value(timestamp, uuid);
|
||||
by_provenance.insert(provenance, prov_val.as_slice())?;
|
||||
let type_key = pack_type_key(node_type, timestamp);
|
||||
by_type.insert(type_key.as_slice(), uuid.as_slice())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Record a uuid→offset mapping only (for deleted nodes - preserves version history).
|
||||
pub fn record_uuid_offset(txn: &WriteTransaction, uuid: &[u8; 16], offset: u64) -> Result<()> {
|
||||
let mut uuid_offsets = txn.open_table(UUID_OFFSETS)?;
|
||||
let uuid_offset_key = pack_uuid_offset(uuid, offset);
|
||||
uuid_offsets.insert(uuid_offset_key.as_slice(), ())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get max offset for a UUID from an already-opened table.
|
||||
/// Uses reverse range scan to find the highest offset (last key in range).
|
||||
fn max_offset_for_uuid_in_table(
|
||||
table: &redb::ReadOnlyTable<&[u8], ()>,
|
||||
uuid: &[u8; 16],
|
||||
) -> Result<Option<u64>> {
|
||||
let start = pack_uuid_offset(uuid, 0);
|
||||
let end = pack_uuid_offset(uuid, u64::MAX);
|
||||
|
||||
// Get last entry in range (highest offset)
|
||||
if let Some(entry) = table.range(start.as_slice()..=end.as_slice())?.next_back() {
|
||||
let (key, _) = entry?;
|
||||
let (_, offset) = unpack_uuid_offset_key(key.value());
|
||||
Ok(Some(offset))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
/// Get recent keys for a given provenance, sorted by timestamp descending.
|
||||
/// Resolves UUID → current key by reading node at latest offset.
|
||||
/// Single transaction for all index lookups.
|
||||
pub fn recent_by_provenance(db: &Database, provenance: &str, limit: usize) -> Result<Vec<(String, i64)>> {
|
||||
let txn = db.begin_read()?;
|
||||
let table = txn.open_multimap_table(NODES_BY_PROVENANCE)?;
|
||||
let prov_table = txn.open_multimap_table(NODES_BY_PROVENANCE)?;
|
||||
let uuid_offsets = txn.open_table(UUID_OFFSETS)?;
|
||||
|
||||
let mut results = Vec::new();
|
||||
for entry in table.get(provenance)? {
|
||||
for entry in prov_table.get(provenance)? {
|
||||
if results.len() >= limit { break; }
|
||||
let (timestamp, key) = unpack_provenance_value(entry?.value());
|
||||
results.push((key, timestamp));
|
||||
let (timestamp, uuid) = unpack_provenance_value(entry?.value());
|
||||
|
||||
if let Some(offset) = max_offset_for_uuid_in_table(&uuid_offsets, &uuid)? {
|
||||
if let Ok(node) = read_node_at_offset(offset) {
|
||||
results.push((node.key, timestamp));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Get offset for a node by key.
|
||||
/// Get UUIDs for nodes of a given type, sorted by timestamp descending (newest first).
|
||||
/// Optionally filter to timestamps >= after_ts.
|
||||
/// Returns up to `limit` UUIDs.
|
||||
pub fn nodes_by_type(db: &Database, node_type: u8, limit: usize, after_ts: Option<i64>) -> Result<Vec<[u8; 16]>> {
|
||||
let txn = db.begin_read()?;
|
||||
let by_type = txn.open_table(NODES_BY_TYPE)?;
|
||||
|
||||
// Range: [type][0x80..] to [type][0xFF..] for positive timestamps (newest first)
|
||||
// !i64::MAX = 0x8000... (far future, smallest), !0 = 0xFFFF... (epoch, largest)
|
||||
let start = pack_type_key(node_type, i64::MAX); // !MAX = 0x8000... = smallest
|
||||
let end = pack_type_key(node_type, 0); // !0 = 0xFFFF... = largest
|
||||
|
||||
let mut results = Vec::new();
|
||||
for entry in by_type.range(start.as_slice()..=end.as_slice())? {
|
||||
if results.len() >= limit { break; }
|
||||
let (key_bytes, uuid_bytes) = entry?;
|
||||
|
||||
// Decode timestamp from key to check after_ts filter
|
||||
let key = key_bytes.value();
|
||||
let neg_ts = i64::from_be_bytes([key[1], key[2], key[3], key[4], key[5], key[6], key[7], key[8]]);
|
||||
let timestamp = !neg_ts;
|
||||
|
||||
if let Some(after) = after_ts {
|
||||
if timestamp < after { continue; }
|
||||
}
|
||||
|
||||
let mut uuid = [0u8; 16];
|
||||
uuid.copy_from_slice(uuid_bytes.value());
|
||||
results.push(uuid);
|
||||
}
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Get offset for a node by key (via KEY_TO_UUID → UUID_OFFSETS).
|
||||
/// Single transaction, returns the newest offset.
|
||||
pub fn get_offset(db: &Database, key: &str) -> Result<Option<u64>> {
|
||||
let txn = db.begin_read()?;
|
||||
let table = txn.open_table(NODES)?;
|
||||
Ok(table.get(key)?.map(|v| v.value()))
|
||||
let key_uuid = txn.open_table(KEY_TO_UUID)?;
|
||||
let uuid_offsets = txn.open_table(UUID_OFFSETS)?;
|
||||
|
||||
let uuid = match key_uuid.get(key)? {
|
||||
Some(data) => {
|
||||
let (uuid, _, _, deleted, _) = unpack_node_meta(data.value());
|
||||
if deleted { return Ok(None); }
|
||||
uuid
|
||||
}
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
max_offset_for_uuid_in_table(&uuid_offsets, &uuid)
|
||||
}
|
||||
|
||||
/// Check if a key exists in the index.
|
||||
/// Check if a key exists in the index (and is not deleted).
|
||||
pub fn contains_key(db: &Database, key: &str) -> Result<bool> {
|
||||
let txn = db.begin_read()?;
|
||||
let table = txn.open_table(NODES)?;
|
||||
Ok(table.get(key)?.is_some())
|
||||
let table = txn.open_table(KEY_TO_UUID)?;
|
||||
match table.get(key)? {
|
||||
Some(data) => {
|
||||
let (_, _, _, deleted, _) = unpack_node_meta(data.value());
|
||||
Ok(!deleted)
|
||||
}
|
||||
None => Ok(false),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a node's UUID from its key.
|
||||
/// Get a node's UUID from its key (returns UUID even for deleted nodes).
|
||||
pub fn get_uuid_for_key(db: &Database, key: &str) -> Result<Option<[u8; 16]>> {
|
||||
let txn = db.begin_read()?;
|
||||
let table = txn.open_table(KEY_TO_UUID)?;
|
||||
match table.get(key)? {
|
||||
Some(data) => {
|
||||
let (uuid, _, _) = unpack_node_meta(data.value());
|
||||
let (uuid, _, _, _, _) = unpack_node_meta(data.value());
|
||||
Ok(Some(uuid))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get all offsets for a UUID (all versions). Returns newest first.
|
||||
/// Get all offsets for a UUID (all versions). Returns newest (highest) first.
|
||||
pub fn get_offsets_for_uuid(db: &Database, uuid: &[u8; 16]) -> Result<Vec<u64>> {
|
||||
let txn = db.begin_read()?;
|
||||
let table = txn.open_multimap_table(UUID_OFFSETS)?;
|
||||
let table = txn.open_table(UUID_OFFSETS)?;
|
||||
|
||||
// Range scan: [uuid][0x00..] to [uuid][0xFF..]
|
||||
let start = pack_uuid_offset(uuid, 0);
|
||||
let end = pack_uuid_offset(uuid, u64::MAX);
|
||||
|
||||
let mut offsets = Vec::new();
|
||||
for entry in table.get(uuid.as_slice())? {
|
||||
offsets.push(entry?.value());
|
||||
for entry in table.range(start.as_slice()..=end.as_slice())? {
|
||||
let (key, _) = entry?;
|
||||
let (_, offset) = unpack_uuid_offset_key(key.value());
|
||||
offsets.push(offset);
|
||||
}
|
||||
// Sort descending so newest (highest offset) is first
|
||||
offsets.sort_by(|a, b| b.cmp(a));
|
||||
// Already sorted ascending by key; reverse for newest first
|
||||
offsets.reverse();
|
||||
Ok(offsets)
|
||||
}
|
||||
|
||||
/// Remove a node from the index (key mappings only; UUID history preserved).
|
||||
/// Mark a node as deleted in the index (key stays for history; UUID_OFFSETS preserved).
|
||||
pub fn remove_node(txn: &WriteTransaction, key: &str) -> Result<()> {
|
||||
let mut nodes_table = txn.open_table(NODES)?;
|
||||
let mut key_uuid_table = txn.open_table(KEY_TO_UUID)?;
|
||||
// Note: UUID_OFFSETS is not cleared - preserves version history
|
||||
|
||||
nodes_table.remove(key)?;
|
||||
key_uuid_table.remove(key)?;
|
||||
// Copy out data to avoid borrow conflict
|
||||
let meta = key_uuid_table.get(key)?.map(|data| {
|
||||
unpack_node_meta(data.value())
|
||||
});
|
||||
if let Some((uuid, node_type, timestamp, _, weight)) = meta {
|
||||
let packed = pack_node_meta(&uuid, node_type, timestamp, true, weight);
|
||||
key_uuid_table.insert(key, packed.as_slice())?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Collect all keys from the index.
|
||||
/// Collect all keys from the index (excludes deleted nodes).
|
||||
pub fn all_keys(db: &Database) -> Result<Vec<String>> {
|
||||
let txn = db.begin_read()?;
|
||||
let table = txn.open_table(NODES)?;
|
||||
let table = txn.open_table(KEY_TO_UUID)?;
|
||||
let mut keys = Vec::new();
|
||||
for entry in table.iter()? {
|
||||
let (key, _) = entry?;
|
||||
keys.push(key.value().to_string());
|
||||
let (key, data) = entry?;
|
||||
let (_, _, _, deleted, _) = unpack_node_meta(data.value());
|
||||
if !deleted {
|
||||
keys.push(key.value().to_string());
|
||||
}
|
||||
}
|
||||
Ok(keys)
|
||||
}
|
||||
|
||||
/// Collect all (key, uuid, node_type, timestamp) in a single table scan.
|
||||
pub fn all_key_uuid_pairs(db: &Database) -> Result<Vec<(String, [u8; 16], u8, i64)>> {
|
||||
/// Collect all (key, uuid, node_type, timestamp, deleted, weight) in a single table scan.
|
||||
pub fn all_key_uuid_pairs(db: &Database) -> Result<Vec<(String, [u8; 16], u8, i64, bool, f32)>> {
|
||||
let txn = db.begin_read()?;
|
||||
let table = txn.open_table(KEY_TO_UUID)?;
|
||||
let mut pairs = Vec::new();
|
||||
for entry in table.iter()? {
|
||||
let (key, data) = entry?;
|
||||
let (uuid, node_type, timestamp) = unpack_node_meta(data.value());
|
||||
pairs.push((key.value().to_string(), uuid, node_type, timestamp));
|
||||
let (uuid, node_type, timestamp, deleted, weight) = unpack_node_meta(data.value());
|
||||
pairs.push((key.value().to_string(), uuid, node_type, timestamp, deleted, weight));
|
||||
}
|
||||
Ok(pairs)
|
||||
}
|
||||
|
|
@ -281,3 +431,234 @@ pub fn edges_for_node(db: &Database, node_uuid: &[u8; 16]) -> Result<Vec<([u8; 1
|
|||
}
|
||||
Ok(edges)
|
||||
}
|
||||
|
||||
// ── Index rebuild ──────────────────────────────────────────────────────
|
||||
|
||||
/// Rebuild the index from a sequence of (offset, Node) pairs.
|
||||
/// Records ALL uuid→offset mappings (for history), but only the latest version per key in KEY_TO_UUID.
|
||||
pub fn rebuild(db: &Database, nodes: Vec<(u64, Node)>) -> Result<()> {
|
||||
// Track latest (offset, node) per key - newest timestamp wins
|
||||
let mut latest: HashMap<String, (u64, Node)> = HashMap::new();
|
||||
// Track ALL uuid→offset mappings for history
|
||||
let mut all_offsets: Vec<([u8; 16], u64)> = Vec::new();
|
||||
|
||||
for (offset, node) in nodes {
|
||||
// Record every offset for history
|
||||
all_offsets.push((node.uuid, offset));
|
||||
|
||||
let dominated = latest.get(&node.key)
|
||||
.map(|(_, existing)| node.timestamp >= existing.timestamp)
|
||||
.unwrap_or(true);
|
||||
if dominated {
|
||||
latest.insert(node.key.clone(), (offset, node));
|
||||
}
|
||||
}
|
||||
|
||||
// Write to index
|
||||
let txn = db.begin_write()?;
|
||||
{
|
||||
// Record all uuid→offset mappings
|
||||
let mut uuid_offsets = txn.open_table(UUID_OFFSETS)?;
|
||||
for (uuid, offset) in &all_offsets {
|
||||
let key = pack_uuid_offset(uuid, *offset);
|
||||
uuid_offsets.insert(key.as_slice(), ())?;
|
||||
}
|
||||
drop(uuid_offsets);
|
||||
|
||||
// Record KEY_TO_UUID and NODES_BY_PROVENANCE for latest version of each key
|
||||
for (key, (_offset, node)) in &latest {
|
||||
if !node.deleted {
|
||||
index_node_no_offset(&txn, key, &node.uuid, node.node_type as u8, node.timestamp, &node.provenance, node.weight)?;
|
||||
} else {
|
||||
// For deleted nodes, just mark KEY_TO_UUID as deleted
|
||||
let mut key_uuid_table = txn.open_table(KEY_TO_UUID)?;
|
||||
let packed = pack_node_meta(&node.uuid, node.node_type as u8, node.timestamp, true, node.weight);
|
||||
key_uuid_table.insert(key.as_str(), packed.as_slice())?;
|
||||
}
|
||||
}
|
||||
}
|
||||
txn.commit()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Record a node in KEY_TO_UUID, NODES_BY_PROVENANCE, and NODES_BY_TYPE (but not UUID_OFFSETS - for rebuild use).
|
||||
fn index_node_no_offset(txn: &WriteTransaction, key: &str, uuid: &[u8; 16], node_type: u8, timestamp: i64, provenance: &str, weight: f32) -> Result<()> {
|
||||
let mut key_uuid_table = txn.open_table(KEY_TO_UUID)?;
|
||||
let mut by_provenance = txn.open_multimap_table(NODES_BY_PROVENANCE)?;
|
||||
let mut by_type = txn.open_table(NODES_BY_TYPE)?;
|
||||
|
||||
let packed = pack_node_meta(uuid, node_type, timestamp, false, weight);
|
||||
key_uuid_table.insert(key, packed.as_slice())?;
|
||||
let prov_val = pack_provenance_value(timestamp, uuid);
|
||||
by_provenance.insert(provenance, prov_val.as_slice())?;
|
||||
let type_key = pack_type_key(node_type, timestamp);
|
||||
by_type.insert(type_key.as_slice(), uuid.as_slice())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Fsck report — discrepancies found between capnp logs and redb index.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct FsckReport {
|
||||
/// Keys in current index but not in rebuilt (zombie entries)
|
||||
pub zombies: Vec<String>,
|
||||
/// Keys in rebuilt but not in current index (missing from index)
|
||||
pub missing: Vec<String>,
|
||||
/// Was capnp log repaired?
|
||||
pub capnp_repaired: bool,
|
||||
}
|
||||
|
||||
impl FsckReport {
|
||||
pub fn is_clean(&self) -> bool {
|
||||
self.zombies.is_empty() && self.missing.is_empty() && !self.capnp_repaired
|
||||
}
|
||||
}
|
||||
|
||||
/// Full fsck: verify capnp logs, rebuild index to temp, compare with current.
|
||||
/// Returns a report of discrepancies found.
|
||||
pub fn fsck_full() -> Result<FsckReport> {
|
||||
use std::collections::HashSet;
|
||||
use tempfile::TempDir;
|
||||
use super::capnp::{fsck, iter_nodes};
|
||||
use super::types::{nodes_path, db_path};
|
||||
|
||||
let mut report = FsckReport::default();
|
||||
|
||||
// Step 1: Run capnp log fsck (may truncate corrupt messages)
|
||||
let nodes_size_before = nodes_path().metadata().map(|m| m.len()).unwrap_or(0);
|
||||
fsck()?;
|
||||
let nodes_size_after = nodes_path().metadata().map(|m| m.len()).unwrap_or(0);
|
||||
report.capnp_repaired = nodes_size_after != nodes_size_before;
|
||||
|
||||
// Step 2: Rebuild index to temp file
|
||||
let temp_dir = TempDir::new().context("create temp dir")?;
|
||||
let temp_db_path = temp_dir.path().join("rebuilt.redb");
|
||||
let rebuilt_db = open_db(&temp_db_path)?;
|
||||
rebuild(&rebuilt_db, iter_nodes()?)?;
|
||||
|
||||
// Step 3: Copy current index to temp and open (avoids write lock contention)
|
||||
let current_db_path = db_path();
|
||||
if !current_db_path.exists() {
|
||||
// No current index — all rebuilt keys are "missing"
|
||||
let txn = rebuilt_db.begin_read()?;
|
||||
let table = txn.open_table(KEY_TO_UUID)?;
|
||||
for entry in table.iter()? {
|
||||
let (key, _) = entry?;
|
||||
report.missing.push(key.value().to_string());
|
||||
}
|
||||
return Ok(report);
|
||||
}
|
||||
|
||||
// Copy to temp to avoid lock contention with running daemon
|
||||
let current_copy_path = temp_dir.path().join("current.redb");
|
||||
std::fs::copy(¤t_db_path, ¤t_copy_path)
|
||||
.with_context(|| format!("copy {} to temp", current_db_path.display()))?;
|
||||
|
||||
let current_db = Database::open(¤t_copy_path)
|
||||
.with_context(|| "open current db copy")?;
|
||||
|
||||
// Step 4: Compare KEY_TO_UUID tables
|
||||
let rebuilt_keys: HashSet<String> = {
|
||||
let txn = rebuilt_db.begin_read()?;
|
||||
let table = txn.open_table(KEY_TO_UUID)?;
|
||||
table.iter()?.map(|e| e.map(|(k, _)| k.value().to_string())).collect::<Result<_, _>>()?
|
||||
};
|
||||
|
||||
let current_keys: HashSet<String> = {
|
||||
let txn = current_db.begin_read()?;
|
||||
let table = txn.open_table(KEY_TO_UUID)?;
|
||||
table.iter()?.map(|e| e.map(|(k, _)| k.value().to_string())).collect::<Result<_, _>>()?
|
||||
};
|
||||
|
||||
// Keys in current but not rebuilt = zombies (shouldn't exist)
|
||||
for key in current_keys.difference(&rebuilt_keys) {
|
||||
report.zombies.push(key.clone());
|
||||
}
|
||||
report.zombies.sort();
|
||||
|
||||
// Keys in rebuilt but not current = missing (should exist but don't)
|
||||
for key in rebuilt_keys.difference(¤t_keys) {
|
||||
report.missing.push(key.clone());
|
||||
}
|
||||
report.missing.sort();
|
||||
|
||||
Ok(report)
|
||||
}
|
||||
|
||||
/// Repair the index by rebuilding from capnp logs.
|
||||
pub fn repair_index() -> Result<()> {
|
||||
use super::capnp::iter_nodes;
|
||||
use super::types::db_path;
|
||||
use std::fs;
|
||||
|
||||
let db_p = db_path();
|
||||
if db_p.exists() {
|
||||
fs::remove_file(&db_p).context("remove old index")?;
|
||||
}
|
||||
let db = open_db(&db_p)?;
|
||||
rebuild(&db, iter_nodes()?)?;
|
||||
eprintln!("index rebuilt from capnp log");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check if redb index is healthy by verifying some offsets are valid.
|
||||
pub fn is_healthy(db: &Database) -> Result<bool> {
|
||||
use super::types::nodes_path;
|
||||
use std::fs;
|
||||
|
||||
let txn = db.begin_read()?;
|
||||
let key_uuid_table = txn.open_table(KEY_TO_UUID)?;
|
||||
|
||||
// Check that we can read the table and it has entries
|
||||
if key_uuid_table.len()? == 0 {
|
||||
let capnp_size = fs::metadata(nodes_path()).map(|m| m.len()).unwrap_or(0);
|
||||
return Ok(capnp_size == 0); // healthy only if capnp is also empty
|
||||
}
|
||||
|
||||
// Spot check: verify a few offsets point to valid messages
|
||||
let uuid_offsets = txn.open_table(UUID_OFFSETS)?;
|
||||
let mut checked = 0;
|
||||
for entry in key_uuid_table.iter()? {
|
||||
if checked >= 5 { break; }
|
||||
let (_key, data) = entry?;
|
||||
let (uuid, _, _, _, _) = unpack_node_meta(data.value());
|
||||
|
||||
if let Some(offset) = max_offset_for_uuid_in_table(&uuid_offsets, &uuid)? {
|
||||
if read_node_at_offset(offset).is_err() {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
checked += 1;
|
||||
}
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
/// Open redb database, rebuilding if unhealthy.
|
||||
pub fn open_or_rebuild(path: &Path) -> Result<Database> {
|
||||
use super::capnp::iter_nodes;
|
||||
use std::fs;
|
||||
|
||||
// Try opening existing database
|
||||
if path.exists() {
|
||||
match open_db(path) {
|
||||
Ok(database) => {
|
||||
if is_healthy(&database)? {
|
||||
return Ok(database);
|
||||
}
|
||||
eprintln!("redb index stale, rebuilding...");
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("redb open failed ({}), rebuilding...", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Rebuild index from capnp log
|
||||
if path.exists() {
|
||||
fs::remove_file(path).with_context(|| format!("remove old db {}", path.display()))?;
|
||||
}
|
||||
let database = open_db(path)?;
|
||||
rebuild(&database, iter_nodes()?)?;
|
||||
Ok(database)
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue