- KEY_TO_UUID now stores weight (30 bytes: uuid+type+ts+deleted+weight) - UUID_OFFSETS changed to composite key for O(log n) max-offset lookup - Add NODES_BY_TYPE index for efficient type+date range queries - Add for_each_key_weight() to StoreView for index-only iteration - match_seeds uses index-only path when content not needed - Fix transaction consistency in ops (single txn for related updates) - rebuild() now records all uuid→offset mappings for version history - Backwards compatible: old index formats decoded with default weight Co-Authored-By: Proof of Concept <poc@bcachefs.org> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
664 lines
25 KiB
Rust
664 lines
25 KiB
Rust
// redb index tables
|
|
//
|
|
// capnp logs are source of truth; redb provides indexed access.
|
|
//
|
|
// Node tables:
|
|
// KEY_TO_UUID: key → (uuid, node_type, timestamp, deleted)
|
|
// Keeps entries for deleted nodes to enable index-based restore.
|
|
// UUID_OFFSETS: [uuid:16][offset:8 BE] → () composite key for O(log n) max-offset lookup
|
|
// NODES_BY_PROVENANCE: provenance → (timestamp, uuid) (multimap)
|
|
//
|
|
// Relation tables:
|
|
// RELS: node_uuid → (other_uuid, strength, rel_type, is_outgoing) packed (multimap)
|
|
// Each relation stored twice — once per endpoint with direction bit.
|
|
//
|
|
// To get current offset: KEY_TO_UUID[key] → uuid → max(UUID_OFFSETS[uuid][*])
|
|
// To get key from uuid: read_node_at_offset(max_offset) → node.key
|
|
|
|
use anyhow::{Context, Result};
|
|
use redb::{Database, MultimapTableDefinition, ReadableDatabase, ReadableTable, ReadableTableMetadata, TableDefinition, WriteTransaction};
|
|
use std::collections::HashMap;
|
|
use std::path::Path;
|
|
|
|
use super::types::Node;
|
|
use super::capnp::read_node_at_offset;
|
|
|
|
// Node tables
|
|
// KEY_TO_UUID: key → [uuid:16][node_type:1][timestamp:8][deleted:1][weight:4] = 30 bytes
|
|
pub const KEY_TO_UUID: TableDefinition<&str, &[u8]> = TableDefinition::new("key_to_uuid");
|
|
// UUID_OFFSETS: [uuid:16][offset:8 BE] → () — offset in key for range scans
|
|
pub const UUID_OFFSETS: TableDefinition<&[u8], ()> = TableDefinition::new("uuid_offsets");
|
|
// NODES_BY_PROVENANCE: provenance → [negated_timestamp:8][uuid:16] = 24 bytes (sorted by timestamp desc)
|
|
pub const NODES_BY_PROVENANCE: MultimapTableDefinition<&str, &[u8]> = MultimapTableDefinition::new("nodes_by_provenance");
|
|
// NODES_BY_TYPE: [type:1][neg_timestamp:8] → uuid (for type+date range queries, newest first)
|
|
pub const NODES_BY_TYPE: TableDefinition<&[u8], &[u8]> = TableDefinition::new("nodes_by_type");
|
|
|
|
// Relations table - each relation stored twice (once per endpoint)
|
|
// Value: (other_uuid: [u8;16], strength: f32, rel_type: u8, is_outgoing: bool)
|
|
// Packed as 22 bytes: [other_uuid:16][strength:4][rel_type:1][is_outgoing:1]
|
|
pub const RELS: MultimapTableDefinition<&[u8], &[u8]> = MultimapTableDefinition::new("rels");
|
|
|
|
/// Open or create the redb database, ensuring all tables exist.
|
|
pub fn open_db(path: &Path) -> Result<Database> {
|
|
let db = Database::create(path)
|
|
.with_context(|| format!("create redb {}", path.display()))?;
|
|
|
|
// Ensure tables exist by opening a write transaction
|
|
let txn = db.begin_write()?;
|
|
{
|
|
// Node tables
|
|
let _ = txn.open_table(KEY_TO_UUID)?;
|
|
let _ = txn.open_table(UUID_OFFSETS)?;
|
|
let _ = txn.open_multimap_table(NODES_BY_PROVENANCE)?;
|
|
let _ = txn.open_table(NODES_BY_TYPE)?;
|
|
// Relations
|
|
let _ = txn.open_multimap_table(RELS)?;
|
|
}
|
|
txn.commit()?;
|
|
|
|
Ok(db)
|
|
}
|
|
|
|
/// Pack node metadata: [uuid:16][node_type:1][timestamp:8][deleted:1][weight:4] = 30 bytes
|
|
fn pack_node_meta(uuid: &[u8; 16], node_type: u8, timestamp: i64, deleted: bool, weight: f32) -> [u8; 30] {
|
|
let mut buf = [0u8; 30];
|
|
buf[0..16].copy_from_slice(uuid);
|
|
buf[16] = node_type;
|
|
buf[17..25].copy_from_slice(×tamp.to_be_bytes());
|
|
buf[25] = if deleted { 1 } else { 0 };
|
|
buf[26..30].copy_from_slice(&weight.to_be_bytes());
|
|
buf
|
|
}
|
|
|
|
/// Unpack node metadata. Returns (uuid, node_type, timestamp, deleted, weight).
|
|
/// Handles old formats (16-byte, 25-byte, 26-byte) and new (30-byte).
|
|
pub fn unpack_node_meta(data: &[u8]) -> ([u8; 16], u8, i64, bool, f32) {
|
|
let mut uuid = [0u8; 16];
|
|
uuid.copy_from_slice(&data[0..16]);
|
|
if data.len() >= 30 {
|
|
let node_type = data[16];
|
|
let timestamp = i64::from_be_bytes([
|
|
data[17], data[18], data[19], data[20],
|
|
data[21], data[22], data[23], data[24],
|
|
]);
|
|
let deleted = data[25] != 0;
|
|
let weight = f32::from_be_bytes([data[26], data[27], data[28], data[29]]);
|
|
(uuid, node_type, timestamp, deleted, weight)
|
|
} else if data.len() >= 26 {
|
|
let node_type = data[16];
|
|
let timestamp = i64::from_be_bytes([
|
|
data[17], data[18], data[19], data[20],
|
|
data[21], data[22], data[23], data[24],
|
|
]);
|
|
let deleted = data[25] != 0;
|
|
(uuid, node_type, timestamp, deleted, 0.5) // default weight
|
|
} else if data.len() >= 25 {
|
|
let node_type = data[16];
|
|
let timestamp = i64::from_be_bytes([
|
|
data[17], data[18], data[19], data[20],
|
|
data[21], data[22], data[23], data[24],
|
|
]);
|
|
(uuid, node_type, timestamp, false, 0.5)
|
|
} else {
|
|
// Old format: just uuid, default metadata
|
|
(uuid, 0, 0, false, 0.5)
|
|
}
|
|
}
|
|
|
|
/// Pack provenance value: [negated_timestamp:8][uuid:16] = 24 bytes for descending sort
|
|
fn pack_provenance_value(timestamp: i64, uuid: &[u8; 16]) -> [u8; 24] {
|
|
let mut buf = [0u8; 24];
|
|
let neg_ts = (!timestamp).to_be_bytes(); // negate for descending order
|
|
buf[0..8].copy_from_slice(&neg_ts);
|
|
buf[8..24].copy_from_slice(uuid);
|
|
buf
|
|
}
|
|
|
|
/// Unpack provenance value: returns (timestamp, uuid)
|
|
pub fn unpack_provenance_value(data: &[u8]) -> (i64, [u8; 16]) {
|
|
let neg_ts = i64::from_be_bytes([data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7]]);
|
|
let timestamp = !neg_ts;
|
|
let mut uuid = [0u8; 16];
|
|
uuid.copy_from_slice(&data[8..24]);
|
|
(timestamp, uuid)
|
|
}
|
|
|
|
/// Pack UUID_OFFSETS key: [uuid:16][offset:8 BE] = 24 bytes
|
|
fn pack_uuid_offset(uuid: &[u8; 16], offset: u64) -> [u8; 24] {
|
|
let mut buf = [0u8; 24];
|
|
buf[0..16].copy_from_slice(uuid);
|
|
buf[16..24].copy_from_slice(&offset.to_be_bytes());
|
|
buf
|
|
}
|
|
|
|
/// Pack NODES_BY_TYPE key: [type:1][neg_timestamp:8] = 9 bytes (newest first within type)
|
|
fn pack_type_key(node_type: u8, timestamp: i64) -> [u8; 9] {
|
|
let mut buf = [0u8; 9];
|
|
buf[0] = node_type;
|
|
buf[1..9].copy_from_slice(&(!timestamp).to_be_bytes());
|
|
buf
|
|
}
|
|
|
|
/// Unpack offset from UUID_OFFSETS key
|
|
fn unpack_uuid_offset_key(key: &[u8]) -> ([u8; 16], u64) {
|
|
let mut uuid = [0u8; 16];
|
|
uuid.copy_from_slice(&key[0..16]);
|
|
let offset = u64::from_be_bytes([key[16], key[17], key[18], key[19], key[20], key[21], key[22], key[23]]);
|
|
(uuid, offset)
|
|
}
|
|
|
|
/// Record a node's location in the index (for live nodes).
|
|
pub fn index_node(txn: &WriteTransaction, key: &str, offset: u64, uuid: &[u8; 16], node_type: u8, timestamp: i64, provenance: &str, weight: f32) -> Result<()> {
|
|
let mut key_uuid_table = txn.open_table(KEY_TO_UUID)?;
|
|
let mut uuid_offsets = txn.open_table(UUID_OFFSETS)?;
|
|
let mut by_provenance = txn.open_multimap_table(NODES_BY_PROVENANCE)?;
|
|
let mut by_type = txn.open_table(NODES_BY_TYPE)?;
|
|
|
|
let packed = pack_node_meta(uuid, node_type, timestamp, false, weight);
|
|
key_uuid_table.insert(key, packed.as_slice())?;
|
|
let uuid_offset_key = pack_uuid_offset(uuid, offset);
|
|
uuid_offsets.insert(uuid_offset_key.as_slice(), ())?;
|
|
let prov_val = pack_provenance_value(timestamp, uuid);
|
|
by_provenance.insert(provenance, prov_val.as_slice())?;
|
|
let type_key = pack_type_key(node_type, timestamp);
|
|
by_type.insert(type_key.as_slice(), uuid.as_slice())?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Record a uuid→offset mapping only (for deleted nodes - preserves version history).
|
|
pub fn record_uuid_offset(txn: &WriteTransaction, uuid: &[u8; 16], offset: u64) -> Result<()> {
|
|
let mut uuid_offsets = txn.open_table(UUID_OFFSETS)?;
|
|
let uuid_offset_key = pack_uuid_offset(uuid, offset);
|
|
uuid_offsets.insert(uuid_offset_key.as_slice(), ())?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Get max offset for a UUID from an already-opened table.
|
|
/// Uses reverse range scan to find the highest offset (last key in range).
|
|
fn max_offset_for_uuid_in_table(
|
|
table: &redb::ReadOnlyTable<&[u8], ()>,
|
|
uuid: &[u8; 16],
|
|
) -> Result<Option<u64>> {
|
|
let start = pack_uuid_offset(uuid, 0);
|
|
let end = pack_uuid_offset(uuid, u64::MAX);
|
|
|
|
// Get last entry in range (highest offset)
|
|
if let Some(entry) = table.range(start.as_slice()..=end.as_slice())?.next_back() {
|
|
let (key, _) = entry?;
|
|
let (_, offset) = unpack_uuid_offset_key(key.value());
|
|
Ok(Some(offset))
|
|
} else {
|
|
Ok(None)
|
|
}
|
|
}
|
|
|
|
/// Get recent keys for a given provenance, sorted by timestamp descending.
|
|
/// Resolves UUID → current key by reading node at latest offset.
|
|
/// Single transaction for all index lookups.
|
|
pub fn recent_by_provenance(db: &Database, provenance: &str, limit: usize) -> Result<Vec<(String, i64)>> {
|
|
let txn = db.begin_read()?;
|
|
let prov_table = txn.open_multimap_table(NODES_BY_PROVENANCE)?;
|
|
let uuid_offsets = txn.open_table(UUID_OFFSETS)?;
|
|
|
|
let mut results = Vec::new();
|
|
for entry in prov_table.get(provenance)? {
|
|
if results.len() >= limit { break; }
|
|
let (timestamp, uuid) = unpack_provenance_value(entry?.value());
|
|
|
|
if let Some(offset) = max_offset_for_uuid_in_table(&uuid_offsets, &uuid)? {
|
|
if let Ok(node) = read_node_at_offset(offset) {
|
|
results.push((node.key, timestamp));
|
|
}
|
|
}
|
|
}
|
|
Ok(results)
|
|
}
|
|
|
|
/// Get UUIDs for nodes of a given type, sorted by timestamp descending (newest first).
|
|
/// Optionally filter to timestamps >= after_ts.
|
|
/// Returns up to `limit` UUIDs.
|
|
pub fn nodes_by_type(db: &Database, node_type: u8, limit: usize, after_ts: Option<i64>) -> Result<Vec<[u8; 16]>> {
|
|
let txn = db.begin_read()?;
|
|
let by_type = txn.open_table(NODES_BY_TYPE)?;
|
|
|
|
// Range: [type][0x80..] to [type][0xFF..] for positive timestamps (newest first)
|
|
// !i64::MAX = 0x8000... (far future, smallest), !0 = 0xFFFF... (epoch, largest)
|
|
let start = pack_type_key(node_type, i64::MAX); // !MAX = 0x8000... = smallest
|
|
let end = pack_type_key(node_type, 0); // !0 = 0xFFFF... = largest
|
|
|
|
let mut results = Vec::new();
|
|
for entry in by_type.range(start.as_slice()..=end.as_slice())? {
|
|
if results.len() >= limit { break; }
|
|
let (key_bytes, uuid_bytes) = entry?;
|
|
|
|
// Decode timestamp from key to check after_ts filter
|
|
let key = key_bytes.value();
|
|
let neg_ts = i64::from_be_bytes([key[1], key[2], key[3], key[4], key[5], key[6], key[7], key[8]]);
|
|
let timestamp = !neg_ts;
|
|
|
|
if let Some(after) = after_ts {
|
|
if timestamp < after { continue; }
|
|
}
|
|
|
|
let mut uuid = [0u8; 16];
|
|
uuid.copy_from_slice(uuid_bytes.value());
|
|
results.push(uuid);
|
|
}
|
|
Ok(results)
|
|
}
|
|
|
|
/// Get offset for a node by key (via KEY_TO_UUID → UUID_OFFSETS).
|
|
/// Single transaction, returns the newest offset.
|
|
pub fn get_offset(db: &Database, key: &str) -> Result<Option<u64>> {
|
|
let txn = db.begin_read()?;
|
|
let key_uuid = txn.open_table(KEY_TO_UUID)?;
|
|
let uuid_offsets = txn.open_table(UUID_OFFSETS)?;
|
|
|
|
let uuid = match key_uuid.get(key)? {
|
|
Some(data) => {
|
|
let (uuid, _, _, deleted, _) = unpack_node_meta(data.value());
|
|
if deleted { return Ok(None); }
|
|
uuid
|
|
}
|
|
None => return Ok(None),
|
|
};
|
|
|
|
max_offset_for_uuid_in_table(&uuid_offsets, &uuid)
|
|
}
|
|
|
|
/// Check if a key exists in the index (and is not deleted).
|
|
pub fn contains_key(db: &Database, key: &str) -> Result<bool> {
|
|
let txn = db.begin_read()?;
|
|
let table = txn.open_table(KEY_TO_UUID)?;
|
|
match table.get(key)? {
|
|
Some(data) => {
|
|
let (_, _, _, deleted, _) = unpack_node_meta(data.value());
|
|
Ok(!deleted)
|
|
}
|
|
None => Ok(false),
|
|
}
|
|
}
|
|
|
|
/// Get a node's UUID from its key (returns UUID even for deleted nodes).
|
|
pub fn get_uuid_for_key(db: &Database, key: &str) -> Result<Option<[u8; 16]>> {
|
|
let txn = db.begin_read()?;
|
|
let table = txn.open_table(KEY_TO_UUID)?;
|
|
match table.get(key)? {
|
|
Some(data) => {
|
|
let (uuid, _, _, _, _) = unpack_node_meta(data.value());
|
|
Ok(Some(uuid))
|
|
}
|
|
None => Ok(None),
|
|
}
|
|
}
|
|
|
|
/// Get all offsets for a UUID (all versions). Returns newest (highest) first.
|
|
pub fn get_offsets_for_uuid(db: &Database, uuid: &[u8; 16]) -> Result<Vec<u64>> {
|
|
let txn = db.begin_read()?;
|
|
let table = txn.open_table(UUID_OFFSETS)?;
|
|
|
|
// Range scan: [uuid][0x00..] to [uuid][0xFF..]
|
|
let start = pack_uuid_offset(uuid, 0);
|
|
let end = pack_uuid_offset(uuid, u64::MAX);
|
|
|
|
let mut offsets = Vec::new();
|
|
for entry in table.range(start.as_slice()..=end.as_slice())? {
|
|
let (key, _) = entry?;
|
|
let (_, offset) = unpack_uuid_offset_key(key.value());
|
|
offsets.push(offset);
|
|
}
|
|
// Already sorted ascending by key; reverse for newest first
|
|
offsets.reverse();
|
|
Ok(offsets)
|
|
}
|
|
|
|
/// Mark a node as deleted in the index (key stays for history; UUID_OFFSETS preserved).
|
|
pub fn remove_node(txn: &WriteTransaction, key: &str) -> Result<()> {
|
|
let mut key_uuid_table = txn.open_table(KEY_TO_UUID)?;
|
|
// Copy out data to avoid borrow conflict
|
|
let meta = key_uuid_table.get(key)?.map(|data| {
|
|
unpack_node_meta(data.value())
|
|
});
|
|
if let Some((uuid, node_type, timestamp, _, weight)) = meta {
|
|
let packed = pack_node_meta(&uuid, node_type, timestamp, true, weight);
|
|
key_uuid_table.insert(key, packed.as_slice())?;
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
/// Collect all keys from the index (excludes deleted nodes).
|
|
pub fn all_keys(db: &Database) -> Result<Vec<String>> {
|
|
let txn = db.begin_read()?;
|
|
let table = txn.open_table(KEY_TO_UUID)?;
|
|
let mut keys = Vec::new();
|
|
for entry in table.iter()? {
|
|
let (key, data) = entry?;
|
|
let (_, _, _, deleted, _) = unpack_node_meta(data.value());
|
|
if !deleted {
|
|
keys.push(key.value().to_string());
|
|
}
|
|
}
|
|
Ok(keys)
|
|
}
|
|
|
|
/// Collect all (key, uuid, node_type, timestamp, deleted, weight) in a single table scan.
|
|
pub fn all_key_uuid_pairs(db: &Database) -> Result<Vec<(String, [u8; 16], u8, i64, bool, f32)>> {
|
|
let txn = db.begin_read()?;
|
|
let table = txn.open_table(KEY_TO_UUID)?;
|
|
let mut pairs = Vec::new();
|
|
for entry in table.iter()? {
|
|
let (key, data) = entry?;
|
|
let (uuid, node_type, timestamp, deleted, weight) = unpack_node_meta(data.value());
|
|
pairs.push((key.value().to_string(), uuid, node_type, timestamp, deleted, weight));
|
|
}
|
|
Ok(pairs)
|
|
}
|
|
|
|
// ── Relation index operations ──────────────────────────────────────
|
|
//
|
|
// RELS value format: [other_uuid:16][strength:4][rel_type:1][is_outgoing:1] = 22 bytes
|
|
|
|
/// Pack relation data into bytes for RELS table.
|
|
fn pack_rel(other_uuid: &[u8; 16], strength: f32, rel_type: u8, is_outgoing: bool) -> [u8; 22] {
|
|
let mut buf = [0u8; 22];
|
|
buf[0..16].copy_from_slice(other_uuid);
|
|
buf[16..20].copy_from_slice(&strength.to_be_bytes());
|
|
buf[20] = rel_type;
|
|
buf[21] = if is_outgoing { 1 } else { 0 };
|
|
buf
|
|
}
|
|
|
|
/// Unpack relation data from RELS table.
|
|
pub fn unpack_rel(data: &[u8]) -> ([u8; 16], f32, u8, bool) {
|
|
let mut other_uuid = [0u8; 16];
|
|
other_uuid.copy_from_slice(&data[0..16]);
|
|
let strength = f32::from_be_bytes([data[16], data[17], data[18], data[19]]);
|
|
let rel_type = data[20];
|
|
let is_outgoing = data[21] != 0;
|
|
(other_uuid, strength, rel_type, is_outgoing)
|
|
}
|
|
|
|
/// Index a relation: store twice (once per endpoint).
|
|
pub fn index_relation(
|
|
txn: &WriteTransaction,
|
|
source_uuid: &[u8; 16],
|
|
target_uuid: &[u8; 16],
|
|
strength: f32,
|
|
rel_type: u8,
|
|
) -> Result<()> {
|
|
let mut rels = txn.open_multimap_table(RELS)?;
|
|
|
|
// Store outgoing: source → (target, strength, type, true)
|
|
let outgoing = pack_rel(target_uuid, strength, rel_type, true);
|
|
rels.insert(source_uuid.as_slice(), outgoing.as_slice())?;
|
|
|
|
// Store incoming: target → (source, strength, type, false)
|
|
let incoming = pack_rel(source_uuid, strength, rel_type, false);
|
|
rels.insert(target_uuid.as_slice(), incoming.as_slice())?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Remove a relation from the index.
|
|
pub fn remove_relation(
|
|
txn: &WriteTransaction,
|
|
source_uuid: &[u8; 16],
|
|
target_uuid: &[u8; 16],
|
|
strength: f32,
|
|
rel_type: u8,
|
|
) -> Result<()> {
|
|
let mut rels = txn.open_multimap_table(RELS)?;
|
|
|
|
let outgoing = pack_rel(target_uuid, strength, rel_type, true);
|
|
rels.remove(source_uuid.as_slice(), outgoing.as_slice())?;
|
|
|
|
let incoming = pack_rel(source_uuid, strength, rel_type, false);
|
|
rels.remove(target_uuid.as_slice(), incoming.as_slice())?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Get all edges for a node. Returns (other_uuid, strength, rel_type, is_outgoing).
|
|
pub fn edges_for_node(db: &Database, node_uuid: &[u8; 16]) -> Result<Vec<([u8; 16], f32, u8, bool)>> {
|
|
let txn = db.begin_read()?;
|
|
let rels = txn.open_multimap_table(RELS)?;
|
|
|
|
let mut edges = Vec::new();
|
|
for entry in rels.get(node_uuid.as_slice())? {
|
|
let guard = entry?;
|
|
let slice = guard.value();
|
|
let mut data = [0u8; 22];
|
|
data.copy_from_slice(slice);
|
|
edges.push(unpack_rel(&data));
|
|
}
|
|
Ok(edges)
|
|
}
|
|
|
|
// ── Index rebuild ──────────────────────────────────────────────────────
|
|
|
|
/// Rebuild the index from a sequence of (offset, Node) pairs.
|
|
/// Records ALL uuid→offset mappings (for history), but only the latest version per key in KEY_TO_UUID.
|
|
pub fn rebuild(db: &Database, nodes: Vec<(u64, Node)>) -> Result<()> {
|
|
// Track latest (offset, node) per key - newest timestamp wins
|
|
let mut latest: HashMap<String, (u64, Node)> = HashMap::new();
|
|
// Track ALL uuid→offset mappings for history
|
|
let mut all_offsets: Vec<([u8; 16], u64)> = Vec::new();
|
|
|
|
for (offset, node) in nodes {
|
|
// Record every offset for history
|
|
all_offsets.push((node.uuid, offset));
|
|
|
|
let dominated = latest.get(&node.key)
|
|
.map(|(_, existing)| node.timestamp >= existing.timestamp)
|
|
.unwrap_or(true);
|
|
if dominated {
|
|
latest.insert(node.key.clone(), (offset, node));
|
|
}
|
|
}
|
|
|
|
// Write to index
|
|
let txn = db.begin_write()?;
|
|
{
|
|
// Record all uuid→offset mappings
|
|
let mut uuid_offsets = txn.open_table(UUID_OFFSETS)?;
|
|
for (uuid, offset) in &all_offsets {
|
|
let key = pack_uuid_offset(uuid, *offset);
|
|
uuid_offsets.insert(key.as_slice(), ())?;
|
|
}
|
|
drop(uuid_offsets);
|
|
|
|
// Record KEY_TO_UUID and NODES_BY_PROVENANCE for latest version of each key
|
|
for (key, (_offset, node)) in &latest {
|
|
if !node.deleted {
|
|
index_node_no_offset(&txn, key, &node.uuid, node.node_type as u8, node.timestamp, &node.provenance, node.weight)?;
|
|
} else {
|
|
// For deleted nodes, just mark KEY_TO_UUID as deleted
|
|
let mut key_uuid_table = txn.open_table(KEY_TO_UUID)?;
|
|
let packed = pack_node_meta(&node.uuid, node.node_type as u8, node.timestamp, true, node.weight);
|
|
key_uuid_table.insert(key.as_str(), packed.as_slice())?;
|
|
}
|
|
}
|
|
}
|
|
txn.commit()?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Record a node in KEY_TO_UUID, NODES_BY_PROVENANCE, and NODES_BY_TYPE (but not UUID_OFFSETS - for rebuild use).
|
|
fn index_node_no_offset(txn: &WriteTransaction, key: &str, uuid: &[u8; 16], node_type: u8, timestamp: i64, provenance: &str, weight: f32) -> Result<()> {
|
|
let mut key_uuid_table = txn.open_table(KEY_TO_UUID)?;
|
|
let mut by_provenance = txn.open_multimap_table(NODES_BY_PROVENANCE)?;
|
|
let mut by_type = txn.open_table(NODES_BY_TYPE)?;
|
|
|
|
let packed = pack_node_meta(uuid, node_type, timestamp, false, weight);
|
|
key_uuid_table.insert(key, packed.as_slice())?;
|
|
let prov_val = pack_provenance_value(timestamp, uuid);
|
|
by_provenance.insert(provenance, prov_val.as_slice())?;
|
|
let type_key = pack_type_key(node_type, timestamp);
|
|
by_type.insert(type_key.as_slice(), uuid.as_slice())?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Fsck report — discrepancies found between capnp logs and redb index.
|
|
#[derive(Debug, Default)]
|
|
pub struct FsckReport {
|
|
/// Keys in current index but not in rebuilt (zombie entries)
|
|
pub zombies: Vec<String>,
|
|
/// Keys in rebuilt but not in current index (missing from index)
|
|
pub missing: Vec<String>,
|
|
/// Was capnp log repaired?
|
|
pub capnp_repaired: bool,
|
|
}
|
|
|
|
impl FsckReport {
|
|
pub fn is_clean(&self) -> bool {
|
|
self.zombies.is_empty() && self.missing.is_empty() && !self.capnp_repaired
|
|
}
|
|
}
|
|
|
|
/// Full fsck: verify capnp logs, rebuild index to temp, compare with current.
|
|
/// Returns a report of discrepancies found.
|
|
pub fn fsck_full() -> Result<FsckReport> {
|
|
use std::collections::HashSet;
|
|
use tempfile::TempDir;
|
|
use super::capnp::{fsck, iter_nodes};
|
|
use super::types::{nodes_path, db_path};
|
|
|
|
let mut report = FsckReport::default();
|
|
|
|
// Step 1: Run capnp log fsck (may truncate corrupt messages)
|
|
let nodes_size_before = nodes_path().metadata().map(|m| m.len()).unwrap_or(0);
|
|
fsck()?;
|
|
let nodes_size_after = nodes_path().metadata().map(|m| m.len()).unwrap_or(0);
|
|
report.capnp_repaired = nodes_size_after != nodes_size_before;
|
|
|
|
// Step 2: Rebuild index to temp file
|
|
let temp_dir = TempDir::new().context("create temp dir")?;
|
|
let temp_db_path = temp_dir.path().join("rebuilt.redb");
|
|
let rebuilt_db = open_db(&temp_db_path)?;
|
|
rebuild(&rebuilt_db, iter_nodes()?)?;
|
|
|
|
// Step 3: Copy current index to temp and open (avoids write lock contention)
|
|
let current_db_path = db_path();
|
|
if !current_db_path.exists() {
|
|
// No current index — all rebuilt keys are "missing"
|
|
let txn = rebuilt_db.begin_read()?;
|
|
let table = txn.open_table(KEY_TO_UUID)?;
|
|
for entry in table.iter()? {
|
|
let (key, _) = entry?;
|
|
report.missing.push(key.value().to_string());
|
|
}
|
|
return Ok(report);
|
|
}
|
|
|
|
// Copy to temp to avoid lock contention with running daemon
|
|
let current_copy_path = temp_dir.path().join("current.redb");
|
|
std::fs::copy(¤t_db_path, ¤t_copy_path)
|
|
.with_context(|| format!("copy {} to temp", current_db_path.display()))?;
|
|
|
|
let current_db = Database::open(¤t_copy_path)
|
|
.with_context(|| "open current db copy")?;
|
|
|
|
// Step 4: Compare KEY_TO_UUID tables
|
|
let rebuilt_keys: HashSet<String> = {
|
|
let txn = rebuilt_db.begin_read()?;
|
|
let table = txn.open_table(KEY_TO_UUID)?;
|
|
table.iter()?.map(|e| e.map(|(k, _)| k.value().to_string())).collect::<Result<_, _>>()?
|
|
};
|
|
|
|
let current_keys: HashSet<String> = {
|
|
let txn = current_db.begin_read()?;
|
|
let table = txn.open_table(KEY_TO_UUID)?;
|
|
table.iter()?.map(|e| e.map(|(k, _)| k.value().to_string())).collect::<Result<_, _>>()?
|
|
};
|
|
|
|
// Keys in current but not rebuilt = zombies (shouldn't exist)
|
|
for key in current_keys.difference(&rebuilt_keys) {
|
|
report.zombies.push(key.clone());
|
|
}
|
|
report.zombies.sort();
|
|
|
|
// Keys in rebuilt but not current = missing (should exist but don't)
|
|
for key in rebuilt_keys.difference(¤t_keys) {
|
|
report.missing.push(key.clone());
|
|
}
|
|
report.missing.sort();
|
|
|
|
Ok(report)
|
|
}
|
|
|
|
/// Repair the index by rebuilding from capnp logs.
|
|
pub fn repair_index() -> Result<()> {
|
|
use super::capnp::iter_nodes;
|
|
use super::types::db_path;
|
|
use std::fs;
|
|
|
|
let db_p = db_path();
|
|
if db_p.exists() {
|
|
fs::remove_file(&db_p).context("remove old index")?;
|
|
}
|
|
let db = open_db(&db_p)?;
|
|
rebuild(&db, iter_nodes()?)?;
|
|
eprintln!("index rebuilt from capnp log");
|
|
Ok(())
|
|
}
|
|
|
|
/// Check if redb index is healthy by verifying some offsets are valid.
|
|
pub fn is_healthy(db: &Database) -> Result<bool> {
|
|
use super::types::nodes_path;
|
|
use std::fs;
|
|
|
|
let txn = db.begin_read()?;
|
|
let key_uuid_table = txn.open_table(KEY_TO_UUID)?;
|
|
|
|
// Check that we can read the table and it has entries
|
|
if key_uuid_table.len()? == 0 {
|
|
let capnp_size = fs::metadata(nodes_path()).map(|m| m.len()).unwrap_or(0);
|
|
return Ok(capnp_size == 0); // healthy only if capnp is also empty
|
|
}
|
|
|
|
// Spot check: verify a few offsets point to valid messages
|
|
let uuid_offsets = txn.open_table(UUID_OFFSETS)?;
|
|
let mut checked = 0;
|
|
for entry in key_uuid_table.iter()? {
|
|
if checked >= 5 { break; }
|
|
let (_key, data) = entry?;
|
|
let (uuid, _, _, _, _) = unpack_node_meta(data.value());
|
|
|
|
if let Some(offset) = max_offset_for_uuid_in_table(&uuid_offsets, &uuid)? {
|
|
if read_node_at_offset(offset).is_err() {
|
|
return Ok(false);
|
|
}
|
|
}
|
|
checked += 1;
|
|
}
|
|
|
|
Ok(true)
|
|
}
|
|
|
|
/// Open redb database, rebuilding if unhealthy.
|
|
pub fn open_or_rebuild(path: &Path) -> Result<Database> {
|
|
use super::capnp::iter_nodes;
|
|
use std::fs;
|
|
|
|
// Try opening existing database
|
|
if path.exists() {
|
|
match open_db(path) {
|
|
Ok(database) => {
|
|
if is_healthy(&database)? {
|
|
return Ok(database);
|
|
}
|
|
eprintln!("redb index stale, rebuilding...");
|
|
}
|
|
Err(e) => {
|
|
eprintln!("redb open failed ({}), rebuilding...", e);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Rebuild index from capnp log
|
|
if path.exists() {
|
|
fs::remove_file(path).with_context(|| format!("remove old db {}", path.display()))?;
|
|
}
|
|
let database = open_db(path)?;
|
|
rebuild(&database, iter_nodes()?)?;
|
|
Ok(database)
|
|
}
|