store: remove Vec<Relation>, dedup uses index iteration

The relations Vec is gone from Store. dedup now iterates via
edges_for_uuid() instead of mutating in-memory Vec — removes/re-adds
edges through the index directly.

Removed load_relations_vec() and clear_relations() — no longer needed.
Added helper methods: edges_for_uuid, index_relation, remove_relation_from_index.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-13 21:32:48 -04:00
parent c2de14dcab
commit 2548ca059d
5 changed files with 105 additions and 78 deletions

View file

@ -113,7 +113,7 @@ pub async fn cmd_fsck() -> Result<()> {
} }
pub async fn cmd_dedup(apply: bool) -> Result<()> { pub async fn cmd_dedup(apply: bool) -> Result<()> {
use std::collections::{HashMap, HashSet}; use std::collections::HashMap;
let arc = memory::access_local()?; let arc = memory::access_local()?;
let mut store = arc.lock().await; let mut store = arc.lock().await;
@ -210,6 +210,14 @@ pub async fn cmd_dedup(apply: bool) -> Result<()> {
.chain(diverged_groups) .chain(diverged_groups)
.collect(); .collect();
// Build uuid → key map for relation key strings
let mut uuid_to_key: HashMap<[u8; 16], String> = HashMap::new();
for key in store.all_keys()? {
if let Ok(Some(node)) = store.get_node(&key) {
uuid_to_key.insert(node.uuid, key);
}
}
let mut merged = 0usize; let mut merged = 0usize;
let mut edges_redirected = 0usize; let mut edges_redirected = 0usize;
let mut edges_deduped = 0usize; let mut edges_deduped = 0usize;
@ -219,52 +227,89 @@ pub async fn cmd_dedup(apply: bool) -> Result<()> {
copies.sort_by(|a, b| b.1.cmp(&a.1).then(b.0.version.cmp(&a.0.version))); copies.sort_by(|a, b| b.1.cmp(&a.1).then(b.0.version.cmp(&a.0.version)));
let survivor_uuid = copies[0].0.uuid; let survivor_uuid = copies[0].0.uuid;
let survivor_key = uuid_to_key.get(&survivor_uuid).cloned().unwrap_or_default();
let doomed_uuids: Vec<[u8; 16]> = copies[1..].iter().map(|c| c.0.uuid).collect(); let doomed_uuids: Vec<[u8; 16]> = copies[1..].iter().map(|c| c.0.uuid).collect();
// Redirect edges from doomed UUIDs to survivor // Redirect edges from doomed UUIDs to survivor via index iteration
let mut updated_rels = Vec::new(); for doomed_uuid in &doomed_uuids {
for rel in &mut store.relations { let edges = store.edges_for_uuid(doomed_uuid)?;
if rel.deleted { continue; } for (other_uuid, strength, rel_type, is_outgoing) in edges {
let mut changed = false; let other_key = uuid_to_key.get(&other_uuid).cloned().unwrap_or_default();
if doomed_uuids.contains(&rel.source) {
rel.source = survivor_uuid; // Remove old edge from index
changed = true; let (old_src, old_tgt) = if is_outgoing {
} (*doomed_uuid, other_uuid)
if doomed_uuids.contains(&rel.target) { } else {
rel.target = survivor_uuid; (other_uuid, *doomed_uuid)
changed = true; };
} store.remove_relation_from_index(&old_src, &old_tgt, strength, rel_type)?;
if changed {
rel.version += 1; // Add redirected edge
updated_rels.push(rel.clone()); let (new_src, new_tgt, src_key, tgt_key) = if is_outgoing {
(survivor_uuid, other_uuid, survivor_key.clone(), other_key)
} else {
(other_uuid, survivor_uuid, other_key, survivor_key.clone())
};
store.index_relation(&new_src, &new_tgt, strength, rel_type)?;
// Append tombstone for old + new relation to log
let mut tombstone = store::new_relation(
old_src, old_tgt,
store::RelationType::from_u8(rel_type), strength,
&uuid_to_key.get(&old_src).cloned().unwrap_or_default(),
&uuid_to_key.get(&old_tgt).cloned().unwrap_or_default(),
);
tombstone.deleted = true;
tombstone.version = 2;
let mut redirected = store::new_relation(
new_src, new_tgt,
store::RelationType::from_u8(rel_type), strength,
&src_key, &tgt_key,
);
redirected.version = 2;
store.append_relations(&[tombstone, redirected])?;
edges_redirected += 1; edges_redirected += 1;
} }
} }
// Dedup edges: same (source, target, rel_type) → keep highest strength // Dedup edges: same (other_uuid, rel_type) → keep highest strength
let mut seen: HashSet<([u8; 16], [u8; 16], String)> = HashSet::new(); // Group edges by (other, type), sort each group by strength desc, tombstone extras
let mut to_tombstone_rels = Vec::new(); let edges = store.edges_for_uuid(&survivor_uuid)?;
// Sort by strength descending so we keep the strongest let mut by_endpoint: HashMap<([u8; 16], u8), Vec<(f32, bool)>> = HashMap::new();
let mut rels_with_idx: Vec<(usize, &store::Relation)> = store.relations.iter() for (other_uuid, strength, rel_type, is_outgoing) in edges {
.enumerate() by_endpoint.entry((other_uuid, rel_type))
.filter(|(_, r)| !r.deleted && (r.source == survivor_uuid || r.target == survivor_uuid)) .or_default()
.collect(); .push((strength, is_outgoing));
rels_with_idx.sort_by(|a, b| b.1.strength.total_cmp(&a.1.strength)); }
for (idx, rel) in &rels_with_idx { for ((other_uuid, rel_type), mut variants) in by_endpoint {
let edge_key = (rel.source, rel.target, format!("{:?}", rel.rel_type)); if variants.len() <= 1 { continue; }
if !seen.insert(edge_key) { // Sort by strength descending, keep first
to_tombstone_rels.push(*idx); variants.sort_by(|a, b| b.0.total_cmp(&a.0));
let other_key = uuid_to_key.get(&other_uuid).cloned().unwrap_or_default();
for (strength, is_outgoing) in variants.into_iter().skip(1) {
let (src, tgt, src_key, tgt_key) = if is_outgoing {
(survivor_uuid, other_uuid, survivor_key.clone(), other_key.clone())
} else {
(other_uuid, survivor_uuid, other_key.clone(), survivor_key.clone())
};
store.remove_relation_from_index(&src, &tgt, strength, rel_type)?;
let mut tombstone = store::new_relation(
src, tgt,
store::RelationType::from_u8(rel_type), strength,
&src_key, &tgt_key,
);
tombstone.deleted = true;
tombstone.version = 2;
store.append_relations(&[tombstone])?;
edges_deduped += 1; edges_deduped += 1;
} }
} }
for &idx in &to_tombstone_rels {
store.relations[idx].deleted = true;
store.relations[idx].version += 1;
updated_rels.push(store.relations[idx].clone());
}
// Tombstone doomed nodes // Tombstone doomed nodes
let mut tombstones = Vec::new(); let mut tombstones = Vec::new();
for (doomed_node, _) in &copies[1..] { for (doomed_node, _) in &copies[1..] {
@ -275,9 +320,6 @@ pub async fn cmd_dedup(apply: bool) -> Result<()> {
} }
store.append_nodes(&tombstones)?; store.append_nodes(&tombstones)?;
if !updated_rels.is_empty() {
store.append_relations(&updated_rels)?;
}
// Remove doomed nodes from index // Remove doomed nodes from index
for (doomed_node, _) in &copies[1..] { for (doomed_node, _) in &copies[1..] {
@ -287,9 +329,6 @@ pub async fn cmd_dedup(apply: bool) -> Result<()> {
merged += doomed_uuids.len(); merged += doomed_uuids.len();
} }
// Remove tombstoned relations from cache and rebuild index
store.relations.retain(|r| !r.deleted);
store.reindex_relations()?;
store.save()?; store.save()?;
println!("Merged {} duplicates, redirected {} edges, deduped {} duplicate edges", println!("Merged {} duplicates, redirected {} edges, deduped {} duplicate edges",

View file

@ -272,12 +272,7 @@ impl Store {
store.loaded_nodes_size = fs::metadata(&nodes_p).map(|m| m.len()).unwrap_or(0); store.loaded_nodes_size = fs::metadata(&nodes_p).map(|m| m.len()).unwrap_or(0);
store.loaded_rels_size = fs::metadata(&rels_p).map(|m| m.len()).unwrap_or(0); store.loaded_rels_size = fs::metadata(&rels_p).map(|m| m.len()).unwrap_or(0);
// Drop edges referencing deleted/missing nodes // Orphan edges filtered naturally during for_each_relation (unresolvable UUIDs skipped)
let db = store.db.as_ref().unwrap();
store.relations.retain(|r|
index::contains_key(db, &r.source_key).unwrap_or(false) &&
index::contains_key(db, &r.target_key).unwrap_or(false)
);
Ok(store) Ok(store)
} }
@ -359,13 +354,10 @@ impl Store {
} }
} }
self.relations = by_uuid.into_values() // Index relations directly (no Vec intermediate)
.filter(|r| !r.deleted)
.collect();
// Index relations in redb
if let Some(db) = &self.db { if let Some(db) = &self.db {
for rel in &self.relations { for rel in by_uuid.into_values() {
if rel.deleted { continue; }
index::index_relation(db, &rel.source, &rel.target, rel.strength, rel.rel_type as u8)?; index::index_relation(db, &rel.source, &rel.target, rel.strength, rel.rel_type as u8)?;
} }
} }

View file

@ -209,18 +209,6 @@ pub fn remove_relation(
Ok(()) Ok(())
} }
/// Clear all relations from the index.
pub fn clear_relations(db: &Database) -> Result<()> {
let txn = db.begin_write()?;
{
// Drop and recreate the table
txn.delete_multimap_table(RELS)?;
let _ = txn.open_multimap_table(RELS)?;
}
txn.commit()?;
Ok(())
}
/// Get all edges for a node. Returns (other_uuid, strength, rel_type, is_outgoing). /// Get all edges for a node. Returns (other_uuid, strength, rel_type, is_outgoing).
pub fn edges_for_node(db: &Database, node_uuid: &[u8; 16]) -> Result<Vec<([u8; 16], f32, u8, bool)>> { pub fn edges_for_node(db: &Database, node_uuid: &[u8; 16]) -> Result<Vec<([u8; 16], f32, u8, bool)>> {
let txn = db.begin_read()?; let txn = db.begin_read()?;

View file

@ -47,7 +47,6 @@ pub fn strip_md_suffix(key: &str) -> String {
// The full in-memory store // The full in-memory store
pub struct Store { pub struct Store {
pub relations: Vec<Relation>, // all active relations
/// Log sizes at load time — used for staleness detection. /// Log sizes at load time — used for staleness detection.
pub(crate) loaded_nodes_size: u64, pub(crate) loaded_nodes_size: u64,
pub(crate) loaded_rels_size: u64, pub(crate) loaded_rels_size: u64,
@ -58,7 +57,6 @@ pub struct Store {
impl Default for Store { impl Default for Store {
fn default() -> Self { fn default() -> Self {
Store { Store {
relations: Vec::new(),
loaded_nodes_size: 0, loaded_nodes_size: 0,
loaded_rels_size: 0, loaded_rels_size: 0,
db: None, db: None,
@ -130,14 +128,25 @@ impl Store {
Ok(()) Ok(())
} }
/// Rebuild relation index from Vec. Call after mutations that modify relations. /// Get all edges for a node by UUID. Returns (other_uuid, strength, rel_type, is_outgoing).
pub fn reindex_relations(&self) -> Result<()> { pub fn edges_for_uuid(&self, uuid: &[u8; 16]) -> Result<Vec<([u8; 16], f32, u8, bool)>> {
if let Some(db) = self.db.as_ref() { let db = self.db.as_ref()
index::clear_relations(db)?; .ok_or_else(|| anyhow::anyhow!("store not loaded"))?;
for rel in &self.relations { index::edges_for_node(db, uuid)
if rel.deleted { continue; }
index::index_relation(db, &rel.source, &rel.target, rel.strength, rel.rel_type as u8)?;
} }
/// Add a relation to the index.
pub fn index_relation(&self, source: &[u8; 16], target: &[u8; 16], strength: f32, rel_type: u8) -> Result<()> {
if let Some(db) = self.db.as_ref() {
index::index_relation(db, source, target, strength, rel_type)?;
}
Ok(())
}
/// Remove a relation from the index.
pub fn remove_relation_from_index(&self, source: &[u8; 16], target: &[u8; 16], strength: f32, rel_type: u8) -> Result<()> {
if let Some(db) = self.db.as_ref() {
index::remove_relation(db, source, target, strength, rel_type)?;
} }
Ok(()) Ok(())
} }

View file

@ -28,13 +28,12 @@ impl Store {
Ok(()) Ok(())
} }
/// Add a relation (appends to log + updates cache + indexes) /// Add a relation (appends to log + indexes)
pub fn add_relation(&mut self, rel: Relation) -> Result<()> { pub fn add_relation(&mut self, rel: Relation) -> Result<()> {
self.append_relations(std::slice::from_ref(&rel))?; self.append_relations(std::slice::from_ref(&rel))?;
if let Some(db) = &self.db { if let Some(db) = &self.db {
index::index_relation(db, &rel.source, &rel.target, rel.strength, rel.rel_type as u8)?; index::index_relation(db, &rel.source, &rel.target, rel.strength, rel.rel_type as u8)?;
} }
self.relations.push(rel);
Ok(()) Ok(())
} }