remove hardcoded skip lists, prune orphan edges in fsck

All nodes in the store are memory — none should be excluded from
knowledge extraction, search, or graph algorithms by name. Removed
the MEMORY/where-am-i/work-queue/work-state skip lists entirely.
Deleted where-am-i and work-queue nodes from the store (ephemeral
scratchpads that don't belong). Added orphan edge pruning to fsck
so broken links get cleaned up automatically.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-03-08 20:07:07 -04:00
parent 70c0276fa0
commit fd5591653d
4 changed files with 23 additions and 26 deletions

View file

@ -515,11 +515,7 @@ fn spectral_distance(embedding: &HashMap<String, Vec<f64>>, a: &str, b: &str) ->
fn select_extractor_clusters(_store: &Store, n: usize) -> Vec<Vec<String>> { fn select_extractor_clusters(_store: &Store, n: usize) -> Vec<Vec<String>> {
let embedding = load_spectral_embedding(); let embedding = load_spectral_embedding();
let skip = ["MEMORY", "where-am-i", "work-queue", "work-state"]; let semantic_keys: Vec<&String> = embedding.keys().collect();
let semantic_keys: Vec<&String> = embedding.keys()
.filter(|k| !skip.contains(&k.as_str()))
.collect();
let cluster_size = 5; let cluster_size = 5;
let mut used = HashSet::new(); let mut used = HashSet::new();
@ -578,12 +574,7 @@ pub fn run_extractor(store: &Store, graph: &Graph, batch_size: usize) -> Result<
fn select_connector_pairs(store: &Store, graph: &Graph, n: usize) -> Vec<(Vec<String>, Vec<String>)> { fn select_connector_pairs(store: &Store, graph: &Graph, n: usize) -> Vec<(Vec<String>, Vec<String>)> {
let embedding = load_spectral_embedding(); let embedding = load_spectral_embedding();
let skip_exact: HashSet<&str> = ["MEMORY", "where-am-i", let semantic_keys: Vec<&String> = embedding.keys().collect();
"work-queue", "work-state"].iter().copied().collect();
let semantic_keys: Vec<&String> = embedding.keys()
.filter(|k| !skip_exact.contains(k.as_str()))
.collect();
let mut pairs = Vec::new(); let mut pairs = Vec::new();
let mut used = HashSet::new(); let mut used = HashSet::new();
@ -651,9 +642,6 @@ pub fn run_challenger(store: &Store, graph: &Graph, batch_size: usize) -> Result
let topology = get_graph_topology(store, graph); let topology = get_graph_topology(store, graph);
let mut candidates: Vec<(&String, usize)> = store.nodes.iter() let mut candidates: Vec<(&String, usize)> = store.nodes.iter()
.filter(|(k, _)| {
!["MEMORY", "where-am-i", "work-queue", "work-state"].contains(&k.as_str())
})
.map(|(k, _)| (k, graph.degree(k))) .map(|(k, _)| (k, graph.degree(k)))
.collect(); .collect();
candidates.sort_by(|a, b| b.1.cmp(&a.1)); candidates.sort_by(|a, b| b.1.cmp(&a.1));

View file

@ -133,11 +133,9 @@ pub(crate) fn parse_json_response(response: &str) -> Result<serde_json::Value, S
Err(format!("no valid JSON in response: {preview}...")) Err(format!("no valid JSON in response: {preview}..."))
} }
/// Get non-operational keys for prompt context. /// Get all keys for prompt context.
pub(crate) fn semantic_keys(store: &Store) -> Vec<String> { pub(crate) fn semantic_keys(store: &Store) -> Vec<String> {
let skip = ["MEMORY", "where-am-i", "work-queue", "work-state"];
let mut keys: Vec<String> = store.nodes.keys() let mut keys: Vec<String> = store.nodes.keys()
.filter(|k| !skip.contains(&k.as_str()))
.cloned() .cloned()
.collect(); .collect();
keys.sort(); keys.sort();

View file

@ -416,18 +416,31 @@ fn cmd_fsck() -> Result<(), String> {
store::fsck()?; store::fsck()?;
store::strip_md_keys()?; store::strip_md_keys()?;
// Check for broken links // Prune broken links (relations referencing deleted/missing nodes)
let store = store::Store::load()?; let mut store = store::Store::load()?;
let mut orphans = 0usize; let mut to_tombstone = Vec::new();
for rel in &store.relations { for rel in &store.relations {
if rel.deleted { continue; } if rel.deleted { continue; }
if !store.nodes.contains_key(&rel.source_key) if !store.nodes.contains_key(&rel.source_key)
|| !store.nodes.contains_key(&rel.target_key) { || !store.nodes.contains_key(&rel.target_key) {
orphans += 1; let mut tombstone = rel.clone();
tombstone.deleted = true;
tombstone.version += 1;
to_tombstone.push(tombstone);
} }
} }
if orphans > 0 { if !to_tombstone.is_empty() {
eprintln!("{} broken links (run `health` for details)", orphans); let count = to_tombstone.len();
store.append_relations(&to_tombstone)?;
for t in &to_tombstone {
if let Some(r) = store.relations.iter_mut().find(|r|
r.source == t.source && r.target == t.target && !r.deleted) {
r.deleted = true;
r.version = t.version;
}
}
store.save()?;
eprintln!("Pruned {} broken links", count);
} else { } else {
eprintln!("No broken links"); eprintln!("No broken links");
} }

View file

@ -145,11 +145,9 @@ fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> S
// Suggested link targets: text-similar semantic nodes not already neighbors // Suggested link targets: text-similar semantic nodes not already neighbors
let neighbor_keys: std::collections::HashSet<&str> = neighbors.iter() let neighbor_keys: std::collections::HashSet<&str> = neighbors.iter()
.map(|(k, _)| k.as_str()).collect(); .map(|(k, _)| k.as_str()).collect();
let skip = ["MEMORY", "where-am-i", "work-queue", "work-state"];
let mut candidates: Vec<(&str, f32)> = store.nodes.iter() let mut candidates: Vec<(&str, f32)> = store.nodes.iter()
.filter(|(k, _)| { .filter(|(k, _)| {
!skip.contains(&k.as_str()) *k != &item.key
&& *k != &item.key
&& !neighbor_keys.contains(k.as_str()) && !neighbor_keys.contains(k.as_str())
}) })
.map(|(k, n)| { .map(|(k, n)| {