remove hardcoded skip lists, prune orphan edges in fsck
All nodes in the store are memory — none should be excluded from knowledge extraction, search, or graph algorithms by name. Removed the MEMORY/where-am-i/work-queue/work-state skip lists entirely. Deleted where-am-i and work-queue nodes from the store (ephemeral scratchpads that don't belong). Added orphan edge pruning to fsck so broken links get cleaned up automatically. Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
This commit is contained in:
parent
70c0276fa0
commit
fd5591653d
4 changed files with 23 additions and 26 deletions
|
|
@ -515,11 +515,7 @@ fn spectral_distance(embedding: &HashMap<String, Vec<f64>>, a: &str, b: &str) ->
|
||||||
|
|
||||||
fn select_extractor_clusters(_store: &Store, n: usize) -> Vec<Vec<String>> {
|
fn select_extractor_clusters(_store: &Store, n: usize) -> Vec<Vec<String>> {
|
||||||
let embedding = load_spectral_embedding();
|
let embedding = load_spectral_embedding();
|
||||||
let skip = ["MEMORY", "where-am-i", "work-queue", "work-state"];
|
let semantic_keys: Vec<&String> = embedding.keys().collect();
|
||||||
|
|
||||||
let semantic_keys: Vec<&String> = embedding.keys()
|
|
||||||
.filter(|k| !skip.contains(&k.as_str()))
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let cluster_size = 5;
|
let cluster_size = 5;
|
||||||
let mut used = HashSet::new();
|
let mut used = HashSet::new();
|
||||||
|
|
@ -578,12 +574,7 @@ pub fn run_extractor(store: &Store, graph: &Graph, batch_size: usize) -> Result<
|
||||||
|
|
||||||
fn select_connector_pairs(store: &Store, graph: &Graph, n: usize) -> Vec<(Vec<String>, Vec<String>)> {
|
fn select_connector_pairs(store: &Store, graph: &Graph, n: usize) -> Vec<(Vec<String>, Vec<String>)> {
|
||||||
let embedding = load_spectral_embedding();
|
let embedding = load_spectral_embedding();
|
||||||
let skip_exact: HashSet<&str> = ["MEMORY", "where-am-i",
|
let semantic_keys: Vec<&String> = embedding.keys().collect();
|
||||||
"work-queue", "work-state"].iter().copied().collect();
|
|
||||||
|
|
||||||
let semantic_keys: Vec<&String> = embedding.keys()
|
|
||||||
.filter(|k| !skip_exact.contains(k.as_str()))
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let mut pairs = Vec::new();
|
let mut pairs = Vec::new();
|
||||||
let mut used = HashSet::new();
|
let mut used = HashSet::new();
|
||||||
|
|
@ -651,9 +642,6 @@ pub fn run_challenger(store: &Store, graph: &Graph, batch_size: usize) -> Result
|
||||||
let topology = get_graph_topology(store, graph);
|
let topology = get_graph_topology(store, graph);
|
||||||
|
|
||||||
let mut candidates: Vec<(&String, usize)> = store.nodes.iter()
|
let mut candidates: Vec<(&String, usize)> = store.nodes.iter()
|
||||||
.filter(|(k, _)| {
|
|
||||||
!["MEMORY", "where-am-i", "work-queue", "work-state"].contains(&k.as_str())
|
|
||||||
})
|
|
||||||
.map(|(k, _)| (k, graph.degree(k)))
|
.map(|(k, _)| (k, graph.degree(k)))
|
||||||
.collect();
|
.collect();
|
||||||
candidates.sort_by(|a, b| b.1.cmp(&a.1));
|
candidates.sort_by(|a, b| b.1.cmp(&a.1));
|
||||||
|
|
|
||||||
|
|
@ -133,11 +133,9 @@ pub(crate) fn parse_json_response(response: &str) -> Result<serde_json::Value, S
|
||||||
Err(format!("no valid JSON in response: {preview}..."))
|
Err(format!("no valid JSON in response: {preview}..."))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get non-operational keys for prompt context.
|
/// Get all keys for prompt context.
|
||||||
pub(crate) fn semantic_keys(store: &Store) -> Vec<String> {
|
pub(crate) fn semantic_keys(store: &Store) -> Vec<String> {
|
||||||
let skip = ["MEMORY", "where-am-i", "work-queue", "work-state"];
|
|
||||||
let mut keys: Vec<String> = store.nodes.keys()
|
let mut keys: Vec<String> = store.nodes.keys()
|
||||||
.filter(|k| !skip.contains(&k.as_str()))
|
|
||||||
.cloned()
|
.cloned()
|
||||||
.collect();
|
.collect();
|
||||||
keys.sort();
|
keys.sort();
|
||||||
|
|
|
||||||
25
src/main.rs
25
src/main.rs
|
|
@ -416,18 +416,31 @@ fn cmd_fsck() -> Result<(), String> {
|
||||||
store::fsck()?;
|
store::fsck()?;
|
||||||
store::strip_md_keys()?;
|
store::strip_md_keys()?;
|
||||||
|
|
||||||
// Check for broken links
|
// Prune broken links (relations referencing deleted/missing nodes)
|
||||||
let store = store::Store::load()?;
|
let mut store = store::Store::load()?;
|
||||||
let mut orphans = 0usize;
|
let mut to_tombstone = Vec::new();
|
||||||
for rel in &store.relations {
|
for rel in &store.relations {
|
||||||
if rel.deleted { continue; }
|
if rel.deleted { continue; }
|
||||||
if !store.nodes.contains_key(&rel.source_key)
|
if !store.nodes.contains_key(&rel.source_key)
|
||||||
|| !store.nodes.contains_key(&rel.target_key) {
|
|| !store.nodes.contains_key(&rel.target_key) {
|
||||||
orphans += 1;
|
let mut tombstone = rel.clone();
|
||||||
|
tombstone.deleted = true;
|
||||||
|
tombstone.version += 1;
|
||||||
|
to_tombstone.push(tombstone);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if orphans > 0 {
|
if !to_tombstone.is_empty() {
|
||||||
eprintln!("{} broken links (run `health` for details)", orphans);
|
let count = to_tombstone.len();
|
||||||
|
store.append_relations(&to_tombstone)?;
|
||||||
|
for t in &to_tombstone {
|
||||||
|
if let Some(r) = store.relations.iter_mut().find(|r|
|
||||||
|
r.source == t.source && r.target == t.target && !r.deleted) {
|
||||||
|
r.deleted = true;
|
||||||
|
r.version = t.version;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
store.save()?;
|
||||||
|
eprintln!("Pruned {} broken links", count);
|
||||||
} else {
|
} else {
|
||||||
eprintln!("No broken links");
|
eprintln!("No broken links");
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -145,11 +145,9 @@ fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> S
|
||||||
// Suggested link targets: text-similar semantic nodes not already neighbors
|
// Suggested link targets: text-similar semantic nodes not already neighbors
|
||||||
let neighbor_keys: std::collections::HashSet<&str> = neighbors.iter()
|
let neighbor_keys: std::collections::HashSet<&str> = neighbors.iter()
|
||||||
.map(|(k, _)| k.as_str()).collect();
|
.map(|(k, _)| k.as_str()).collect();
|
||||||
let skip = ["MEMORY", "where-am-i", "work-queue", "work-state"];
|
|
||||||
let mut candidates: Vec<(&str, f32)> = store.nodes.iter()
|
let mut candidates: Vec<(&str, f32)> = store.nodes.iter()
|
||||||
.filter(|(k, _)| {
|
.filter(|(k, _)| {
|
||||||
!skip.contains(&k.as_str())
|
*k != &item.key
|
||||||
&& *k != &item.key
|
|
||||||
&& !neighbor_keys.contains(k.as_str())
|
&& !neighbor_keys.contains(k.as_str())
|
||||||
})
|
})
|
||||||
.map(|(k, n)| {
|
.map(|(k, n)| {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue