Delete similarity module, rewrite module, and all text-similarity code

Text cosine similarity was being used as a crutch for operations
the graph structure should handle: interference detection, orphan
linking, triangle closing, hub differentiation. These are all
graph-structural operations that the agents (linker, extractor)
handle with actual semantic understanding.

Removed: similarity.rs (stemming + cosine), rewrite.rs (orphan
linking, triangle closing, hub differentiation), detect_interference,
and all CLI commands and consolidation steps that used them.

-794 lines.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
ProofOfConcept 2026-04-10 15:44:10 -04:00
parent 92ef9b5215
commit 96e573f2e5
12 changed files with 11 additions and 794 deletions

View file

@ -126,43 +126,6 @@ pub fn replay_queue_with_graph(
items
}
/// Detect interfering memory pairs: high text similarity but different communities
pub fn detect_interference(
store: &Store,
graph: &Graph,
threshold: f32,
) -> Vec<(String, String, f32)> {
use crate::similarity;
let communities = graph.communities();
// Only compare nodes within a reasonable set — take the most active ones
let mut docs: Vec<(String, String)> = store.nodes.iter()
.filter(|(_, n)| n.content.len() > 50) // skip tiny nodes
.map(|(k, n)| (k.clone(), n.content.clone()))
.collect();
// For large stores, sample to keep pairwise comparison feasible
if docs.len() > 200 {
docs.sort_by(|a, b| b.1.len().cmp(&a.1.len()));
docs.truncate(200);
}
let similar = similarity::pairwise_similar(&docs, threshold);
// Filter to pairs in different communities
similar.into_iter()
.filter(|(a, b, _)| {
let ca = communities.get(a);
let cb = communities.get(b);
match (ca, cb) {
(Some(a), Some(b)) => a != b,
_ => true, // if community unknown, flag it
}
})
.collect()
}
/// Agent allocation from the control loop.
/// Agent types and counts are data-driven — add agents by adding
/// entries to the counts map.
@ -245,16 +208,11 @@ pub fn consolidation_plan_quick(store: &Store) -> ConsolidationPlan {
consolidation_plan_inner(store, false)
}
fn consolidation_plan_inner(store: &Store, detect_interf: bool) -> ConsolidationPlan {
fn consolidation_plan_inner(store: &Store, _detect_interf: bool) -> ConsolidationPlan {
let graph = store.build_graph();
let alpha = graph.degree_power_law_exponent();
let gini = graph.degree_gini();
let _avg_cc = graph.avg_clustering_coefficient();
let interference_count = if detect_interf {
detect_interference(store, &graph, 0.5).len()
} else {
0
};
let episodic_count = store.nodes.iter()
.filter(|(_, n)| matches!(n.node_type, crate::store::NodeType::EpisodicSession))
@ -294,19 +252,6 @@ fn consolidation_plan_inner(store: &Store, detect_interf: bool) -> Consolidation
"Gini={:.3} (target ≤0.4): high inequality → +50 linker", gini));
}
// Interference: separator disambiguates confusable nodes
if interference_count > 100 {
plan.add("separator", 10);
plan.rationale.push(format!(
"Interference: {} pairs (target <50) → 10 separator", interference_count));
} else if interference_count > 20 {
plan.add("separator", 5);
plan.rationale.push(format!(
"Interference: {} pairs → 5 separator", interference_count));
} else if interference_count > 0 {
plan.add("separator", interference_count.min(3));
}
// Organize: proportional to linker — synthesizes what linker connects
let linker = plan.count("linker");
plan.set("organize", linker / 2);