forked from kent/consciousness
graph health: fix-categories, cap-degree, link-orphans
Three new tools for structural graph health: - fix-categories: rule-based recategorization fixing core inflation (225 → 26 core nodes). Only identity.md and kent.md stay core; everything else reclassified to tech/obs/gen by file prefix rules. - cap-degree: two-phase degree capping. First prunes weakest Auto edges, then prunes Link edges to high-degree targets (they have alternative paths). Brought max degree from 919 → 50. - link-orphans: connects degree-0/1 nodes to most textually similar connected nodes via cosine similarity. Linked 614 orphans. Also: community detection now filters edges below strength 0.3, preventing weak auto-links from merging unrelated communities. Pipeline updated: consolidate-full now runs link-orphans + cap-degree instead of triangle-close (which was counterproductive — densified hub neighborhoods instead of building bridges). Net effect: Gini 0.754 → 0.546, max degree 919 → 50.
This commit is contained in:
parent
6c7bfb9ec4
commit
94dbca6018
5 changed files with 297 additions and 2 deletions
76
src/neuro.rs
76
src/neuro.rs
|
|
@ -1059,3 +1059,79 @@ pub fn triangle_close(
|
|||
}
|
||||
(hubs_processed, added)
|
||||
}
|
||||
|
||||
/// Link orphan nodes (degree < min_degree) to their most textually similar
|
||||
/// connected nodes. For each orphan, finds top-K nearest neighbors by
|
||||
/// cosine similarity and creates Auto links.
|
||||
/// Returns (orphans_linked, total_links_added).
|
||||
pub fn link_orphans(
|
||||
store: &mut Store,
|
||||
min_degree: usize,
|
||||
links_per_orphan: usize,
|
||||
sim_threshold: f32,
|
||||
) -> (usize, usize) {
|
||||
let graph = store.build_graph();
|
||||
let mut added = 0usize;
|
||||
let mut orphans_linked = 0usize;
|
||||
|
||||
// Separate orphans from connected nodes
|
||||
let orphans: Vec<String> = graph.nodes().iter()
|
||||
.filter(|k| graph.degree(k) < min_degree)
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
// Build candidate pool: connected nodes with their content
|
||||
let candidates: Vec<(String, String)> = graph.nodes().iter()
|
||||
.filter(|k| graph.degree(k) >= min_degree)
|
||||
.filter_map(|k| store.nodes.get(k).map(|n| (k.clone(), n.content.clone())))
|
||||
.collect();
|
||||
|
||||
if candidates.is_empty() { return (0, 0); }
|
||||
|
||||
for orphan_key in &orphans {
|
||||
let orphan_content = match store.nodes.get(orphan_key) {
|
||||
Some(n) => n.content.clone(),
|
||||
None => continue,
|
||||
};
|
||||
if orphan_content.len() < 20 { continue; } // skip near-empty nodes
|
||||
|
||||
// Score against all candidates
|
||||
let mut scores: Vec<(usize, f32)> = candidates.iter()
|
||||
.enumerate()
|
||||
.map(|(i, (_, content))| {
|
||||
(i, similarity::cosine_similarity(&orphan_content, content))
|
||||
})
|
||||
.filter(|(_, s)| *s >= sim_threshold)
|
||||
.collect();
|
||||
|
||||
scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
||||
let to_link = scores.len().min(links_per_orphan);
|
||||
if to_link == 0 { continue; }
|
||||
|
||||
let orphan_uuid = store.nodes.get(orphan_key).unwrap().uuid;
|
||||
|
||||
for &(idx, sim) in scores.iter().take(to_link) {
|
||||
let target_key = &candidates[idx].0;
|
||||
let target_uuid = match store.nodes.get(target_key) {
|
||||
Some(n) => n.uuid,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let rel = Store::new_relation(
|
||||
orphan_uuid, target_uuid,
|
||||
crate::capnp_store::RelationType::Auto,
|
||||
sim * 0.5,
|
||||
orphan_key, target_key,
|
||||
);
|
||||
if store.add_relation(rel).is_ok() {
|
||||
added += 1;
|
||||
}
|
||||
}
|
||||
orphans_linked += 1;
|
||||
}
|
||||
|
||||
if added > 0 {
|
||||
let _ = store.save();
|
||||
}
|
||||
(orphans_linked, added)
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue