hub differentiation + refine_target for automatic section targeting
Pattern separation for memory graph: when a file-level node (e.g. identity.md) has section children, redistribute its links to the best-matching section using cosine similarity. - differentiate_hub: analyze hub, propose link redistribution - refine_target: at link creation time, automatically target the most specific section instead of the file-level hub - Applied refine_target in all four link creation paths (digest links, journal enrichment, apply consolidation, link-add command) - Saturated hubs listed in agent topology header with "DO NOT LINK" This prevents hub formation proactively (refine_target) and remediates existing hubs (differentiate command). Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
This commit is contained in:
parent
3afc947b88
commit
4530837057
3 changed files with 334 additions and 7 deletions
256
src/neuro.rs
256
src/neuro.rs
|
|
@ -60,11 +60,16 @@ pub struct ReplayItem {
|
|||
/// Generate the replay queue: nodes ordered by consolidation priority
|
||||
pub fn replay_queue(store: &Store, count: usize) -> Vec<ReplayItem> {
|
||||
let graph = store.build_graph();
|
||||
let fits = graph::schema_fit_all(&graph);
|
||||
replay_queue_with_graph(store, count, &graph)
|
||||
}
|
||||
|
||||
/// Generate the replay queue using a pre-built graph (avoids redundant rebuild)
|
||||
pub fn replay_queue_with_graph(store: &Store, count: usize, graph: &Graph) -> Vec<ReplayItem> {
|
||||
let fits = graph::schema_fit_all(graph);
|
||||
|
||||
let mut items: Vec<ReplayItem> = store.nodes.iter()
|
||||
.map(|(key, node)| {
|
||||
let priority = consolidation_priority(store, key, &graph);
|
||||
let priority = consolidation_priority(store, key, graph);
|
||||
let fit = fits.get(key).copied().unwrap_or(0.0);
|
||||
ReplayItem {
|
||||
key: key.clone(),
|
||||
|
|
@ -76,7 +81,7 @@ pub fn replay_queue(store: &Store, count: usize) -> Vec<ReplayItem> {
|
|||
})
|
||||
.collect();
|
||||
|
||||
items.sort_by(|a, b| b.priority.partial_cmp(&a.priority).unwrap());
|
||||
items.sort_by(|a, b| b.priority.partial_cmp(&a.priority).unwrap_or(std::cmp::Ordering::Equal));
|
||||
items.truncate(count);
|
||||
items
|
||||
}
|
||||
|
|
@ -166,14 +171,39 @@ fn format_topology_header(graph: &Graph) -> String {
|
|||
let n = graph.nodes().len();
|
||||
let e = graph.edge_count();
|
||||
|
||||
// Identify saturated hubs — nodes with degree well above threshold
|
||||
let threshold = hub_threshold(graph);
|
||||
let mut hubs: Vec<_> = graph.nodes().iter()
|
||||
.map(|k| (k.clone(), graph.degree(k)))
|
||||
.filter(|(_, d)| *d >= threshold)
|
||||
.collect();
|
||||
hubs.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
hubs.truncate(15);
|
||||
|
||||
let hub_list = if hubs.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
let lines: Vec<String> = hubs.iter()
|
||||
.map(|(k, d)| format!(" - {} (degree {})", k, d))
|
||||
.collect();
|
||||
format!(
|
||||
"### SATURATED HUBS — DO NOT LINK TO THESE\n\
|
||||
The following nodes are already over-connected. Adding more links\n\
|
||||
to them makes the graph worse (star topology). Find lateral\n\
|
||||
connections between peripheral nodes instead.\n\n{}\n\n\
|
||||
Only link to a hub if it is genuinely the ONLY reasonable target.\n\n",
|
||||
lines.join("\n"))
|
||||
};
|
||||
|
||||
format!(
|
||||
"## Current graph topology\n\
|
||||
Nodes: {} Edges: {} Communities: {}\n\
|
||||
Small-world σ: {:.1} Power-law α: {:.2} Degree Gini: {:.3}\n\
|
||||
Avg clustering coefficient: {:.4}\n\n\
|
||||
{}\
|
||||
Each node below shows its hub-link ratio (fraction of edges to top-5% degree nodes).\n\
|
||||
Use `poc-memory link-impact SOURCE TARGET` to evaluate proposed links.\n\n",
|
||||
n, e, graph.community_count(), sigma, alpha, gini, avg_cc)
|
||||
n, e, graph.community_count(), sigma, alpha, gini, avg_cc, hub_list)
|
||||
}
|
||||
|
||||
/// Compute the hub degree threshold (top 5% by degree)
|
||||
|
|
@ -415,13 +445,13 @@ pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<String,
|
|||
|
||||
match agent {
|
||||
"replay" => {
|
||||
let items = replay_queue(store, count);
|
||||
let items = replay_queue_with_graph(store, count, &graph);
|
||||
let nodes_section = format_nodes_section(store, &items, &graph);
|
||||
load_prompt("replay", &[("{{TOPOLOGY}}", &topology), ("{{NODES}}", &nodes_section)])
|
||||
}
|
||||
"linker" => {
|
||||
// Filter to episodic entries
|
||||
let mut items = replay_queue(store, count * 2);
|
||||
let mut items = replay_queue_with_graph(store, count * 2, &graph);
|
||||
items.retain(|item| {
|
||||
store.nodes.get(&item.key)
|
||||
.map(|n| matches!(n.node_type, crate::capnp_store::NodeType::EpisodicSession))
|
||||
|
|
@ -444,7 +474,7 @@ pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<String,
|
|||
.filter(|(k, _)| k.contains("journal") || k.contains("session"))
|
||||
.map(|(k, n)| (k.clone(), n.timestamp))
|
||||
.collect();
|
||||
episodes.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
|
||||
episodes.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
||||
episodes.truncate(count);
|
||||
|
||||
let episode_keys: Vec<_> = episodes.iter().map(|(k, _)| k.clone()).collect();
|
||||
|
|
@ -700,3 +730,215 @@ pub fn daily_check(store: &Store) -> String {
|
|||
|
||||
out
|
||||
}
|
||||
|
||||
// --- Pattern separation (hub differentiation) ---
|
||||
//
|
||||
// When a node becomes a hub (high degree, low CC), it usually means
|
||||
// the concept is under-differentiated — too many things link to one
|
||||
// broad idea instead of specific sub-concepts.
|
||||
//
|
||||
// The hippocampal fix: pattern separation. Examine the hub's neighbors,
|
||||
// match each to the best-fitting child section, and move the link from
|
||||
// the broad parent to the specific child.
|
||||
//
|
||||
// Two cases:
|
||||
// 1. Hub has existing section children (identity.md → identity.md#voice etc)
|
||||
// → purely structural, no Sonnet needed
|
||||
// 2. Hub has no sections → needs Sonnet to propose a split
|
||||
// (not implemented yet)
|
||||
|
||||
/// Refine a link target: if the target is a file-level node with section
|
||||
/// children, find the best-matching section by cosine similarity against
|
||||
/// the source content. Returns the original key if no sections exist or
|
||||
/// no section matches above threshold.
|
||||
///
|
||||
/// This prevents hub formation at link creation time — every new link
|
||||
/// targets the most specific available node.
|
||||
pub fn refine_target(store: &Store, source_content: &str, target_key: &str) -> String {
|
||||
// Only refine file-level nodes (no # in key)
|
||||
if target_key.contains('#') { return target_key.to_string(); }
|
||||
|
||||
let prefix = format!("{}#", target_key);
|
||||
let sections: Vec<(&str, &str)> = store.nodes.iter()
|
||||
.filter(|(k, _)| k.starts_with(&prefix))
|
||||
.map(|(k, n)| (k.as_str(), n.content.as_str()))
|
||||
.collect();
|
||||
|
||||
if sections.is_empty() { return target_key.to_string(); }
|
||||
|
||||
let mut best_section = "";
|
||||
let mut best_sim = 0.0f32;
|
||||
|
||||
for (section_key, section_content) in §ions {
|
||||
let sim = similarity::cosine_similarity(source_content, section_content);
|
||||
if sim > best_sim {
|
||||
best_sim = sim;
|
||||
best_section = section_key;
|
||||
}
|
||||
}
|
||||
|
||||
// Threshold: only refine if there's a meaningful match
|
||||
if best_sim > 0.05 && !best_section.is_empty() {
|
||||
best_section.to_string()
|
||||
} else {
|
||||
target_key.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// A proposed link move: from hub→neighbor to section→neighbor
|
||||
pub struct LinkMove {
|
||||
pub neighbor_key: String,
|
||||
pub from_hub: String,
|
||||
pub to_section: String,
|
||||
pub similarity: f32,
|
||||
pub neighbor_snippet: String,
|
||||
}
|
||||
|
||||
/// Analyze a hub node and propose redistributing its links to child sections.
|
||||
///
|
||||
/// Returns None if the node isn't a hub or has no sections to redistribute to.
|
||||
pub fn differentiate_hub(store: &Store, hub_key: &str) -> Option<Vec<LinkMove>> {
|
||||
let graph = store.build_graph();
|
||||
differentiate_hub_with_graph(store, hub_key, &graph)
|
||||
}
|
||||
|
||||
/// Like differentiate_hub but uses a pre-built graph.
|
||||
pub fn differentiate_hub_with_graph(store: &Store, hub_key: &str, graph: &Graph) -> Option<Vec<LinkMove>> {
|
||||
let degree = graph.degree(hub_key);
|
||||
|
||||
// Only differentiate actual hubs
|
||||
if degree < 20 { return None; }
|
||||
|
||||
// Only works on file-level nodes that have section children
|
||||
if hub_key.contains('#') { return None; }
|
||||
|
||||
let prefix = format!("{}#", hub_key);
|
||||
let sections: Vec<(&str, &str)> = store.nodes.iter()
|
||||
.filter(|(k, _)| k.starts_with(&prefix))
|
||||
.map(|(k, n)| (k.as_str(), n.content.as_str()))
|
||||
.collect();
|
||||
|
||||
if sections.is_empty() { return None; }
|
||||
|
||||
// Get all neighbors of the hub
|
||||
let neighbors = graph.neighbors(hub_key);
|
||||
|
||||
let mut moves = Vec::new();
|
||||
|
||||
for (neighbor_key, _strength) in &neighbors {
|
||||
// Skip section children — they should stay linked to parent
|
||||
if neighbor_key.starts_with(&prefix) { continue; }
|
||||
|
||||
let neighbor_content = match store.nodes.get(neighbor_key.as_str()) {
|
||||
Some(n) => &n.content,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
// Find best-matching section by content similarity
|
||||
let mut best_section = "";
|
||||
let mut best_sim = 0.0f32;
|
||||
|
||||
for (section_key, section_content) in §ions {
|
||||
let sim = similarity::cosine_similarity(neighbor_content, section_content);
|
||||
if sim > best_sim {
|
||||
best_sim = sim;
|
||||
best_section = section_key;
|
||||
}
|
||||
}
|
||||
|
||||
// Only propose move if there's a reasonable match
|
||||
if best_sim > 0.05 && !best_section.is_empty() {
|
||||
let snippet = neighbor_content.lines()
|
||||
.find(|l| !l.is_empty() && !l.starts_with("<!--") && !l.starts_with("##"))
|
||||
.unwrap_or("")
|
||||
.chars().take(80).collect::<String>();
|
||||
|
||||
moves.push(LinkMove {
|
||||
neighbor_key: neighbor_key.to_string(),
|
||||
from_hub: hub_key.to_string(),
|
||||
to_section: best_section.to_string(),
|
||||
similarity: best_sim,
|
||||
neighbor_snippet: snippet,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
moves.sort_by(|a, b| b.similarity.partial_cmp(&a.similarity).unwrap_or(std::cmp::Ordering::Equal));
|
||||
Some(moves)
|
||||
}
|
||||
|
||||
/// Apply link moves: soft-delete hub→neighbor, create section→neighbor.
|
||||
pub fn apply_differentiation(
|
||||
store: &mut Store,
|
||||
moves: &[LinkMove],
|
||||
) -> (usize, usize) {
|
||||
let mut applied = 0usize;
|
||||
let mut skipped = 0usize;
|
||||
|
||||
for mv in moves {
|
||||
// Check that section→neighbor doesn't already exist
|
||||
let exists = store.relations.iter().any(|r|
|
||||
((r.source_key == mv.to_section && r.target_key == mv.neighbor_key)
|
||||
|| (r.source_key == mv.neighbor_key && r.target_key == mv.to_section))
|
||||
&& !r.deleted
|
||||
);
|
||||
if exists { skipped += 1; continue; }
|
||||
|
||||
let section_uuid = match store.nodes.get(&mv.to_section) {
|
||||
Some(n) => n.uuid,
|
||||
None => { skipped += 1; continue; }
|
||||
};
|
||||
let neighbor_uuid = match store.nodes.get(&mv.neighbor_key) {
|
||||
Some(n) => n.uuid,
|
||||
None => { skipped += 1; continue; }
|
||||
};
|
||||
|
||||
// Soft-delete old hub→neighbor relation
|
||||
for rel in &mut store.relations {
|
||||
if ((rel.source_key == mv.from_hub && rel.target_key == mv.neighbor_key)
|
||||
|| (rel.source_key == mv.neighbor_key && rel.target_key == mv.from_hub))
|
||||
&& !rel.deleted
|
||||
{
|
||||
rel.deleted = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Create new section→neighbor relation
|
||||
let new_rel = Store::new_relation(
|
||||
section_uuid, neighbor_uuid,
|
||||
crate::capnp_store::RelationType::Auto,
|
||||
0.5,
|
||||
&mv.to_section, &mv.neighbor_key,
|
||||
);
|
||||
if store.add_relation(new_rel).is_ok() {
|
||||
applied += 1;
|
||||
}
|
||||
}
|
||||
|
||||
(applied, skipped)
|
||||
}
|
||||
|
||||
/// Find all file-level hubs that have section children to split into.
|
||||
pub fn find_differentiable_hubs(store: &Store) -> Vec<(String, usize, usize)> {
|
||||
let graph = store.build_graph();
|
||||
let threshold = hub_threshold(&graph);
|
||||
|
||||
let mut hubs = Vec::new();
|
||||
for key in graph.nodes() {
|
||||
let deg = graph.degree(key);
|
||||
if deg < threshold { continue; }
|
||||
if key.contains('#') { continue; }
|
||||
|
||||
let prefix = format!("{}#", key);
|
||||
let section_count = store.nodes.keys()
|
||||
.filter(|k| k.starts_with(&prefix))
|
||||
.count();
|
||||
|
||||
if section_count > 0 {
|
||||
hubs.push((key.clone(), deg, section_count));
|
||||
}
|
||||
}
|
||||
|
||||
hubs.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
hubs
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue