hub differentiation + refine_target for automatic section targeting

Pattern separation for memory graph: when a file-level node (e.g.
identity.md) has section children, redistribute its links to the
best-matching section using cosine similarity.

- differentiate_hub: analyze hub, propose link redistribution
- refine_target: at link creation time, automatically target the
  most specific section instead of the file-level hub
- Applied refine_target in all four link creation paths (digest
  links, journal enrichment, apply consolidation, link-add command)
- Saturated hubs listed in agent topology header with "DO NOT LINK"

This prevents hub formation proactively (refine_target) and
remediates existing hubs (differentiate command).

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-03-01 00:33:46 -05:00
parent 3afc947b88
commit 4530837057
3 changed files with 334 additions and 7 deletions

View file

@ -9,6 +9,7 @@
// 5. Extracts links and saves agent results
use crate::capnp_store::{self, Store};
use crate::neuro;
use regex::Regex;
use std::fs;
@ -803,6 +804,11 @@ pub fn apply_digest_links(store: &mut Store, links: &[DigestLink]) -> (usize, us
}
};
// Refine target to best-matching section if available
let source_content = store.nodes.get(&source)
.map(|n| n.content.as_str()).unwrap_or("");
let target = neuro::refine_target(store, source_content, &target);
if source == target { skipped += 1; continue; }
// Check if link already exists
@ -1070,6 +1076,11 @@ pub fn journal_enrich(
None => { println!(" SKIP {} (no matching journal node)", target); continue; }
};
// Refine target to best-matching section
let source_content = store.nodes.get(&source_key)
.map(|n| n.content.as_str()).unwrap_or("");
let resolved = neuro::refine_target(store, source_content, &resolved);
let source_uuid = match store.nodes.get(&source_key) {
Some(n) => n.uuid,
None => continue,
@ -1301,6 +1312,11 @@ pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_file: Optio
Err(e) => { println!(" ? {}{}: {}", src, tgt, e); skipped += 1; continue; }
};
// Refine target to best-matching section
let source_content = store.nodes.get(&source)
.map(|n| n.content.as_str()).unwrap_or("");
let target = neuro::refine_target(store, source_content, &target);
let exists = store.relations.iter().any(|r|
r.source_key == source && r.target_key == target && !r.deleted
);

View file

@ -92,6 +92,7 @@ fn main() {
"digest-links" => cmd_digest_links(&args[2..]),
"journal-enrich" => cmd_journal_enrich(&args[2..]),
"apply-consolidation" => cmd_apply_consolidation(&args[2..]),
"differentiate" => cmd_differentiate(&args[2..]),
"trace" => cmd_trace(&args[2..]),
"list-keys" => cmd_list_keys(),
"list-edges" => cmd_list_edges(),
@ -154,6 +155,8 @@ Commands:
Enrich journal entry with conversation links
apply-consolidation [--apply] [--report FILE]
Extract and apply actions from consolidation reports
differentiate [KEY] [--apply]
Redistribute hub links to section-level children
trace KEY Walk temporal links: semantic episodic conversation
list-keys List all node keys (one per line)
list-edges List all edges (tsv: source target strength type)
@ -438,6 +441,11 @@ fn cmd_link_add(args: &[String]) -> Result<(), String> {
let target = store.resolve_key(&args[1])?;
let reason = if args.len() > 2 { args[2..].join(" ") } else { String::new() };
// Refine target to best-matching section
let source_content = store.nodes.get(&source)
.map(|n| n.content.as_str()).unwrap_or("");
let target = neuro::refine_target(&store, source_content, &target);
// Find UUIDs
let source_uuid = store.nodes.get(&source)
.map(|n| n.uuid)
@ -714,6 +722,67 @@ fn cmd_apply_consolidation(args: &[String]) -> Result<(), String> {
digest::apply_consolidation(&mut store, do_apply, report_file)
}
fn cmd_differentiate(args: &[String]) -> Result<(), String> {
let do_apply = args.iter().any(|a| a == "--apply");
let key_arg: Option<&str> = args.iter()
.find(|a| !a.starts_with("--"))
.map(|s| s.as_str());
let mut store = capnp_store::Store::load()?;
if let Some(key) = key_arg {
// Differentiate a specific hub
let resolved = store.resolve_key(key)?;
let moves = neuro::differentiate_hub(&store, &resolved)
.ok_or_else(|| format!("'{}' is not a file-level hub with sections", resolved))?;
// Group by target section for display
let mut by_section: std::collections::BTreeMap<String, Vec<&neuro::LinkMove>> =
std::collections::BTreeMap::new();
for mv in &moves {
by_section.entry(mv.to_section.clone()).or_default().push(mv);
}
println!("Hub '{}' — {} links to redistribute across {} sections\n",
resolved, moves.len(), by_section.len());
for (section, section_moves) in &by_section {
println!(" {} ({} links):", section, section_moves.len());
for mv in section_moves.iter().take(5) {
println!(" [{:.3}] {}{}", mv.similarity,
mv.neighbor_key, mv.neighbor_snippet);
}
if section_moves.len() > 5 {
println!(" ... and {} more", section_moves.len() - 5);
}
}
if !do_apply {
println!("\nTo apply: poc-memory differentiate {} --apply", resolved);
return Ok(());
}
let (applied, skipped) = neuro::apply_differentiation(&mut store, &moves);
store.save()?;
println!("\nApplied: {} Skipped: {}", applied, skipped);
} else {
// Show all differentiable hubs
let hubs = neuro::find_differentiable_hubs(&store);
if hubs.is_empty() {
println!("No file-level hubs with sections found above threshold");
return Ok(());
}
println!("Differentiable hubs (file-level nodes with sections):\n");
for (key, degree, sections) in &hubs {
println!(" {:40} deg={:3} sections={}", key, degree, sections);
}
println!("\nRun: poc-memory differentiate KEY to preview a specific hub");
}
Ok(())
}
fn cmd_trace(args: &[String]) -> Result<(), String> {
if args.is_empty() {
return Err("Usage: poc-memory trace KEY".into());

View file

@ -60,11 +60,16 @@ pub struct ReplayItem {
/// Generate the replay queue: nodes ordered by consolidation priority
pub fn replay_queue(store: &Store, count: usize) -> Vec<ReplayItem> {
let graph = store.build_graph();
let fits = graph::schema_fit_all(&graph);
replay_queue_with_graph(store, count, &graph)
}
/// Generate the replay queue using a pre-built graph (avoids redundant rebuild)
pub fn replay_queue_with_graph(store: &Store, count: usize, graph: &Graph) -> Vec<ReplayItem> {
let fits = graph::schema_fit_all(graph);
let mut items: Vec<ReplayItem> = store.nodes.iter()
.map(|(key, node)| {
let priority = consolidation_priority(store, key, &graph);
let priority = consolidation_priority(store, key, graph);
let fit = fits.get(key).copied().unwrap_or(0.0);
ReplayItem {
key: key.clone(),
@ -76,7 +81,7 @@ pub fn replay_queue(store: &Store, count: usize) -> Vec<ReplayItem> {
})
.collect();
items.sort_by(|a, b| b.priority.partial_cmp(&a.priority).unwrap());
items.sort_by(|a, b| b.priority.partial_cmp(&a.priority).unwrap_or(std::cmp::Ordering::Equal));
items.truncate(count);
items
}
@ -166,14 +171,39 @@ fn format_topology_header(graph: &Graph) -> String {
let n = graph.nodes().len();
let e = graph.edge_count();
// Identify saturated hubs — nodes with degree well above threshold
let threshold = hub_threshold(graph);
let mut hubs: Vec<_> = graph.nodes().iter()
.map(|k| (k.clone(), graph.degree(k)))
.filter(|(_, d)| *d >= threshold)
.collect();
hubs.sort_by(|a, b| b.1.cmp(&a.1));
hubs.truncate(15);
let hub_list = if hubs.is_empty() {
String::new()
} else {
let lines: Vec<String> = hubs.iter()
.map(|(k, d)| format!(" - {} (degree {})", k, d))
.collect();
format!(
"### SATURATED HUBS — DO NOT LINK TO THESE\n\
The following nodes are already over-connected. Adding more links\n\
to them makes the graph worse (star topology). Find lateral\n\
connections between peripheral nodes instead.\n\n{}\n\n\
Only link to a hub if it is genuinely the ONLY reasonable target.\n\n",
lines.join("\n"))
};
format!(
"## Current graph topology\n\
Nodes: {} Edges: {} Communities: {}\n\
Small-world σ: {:.1} Power-law α: {:.2} Degree Gini: {:.3}\n\
Avg clustering coefficient: {:.4}\n\n\
{}\
Each node below shows its hub-link ratio (fraction of edges to top-5% degree nodes).\n\
Use `poc-memory link-impact SOURCE TARGET` to evaluate proposed links.\n\n",
n, e, graph.community_count(), sigma, alpha, gini, avg_cc)
n, e, graph.community_count(), sigma, alpha, gini, avg_cc, hub_list)
}
/// Compute the hub degree threshold (top 5% by degree)
@ -415,13 +445,13 @@ pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<String,
match agent {
"replay" => {
let items = replay_queue(store, count);
let items = replay_queue_with_graph(store, count, &graph);
let nodes_section = format_nodes_section(store, &items, &graph);
load_prompt("replay", &[("{{TOPOLOGY}}", &topology), ("{{NODES}}", &nodes_section)])
}
"linker" => {
// Filter to episodic entries
let mut items = replay_queue(store, count * 2);
let mut items = replay_queue_with_graph(store, count * 2, &graph);
items.retain(|item| {
store.nodes.get(&item.key)
.map(|n| matches!(n.node_type, crate::capnp_store::NodeType::EpisodicSession))
@ -444,7 +474,7 @@ pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<String,
.filter(|(k, _)| k.contains("journal") || k.contains("session"))
.map(|(k, n)| (k.clone(), n.timestamp))
.collect();
episodes.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
episodes.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
episodes.truncate(count);
let episode_keys: Vec<_> = episodes.iter().map(|(k, _)| k.clone()).collect();
@ -700,3 +730,215 @@ pub fn daily_check(store: &Store) -> String {
out
}
// --- Pattern separation (hub differentiation) ---
//
// When a node becomes a hub (high degree, low CC), it usually means
// the concept is under-differentiated — too many things link to one
// broad idea instead of specific sub-concepts.
//
// The hippocampal fix: pattern separation. Examine the hub's neighbors,
// match each to the best-fitting child section, and move the link from
// the broad parent to the specific child.
//
// Two cases:
// 1. Hub has existing section children (identity.md → identity.md#voice etc)
// → purely structural, no Sonnet needed
// 2. Hub has no sections → needs Sonnet to propose a split
// (not implemented yet)
/// Refine a link target: if the target is a file-level node with section
/// children, find the best-matching section by cosine similarity against
/// the source content. Returns the original key if no sections exist or
/// no section matches above threshold.
///
/// This prevents hub formation at link creation time — every new link
/// targets the most specific available node.
pub fn refine_target(store: &Store, source_content: &str, target_key: &str) -> String {
// Only refine file-level nodes (no # in key)
if target_key.contains('#') { return target_key.to_string(); }
let prefix = format!("{}#", target_key);
let sections: Vec<(&str, &str)> = store.nodes.iter()
.filter(|(k, _)| k.starts_with(&prefix))
.map(|(k, n)| (k.as_str(), n.content.as_str()))
.collect();
if sections.is_empty() { return target_key.to_string(); }
let mut best_section = "";
let mut best_sim = 0.0f32;
for (section_key, section_content) in &sections {
let sim = similarity::cosine_similarity(source_content, section_content);
if sim > best_sim {
best_sim = sim;
best_section = section_key;
}
}
// Threshold: only refine if there's a meaningful match
if best_sim > 0.05 && !best_section.is_empty() {
best_section.to_string()
} else {
target_key.to_string()
}
}
/// A proposed link move: from hub→neighbor to section→neighbor
pub struct LinkMove {
pub neighbor_key: String,
pub from_hub: String,
pub to_section: String,
pub similarity: f32,
pub neighbor_snippet: String,
}
/// Analyze a hub node and propose redistributing its links to child sections.
///
/// Returns None if the node isn't a hub or has no sections to redistribute to.
pub fn differentiate_hub(store: &Store, hub_key: &str) -> Option<Vec<LinkMove>> {
let graph = store.build_graph();
differentiate_hub_with_graph(store, hub_key, &graph)
}
/// Like differentiate_hub but uses a pre-built graph.
pub fn differentiate_hub_with_graph(store: &Store, hub_key: &str, graph: &Graph) -> Option<Vec<LinkMove>> {
let degree = graph.degree(hub_key);
// Only differentiate actual hubs
if degree < 20 { return None; }
// Only works on file-level nodes that have section children
if hub_key.contains('#') { return None; }
let prefix = format!("{}#", hub_key);
let sections: Vec<(&str, &str)> = store.nodes.iter()
.filter(|(k, _)| k.starts_with(&prefix))
.map(|(k, n)| (k.as_str(), n.content.as_str()))
.collect();
if sections.is_empty() { return None; }
// Get all neighbors of the hub
let neighbors = graph.neighbors(hub_key);
let mut moves = Vec::new();
for (neighbor_key, _strength) in &neighbors {
// Skip section children — they should stay linked to parent
if neighbor_key.starts_with(&prefix) { continue; }
let neighbor_content = match store.nodes.get(neighbor_key.as_str()) {
Some(n) => &n.content,
None => continue,
};
// Find best-matching section by content similarity
let mut best_section = "";
let mut best_sim = 0.0f32;
for (section_key, section_content) in &sections {
let sim = similarity::cosine_similarity(neighbor_content, section_content);
if sim > best_sim {
best_sim = sim;
best_section = section_key;
}
}
// Only propose move if there's a reasonable match
if best_sim > 0.05 && !best_section.is_empty() {
let snippet = neighbor_content.lines()
.find(|l| !l.is_empty() && !l.starts_with("<!--") && !l.starts_with("##"))
.unwrap_or("")
.chars().take(80).collect::<String>();
moves.push(LinkMove {
neighbor_key: neighbor_key.to_string(),
from_hub: hub_key.to_string(),
to_section: best_section.to_string(),
similarity: best_sim,
neighbor_snippet: snippet,
});
}
}
moves.sort_by(|a, b| b.similarity.partial_cmp(&a.similarity).unwrap_or(std::cmp::Ordering::Equal));
Some(moves)
}
/// Apply link moves: soft-delete hub→neighbor, create section→neighbor.
pub fn apply_differentiation(
store: &mut Store,
moves: &[LinkMove],
) -> (usize, usize) {
let mut applied = 0usize;
let mut skipped = 0usize;
for mv in moves {
// Check that section→neighbor doesn't already exist
let exists = store.relations.iter().any(|r|
((r.source_key == mv.to_section && r.target_key == mv.neighbor_key)
|| (r.source_key == mv.neighbor_key && r.target_key == mv.to_section))
&& !r.deleted
);
if exists { skipped += 1; continue; }
let section_uuid = match store.nodes.get(&mv.to_section) {
Some(n) => n.uuid,
None => { skipped += 1; continue; }
};
let neighbor_uuid = match store.nodes.get(&mv.neighbor_key) {
Some(n) => n.uuid,
None => { skipped += 1; continue; }
};
// Soft-delete old hub→neighbor relation
for rel in &mut store.relations {
if ((rel.source_key == mv.from_hub && rel.target_key == mv.neighbor_key)
|| (rel.source_key == mv.neighbor_key && rel.target_key == mv.from_hub))
&& !rel.deleted
{
rel.deleted = true;
}
}
// Create new section→neighbor relation
let new_rel = Store::new_relation(
section_uuid, neighbor_uuid,
crate::capnp_store::RelationType::Auto,
0.5,
&mv.to_section, &mv.neighbor_key,
);
if store.add_relation(new_rel).is_ok() {
applied += 1;
}
}
(applied, skipped)
}
/// Find all file-level hubs that have section children to split into.
pub fn find_differentiable_hubs(store: &Store) -> Vec<(String, usize, usize)> {
let graph = store.build_graph();
let threshold = hub_threshold(&graph);
let mut hubs = Vec::new();
for key in graph.nodes() {
let deg = graph.degree(key);
if deg < threshold { continue; }
if key.contains('#') { continue; }
let prefix = format!("{}#", key);
let section_count = store.nodes.keys()
.filter(|k| k.starts_with(&prefix))
.count();
if section_count > 0 {
hubs.push((key.clone(), deg, section_count));
}
}
hubs.sort_by(|a, b| b.1.cmp(&a.1));
hubs
}