hub differentiation + refine_target for automatic section targeting
Pattern separation for memory graph: when a file-level node (e.g. identity.md) has section children, redistribute its links to the best-matching section using cosine similarity. - differentiate_hub: analyze hub, propose link redistribution - refine_target: at link creation time, automatically target the most specific section instead of the file-level hub - Applied refine_target in all four link creation paths (digest links, journal enrichment, apply consolidation, link-add command) - Saturated hubs listed in agent topology header with "DO NOT LINK" This prevents hub formation proactively (refine_target) and remediates existing hubs (differentiate command). Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
This commit is contained in:
parent
3afc947b88
commit
4530837057
3 changed files with 334 additions and 7 deletions
|
|
@ -9,6 +9,7 @@
|
|||
// 5. Extracts links and saves agent results
|
||||
|
||||
use crate::capnp_store::{self, Store};
|
||||
use crate::neuro;
|
||||
|
||||
use regex::Regex;
|
||||
use std::fs;
|
||||
|
|
@ -803,6 +804,11 @@ pub fn apply_digest_links(store: &mut Store, links: &[DigestLink]) -> (usize, us
|
|||
}
|
||||
};
|
||||
|
||||
// Refine target to best-matching section if available
|
||||
let source_content = store.nodes.get(&source)
|
||||
.map(|n| n.content.as_str()).unwrap_or("");
|
||||
let target = neuro::refine_target(store, source_content, &target);
|
||||
|
||||
if source == target { skipped += 1; continue; }
|
||||
|
||||
// Check if link already exists
|
||||
|
|
@ -1070,6 +1076,11 @@ pub fn journal_enrich(
|
|||
None => { println!(" SKIP {} (no matching journal node)", target); continue; }
|
||||
};
|
||||
|
||||
// Refine target to best-matching section
|
||||
let source_content = store.nodes.get(&source_key)
|
||||
.map(|n| n.content.as_str()).unwrap_or("");
|
||||
let resolved = neuro::refine_target(store, source_content, &resolved);
|
||||
|
||||
let source_uuid = match store.nodes.get(&source_key) {
|
||||
Some(n) => n.uuid,
|
||||
None => continue,
|
||||
|
|
@ -1301,6 +1312,11 @@ pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_file: Optio
|
|||
Err(e) => { println!(" ? {} → {}: {}", src, tgt, e); skipped += 1; continue; }
|
||||
};
|
||||
|
||||
// Refine target to best-matching section
|
||||
let source_content = store.nodes.get(&source)
|
||||
.map(|n| n.content.as_str()).unwrap_or("");
|
||||
let target = neuro::refine_target(store, source_content, &target);
|
||||
|
||||
let exists = store.relations.iter().any(|r|
|
||||
r.source_key == source && r.target_key == target && !r.deleted
|
||||
);
|
||||
|
|
|
|||
69
src/main.rs
69
src/main.rs
|
|
@ -92,6 +92,7 @@ fn main() {
|
|||
"digest-links" => cmd_digest_links(&args[2..]),
|
||||
"journal-enrich" => cmd_journal_enrich(&args[2..]),
|
||||
"apply-consolidation" => cmd_apply_consolidation(&args[2..]),
|
||||
"differentiate" => cmd_differentiate(&args[2..]),
|
||||
"trace" => cmd_trace(&args[2..]),
|
||||
"list-keys" => cmd_list_keys(),
|
||||
"list-edges" => cmd_list_edges(),
|
||||
|
|
@ -154,6 +155,8 @@ Commands:
|
|||
Enrich journal entry with conversation links
|
||||
apply-consolidation [--apply] [--report FILE]
|
||||
Extract and apply actions from consolidation reports
|
||||
differentiate [KEY] [--apply]
|
||||
Redistribute hub links to section-level children
|
||||
trace KEY Walk temporal links: semantic ↔ episodic ↔ conversation
|
||||
list-keys List all node keys (one per line)
|
||||
list-edges List all edges (tsv: source target strength type)
|
||||
|
|
@ -438,6 +441,11 @@ fn cmd_link_add(args: &[String]) -> Result<(), String> {
|
|||
let target = store.resolve_key(&args[1])?;
|
||||
let reason = if args.len() > 2 { args[2..].join(" ") } else { String::new() };
|
||||
|
||||
// Refine target to best-matching section
|
||||
let source_content = store.nodes.get(&source)
|
||||
.map(|n| n.content.as_str()).unwrap_or("");
|
||||
let target = neuro::refine_target(&store, source_content, &target);
|
||||
|
||||
// Find UUIDs
|
||||
let source_uuid = store.nodes.get(&source)
|
||||
.map(|n| n.uuid)
|
||||
|
|
@ -714,6 +722,67 @@ fn cmd_apply_consolidation(args: &[String]) -> Result<(), String> {
|
|||
digest::apply_consolidation(&mut store, do_apply, report_file)
|
||||
}
|
||||
|
||||
fn cmd_differentiate(args: &[String]) -> Result<(), String> {
|
||||
let do_apply = args.iter().any(|a| a == "--apply");
|
||||
let key_arg: Option<&str> = args.iter()
|
||||
.find(|a| !a.starts_with("--"))
|
||||
.map(|s| s.as_str());
|
||||
|
||||
let mut store = capnp_store::Store::load()?;
|
||||
|
||||
if let Some(key) = key_arg {
|
||||
// Differentiate a specific hub
|
||||
let resolved = store.resolve_key(key)?;
|
||||
let moves = neuro::differentiate_hub(&store, &resolved)
|
||||
.ok_or_else(|| format!("'{}' is not a file-level hub with sections", resolved))?;
|
||||
|
||||
// Group by target section for display
|
||||
let mut by_section: std::collections::BTreeMap<String, Vec<&neuro::LinkMove>> =
|
||||
std::collections::BTreeMap::new();
|
||||
for mv in &moves {
|
||||
by_section.entry(mv.to_section.clone()).or_default().push(mv);
|
||||
}
|
||||
|
||||
println!("Hub '{}' — {} links to redistribute across {} sections\n",
|
||||
resolved, moves.len(), by_section.len());
|
||||
|
||||
for (section, section_moves) in &by_section {
|
||||
println!(" {} ({} links):", section, section_moves.len());
|
||||
for mv in section_moves.iter().take(5) {
|
||||
println!(" [{:.3}] {} — {}", mv.similarity,
|
||||
mv.neighbor_key, mv.neighbor_snippet);
|
||||
}
|
||||
if section_moves.len() > 5 {
|
||||
println!(" ... and {} more", section_moves.len() - 5);
|
||||
}
|
||||
}
|
||||
|
||||
if !do_apply {
|
||||
println!("\nTo apply: poc-memory differentiate {} --apply", resolved);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let (applied, skipped) = neuro::apply_differentiation(&mut store, &moves);
|
||||
store.save()?;
|
||||
println!("\nApplied: {} Skipped: {}", applied, skipped);
|
||||
} else {
|
||||
// Show all differentiable hubs
|
||||
let hubs = neuro::find_differentiable_hubs(&store);
|
||||
if hubs.is_empty() {
|
||||
println!("No file-level hubs with sections found above threshold");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
println!("Differentiable hubs (file-level nodes with sections):\n");
|
||||
for (key, degree, sections) in &hubs {
|
||||
println!(" {:40} deg={:3} sections={}", key, degree, sections);
|
||||
}
|
||||
println!("\nRun: poc-memory differentiate KEY to preview a specific hub");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_trace(args: &[String]) -> Result<(), String> {
|
||||
if args.is_empty() {
|
||||
return Err("Usage: poc-memory trace KEY".into());
|
||||
|
|
|
|||
256
src/neuro.rs
256
src/neuro.rs
|
|
@ -60,11 +60,16 @@ pub struct ReplayItem {
|
|||
/// Generate the replay queue: nodes ordered by consolidation priority
|
||||
pub fn replay_queue(store: &Store, count: usize) -> Vec<ReplayItem> {
|
||||
let graph = store.build_graph();
|
||||
let fits = graph::schema_fit_all(&graph);
|
||||
replay_queue_with_graph(store, count, &graph)
|
||||
}
|
||||
|
||||
/// Generate the replay queue using a pre-built graph (avoids redundant rebuild)
|
||||
pub fn replay_queue_with_graph(store: &Store, count: usize, graph: &Graph) -> Vec<ReplayItem> {
|
||||
let fits = graph::schema_fit_all(graph);
|
||||
|
||||
let mut items: Vec<ReplayItem> = store.nodes.iter()
|
||||
.map(|(key, node)| {
|
||||
let priority = consolidation_priority(store, key, &graph);
|
||||
let priority = consolidation_priority(store, key, graph);
|
||||
let fit = fits.get(key).copied().unwrap_or(0.0);
|
||||
ReplayItem {
|
||||
key: key.clone(),
|
||||
|
|
@ -76,7 +81,7 @@ pub fn replay_queue(store: &Store, count: usize) -> Vec<ReplayItem> {
|
|||
})
|
||||
.collect();
|
||||
|
||||
items.sort_by(|a, b| b.priority.partial_cmp(&a.priority).unwrap());
|
||||
items.sort_by(|a, b| b.priority.partial_cmp(&a.priority).unwrap_or(std::cmp::Ordering::Equal));
|
||||
items.truncate(count);
|
||||
items
|
||||
}
|
||||
|
|
@ -166,14 +171,39 @@ fn format_topology_header(graph: &Graph) -> String {
|
|||
let n = graph.nodes().len();
|
||||
let e = graph.edge_count();
|
||||
|
||||
// Identify saturated hubs — nodes with degree well above threshold
|
||||
let threshold = hub_threshold(graph);
|
||||
let mut hubs: Vec<_> = graph.nodes().iter()
|
||||
.map(|k| (k.clone(), graph.degree(k)))
|
||||
.filter(|(_, d)| *d >= threshold)
|
||||
.collect();
|
||||
hubs.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
hubs.truncate(15);
|
||||
|
||||
let hub_list = if hubs.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
let lines: Vec<String> = hubs.iter()
|
||||
.map(|(k, d)| format!(" - {} (degree {})", k, d))
|
||||
.collect();
|
||||
format!(
|
||||
"### SATURATED HUBS — DO NOT LINK TO THESE\n\
|
||||
The following nodes are already over-connected. Adding more links\n\
|
||||
to them makes the graph worse (star topology). Find lateral\n\
|
||||
connections between peripheral nodes instead.\n\n{}\n\n\
|
||||
Only link to a hub if it is genuinely the ONLY reasonable target.\n\n",
|
||||
lines.join("\n"))
|
||||
};
|
||||
|
||||
format!(
|
||||
"## Current graph topology\n\
|
||||
Nodes: {} Edges: {} Communities: {}\n\
|
||||
Small-world σ: {:.1} Power-law α: {:.2} Degree Gini: {:.3}\n\
|
||||
Avg clustering coefficient: {:.4}\n\n\
|
||||
{}\
|
||||
Each node below shows its hub-link ratio (fraction of edges to top-5% degree nodes).\n\
|
||||
Use `poc-memory link-impact SOURCE TARGET` to evaluate proposed links.\n\n",
|
||||
n, e, graph.community_count(), sigma, alpha, gini, avg_cc)
|
||||
n, e, graph.community_count(), sigma, alpha, gini, avg_cc, hub_list)
|
||||
}
|
||||
|
||||
/// Compute the hub degree threshold (top 5% by degree)
|
||||
|
|
@ -415,13 +445,13 @@ pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<String,
|
|||
|
||||
match agent {
|
||||
"replay" => {
|
||||
let items = replay_queue(store, count);
|
||||
let items = replay_queue_with_graph(store, count, &graph);
|
||||
let nodes_section = format_nodes_section(store, &items, &graph);
|
||||
load_prompt("replay", &[("{{TOPOLOGY}}", &topology), ("{{NODES}}", &nodes_section)])
|
||||
}
|
||||
"linker" => {
|
||||
// Filter to episodic entries
|
||||
let mut items = replay_queue(store, count * 2);
|
||||
let mut items = replay_queue_with_graph(store, count * 2, &graph);
|
||||
items.retain(|item| {
|
||||
store.nodes.get(&item.key)
|
||||
.map(|n| matches!(n.node_type, crate::capnp_store::NodeType::EpisodicSession))
|
||||
|
|
@ -444,7 +474,7 @@ pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<String,
|
|||
.filter(|(k, _)| k.contains("journal") || k.contains("session"))
|
||||
.map(|(k, n)| (k.clone(), n.timestamp))
|
||||
.collect();
|
||||
episodes.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
|
||||
episodes.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
||||
episodes.truncate(count);
|
||||
|
||||
let episode_keys: Vec<_> = episodes.iter().map(|(k, _)| k.clone()).collect();
|
||||
|
|
@ -700,3 +730,215 @@ pub fn daily_check(store: &Store) -> String {
|
|||
|
||||
out
|
||||
}
|
||||
|
||||
// --- Pattern separation (hub differentiation) ---
|
||||
//
|
||||
// When a node becomes a hub (high degree, low CC), it usually means
|
||||
// the concept is under-differentiated — too many things link to one
|
||||
// broad idea instead of specific sub-concepts.
|
||||
//
|
||||
// The hippocampal fix: pattern separation. Examine the hub's neighbors,
|
||||
// match each to the best-fitting child section, and move the link from
|
||||
// the broad parent to the specific child.
|
||||
//
|
||||
// Two cases:
|
||||
// 1. Hub has existing section children (identity.md → identity.md#voice etc)
|
||||
// → purely structural, no Sonnet needed
|
||||
// 2. Hub has no sections → needs Sonnet to propose a split
|
||||
// (not implemented yet)
|
||||
|
||||
/// Refine a link target: if the target is a file-level node with section
|
||||
/// children, find the best-matching section by cosine similarity against
|
||||
/// the source content. Returns the original key if no sections exist or
|
||||
/// no section matches above threshold.
|
||||
///
|
||||
/// This prevents hub formation at link creation time — every new link
|
||||
/// targets the most specific available node.
|
||||
pub fn refine_target(store: &Store, source_content: &str, target_key: &str) -> String {
|
||||
// Only refine file-level nodes (no # in key)
|
||||
if target_key.contains('#') { return target_key.to_string(); }
|
||||
|
||||
let prefix = format!("{}#", target_key);
|
||||
let sections: Vec<(&str, &str)> = store.nodes.iter()
|
||||
.filter(|(k, _)| k.starts_with(&prefix))
|
||||
.map(|(k, n)| (k.as_str(), n.content.as_str()))
|
||||
.collect();
|
||||
|
||||
if sections.is_empty() { return target_key.to_string(); }
|
||||
|
||||
let mut best_section = "";
|
||||
let mut best_sim = 0.0f32;
|
||||
|
||||
for (section_key, section_content) in §ions {
|
||||
let sim = similarity::cosine_similarity(source_content, section_content);
|
||||
if sim > best_sim {
|
||||
best_sim = sim;
|
||||
best_section = section_key;
|
||||
}
|
||||
}
|
||||
|
||||
// Threshold: only refine if there's a meaningful match
|
||||
if best_sim > 0.05 && !best_section.is_empty() {
|
||||
best_section.to_string()
|
||||
} else {
|
||||
target_key.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// A proposed link move: from hub→neighbor to section→neighbor
|
||||
pub struct LinkMove {
|
||||
pub neighbor_key: String,
|
||||
pub from_hub: String,
|
||||
pub to_section: String,
|
||||
pub similarity: f32,
|
||||
pub neighbor_snippet: String,
|
||||
}
|
||||
|
||||
/// Analyze a hub node and propose redistributing its links to child sections.
|
||||
///
|
||||
/// Returns None if the node isn't a hub or has no sections to redistribute to.
|
||||
pub fn differentiate_hub(store: &Store, hub_key: &str) -> Option<Vec<LinkMove>> {
|
||||
let graph = store.build_graph();
|
||||
differentiate_hub_with_graph(store, hub_key, &graph)
|
||||
}
|
||||
|
||||
/// Like differentiate_hub but uses a pre-built graph.
|
||||
pub fn differentiate_hub_with_graph(store: &Store, hub_key: &str, graph: &Graph) -> Option<Vec<LinkMove>> {
|
||||
let degree = graph.degree(hub_key);
|
||||
|
||||
// Only differentiate actual hubs
|
||||
if degree < 20 { return None; }
|
||||
|
||||
// Only works on file-level nodes that have section children
|
||||
if hub_key.contains('#') { return None; }
|
||||
|
||||
let prefix = format!("{}#", hub_key);
|
||||
let sections: Vec<(&str, &str)> = store.nodes.iter()
|
||||
.filter(|(k, _)| k.starts_with(&prefix))
|
||||
.map(|(k, n)| (k.as_str(), n.content.as_str()))
|
||||
.collect();
|
||||
|
||||
if sections.is_empty() { return None; }
|
||||
|
||||
// Get all neighbors of the hub
|
||||
let neighbors = graph.neighbors(hub_key);
|
||||
|
||||
let mut moves = Vec::new();
|
||||
|
||||
for (neighbor_key, _strength) in &neighbors {
|
||||
// Skip section children — they should stay linked to parent
|
||||
if neighbor_key.starts_with(&prefix) { continue; }
|
||||
|
||||
let neighbor_content = match store.nodes.get(neighbor_key.as_str()) {
|
||||
Some(n) => &n.content,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
// Find best-matching section by content similarity
|
||||
let mut best_section = "";
|
||||
let mut best_sim = 0.0f32;
|
||||
|
||||
for (section_key, section_content) in §ions {
|
||||
let sim = similarity::cosine_similarity(neighbor_content, section_content);
|
||||
if sim > best_sim {
|
||||
best_sim = sim;
|
||||
best_section = section_key;
|
||||
}
|
||||
}
|
||||
|
||||
// Only propose move if there's a reasonable match
|
||||
if best_sim > 0.05 && !best_section.is_empty() {
|
||||
let snippet = neighbor_content.lines()
|
||||
.find(|l| !l.is_empty() && !l.starts_with("<!--") && !l.starts_with("##"))
|
||||
.unwrap_or("")
|
||||
.chars().take(80).collect::<String>();
|
||||
|
||||
moves.push(LinkMove {
|
||||
neighbor_key: neighbor_key.to_string(),
|
||||
from_hub: hub_key.to_string(),
|
||||
to_section: best_section.to_string(),
|
||||
similarity: best_sim,
|
||||
neighbor_snippet: snippet,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
moves.sort_by(|a, b| b.similarity.partial_cmp(&a.similarity).unwrap_or(std::cmp::Ordering::Equal));
|
||||
Some(moves)
|
||||
}
|
||||
|
||||
/// Apply link moves: soft-delete hub→neighbor, create section→neighbor.
|
||||
pub fn apply_differentiation(
|
||||
store: &mut Store,
|
||||
moves: &[LinkMove],
|
||||
) -> (usize, usize) {
|
||||
let mut applied = 0usize;
|
||||
let mut skipped = 0usize;
|
||||
|
||||
for mv in moves {
|
||||
// Check that section→neighbor doesn't already exist
|
||||
let exists = store.relations.iter().any(|r|
|
||||
((r.source_key == mv.to_section && r.target_key == mv.neighbor_key)
|
||||
|| (r.source_key == mv.neighbor_key && r.target_key == mv.to_section))
|
||||
&& !r.deleted
|
||||
);
|
||||
if exists { skipped += 1; continue; }
|
||||
|
||||
let section_uuid = match store.nodes.get(&mv.to_section) {
|
||||
Some(n) => n.uuid,
|
||||
None => { skipped += 1; continue; }
|
||||
};
|
||||
let neighbor_uuid = match store.nodes.get(&mv.neighbor_key) {
|
||||
Some(n) => n.uuid,
|
||||
None => { skipped += 1; continue; }
|
||||
};
|
||||
|
||||
// Soft-delete old hub→neighbor relation
|
||||
for rel in &mut store.relations {
|
||||
if ((rel.source_key == mv.from_hub && rel.target_key == mv.neighbor_key)
|
||||
|| (rel.source_key == mv.neighbor_key && rel.target_key == mv.from_hub))
|
||||
&& !rel.deleted
|
||||
{
|
||||
rel.deleted = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Create new section→neighbor relation
|
||||
let new_rel = Store::new_relation(
|
||||
section_uuid, neighbor_uuid,
|
||||
crate::capnp_store::RelationType::Auto,
|
||||
0.5,
|
||||
&mv.to_section, &mv.neighbor_key,
|
||||
);
|
||||
if store.add_relation(new_rel).is_ok() {
|
||||
applied += 1;
|
||||
}
|
||||
}
|
||||
|
||||
(applied, skipped)
|
||||
}
|
||||
|
||||
/// Find all file-level hubs that have section children to split into.
|
||||
pub fn find_differentiable_hubs(store: &Store) -> Vec<(String, usize, usize)> {
|
||||
let graph = store.build_graph();
|
||||
let threshold = hub_threshold(&graph);
|
||||
|
||||
let mut hubs = Vec::new();
|
||||
for key in graph.nodes() {
|
||||
let deg = graph.degree(key);
|
||||
if deg < threshold { continue; }
|
||||
if key.contains('#') { continue; }
|
||||
|
||||
let prefix = format!("{}#", key);
|
||||
let section_count = store.nodes.keys()
|
||||
.filter(|k| k.starts_with(&prefix))
|
||||
.count();
|
||||
|
||||
if section_count > 0 {
|
||||
hubs.push((key.clone(), deg, section_count));
|
||||
}
|
||||
}
|
||||
|
||||
hubs.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
hubs
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue