diff --git a/src/digest.rs b/src/digest.rs index 6e1cd47..d7521b8 100644 --- a/src/digest.rs +++ b/src/digest.rs @@ -9,6 +9,7 @@ // 5. Extracts links and saves agent results use crate::capnp_store::{self, Store}; +use crate::neuro; use regex::Regex; use std::fs; @@ -803,6 +804,11 @@ pub fn apply_digest_links(store: &mut Store, links: &[DigestLink]) -> (usize, us } }; + // Refine target to best-matching section if available + let source_content = store.nodes.get(&source) + .map(|n| n.content.as_str()).unwrap_or(""); + let target = neuro::refine_target(store, source_content, &target); + if source == target { skipped += 1; continue; } // Check if link already exists @@ -1070,6 +1076,11 @@ pub fn journal_enrich( None => { println!(" SKIP {} (no matching journal node)", target); continue; } }; + // Refine target to best-matching section + let source_content = store.nodes.get(&source_key) + .map(|n| n.content.as_str()).unwrap_or(""); + let resolved = neuro::refine_target(store, source_content, &resolved); + let source_uuid = match store.nodes.get(&source_key) { Some(n) => n.uuid, None => continue, @@ -1301,6 +1312,11 @@ pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_file: Optio Err(e) => { println!(" ? {} → {}: {}", src, tgt, e); skipped += 1; continue; } }; + // Refine target to best-matching section + let source_content = store.nodes.get(&source) + .map(|n| n.content.as_str()).unwrap_or(""); + let target = neuro::refine_target(store, source_content, &target); + let exists = store.relations.iter().any(|r| r.source_key == source && r.target_key == target && !r.deleted ); diff --git a/src/main.rs b/src/main.rs index f87204c..6f11c20 100644 --- a/src/main.rs +++ b/src/main.rs @@ -92,6 +92,7 @@ fn main() { "digest-links" => cmd_digest_links(&args[2..]), "journal-enrich" => cmd_journal_enrich(&args[2..]), "apply-consolidation" => cmd_apply_consolidation(&args[2..]), + "differentiate" => cmd_differentiate(&args[2..]), "trace" => cmd_trace(&args[2..]), "list-keys" => cmd_list_keys(), "list-edges" => cmd_list_edges(), @@ -154,6 +155,8 @@ Commands: Enrich journal entry with conversation links apply-consolidation [--apply] [--report FILE] Extract and apply actions from consolidation reports + differentiate [KEY] [--apply] + Redistribute hub links to section-level children trace KEY Walk temporal links: semantic ↔ episodic ↔ conversation list-keys List all node keys (one per line) list-edges List all edges (tsv: source target strength type) @@ -438,6 +441,11 @@ fn cmd_link_add(args: &[String]) -> Result<(), String> { let target = store.resolve_key(&args[1])?; let reason = if args.len() > 2 { args[2..].join(" ") } else { String::new() }; + // Refine target to best-matching section + let source_content = store.nodes.get(&source) + .map(|n| n.content.as_str()).unwrap_or(""); + let target = neuro::refine_target(&store, source_content, &target); + // Find UUIDs let source_uuid = store.nodes.get(&source) .map(|n| n.uuid) @@ -714,6 +722,67 @@ fn cmd_apply_consolidation(args: &[String]) -> Result<(), String> { digest::apply_consolidation(&mut store, do_apply, report_file) } +fn cmd_differentiate(args: &[String]) -> Result<(), String> { + let do_apply = args.iter().any(|a| a == "--apply"); + let key_arg: Option<&str> = args.iter() + .find(|a| !a.starts_with("--")) + .map(|s| s.as_str()); + + let mut store = capnp_store::Store::load()?; + + if let Some(key) = key_arg { + // Differentiate a specific hub + let resolved = store.resolve_key(key)?; + let moves = neuro::differentiate_hub(&store, &resolved) + .ok_or_else(|| format!("'{}' is not a file-level hub with sections", resolved))?; + + // Group by target section for display + let mut by_section: std::collections::BTreeMap> = + std::collections::BTreeMap::new(); + for mv in &moves { + by_section.entry(mv.to_section.clone()).or_default().push(mv); + } + + println!("Hub '{}' — {} links to redistribute across {} sections\n", + resolved, moves.len(), by_section.len()); + + for (section, section_moves) in &by_section { + println!(" {} ({} links):", section, section_moves.len()); + for mv in section_moves.iter().take(5) { + println!(" [{:.3}] {} — {}", mv.similarity, + mv.neighbor_key, mv.neighbor_snippet); + } + if section_moves.len() > 5 { + println!(" ... and {} more", section_moves.len() - 5); + } + } + + if !do_apply { + println!("\nTo apply: poc-memory differentiate {} --apply", resolved); + return Ok(()); + } + + let (applied, skipped) = neuro::apply_differentiation(&mut store, &moves); + store.save()?; + println!("\nApplied: {} Skipped: {}", applied, skipped); + } else { + // Show all differentiable hubs + let hubs = neuro::find_differentiable_hubs(&store); + if hubs.is_empty() { + println!("No file-level hubs with sections found above threshold"); + return Ok(()); + } + + println!("Differentiable hubs (file-level nodes with sections):\n"); + for (key, degree, sections) in &hubs { + println!(" {:40} deg={:3} sections={}", key, degree, sections); + } + println!("\nRun: poc-memory differentiate KEY to preview a specific hub"); + } + + Ok(()) +} + fn cmd_trace(args: &[String]) -> Result<(), String> { if args.is_empty() { return Err("Usage: poc-memory trace KEY".into()); diff --git a/src/neuro.rs b/src/neuro.rs index 7d84762..2abc91c 100644 --- a/src/neuro.rs +++ b/src/neuro.rs @@ -60,11 +60,16 @@ pub struct ReplayItem { /// Generate the replay queue: nodes ordered by consolidation priority pub fn replay_queue(store: &Store, count: usize) -> Vec { let graph = store.build_graph(); - let fits = graph::schema_fit_all(&graph); + replay_queue_with_graph(store, count, &graph) +} + +/// Generate the replay queue using a pre-built graph (avoids redundant rebuild) +pub fn replay_queue_with_graph(store: &Store, count: usize, graph: &Graph) -> Vec { + let fits = graph::schema_fit_all(graph); let mut items: Vec = store.nodes.iter() .map(|(key, node)| { - let priority = consolidation_priority(store, key, &graph); + let priority = consolidation_priority(store, key, graph); let fit = fits.get(key).copied().unwrap_or(0.0); ReplayItem { key: key.clone(), @@ -76,7 +81,7 @@ pub fn replay_queue(store: &Store, count: usize) -> Vec { }) .collect(); - items.sort_by(|a, b| b.priority.partial_cmp(&a.priority).unwrap()); + items.sort_by(|a, b| b.priority.partial_cmp(&a.priority).unwrap_or(std::cmp::Ordering::Equal)); items.truncate(count); items } @@ -166,14 +171,39 @@ fn format_topology_header(graph: &Graph) -> String { let n = graph.nodes().len(); let e = graph.edge_count(); + // Identify saturated hubs — nodes with degree well above threshold + let threshold = hub_threshold(graph); + let mut hubs: Vec<_> = graph.nodes().iter() + .map(|k| (k.clone(), graph.degree(k))) + .filter(|(_, d)| *d >= threshold) + .collect(); + hubs.sort_by(|a, b| b.1.cmp(&a.1)); + hubs.truncate(15); + + let hub_list = if hubs.is_empty() { + String::new() + } else { + let lines: Vec = hubs.iter() + .map(|(k, d)| format!(" - {} (degree {})", k, d)) + .collect(); + format!( + "### SATURATED HUBS — DO NOT LINK TO THESE\n\ + The following nodes are already over-connected. Adding more links\n\ + to them makes the graph worse (star topology). Find lateral\n\ + connections between peripheral nodes instead.\n\n{}\n\n\ + Only link to a hub if it is genuinely the ONLY reasonable target.\n\n", + lines.join("\n")) + }; + format!( "## Current graph topology\n\ Nodes: {} Edges: {} Communities: {}\n\ Small-world σ: {:.1} Power-law α: {:.2} Degree Gini: {:.3}\n\ Avg clustering coefficient: {:.4}\n\n\ + {}\ Each node below shows its hub-link ratio (fraction of edges to top-5% degree nodes).\n\ Use `poc-memory link-impact SOURCE TARGET` to evaluate proposed links.\n\n", - n, e, graph.community_count(), sigma, alpha, gini, avg_cc) + n, e, graph.community_count(), sigma, alpha, gini, avg_cc, hub_list) } /// Compute the hub degree threshold (top 5% by degree) @@ -415,13 +445,13 @@ pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result { - let items = replay_queue(store, count); + let items = replay_queue_with_graph(store, count, &graph); let nodes_section = format_nodes_section(store, &items, &graph); load_prompt("replay", &[("{{TOPOLOGY}}", &topology), ("{{NODES}}", &nodes_section)]) } "linker" => { // Filter to episodic entries - let mut items = replay_queue(store, count * 2); + let mut items = replay_queue_with_graph(store, count * 2, &graph); items.retain(|item| { store.nodes.get(&item.key) .map(|n| matches!(n.node_type, crate::capnp_store::NodeType::EpisodicSession)) @@ -444,7 +474,7 @@ pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result = episodes.iter().map(|(k, _)| k.clone()).collect(); @@ -700,3 +730,215 @@ pub fn daily_check(store: &Store) -> String { out } + +// --- Pattern separation (hub differentiation) --- +// +// When a node becomes a hub (high degree, low CC), it usually means +// the concept is under-differentiated — too many things link to one +// broad idea instead of specific sub-concepts. +// +// The hippocampal fix: pattern separation. Examine the hub's neighbors, +// match each to the best-fitting child section, and move the link from +// the broad parent to the specific child. +// +// Two cases: +// 1. Hub has existing section children (identity.md → identity.md#voice etc) +// → purely structural, no Sonnet needed +// 2. Hub has no sections → needs Sonnet to propose a split +// (not implemented yet) + +/// Refine a link target: if the target is a file-level node with section +/// children, find the best-matching section by cosine similarity against +/// the source content. Returns the original key if no sections exist or +/// no section matches above threshold. +/// +/// This prevents hub formation at link creation time — every new link +/// targets the most specific available node. +pub fn refine_target(store: &Store, source_content: &str, target_key: &str) -> String { + // Only refine file-level nodes (no # in key) + if target_key.contains('#') { return target_key.to_string(); } + + let prefix = format!("{}#", target_key); + let sections: Vec<(&str, &str)> = store.nodes.iter() + .filter(|(k, _)| k.starts_with(&prefix)) + .map(|(k, n)| (k.as_str(), n.content.as_str())) + .collect(); + + if sections.is_empty() { return target_key.to_string(); } + + let mut best_section = ""; + let mut best_sim = 0.0f32; + + for (section_key, section_content) in §ions { + let sim = similarity::cosine_similarity(source_content, section_content); + if sim > best_sim { + best_sim = sim; + best_section = section_key; + } + } + + // Threshold: only refine if there's a meaningful match + if best_sim > 0.05 && !best_section.is_empty() { + best_section.to_string() + } else { + target_key.to_string() + } +} + +/// A proposed link move: from hub→neighbor to section→neighbor +pub struct LinkMove { + pub neighbor_key: String, + pub from_hub: String, + pub to_section: String, + pub similarity: f32, + pub neighbor_snippet: String, +} + +/// Analyze a hub node and propose redistributing its links to child sections. +/// +/// Returns None if the node isn't a hub or has no sections to redistribute to. +pub fn differentiate_hub(store: &Store, hub_key: &str) -> Option> { + let graph = store.build_graph(); + differentiate_hub_with_graph(store, hub_key, &graph) +} + +/// Like differentiate_hub but uses a pre-built graph. +pub fn differentiate_hub_with_graph(store: &Store, hub_key: &str, graph: &Graph) -> Option> { + let degree = graph.degree(hub_key); + + // Only differentiate actual hubs + if degree < 20 { return None; } + + // Only works on file-level nodes that have section children + if hub_key.contains('#') { return None; } + + let prefix = format!("{}#", hub_key); + let sections: Vec<(&str, &str)> = store.nodes.iter() + .filter(|(k, _)| k.starts_with(&prefix)) + .map(|(k, n)| (k.as_str(), n.content.as_str())) + .collect(); + + if sections.is_empty() { return None; } + + // Get all neighbors of the hub + let neighbors = graph.neighbors(hub_key); + + let mut moves = Vec::new(); + + for (neighbor_key, _strength) in &neighbors { + // Skip section children — they should stay linked to parent + if neighbor_key.starts_with(&prefix) { continue; } + + let neighbor_content = match store.nodes.get(neighbor_key.as_str()) { + Some(n) => &n.content, + None => continue, + }; + + // Find best-matching section by content similarity + let mut best_section = ""; + let mut best_sim = 0.0f32; + + for (section_key, section_content) in §ions { + let sim = similarity::cosine_similarity(neighbor_content, section_content); + if sim > best_sim { + best_sim = sim; + best_section = section_key; + } + } + + // Only propose move if there's a reasonable match + if best_sim > 0.05 && !best_section.is_empty() { + let snippet = neighbor_content.lines() + .find(|l| !l.is_empty() && !l.starts_with("