neuro: split into scoring, prompts, and rewrite modules

neuro.rs was 1164 lines wearing three hats: - scoring.rs (401 lines): pure analysis — priority, replay queues, interference detection, consolidation planning - prompts.rs (396 lines): agent prompt generation and formatting - rewrite.rs (363 lines): graph topology mutations — hub differentiation, triangle closure, orphan linking The split follows safety profiles: scoring never mutates, prompts only reads, rewrite takes &mut Store. All public API re-exported from neuro/mod.rs so callers don't change.
2026-03-05 10:24:05 -05:00 · 2026-03-05 10:24:05 -05:00 · 2f455ba29d
commit 2f455ba29d
parent 4747004b36
5 changed files with 1178 additions and 1163 deletions
--- a/src/neuro/scoring.rs
+++ b/src/neuro/scoring.rs
@ -0,0 +1,390 @@
+// Consolidation scoring, replay queues, interference detection, and
+// graph health metrics. Pure analysis — no store mutations.
+
+use crate::store::{Store, now_epoch};
+use crate::graph::{self, Graph};
+use crate::spectral::{self, SpectralEmbedding, SpectralPosition};
+
+use std::collections::HashMap;
+
+const SECS_PER_DAY: f64 = 86400.0;
+
+/// Consolidation priority: how urgently a node needs attention.
+///
+/// With spectral data:
+///   priority = spectral_displacement × overdue × emotion
+/// Without:
+///   priority = (1 - cc) × overdue × emotion
+///
+/// Spectral displacement is the outlier_score clamped and normalized —
+/// it measures how far a node sits from its community center in the
+/// eigenspace. This is a global signal (considers all graph structure)
+/// vs CC which is local (only immediate neighbors).
+pub fn consolidation_priority(
+    store: &Store,
+    key: &str,
+    graph: &Graph,
+    spectral_outlier: Option<f64>,
+) -> f64 {
+    let node = match store.nodes.get(key) {
+        Some(n) => n,
+        None => return 0.0,
+    };
+
+    // Integration factor: how poorly integrated is this node?
+    let displacement = if let Some(outlier) = spectral_outlier {
+        // outlier_score = dist_to_center / median_dist_in_community
+        // 1.0 = typical position, >2 = unusual, >5 = extreme outlier
+        // Use log scale for dynamic range: the difference between
+        // outlier=5 and outlier=10 matters less than 1 vs 2.
+        (outlier / 3.0).min(3.0)
+    } else {
+        let cc = graph.clustering_coefficient(key) as f64;
+        1.0 - cc
+    };
+
+    // Spaced repetition: how overdue is this node for replay?
+    let interval_secs = node.spaced_repetition_interval as f64 * SECS_PER_DAY;
+    let time_since_replay = if node.last_replayed > 0 {
+        (now_epoch() - node.last_replayed).max(0) as f64
+    } else {
+        interval_secs * 3.0
+    };
+    let overdue_ratio = (time_since_replay / interval_secs).min(5.0);
+
+    // Emotional intensity: higher emotion = higher priority
+    let emotion_factor = 1.0 + (node.emotion as f64 / 10.0);
+
+    displacement * overdue_ratio * emotion_factor
+}
+
+/// Item in the replay queue
+pub struct ReplayItem {
+    pub key: String,
+    pub priority: f64,
+    pub interval_days: u32,
+    pub emotion: f32,
+    pub cc: f32,
+    /// Spectral classification: "bridge", "outlier", "core", "peripheral"
+    pub classification: &'static str,
+    /// Raw spectral outlier score (distance / median)
+    pub outlier_score: f64,
+}
+
+/// Generate the replay queue: nodes ordered by consolidation priority.
+/// Automatically loads spectral embedding if available.
+pub fn replay_queue(store: &Store, count: usize) -> Vec<ReplayItem> {
+    let graph = store.build_graph();
+    let emb = spectral::load_embedding().ok();
+    replay_queue_with_graph(store, count, &graph, emb.as_ref())
+}
+
+/// Generate the replay queue using pre-built graph and optional spectral data.
+pub fn replay_queue_with_graph(
+    store: &Store,
+    count: usize,
+    graph: &Graph,
+    emb: Option<&SpectralEmbedding>,
+) -> Vec<ReplayItem> {
+    // Build spectral position map if embedding is available
+    let positions: HashMap<String, SpectralPosition> = if let Some(emb) = emb {
+        let communities = graph.communities().clone();
+        spectral::analyze_positions(emb, &communities)
+            .into_iter()
+            .map(|p| (p.key.clone(), p))
+            .collect()
+    } else {
+        HashMap::new()
+    };
+
+    let mut items: Vec<ReplayItem> = store.nodes.iter()
+        .map(|(key, node)| {
+            let pos = positions.get(key);
+            let outlier_score = pos.map(|p| p.outlier_score).unwrap_or(0.0);
+            let classification = pos
+                .map(|p| spectral::classify_position(p))
+                .unwrap_or("unknown");
+
+            let priority = consolidation_priority(
+                store, key, graph,
+                pos.map(|p| p.outlier_score),
+            );
+            ReplayItem {
+                key: key.clone(),
+                priority,
+                interval_days: node.spaced_repetition_interval,
+                emotion: node.emotion,
+                cc: graph.clustering_coefficient(key),
+                classification,
+                outlier_score,
+            }
+        })
+        .collect();
+
+    items.sort_by(|a, b| b.priority.total_cmp(&a.priority));
+    items.truncate(count);
+    items
+}
+
+/// Detect interfering memory pairs: high text similarity but different communities
+pub fn detect_interference(
+    store: &Store,
+    graph: &Graph,
+    threshold: f32,
+) -> Vec<(String, String, f32)> {
+    use crate::similarity;
+
+    let communities = graph.communities();
+
+    // Only compare nodes within a reasonable set — take the most active ones
+    let mut docs: Vec<(String, String)> = store.nodes.iter()
+        .filter(|(_, n)| n.content.len() > 50) // skip tiny nodes
+        .map(|(k, n)| (k.clone(), n.content.clone()))
+        .collect();
+
+    // For large stores, sample to keep pairwise comparison feasible
+    if docs.len() > 200 {
+        docs.sort_by(|a, b| b.1.len().cmp(&a.1.len()));
+        docs.truncate(200);
+    }
+
+    let similar = similarity::pairwise_similar(&docs, threshold);
+
+    // Filter to pairs in different communities
+    similar.into_iter()
+        .filter(|(a, b, _)| {
+            let ca = communities.get(a);
+            let cb = communities.get(b);
+            match (ca, cb) {
+                (Some(a), Some(b)) => a != b,
+                _ => true, // if community unknown, flag it
+            }
+        })
+        .collect()
+}
+
+/// Schema assimilation scoring for a new node.
+/// Returns how easily the node integrates into existing structure.
+///
+/// High fit (>0.5): auto-link, done
+/// Medium fit (0.2-0.5): agent reviews, proposes links
+/// Low fit (<0.2): deep examination needed — new schema seed, bridge, or noise?
+pub fn schema_assimilation(store: &Store, key: &str) -> (f32, &'static str) {
+    let graph = store.build_graph();
+    let fit = graph.clustering_coefficient(key);
+
+    let recommendation = if fit > 0.5 {
+        "auto-integrate"
+    } else if fit > 0.2 {
+        "agent-review"
+    } else if graph.degree(key) > 0 {
+        "deep-examine-bridge"
+    } else {
+        "deep-examine-orphan"
+    };
+
+    (fit, recommendation)
+}
+
+/// Agent allocation from the control loop
+pub struct ConsolidationPlan {
+    pub replay_count: usize,
+    pub linker_count: usize,
+    pub separator_count: usize,
+    pub transfer_count: usize,
+    pub run_health: bool,
+    pub rationale: Vec<String>,
+}
+
+/// Analyze metrics and decide how much each agent needs to run.
+///
+/// This is the control loop: metrics → error signal → agent allocation.
+/// Target values are based on healthy small-world networks.
+pub fn consolidation_plan(store: &Store) -> ConsolidationPlan {
+    let graph = store.build_graph();
+    let alpha = graph.degree_power_law_exponent();
+    let gini = graph.degree_gini();
+    let avg_cc = graph.avg_clustering_coefficient();
+    let interference_pairs = detect_interference(store, &graph, 0.5);
+    let interference_count = interference_pairs.len();
+
+    // Count episodic vs semantic nodes
+    let episodic_count = store.nodes.iter()
+        .filter(|(k, _)| k.contains("journal") || k.contains("session"))
+        .count();
+    let _semantic_count = store.nodes.len() - episodic_count;
+    let episodic_ratio = if store.nodes.is_empty() { 0.0 }
+        else { episodic_count as f32 / store.nodes.len() as f32 };
+
+    let mut plan = ConsolidationPlan {
+        replay_count: 0,
+        linker_count: 0,
+        separator_count: 0,
+        transfer_count: 0,
+        run_health: true, // always run health first
+        rationale: Vec::new(),
+    };
+
+    // Target: α ≥ 2.5 (healthy scale-free)
+    if alpha < 2.0 {
+        plan.replay_count += 10;
+        plan.linker_count += 5;
+        plan.rationale.push(format!(
+            "α={:.2} (target ≥2.5): extreme hub dominance → 10 replay + 5 linker for lateral links",
+            alpha));
+    } else if alpha < 2.5 {
+        plan.replay_count += 5;
+        plan.linker_count += 3;
+        plan.rationale.push(format!(
+            "α={:.2} (target ≥2.5): moderate hub dominance → 5 replay + 3 linker",
+            alpha));
+    } else {
+        plan.replay_count += 3;
+        plan.rationale.push(format!(
+            "α={:.2}: healthy — 3 replay for maintenance", alpha));
+    }
+
+    // Target: Gini ≤ 0.4
+    if gini > 0.5 {
+        plan.replay_count += 3;
+        plan.rationale.push(format!(
+            "Gini={:.3} (target ≤0.4): high inequality → +3 replay (lateral focus)",
+            gini));
+    }
+
+    // Target: avg CC ≥ 0.2
+    if avg_cc < 0.1 {
+        plan.replay_count += 5;
+        plan.rationale.push(format!(
+            "CC={:.3} (target ≥0.2): very poor integration → +5 replay",
+            avg_cc));
+    } else if avg_cc < 0.2 {
+        plan.replay_count += 2;
+        plan.rationale.push(format!(
+            "CC={:.3} (target ≥0.2): low integration → +2 replay",
+            avg_cc));
+    }
+
+    // Interference: >100 pairs is a lot, <10 is clean
+    if interference_count > 100 {
+        plan.separator_count += 10;
+        plan.rationale.push(format!(
+            "Interference: {} pairs (target <50) → 10 separator",
+            interference_count));
+    } else if interference_count > 20 {
+        plan.separator_count += 5;
+        plan.rationale.push(format!(
+            "Interference: {} pairs (target <50) → 5 separator",
+            interference_count));
+    } else if interference_count > 0 {
+        plan.separator_count += interference_count.min(3);
+        plan.rationale.push(format!(
+            "Interference: {} pairs → {} separator",
+            interference_count, plan.separator_count));
+    }
+
+    // Episodic → semantic transfer
+    if episodic_ratio > 0.6 {
+        plan.transfer_count += 10;
+        plan.rationale.push(format!(
+            "Episodic ratio: {:.0}% ({}/{}) → 10 transfer (knowledge extraction needed)",
+            episodic_ratio * 100.0, episodic_count, store.nodes.len()));
+    } else if episodic_ratio > 0.4 {
+        plan.transfer_count += 5;
+        plan.rationale.push(format!(
+            "Episodic ratio: {:.0}% → 5 transfer",
+            episodic_ratio * 100.0));
+    }
+
+    plan
+}
+
+/// Format the consolidation plan for display
+pub fn format_plan(plan: &ConsolidationPlan) -> String {
+    let mut out = String::from("Consolidation Plan\n==================\n\n");
+
+    out.push_str("Analysis:\n");
+    for r in &plan.rationale {
+        out.push_str(&format!("  • {}\n", r));
+    }
+
+    out.push_str("\nAgent allocation:\n");
+    if plan.run_health {
+        out.push_str("  1. health        — system audit\n");
+    }
+    let mut step = 2;
+    if plan.replay_count > 0 {
+        out.push_str(&format!("  {}. replay ×{:2}    — schema assimilation + lateral linking\n",
+            step, plan.replay_count));
+        step += 1;
+    }
+    if plan.linker_count > 0 {
+        out.push_str(&format!("  {}. linker ×{:2}    — relational binding from episodes\n",
+            step, plan.linker_count));
+        step += 1;
+    }
+    if plan.separator_count > 0 {
+        out.push_str(&format!("  {}. separator ×{} — pattern separation\n",
+            step, plan.separator_count));
+        step += 1;
+    }
+    if plan.transfer_count > 0 {
+        out.push_str(&format!("  {}. transfer ×{:2} — episodic→semantic extraction\n",
+            step, plan.transfer_count));
+    }
+
+    let total = plan.replay_count + plan.linker_count
+        + plan.separator_count + plan.transfer_count
+        + if plan.run_health { 1 } else { 0 };
+    out.push_str(&format!("\nTotal agent runs: {}\n", total));
+
+    out
+}
+
+/// Brief daily check: compare current metrics to last snapshot
+pub fn daily_check(store: &Store) -> String {
+    let graph_obj = store.build_graph();
+    let snap = graph::current_metrics(&graph_obj);
+
+    let history = graph::load_metrics_history();
+    let prev = history.last();
+
+    let mut out = String::from("Memory daily check\n");
+
+    // Current state
+    out.push_str(&format!("  σ={:.1}  α={:.2}  gini={:.3}  cc={:.4}\n",
+        snap.sigma, snap.alpha, snap.gini, snap.avg_cc));
+
+    // Trend
+    if let Some(p) = prev {
+        let d_sigma = snap.sigma - p.sigma;
+        let d_alpha = snap.alpha - p.alpha;
+        let d_gini = snap.gini - p.gini;
+
+        out.push_str(&format!("  Δσ={:+.1}  Δα={:+.2}  Δgini={:+.3}\n",
+            d_sigma, d_alpha, d_gini));
+
+        // Assessment
+        let mut issues = Vec::new();
+        if snap.alpha < 2.0 { issues.push("hub dominance critical"); }
+        if snap.gini > 0.5 { issues.push("high inequality"); }
+        if snap.avg_cc < 0.1 { issues.push("poor integration"); }
+        if d_sigma < -5.0 { issues.push("σ declining"); }
+        if d_alpha < -0.1 { issues.push("α declining"); }
+        if d_gini > 0.02 { issues.push("inequality increasing"); }
+
+        if issues.is_empty() {
+            out.push_str("  Status: healthy\n");
+        } else {
+            out.push_str(&format!("  Status: needs attention — {}\n", issues.join(", ")));
+            out.push_str("  Run: poc-memory consolidate-session\n");
+        }
+    } else {
+        out.push_str("  (first snapshot, no trend data yet)\n");
+    }
+
+    // Persist the snapshot
+    graph::save_metrics_snapshot(&snap);
+
+    out
+}