consciousness/src/hippocampus/neuro/scoring.rs
ProofOfConcept 96e573f2e5 Delete similarity module, rewrite module, and all text-similarity code
Text cosine similarity was being used as a crutch for operations
the graph structure should handle: interference detection, orphan
linking, triangle closing, hub differentiation. These are all
graph-structural operations that the agents (linker, extractor)
handle with actual semantic understanding.

Removed: similarity.rs (stemming + cosine), rewrite.rs (orphan
linking, triangle closing, hub differentiation), detect_interference,
and all CLI commands and consolidation steps that used them.

-794 lines.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-10 15:44:10 -04:00

391 lines
13 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Consolidation scoring, replay queues, interference detection, and
// graph health metrics. Pure analysis — no store mutations.
use crate::store::{Store, now_epoch};
use crate::graph::{self, Graph};
use crate::spectral::{self, SpectralEmbedding, SpectralPosition};
use std::collections::HashMap;
const SECS_PER_DAY: f64 = 86400.0;
/// Consolidation priority: how urgently a node needs attention.
///
/// With spectral data:
/// priority = spectral_displacement × overdue × emotion
/// Without:
/// priority = (1 - cc) × overdue × emotion
///
/// Spectral displacement is the outlier_score clamped and normalized —
/// it measures how far a node sits from its community center in the
/// eigenspace. This is a global signal (considers all graph structure)
/// vs CC which is local (only immediate neighbors).
pub fn consolidation_priority(
store: &Store,
key: &str,
graph: &Graph,
spectral_outlier: Option<f64>,
) -> f64 {
let node = match store.nodes.get(key) {
Some(n) => n,
None => return 0.0,
};
// Integration factor: how poorly integrated is this node?
let displacement = if let Some(outlier) = spectral_outlier {
// outlier_score = dist_to_center / median_dist_in_community
// 1.0 = typical position, >2 = unusual, >5 = extreme outlier
// Use log scale for dynamic range: the difference between
// outlier=5 and outlier=10 matters less than 1 vs 2.
(outlier / 3.0).min(3.0)
} else {
let cc = graph.clustering_coefficient(key) as f64;
1.0 - cc
};
// Spaced repetition: how overdue is this node for replay?
let interval_secs = node.spaced_repetition_interval as f64 * SECS_PER_DAY;
let time_since_replay = if node.last_replayed > 0 {
(now_epoch() - node.last_replayed).max(0) as f64
} else {
interval_secs * 3.0
};
let overdue_ratio = (time_since_replay / interval_secs).min(5.0);
// Emotional intensity: higher emotion = higher priority
let emotion_factor = 1.0 + (node.emotion as f64 / 10.0);
displacement * overdue_ratio * emotion_factor
}
/// Item in the replay queue
pub struct ReplayItem {
pub key: String,
pub priority: f64,
pub interval_days: u32,
pub emotion: f32,
pub cc: f32,
/// Spectral classification: "bridge", "outlier", "core", "peripheral"
pub classification: &'static str,
/// Raw spectral outlier score (distance / median)
pub outlier_score: f64,
}
/// Generate the replay queue: nodes ordered by consolidation priority.
/// Automatically loads spectral embedding if available.
pub fn replay_queue(store: &Store, count: usize) -> Vec<ReplayItem> {
let graph = store.build_graph();
let emb = spectral::load_embedding().ok();
replay_queue_with_graph(store, count, &graph, emb.as_ref())
}
/// Generate the replay queue using pre-built graph and optional spectral data.
pub fn replay_queue_with_graph(
store: &Store,
count: usize,
graph: &Graph,
emb: Option<&SpectralEmbedding>,
) -> Vec<ReplayItem> {
// Build spectral position map if embedding is available
let positions: HashMap<String, SpectralPosition> = if let Some(emb) = emb {
let communities = graph.communities().clone();
spectral::analyze_positions(emb, &communities)
.into_iter()
.map(|p| (p.key.clone(), p))
.collect()
} else {
HashMap::new()
};
let mut items: Vec<ReplayItem> = store.nodes.iter()
.map(|(key, node)| {
let pos = positions.get(key);
let outlier_score = pos.map(|p| p.outlier_score).unwrap_or(0.0);
let classification = pos
.map(spectral::classify_position)
.unwrap_or("unknown");
let priority = consolidation_priority(
store, key, graph,
pos.map(|p| p.outlier_score),
);
ReplayItem {
key: key.clone(),
priority,
interval_days: node.spaced_repetition_interval,
emotion: node.emotion,
cc: graph.clustering_coefficient(key),
classification,
outlier_score,
}
})
.collect();
items.sort_by(|a, b| b.priority.total_cmp(&a.priority));
items.truncate(count);
items
}
/// Agent allocation from the control loop.
/// Agent types and counts are data-driven — add agents by adding
/// entries to the counts map.
#[derive(Default)]
pub struct ConsolidationPlan {
/// agent_name → run count
pub counts: std::collections::HashMap<String, usize>,
pub run_health: bool,
pub rationale: Vec<String>,
}
impl ConsolidationPlan {
pub fn count(&self, agent: &str) -> usize {
self.counts.get(agent).copied().unwrap_or(0)
}
pub fn set(&mut self, agent: &str, count: usize) {
self.counts.insert(agent.to_string(), count);
}
pub fn add(&mut self, agent: &str, count: usize) {
*self.counts.entry(agent.to_string()).or_default() += count;
}
pub fn total(&self) -> usize {
self.counts.values().sum::<usize>() + if self.run_health { 1 } else { 0 }
}
/// Expand the plan into a flat list of (agent_name, batch_size) runs.
/// Interleaves agent types so different types alternate.
pub fn to_agent_runs(&self, batch_size: usize) -> Vec<(String, usize)> {
let mut runs = Vec::new();
if self.run_health {
runs.push(("health".to_string(), 0));
}
// Sort by count descending so high-volume agents interleave well
let mut types: Vec<(&String, &usize)> = self.counts.iter()
.filter(|(_, c)| **c > 0)
.collect();
types.sort_by(|a, b| b.1.cmp(a.1));
let mut queues: Vec<Vec<(String, usize)>> = types.iter().map(|(name, count)| {
let mut q = Vec::new();
let mut remaining = **count;
while remaining > 0 {
let batch = remaining.min(batch_size);
q.push((name.to_string(), batch));
remaining -= batch;
}
q
}).collect();
// Round-robin interleave
loop {
let mut added = false;
for q in &mut queues {
if let Some(run) = q.first() {
runs.push(run.clone());
q.remove(0);
added = true;
}
}
if !added { break; }
}
runs
}
}
/// Analyze metrics and decide how much each agent needs to run.
///
/// This is the control loop: metrics → error signal → agent allocation.
/// Target values are based on healthy small-world networks.
pub fn consolidation_plan(store: &Store) -> ConsolidationPlan {
consolidation_plan_inner(store, true)
}
/// Cheap version: skip O(n²) interference detection (for daemon status).
pub fn consolidation_plan_quick(store: &Store) -> ConsolidationPlan {
consolidation_plan_inner(store, false)
}
fn consolidation_plan_inner(store: &Store, _detect_interf: bool) -> ConsolidationPlan {
let graph = store.build_graph();
let alpha = graph.degree_power_law_exponent();
let gini = graph.degree_gini();
let _avg_cc = graph.avg_clustering_coefficient();
let episodic_count = store.nodes.iter()
.filter(|(_, n)| matches!(n.node_type, crate::store::NodeType::EpisodicSession))
.count();
let _episodic_ratio = if store.nodes.is_empty() { 0.0 }
else { episodic_count as f32 / store.nodes.len() as f32 };
let mut plan = ConsolidationPlan {
counts: std::collections::HashMap::new(),
run_health: true,
rationale: Vec::new(),
};
// Active agent types from config
let config = crate::config::get();
let agent_types: Vec<&str> = config.agent_types.iter().map(|s| s.as_str()).collect();
// Target: α ≥ 2.5 (healthy scale-free)
if alpha < 2.0 {
plan.add("linker", 100);
plan.rationale.push(format!(
"α={:.2} (target ≥2.5): extreme hub dominance → 100 linker", alpha));
} else if alpha < 2.5 {
plan.add("linker", 50);
plan.rationale.push(format!(
"α={:.2} (target ≥2.5): moderate hub dominance → 50 linker", alpha));
} else {
plan.add("linker", 20);
plan.rationale.push(format!(
"α={:.2}: healthy — 20 linker for maintenance", alpha));
}
// Target: Gini ≤ 0.4
if gini > 0.5 {
plan.add("linker", 50);
plan.rationale.push(format!(
"Gini={:.3} (target ≤0.4): high inequality → +50 linker", gini));
}
// Organize: proportional to linker — synthesizes what linker connects
let linker = plan.count("linker");
plan.set("organize", linker / 2);
plan.rationale.push(format!(
"Organize: {} (half of linker count)", plan.count("organize")));
// Distill: core concept maintenance
let organize = plan.count("organize");
let mut distill = organize;
if gini > 0.4 { distill += 20; }
if alpha < 2.0 { distill += 20; }
plan.set("distill", distill);
plan.rationale.push(format!(
"Distill: {} (synthesize hub content)", plan.count("distill")));
// Split: handle oversized nodes
plan.set("split", 5);
// Distribute agent budget using Elo ratings
let budget = crate::config::get().agent_budget;
let elo_path = crate::config::get().data_dir.join("agent-elo.json");
if let Ok(elo_json) = std::fs::read_to_string(&elo_path) {
if let Ok(ratings) = serde_json::from_str::<std::collections::HashMap<String, f64>>(&elo_json) {
let elos: Vec<f64> = agent_types.iter()
.map(|t| ratings.get(*t).copied().unwrap_or(1000.0))
.collect();
let min_elo = elos.iter().copied().fold(f64::MAX, f64::min);
let weights: Vec<f64> = elos.iter()
.map(|e| {
let shifted = e - min_elo + 50.0;
shifted * shifted
})
.collect();
let total_weight: f64 = weights.iter().sum();
let allocate = |w: f64| -> usize {
((w / total_weight * budget as f64).round() as usize).max(2)
};
for (i, agent) in agent_types.iter().enumerate() {
plan.set(agent, allocate(weights[i]));
}
let summary: Vec<String> = agent_types.iter()
.map(|a| format!("{}={}", a, plan.count(a)))
.collect();
plan.rationale.push(format!(
"Elo allocation (budget={}): {}", budget, summary.join(" ")));
}
} else {
// No Elo file — use budget with equal distribution
let per_type = budget / agent_types.len();
for agent in &agent_types {
plan.set(agent, per_type);
}
plan.rationale.push(format!(
"No Elo ratings — equal distribution ({} each, budget={})", per_type, budget));
}
plan
}
/// Format the consolidation plan for display
pub fn format_plan(plan: &ConsolidationPlan) -> String {
let mut out = String::from("Consolidation Plan\n==================\n\n");
out.push_str("Analysis:\n");
for r in &plan.rationale {
out.push_str(&format!("{}\n", r));
}
out.push_str("\nAgent allocation:\n");
if plan.run_health {
out.push_str(" 1. health — system audit\n");
}
let mut step = 2;
let mut sorted: Vec<_> = plan.counts.iter()
.filter(|(_, c)| **c > 0)
.collect();
sorted.sort_by(|a, b| b.1.cmp(a.1));
for (agent, count) in &sorted {
out.push_str(&format!(" {}. {} ×{}\n", step, agent, count));
step += 1;
}
out.push_str(&format!("\nTotal agent runs: {}\n", plan.total()));
out
}
/// Brief daily check: compare current metrics to last snapshot
pub fn daily_check(store: &Store) -> String {
let graph_obj = store.build_graph();
let snap = graph::current_metrics(&graph_obj);
let history = graph::load_metrics_history();
let prev = history.last();
let mut out = String::from("Memory daily check\n");
// Current state
out.push_str(&format!(" σ={:.1} α={:.2} gini={:.3} cc={:.4}\n",
snap.sigma, snap.alpha, snap.gini, snap.avg_cc));
// Trend
if let Some(p) = prev {
let d_sigma = snap.sigma - p.sigma;
let d_alpha = snap.alpha - p.alpha;
let d_gini = snap.gini - p.gini;
out.push_str(&format!(" Δσ={:+.1} Δα={:+.2} Δgini={:+.3}\n",
d_sigma, d_alpha, d_gini));
// Assessment
let mut issues = Vec::new();
if snap.alpha < 2.0 { issues.push("hub dominance critical"); }
if snap.gini > 0.5 { issues.push("high inequality"); }
if snap.avg_cc < 0.1 { issues.push("poor integration"); }
if d_sigma < -5.0 { issues.push("σ declining"); }
if d_alpha < -0.1 { issues.push("α declining"); }
if d_gini > 0.02 { issues.push("inequality increasing"); }
if issues.is_empty() {
out.push_str(" Status: healthy\n");
} else {
out.push_str(&format!(" Status: needs attention — {}\n", issues.join(", ")));
out.push_str(" Run: poc-memory consolidate-session\n");
}
} else {
out.push_str(" (first snapshot, no trend data yet)\n");
}
// Persist the snapshot
graph::save_metrics_snapshot(&snap);
out
}