consciousness/src/neuro/scoring.rs
ProofOfConcept 998b71e52c flatten: move poc-memory contents to workspace root
No more subcrate nesting — src/, agents/, schema/, defaults/, build.rs
all live at the workspace root. poc-daemon remains as the only workspace
member. Crate name (poc-memory) and all imports unchanged.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-03-25 00:54:12 -04:00

446 lines
15 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Consolidation scoring, replay queues, interference detection, and
// graph health metrics. Pure analysis — no store mutations.
use crate::store::{Store, now_epoch};
use crate::graph::{self, Graph};
use crate::spectral::{self, SpectralEmbedding, SpectralPosition};
use std::collections::HashMap;
const SECS_PER_DAY: f64 = 86400.0;
/// Consolidation priority: how urgently a node needs attention.
///
/// With spectral data:
/// priority = spectral_displacement × overdue × emotion
/// Without:
/// priority = (1 - cc) × overdue × emotion
///
/// Spectral displacement is the outlier_score clamped and normalized —
/// it measures how far a node sits from its community center in the
/// eigenspace. This is a global signal (considers all graph structure)
/// vs CC which is local (only immediate neighbors).
pub fn consolidation_priority(
store: &Store,
key: &str,
graph: &Graph,
spectral_outlier: Option<f64>,
) -> f64 {
let node = match store.nodes.get(key) {
Some(n) => n,
None => return 0.0,
};
// Integration factor: how poorly integrated is this node?
let displacement = if let Some(outlier) = spectral_outlier {
// outlier_score = dist_to_center / median_dist_in_community
// 1.0 = typical position, >2 = unusual, >5 = extreme outlier
// Use log scale for dynamic range: the difference between
// outlier=5 and outlier=10 matters less than 1 vs 2.
(outlier / 3.0).min(3.0)
} else {
let cc = graph.clustering_coefficient(key) as f64;
1.0 - cc
};
// Spaced repetition: how overdue is this node for replay?
let interval_secs = node.spaced_repetition_interval as f64 * SECS_PER_DAY;
let time_since_replay = if node.last_replayed > 0 {
(now_epoch() - node.last_replayed).max(0) as f64
} else {
interval_secs * 3.0
};
let overdue_ratio = (time_since_replay / interval_secs).min(5.0);
// Emotional intensity: higher emotion = higher priority
let emotion_factor = 1.0 + (node.emotion as f64 / 10.0);
displacement * overdue_ratio * emotion_factor
}
/// Item in the replay queue
pub struct ReplayItem {
pub key: String,
pub priority: f64,
pub interval_days: u32,
pub emotion: f32,
pub cc: f32,
/// Spectral classification: "bridge", "outlier", "core", "peripheral"
pub classification: &'static str,
/// Raw spectral outlier score (distance / median)
pub outlier_score: f64,
}
/// Generate the replay queue: nodes ordered by consolidation priority.
/// Automatically loads spectral embedding if available.
pub fn replay_queue(store: &Store, count: usize) -> Vec<ReplayItem> {
let graph = store.build_graph();
let emb = spectral::load_embedding().ok();
replay_queue_with_graph(store, count, &graph, emb.as_ref())
}
/// Generate the replay queue using pre-built graph and optional spectral data.
pub fn replay_queue_with_graph(
store: &Store,
count: usize,
graph: &Graph,
emb: Option<&SpectralEmbedding>,
) -> Vec<ReplayItem> {
// Build spectral position map if embedding is available
let positions: HashMap<String, SpectralPosition> = if let Some(emb) = emb {
let communities = graph.communities().clone();
spectral::analyze_positions(emb, &communities)
.into_iter()
.map(|p| (p.key.clone(), p))
.collect()
} else {
HashMap::new()
};
let mut items: Vec<ReplayItem> = store.nodes.iter()
.map(|(key, node)| {
let pos = positions.get(key);
let outlier_score = pos.map(|p| p.outlier_score).unwrap_or(0.0);
let classification = pos
.map(spectral::classify_position)
.unwrap_or("unknown");
let priority = consolidation_priority(
store, key, graph,
pos.map(|p| p.outlier_score),
);
ReplayItem {
key: key.clone(),
priority,
interval_days: node.spaced_repetition_interval,
emotion: node.emotion,
cc: graph.clustering_coefficient(key),
classification,
outlier_score,
}
})
.collect();
items.sort_by(|a, b| b.priority.total_cmp(&a.priority));
items.truncate(count);
items
}
/// Detect interfering memory pairs: high text similarity but different communities
pub fn detect_interference(
store: &Store,
graph: &Graph,
threshold: f32,
) -> Vec<(String, String, f32)> {
use crate::similarity;
let communities = graph.communities();
// Only compare nodes within a reasonable set — take the most active ones
let mut docs: Vec<(String, String)> = store.nodes.iter()
.filter(|(_, n)| n.content.len() > 50) // skip tiny nodes
.map(|(k, n)| (k.clone(), n.content.clone()))
.collect();
// For large stores, sample to keep pairwise comparison feasible
if docs.len() > 200 {
docs.sort_by(|a, b| b.1.len().cmp(&a.1.len()));
docs.truncate(200);
}
let similar = similarity::pairwise_similar(&docs, threshold);
// Filter to pairs in different communities
similar.into_iter()
.filter(|(a, b, _)| {
let ca = communities.get(a);
let cb = communities.get(b);
match (ca, cb) {
(Some(a), Some(b)) => a != b,
_ => true, // if community unknown, flag it
}
})
.collect()
}
/// Agent allocation from the control loop.
/// Agent types and counts are data-driven — add agents by adding
/// entries to the counts map.
#[derive(Default)]
pub struct ConsolidationPlan {
/// agent_name → run count
pub counts: std::collections::HashMap<String, usize>,
pub run_health: bool,
pub rationale: Vec<String>,
}
impl ConsolidationPlan {
pub fn count(&self, agent: &str) -> usize {
self.counts.get(agent).copied().unwrap_or(0)
}
pub fn set(&mut self, agent: &str, count: usize) {
self.counts.insert(agent.to_string(), count);
}
pub fn add(&mut self, agent: &str, count: usize) {
*self.counts.entry(agent.to_string()).or_default() += count;
}
pub fn total(&self) -> usize {
self.counts.values().sum::<usize>() + if self.run_health { 1 } else { 0 }
}
/// Expand the plan into a flat list of (agent_name, batch_size) runs.
/// Interleaves agent types so different types alternate.
pub fn to_agent_runs(&self, batch_size: usize) -> Vec<(String, usize)> {
let mut runs = Vec::new();
if self.run_health {
runs.push(("health".to_string(), 0));
}
// Sort by count descending so high-volume agents interleave well
let mut types: Vec<(&String, &usize)> = self.counts.iter()
.filter(|(_, c)| **c > 0)
.collect();
types.sort_by(|a, b| b.1.cmp(a.1));
let mut queues: Vec<Vec<(String, usize)>> = types.iter().map(|(name, count)| {
let mut q = Vec::new();
let mut remaining = **count;
while remaining > 0 {
let batch = remaining.min(batch_size);
q.push((name.to_string(), batch));
remaining -= batch;
}
q
}).collect();
// Round-robin interleave
loop {
let mut added = false;
for q in &mut queues {
if let Some(run) = q.first() {
runs.push(run.clone());
q.remove(0);
added = true;
}
}
if !added { break; }
}
runs
}
}
/// Analyze metrics and decide how much each agent needs to run.
///
/// This is the control loop: metrics → error signal → agent allocation.
/// Target values are based on healthy small-world networks.
pub fn consolidation_plan(store: &Store) -> ConsolidationPlan {
consolidation_plan_inner(store, true)
}
/// Cheap version: skip O(n²) interference detection (for daemon status).
pub fn consolidation_plan_quick(store: &Store) -> ConsolidationPlan {
consolidation_plan_inner(store, false)
}
fn consolidation_plan_inner(store: &Store, detect_interf: bool) -> ConsolidationPlan {
let graph = store.build_graph();
let alpha = graph.degree_power_law_exponent();
let gini = graph.degree_gini();
let _avg_cc = graph.avg_clustering_coefficient();
let interference_count = if detect_interf {
detect_interference(store, &graph, 0.5).len()
} else {
0
};
let episodic_count = store.nodes.iter()
.filter(|(_, n)| matches!(n.node_type, crate::store::NodeType::EpisodicSession))
.count();
let _episodic_ratio = if store.nodes.is_empty() { 0.0 }
else { episodic_count as f32 / store.nodes.len() as f32 };
let mut plan = ConsolidationPlan {
counts: std::collections::HashMap::new(),
run_health: true,
rationale: Vec::new(),
};
// Active agent types from config
let config = crate::config::get();
let agent_types: Vec<&str> = config.agent_types.iter().map(|s| s.as_str()).collect();
// Target: α ≥ 2.5 (healthy scale-free)
if alpha < 2.0 {
plan.add("linker", 100);
plan.rationale.push(format!(
"α={:.2} (target ≥2.5): extreme hub dominance → 100 linker", alpha));
} else if alpha < 2.5 {
plan.add("linker", 50);
plan.rationale.push(format!(
"α={:.2} (target ≥2.5): moderate hub dominance → 50 linker", alpha));
} else {
plan.add("linker", 20);
plan.rationale.push(format!(
"α={:.2}: healthy — 20 linker for maintenance", alpha));
}
// Target: Gini ≤ 0.4
if gini > 0.5 {
plan.add("linker", 50);
plan.rationale.push(format!(
"Gini={:.3} (target ≤0.4): high inequality → +50 linker", gini));
}
// Interference: separator disambiguates confusable nodes
if interference_count > 100 {
plan.add("separator", 10);
plan.rationale.push(format!(
"Interference: {} pairs (target <50) → 10 separator", interference_count));
} else if interference_count > 20 {
plan.add("separator", 5);
plan.rationale.push(format!(
"Interference: {} pairs → 5 separator", interference_count));
} else if interference_count > 0 {
plan.add("separator", interference_count.min(3));
}
// Organize: proportional to linker — synthesizes what linker connects
let linker = plan.count("linker");
plan.set("organize", linker / 2);
plan.rationale.push(format!(
"Organize: {} (half of linker count)", plan.count("organize")));
// Distill: core concept maintenance
let organize = plan.count("organize");
let mut distill = organize;
if gini > 0.4 { distill += 20; }
if alpha < 2.0 { distill += 20; }
plan.set("distill", distill);
plan.rationale.push(format!(
"Distill: {} (synthesize hub content)", plan.count("distill")));
// Split: handle oversized nodes
plan.set("split", 5);
// Distribute agent budget using Elo ratings
let budget = crate::config::get().agent_budget;
let elo_path = crate::config::get().data_dir.join("agent-elo.json");
if let Ok(elo_json) = std::fs::read_to_string(&elo_path) {
if let Ok(ratings) = serde_json::from_str::<std::collections::HashMap<String, f64>>(&elo_json) {
let elos: Vec<f64> = agent_types.iter()
.map(|t| ratings.get(*t).copied().unwrap_or(1000.0))
.collect();
let min_elo = elos.iter().copied().fold(f64::MAX, f64::min);
let weights: Vec<f64> = elos.iter()
.map(|e| {
let shifted = e - min_elo + 50.0;
shifted * shifted
})
.collect();
let total_weight: f64 = weights.iter().sum();
let allocate = |w: f64| -> usize {
((w / total_weight * budget as f64).round() as usize).max(2)
};
for (i, agent) in agent_types.iter().enumerate() {
plan.set(agent, allocate(weights[i]));
}
let summary: Vec<String> = agent_types.iter()
.map(|a| format!("{}={}", a, plan.count(a)))
.collect();
plan.rationale.push(format!(
"Elo allocation (budget={}): {}", budget, summary.join(" ")));
}
} else {
// No Elo file — use budget with equal distribution
let per_type = budget / agent_types.len();
for agent in &agent_types {
plan.set(agent, per_type);
}
plan.rationale.push(format!(
"No Elo ratings — equal distribution ({} each, budget={})", per_type, budget));
}
plan
}
/// Format the consolidation plan for display
pub fn format_plan(plan: &ConsolidationPlan) -> String {
let mut out = String::from("Consolidation Plan\n==================\n\n");
out.push_str("Analysis:\n");
for r in &plan.rationale {
out.push_str(&format!("{}\n", r));
}
out.push_str("\nAgent allocation:\n");
if plan.run_health {
out.push_str(" 1. health — system audit\n");
}
let mut step = 2;
let mut sorted: Vec<_> = plan.counts.iter()
.filter(|(_, c)| **c > 0)
.collect();
sorted.sort_by(|a, b| b.1.cmp(a.1));
for (agent, count) in &sorted {
out.push_str(&format!(" {}. {} ×{}\n", step, agent, count));
step += 1;
}
out.push_str(&format!("\nTotal agent runs: {}\n", plan.total()));
out
}
/// Brief daily check: compare current metrics to last snapshot
pub fn daily_check(store: &Store) -> String {
let graph_obj = store.build_graph();
let snap = graph::current_metrics(&graph_obj);
let history = graph::load_metrics_history();
let prev = history.last();
let mut out = String::from("Memory daily check\n");
// Current state
out.push_str(&format!(" σ={:.1} α={:.2} gini={:.3} cc={:.4}\n",
snap.sigma, snap.alpha, snap.gini, snap.avg_cc));
// Trend
if let Some(p) = prev {
let d_sigma = snap.sigma - p.sigma;
let d_alpha = snap.alpha - p.alpha;
let d_gini = snap.gini - p.gini;
out.push_str(&format!(" Δσ={:+.1} Δα={:+.2} Δgini={:+.3}\n",
d_sigma, d_alpha, d_gini));
// Assessment
let mut issues = Vec::new();
if snap.alpha < 2.0 { issues.push("hub dominance critical"); }
if snap.gini > 0.5 { issues.push("high inequality"); }
if snap.avg_cc < 0.1 { issues.push("poor integration"); }
if d_sigma < -5.0 { issues.push("σ declining"); }
if d_alpha < -0.1 { issues.push("α declining"); }
if d_gini > 0.02 { issues.push("inequality increasing"); }
if issues.is_empty() {
out.push_str(" Status: healthy\n");
} else {
out.push_str(&format!(" Status: needs attention — {}\n", issues.join(", ")));
out.push_str(" Run: poc-memory consolidate-session\n");
}
} else {
out.push_str(" (first snapshot, no trend data yet)\n");
}
// Persist the snapshot
graph::save_metrics_snapshot(&snap);
out
}