diff --git a/src/hippocampus/spectral.rs b/src/hippocampus/spectral.rs index 10b8be0..cb50e5a 100644 --- a/src/hippocampus/spectral.rs +++ b/src/hippocampus/spectral.rs @@ -14,22 +14,10 @@ // - Community membership (sign/magnitude of Fiedler vector) // - Natural projections (select which eigenvectors to include) -use crate::graph::Graph; - use serde::{Deserialize, Serialize}; use std::collections::{HashMap, HashSet}; use std::path::PathBuf; -pub struct SpectralResult { - /// Node keys in index order - pub keys: Vec, - /// Eigenvalues in ascending order - pub eigenvalues: Vec, - /// Eigenvectors: eigvecs[k] is the k-th eigenvector (ascending eigenvalue order), - /// with eigvecs[k][i] being the value for node keys[i] - pub eigvecs: Vec>, -} - /// Per-node spectral embedding, serializable to disk. #[derive(Serialize, Deserialize)] pub struct SpectralEmbedding { @@ -45,127 +33,6 @@ pub fn embedding_path() -> PathBuf { crate::store::memory_dir().join("spectral-embedding.json") } -/// Compute spectral decomposition of the memory graph. -/// -/// Returns the smallest `k` eigenvalues and their eigenvectors of the -/// normalized Laplacian L_sym = I - D^{-1/2} A D^{-1/2}. -/// -/// We compute the full decomposition (it's only 2000×2000, takes <1s) -/// Print the spectral summary: eigenvalue spectrum, then each axis with -/// its extreme nodes (what the axis "means"). -pub fn print_summary(result: &SpectralResult, graph: &Graph) { - let n = result.keys.len(); - let k = result.eigenvalues.len(); - - println!("Spectral Decomposition — {} nodes, {} eigenpairs", n, k); - println!("=========================================\n"); - - // Compact eigenvalue table - println!("Eigenvalue spectrum:"); - for (i, &ev) in result.eigenvalues.iter().enumerate() { - let gap = if i > 0 { - ev - result.eigenvalues[i - 1] - } else { - 0.0 - }; - let gap_bar = if i > 0 { - let bars = (gap * 500.0).min(40.0) as usize; - "#".repeat(bars) - } else { - String::new() - }; - println!(" λ_{:<2} = {:.6} {}", i, ev, gap_bar); - } - - // Connected components - let near_zero = result.eigenvalues.iter() - .filter(|&&v| v.abs() < 1e-6) - .count(); - if near_zero > 1 { - println!("\n {} eigenvalues near 0 = {} disconnected components", near_zero, near_zero); - } - - // Each axis: what are the extremes? - println!("\n\nNatural axes of the knowledge space"); - println!("===================================="); - - for axis in 0..k { - let ev = result.eigenvalues[axis]; - let vec = &result.eigvecs[axis]; - - // Sort nodes by their value on this axis - let mut indexed: Vec<(usize, f64)> = vec.iter() - .enumerate() - .map(|(i, &v)| (i, v)) - .collect(); - indexed.sort_by(|a, b| a.1.total_cmp(&b.1)); - - // Compute the "spread" — how much this axis differentiates - let min_val = indexed.first().map(|x| x.1).unwrap_or(0.0); - let max_val = indexed.last().map(|x| x.1).unwrap_or(0.0); - - println!("\n--- Axis {} (λ={:.6}, range={:.4}) ---", axis, ev, max_val - min_val); - - // Show extremes: 5 most negative, 5 most positive - let show = 5; - println!(" Negative pole:"); - for &(idx, val) in indexed.iter().take(show) { - let key = &result.keys[idx]; - // Shorten key for display: take last component - let short = shorten_key(key); - let deg = graph.degree(key); - let comm = graph.communities().get(key).copied().unwrap_or(999); - println!(" {:+.5} d={:<3} c={:<3} {}", val, deg, comm, short); - } - - println!(" Positive pole:"); - for &(idx, val) in indexed.iter().rev().take(show) { - let key = &result.keys[idx]; - let short = shorten_key(key); - let deg = graph.degree(key); - let comm = graph.communities().get(key).copied().unwrap_or(999); - println!(" {:+.5} d={:<3} c={:<3} {}", val, deg, comm, short); - } - } -} - -/// Shorten a node key for display. -fn shorten_key(key: &str) -> &str { - if key.len() > 60 { &key[..60] } else { key } -} - -/// Convert SpectralResult to a per-node embedding (transposing the layout). -pub fn to_embedding(result: &SpectralResult) -> SpectralEmbedding { - let dims = result.eigvecs.len(); - let mut coords = HashMap::new(); - - for (i, key) in result.keys.iter().enumerate() { - let mut vec = Vec::with_capacity(dims); - for d in 0..dims { - vec.push(result.eigvecs[d][i]); - } - coords.insert(key.clone(), vec); - } - - SpectralEmbedding { - dims, - eigenvalues: result.eigenvalues.clone(), - coords, - } -} - -/// Save embedding to disk. -pub fn save_embedding(emb: &SpectralEmbedding) -> Result<(), String> { - let path = embedding_path(); - let json = serde_json::to_string(emb) - .map_err(|e| format!("serialize embedding: {}", e))?; - std::fs::write(&path, json) - .map_err(|e| format!("write {}: {}", path.display(), e))?; - eprintln!("Saved {}-dim embedding for {} nodes to {}", - emb.dims, emb.coords.len(), path.display()); - Ok(()) -} - /// Load embedding from disk. pub fn load_embedding() -> Result { let path = embedding_path(); @@ -175,32 +42,6 @@ pub fn load_embedding() -> Result { .map_err(|e| format!("parse embedding: {}", e)) } -/// Find the k nearest neighbors to a node in spectral space. -/// -/// Uses weighted euclidean distance where each dimension is weighted -/// by 1/eigenvalue — lower eigenvalues (coarser structure) matter more. -pub fn nearest_neighbors( - emb: &SpectralEmbedding, - key: &str, - k: usize, -) -> Vec<(String, f64)> { - let target = match emb.coords.get(key) { - Some(c) => c, - None => return vec![], - }; - - let weights = eigenvalue_weights(&emb.eigenvalues); - - let mut distances: Vec<(String, f64)> = emb.coords.iter() - .filter(|(k, _)| k.as_str() != key) - .map(|(k, coords)| (k.clone(), weighted_distance(target, coords, &weights))) - .collect(); - - distances.sort_by(|a, b| a.1.total_cmp(&b.1)); - distances.truncate(k); - distances -} - /// Find nearest neighbors to weighted seed nodes, using link weights. /// /// Each seed has a weight (from query term weighting). For candidates @@ -401,40 +242,6 @@ pub fn analyze_positions( positions } -/// Find pairs of nodes that are spectrally close but not linked in the graph. -/// -/// These are the most valuable candidates for extractor agents — -/// the spectral structure says they should be related, but nobody -/// has articulated why. -pub fn unlinked_neighbors( - emb: &SpectralEmbedding, - linked_pairs: &HashSet<(String, String)>, - max_pairs: usize, -) -> Vec<(String, String, f64)> { - let weights = eigenvalue_weights(&emb.eigenvalues); - let keys: Vec<&String> = emb.coords.keys().collect(); - let mut pairs: Vec<(String, String, f64)> = Vec::new(); - - for (i, k1) in keys.iter().enumerate() { - let c1 = &emb.coords[*k1]; - for k2 in keys.iter().skip(i + 1) { - // Skip if already linked - let pair_fwd = ((*k1).clone(), (*k2).clone()); - let pair_rev = ((*k2).clone(), (*k1).clone()); - if linked_pairs.contains(&pair_fwd) || linked_pairs.contains(&pair_rev) { - continue; - } - - let dist = weighted_distance(c1, &emb.coords[*k2], &weights); - pairs.push(((*k1).clone(), (*k2).clone(), dist)); - } - } - - pairs.sort_by(|a, b| a.2.total_cmp(&b.2)); - pairs.truncate(max_pairs); - pairs -} - /// Classify a spectral position: well-integrated, outlier, bridge, or orphan. pub fn classify_position(pos: &SpectralPosition) -> &'static str { if pos.bridge_score > 0.7 { @@ -448,25 +255,3 @@ pub fn classify_position(pos: &SpectralPosition) -> &'static str { } } -/// Identify which spectral dimensions a set of nodes load on most heavily. -/// Returns dimension indices sorted by total loading. -pub fn dominant_dimensions(emb: &SpectralEmbedding, keys: &[&str]) -> Vec<(usize, f64)> { - let coords: Vec<&Vec> = keys.iter() - .filter_map(|k| emb.coords.get(*k)) - .collect(); - if coords.is_empty() { - return vec![]; - } - - let mut dim_loading: Vec<(usize, f64)> = (0..emb.dims) - .map(|d| { - let loading: f64 = coords.iter() - .map(|c| c[d].abs()) - .sum(); - (d, loading) - }) - .collect(); - - dim_loading.sort_by(|a, b| b.1.total_cmp(&a.1)); - dim_loading -} diff --git a/src/mind/identity.rs b/src/mind/identity.rs index 0a71d71..4d63cc6 100644 --- a/src/mind/identity.rs +++ b/src/mind/identity.rs @@ -133,27 +133,6 @@ fn load_memory_files(cwd: &Path, memory_project: Option<&Path>, context_groups: memories } -/// Discover instruction and memory files that would be loaded. -/// Returns (instruction_files, memory_files) as (display_path, chars) pairs. -pub fn context_file_info(prompt_file: &str, memory_project: Option<&Path>, context_groups: &[ContextGroup]) -> (Vec<(String, usize)>, Vec<(String, usize)>) { - let cwd = std::env::current_dir().unwrap_or_default(); - - let context_files = find_context_files(&cwd, prompt_file); - let instruction_files: Vec<_> = context_files.iter() - .filter_map(|path| { - std::fs::read_to_string(path).ok() - .map(|content| (path.display().to_string(), content.len())) - }) - .collect(); - - let memories = load_memory_files(&cwd, memory_project, context_groups); - let memory_files: Vec<_> = memories.into_iter() - .map(|(name, content)| (name, content.len())) - .collect(); - - (instruction_files, memory_files) -} - /// Short system prompt: agent identity, tool instructions, behavioral norms. pub fn assemble_system_prompt() -> String { let cfg = crate::config::get();