// Spectral decomposition of the memory graph. // // Computes eigenvalues and eigenvectors of the normalized graph Laplacian. // The eigenvectors provide natural coordinates for each node — connected // nodes land nearby, communities form clusters, bridges sit between clusters. // // The eigenvalue spectrum reveals: // - Number of connected components (count of zero eigenvalues) // - Number of natural communities (eigenvalues near zero, before the gap) // - How well-connected the graph is (Fiedler value = second eigenvalue) // // The eigenvectors provide: // - Spectral coordinates for each node (the embedding) // - Community membership (sign/magnitude of Fiedler vector) // - Natural projections (select which eigenvectors to include) use crate::graph::Graph; use faer::Mat; use serde::{Deserialize, Serialize}; use std::collections::{HashMap, HashSet}; use std::path::PathBuf; pub struct SpectralResult { /// Node keys in index order pub keys: Vec, /// Eigenvalues in ascending order pub eigenvalues: Vec, /// Eigenvectors: eigvecs[k] is the k-th eigenvector (ascending eigenvalue order), /// with eigvecs[k][i] being the value for node keys[i] pub eigvecs: Vec>, } /// Per-node spectral embedding, serializable to disk. #[derive(Serialize, Deserialize)] pub struct SpectralEmbedding { /// Number of dimensions (eigenvectors) pub dims: usize, /// Eigenvalues for each dimension pub eigenvalues: Vec, /// Node key → coordinate vector pub coords: HashMap>, } pub fn embedding_path() -> PathBuf { crate::store::memory_dir().join("spectral-embedding.json") } /// Compute spectral decomposition of the memory graph. /// /// Returns the smallest `k` eigenvalues and their eigenvectors of the /// normalized Laplacian L_sym = I - D^{-1/2} A D^{-1/2}. /// /// We compute the full decomposition (it's only 2000×2000, takes <1s) /// and return the bottom k. pub fn decompose(graph: &Graph, k: usize) -> SpectralResult { // Only include nodes with edges (filter isolates) let mut keys: Vec = graph.nodes().iter() .filter(|k| graph.degree(k) > 0) .cloned() .collect(); keys.sort(); let n = keys.len(); let isolates = graph.nodes().len() - n; if isolates > 0 { eprintln!("note: filtered {} isolated nodes, decomposing {} connected nodes", isolates, n); } let key_to_idx: HashMap<&str, usize> = keys.iter() .enumerate() .map(|(i, k)| (k.as_str(), i)) .collect(); // Build weighted degree vector and adjacency let mut degree = vec![0.0f64; n]; let mut adj_entries: Vec<(usize, usize, f64)> = Vec::new(); for (i, key) in keys.iter().enumerate() { for (neighbor, strength) in graph.neighbors(key) { if let Some(&j) = key_to_idx.get(neighbor.as_str()) && j > i { // each edge once let w = strength as f64; adj_entries.push((i, j, w)); degree[i] += w; degree[j] += w; } } } // Build normalized Laplacian: L_sym = I - D^{-1/2} A D^{-1/2} let mut laplacian = Mat::::zeros(n, n); // Diagonal = 1 for nodes with edges, 0 for isolates for i in 0..n { if degree[i] > 0.0 { laplacian[(i, i)] = 1.0; } } // Off-diagonal: -w / sqrt(d_i * d_j) for &(i, j, w) in &adj_entries { if degree[i] > 0.0 && degree[j] > 0.0 { let val = -w / (degree[i] * degree[j]).sqrt(); laplacian[(i, j)] = val; laplacian[(j, i)] = val; } } // Eigendecompose let eig = laplacian.self_adjoint_eigen(faer::Side::Lower) .expect("eigendecomposition failed"); let s = eig.S(); let u = eig.U(); let mut eigenvalues = Vec::with_capacity(k); let mut eigvecs = Vec::with_capacity(k); let s_col = s.column_vector(); // Skip trivial eigenvalues (near-zero = null space from disconnected components). // The number of zero eigenvalues equals the number of connected components. let mut start = 0; while start < n && s_col[start].abs() < 1e-8 { start += 1; } let k = k.min(n.saturating_sub(start)); for col in start..start + k { eigenvalues.push(s_col[col]); let mut vec = Vec::with_capacity(n); for row in 0..n { vec.push(u[(row, col)]); } eigvecs.push(vec); } SpectralResult { keys, eigenvalues, eigvecs } } /// Print the spectral summary: eigenvalue spectrum, then each axis with /// its extreme nodes (what the axis "means"). pub fn print_summary(result: &SpectralResult, graph: &Graph) { let n = result.keys.len(); let k = result.eigenvalues.len(); println!("Spectral Decomposition — {} nodes, {} eigenpairs", n, k); println!("=========================================\n"); // Compact eigenvalue table println!("Eigenvalue spectrum:"); for (i, &ev) in result.eigenvalues.iter().enumerate() { let gap = if i > 0 { ev - result.eigenvalues[i - 1] } else { 0.0 }; let gap_bar = if i > 0 { let bars = (gap * 500.0).min(40.0) as usize; "#".repeat(bars) } else { String::new() }; println!(" λ_{:<2} = {:.6} {}", i, ev, gap_bar); } // Connected components let near_zero = result.eigenvalues.iter() .filter(|&&v| v.abs() < 1e-6) .count(); if near_zero > 1 { println!("\n {} eigenvalues near 0 = {} disconnected components", near_zero, near_zero); } // Each axis: what are the extremes? println!("\n\nNatural axes of the knowledge space"); println!("===================================="); for axis in 0..k { let ev = result.eigenvalues[axis]; let vec = &result.eigvecs[axis]; // Sort nodes by their value on this axis let mut indexed: Vec<(usize, f64)> = vec.iter() .enumerate() .map(|(i, &v)| (i, v)) .collect(); indexed.sort_by(|a, b| a.1.total_cmp(&b.1)); // Compute the "spread" — how much this axis differentiates let min_val = indexed.first().map(|x| x.1).unwrap_or(0.0); let max_val = indexed.last().map(|x| x.1).unwrap_or(0.0); println!("\n--- Axis {} (λ={:.6}, range={:.4}) ---", axis, ev, max_val - min_val); // Show extremes: 5 most negative, 5 most positive let show = 5; println!(" Negative pole:"); for &(idx, val) in indexed.iter().take(show) { let key = &result.keys[idx]; // Shorten key for display: take last component let short = shorten_key(key); let deg = graph.degree(key); let comm = graph.communities().get(key).copied().unwrap_or(999); println!(" {:+.5} d={:<3} c={:<3} {}", val, deg, comm, short); } println!(" Positive pole:"); for &(idx, val) in indexed.iter().rev().take(show) { let key = &result.keys[idx]; let short = shorten_key(key); let deg = graph.degree(key); let comm = graph.communities().get(key).copied().unwrap_or(999); println!(" {:+.5} d={:<3} c={:<3} {}", val, deg, comm, short); } } } /// Shorten a node key for display. fn shorten_key(key: &str) -> &str { if key.len() > 60 { &key[..60] } else { key } } /// Convert SpectralResult to a per-node embedding (transposing the layout). pub fn to_embedding(result: &SpectralResult) -> SpectralEmbedding { let dims = result.eigvecs.len(); let mut coords = HashMap::new(); for (i, key) in result.keys.iter().enumerate() { let mut vec = Vec::with_capacity(dims); for d in 0..dims { vec.push(result.eigvecs[d][i]); } coords.insert(key.clone(), vec); } SpectralEmbedding { dims, eigenvalues: result.eigenvalues.clone(), coords, } } /// Save embedding to disk. pub fn save_embedding(emb: &SpectralEmbedding) -> Result<(), String> { let path = embedding_path(); let json = serde_json::to_string(emb) .map_err(|e| format!("serialize embedding: {}", e))?; std::fs::write(&path, json) .map_err(|e| format!("write {}: {}", path.display(), e))?; eprintln!("Saved {}-dim embedding for {} nodes to {}", emb.dims, emb.coords.len(), path.display()); Ok(()) } /// Load embedding from disk. pub fn load_embedding() -> Result { let path = embedding_path(); let data = std::fs::read_to_string(&path) .map_err(|e| format!("read {}: {}", path.display(), e))?; serde_json::from_str(&data) .map_err(|e| format!("parse embedding: {}", e)) } /// Find the k nearest neighbors to a node in spectral space. /// /// Uses weighted euclidean distance where each dimension is weighted /// by 1/eigenvalue — lower eigenvalues (coarser structure) matter more. pub fn nearest_neighbors( emb: &SpectralEmbedding, key: &str, k: usize, ) -> Vec<(String, f64)> { let target = match emb.coords.get(key) { Some(c) => c, None => return vec![], }; let weights = eigenvalue_weights(&emb.eigenvalues); let mut distances: Vec<(String, f64)> = emb.coords.iter() .filter(|(k, _)| k.as_str() != key) .map(|(k, coords)| (k.clone(), weighted_distance(target, coords, &weights))) .collect(); distances.sort_by(|a, b| a.1.total_cmp(&b.1)); distances.truncate(k); distances } /// Find nearest neighbors to a set of seed nodes (multi-seed query). /// Returns nodes ranked by minimum distance to any seed. pub fn nearest_to_seeds( emb: &SpectralEmbedding, seeds: &[&str], k: usize, ) -> Vec<(String, f64)> { nearest_to_seeds_weighted(emb, &seeds.iter().map(|&s| (s, 1.0)).collect::>(), None, k) } /// Find nearest neighbors to weighted seed nodes, using link weights. /// /// Each seed has a weight (from query term weighting). For candidates /// directly linked to a seed, the spectral distance is scaled by /// 1/link_strength — strong links make effective distance shorter. /// Seed weight scales the contribution: high-weight seeds pull harder. /// /// Returns (key, effective_distance) sorted by distance ascending. pub fn nearest_to_seeds_weighted( emb: &SpectralEmbedding, seeds: &[(&str, f64)], // (key, seed_weight) graph: Option<&crate::graph::Graph>, k: usize, ) -> Vec<(String, f64)> { let seed_set: HashSet<&str> = seeds.iter().map(|(s, _)| *s).collect(); let seed_data: Vec<(&str, &Vec, f64)> = seeds.iter() .filter_map(|(s, w)| { emb.coords.get(*s) .filter(|c| c.iter().any(|&v| v.abs() > 1e-12)) // skip degenerate seeds .map(|c| (*s, c, *w)) }) .collect(); if seed_data.is_empty() { return vec![]; } // Build seed→neighbor link strength lookup let link_strengths: HashMap<(&str, &str), f32> = if let Some(g) = graph { let mut map = HashMap::new(); for &(seed_key, _) in seeds { for (neighbor, strength) in g.neighbors(seed_key) { map.insert((seed_key, neighbor.as_str()), strength); } } map } else { HashMap::new() }; let dim_weights = eigenvalue_weights(&emb.eigenvalues); let mut distances: Vec<(String, f64)> = emb.coords.iter() .filter(|(k, coords)| { !seed_set.contains(k.as_str()) && coords.iter().any(|&v| v.abs() > 1e-12) // skip degenerate zero-coord nodes }) .map(|(candidate_key, coords)| { let min_dist = seed_data.iter() .map(|(seed_key, sc, seed_weight)| { let raw_dist = weighted_distance(coords, sc, &dim_weights); // Scale by link strength if directly connected let link_scale = link_strengths .get(&(*seed_key, candidate_key.as_str())) .map(|&s| 1.0 / (1.0 + s as f64)) // strong link → smaller distance .unwrap_or(1.0); raw_dist * link_scale / seed_weight }) .fold(f64::MAX, f64::min); (candidate_key.clone(), min_dist) }) .collect(); distances.sort_by(|a, b| a.1.total_cmp(&b.1)); distances.truncate(k); distances } /// Weighted euclidean distance in spectral space. /// Dimensions weighted by 1/eigenvalue — coarser structure matters more. fn weighted_distance(a: &[f64], b: &[f64], weights: &[f64]) -> f64 { a.iter() .zip(b.iter()) .zip(weights.iter()) .map(|((&x, &y), &w)| w * (x - y) * (x - y)) .sum::() .sqrt() } /// Compute eigenvalue-inverse weights for distance calculations. fn eigenvalue_weights(eigenvalues: &[f64]) -> Vec { eigenvalues.iter() .map(|&ev| if ev > 1e-8 { 1.0 / ev } else { 0.0 }) .collect() } /// Compute cluster centers (centroids) in spectral space. pub fn cluster_centers( emb: &SpectralEmbedding, communities: &HashMap, ) -> HashMap> { let mut sums: HashMap, usize)> = HashMap::new(); for (key, coords) in &emb.coords { if let Some(&comm) = communities.get(key) { let entry = sums.entry(comm) .or_insert_with(|| (vec![0.0; emb.dims], 0)); for (i, &c) in coords.iter().enumerate() { entry.0[i] += c; } entry.1 += 1; } } sums.into_iter() .map(|(comm, (sum, count))| { let center: Vec = sum.iter() .map(|s| s / count as f64) .collect(); (comm, center) }) .collect() } /// Per-node analysis of spectral position relative to communities. pub struct SpectralPosition { pub key: String, pub community: u32, /// Distance to own community center pub dist_to_center: f64, /// Distance to nearest OTHER community center pub dist_to_nearest: f64, /// Which community is nearest (other than own) pub nearest_community: u32, /// dist_to_center / median_dist_in_community (>1 = outlier) pub outlier_score: f64, /// dist_to_center / dist_to_nearest (>1 = between clusters, potential bridge) pub bridge_score: f64, } /// Analyze spectral positions for all nodes. /// /// Returns positions sorted by outlier_score descending (most displaced first). pub fn analyze_positions( emb: &SpectralEmbedding, communities: &HashMap, ) -> Vec { let centers = cluster_centers(emb, communities); let weights = eigenvalue_weights(&emb.eigenvalues); // Compute distances to own community center let mut by_community: HashMap> = HashMap::new(); let mut node_dists: Vec<(String, u32, f64)> = Vec::new(); for (key, coords) in &emb.coords { if let Some(&comm) = communities.get(key) && let Some(center) = centers.get(&comm) { let dist = weighted_distance(coords, center, &weights); by_community.entry(comm).or_default().push(dist); node_dists.push((key.clone(), comm, dist)); } } // Median distance per community for outlier scoring let medians: HashMap = by_community.into_iter() .map(|(comm, mut dists)| { dists.sort_by(|a, b| a.total_cmp(b)); let median = if dists.is_empty() { 1.0 } else if dists.len() % 2 == 0 { (dists[dists.len() / 2 - 1] + dists[dists.len() / 2]) / 2.0 } else { dists[dists.len() / 2] }; (comm, median.max(1e-6)) }) .collect(); let mut positions: Vec = node_dists.into_iter() .map(|(key, comm, dist_to_center)| { let coords = &emb.coords[&key]; let (nearest_community, dist_to_nearest) = centers.iter() .filter(|&(&c, _)| c != comm) .map(|(&c, center)| (c, weighted_distance(coords, center, &weights))) .min_by(|a, b| a.1.total_cmp(&b.1)) .unwrap_or((comm, f64::MAX)); let median = medians.get(&comm).copied().unwrap_or(1.0); let outlier_score = dist_to_center / median; let bridge_score = if dist_to_nearest > 1e-8 { dist_to_center / dist_to_nearest } else { 0.0 }; SpectralPosition { key, community: comm, dist_to_center, dist_to_nearest, nearest_community, outlier_score, bridge_score, } }) .collect(); positions.sort_by(|a, b| b.outlier_score.total_cmp(&a.outlier_score)); positions } /// Find pairs of nodes that are spectrally close but not linked in the graph. /// /// These are the most valuable candidates for extractor agents — /// the spectral structure says they should be related, but nobody /// has articulated why. pub fn unlinked_neighbors( emb: &SpectralEmbedding, linked_pairs: &HashSet<(String, String)>, max_pairs: usize, ) -> Vec<(String, String, f64)> { let weights = eigenvalue_weights(&emb.eigenvalues); let keys: Vec<&String> = emb.coords.keys().collect(); let mut pairs: Vec<(String, String, f64)> = Vec::new(); for (i, k1) in keys.iter().enumerate() { let c1 = &emb.coords[*k1]; for k2 in keys.iter().skip(i + 1) { // Skip if already linked let pair_fwd = ((*k1).clone(), (*k2).clone()); let pair_rev = ((*k2).clone(), (*k1).clone()); if linked_pairs.contains(&pair_fwd) || linked_pairs.contains(&pair_rev) { continue; } let dist = weighted_distance(c1, &emb.coords[*k2], &weights); pairs.push(((*k1).clone(), (*k2).clone(), dist)); } } pairs.sort_by(|a, b| a.2.total_cmp(&b.2)); pairs.truncate(max_pairs); pairs } /// Approximate spectral coordinates for a new node using Nyström extension. /// /// Given a new node's edges to existing nodes, estimate where it would /// land in spectral space without recomputing the full decomposition. /// Uses weighted average of neighbors' coordinates, weighted by edge strength. pub fn nystrom_project( emb: &SpectralEmbedding, neighbors: &[(&str, f32)], // (key, edge_strength) ) -> Option> { let mut weighted_sum = vec![0.0f64; emb.dims]; let mut total_weight = 0.0f64; for &(key, strength) in neighbors { if let Some(coords) = emb.coords.get(key) { let w = strength as f64; for (i, &c) in coords.iter().enumerate() { weighted_sum[i] += w * c; } total_weight += w; } } if total_weight < 1e-8 { return None; } Some(weighted_sum.iter().map(|s| s / total_weight).collect()) } /// Classify a spectral position: well-integrated, outlier, bridge, or orphan. pub fn classify_position(pos: &SpectralPosition) -> &'static str { if pos.bridge_score > 0.7 { "bridge" // between two communities } else if pos.outlier_score > 2.0 { "outlier" // far from own community center } else if pos.outlier_score < 0.5 { "core" // close to community center } else { "peripheral" // normal community member } } /// Identify which spectral dimensions a set of nodes load on most heavily. /// Returns dimension indices sorted by total loading. pub fn dominant_dimensions(emb: &SpectralEmbedding, keys: &[&str]) -> Vec<(usize, f64)> { let coords: Vec<&Vec> = keys.iter() .filter_map(|k| emb.coords.get(*k)) .collect(); if coords.is_empty() { return vec![]; } let mut dim_loading: Vec<(usize, f64)> = (0..emb.dims) .map(|d| { let loading: f64 = coords.iter() .map(|c| c[d].abs()) .sum(); (d, loading) }) .collect(); dim_loading.sort_by(|a, b| b.1.total_cmp(&a.1)); dim_loading }