// Spectral decomposition of the memory graph. // // Computes eigenvalues and eigenvectors of the normalized graph Laplacian. // The eigenvectors provide natural coordinates for each node — connected // nodes land nearby, communities form clusters, bridges sit between clusters. // // The eigenvalue spectrum reveals: // - Number of connected components (count of zero eigenvalues) // - Number of natural communities (eigenvalues near zero, before the gap) // - How well-connected the graph is (Fiedler value = second eigenvalue) // // The eigenvectors provide: // - Spectral coordinates for each node (the embedding) // - Community membership (sign/magnitude of Fiedler vector) // - Natural projections (select which eigenvectors to include) use serde::{Deserialize, Serialize}; use std::collections::{HashMap, HashSet}; use std::path::PathBuf; /// Per-node spectral embedding, serializable to disk. #[derive(Serialize, Deserialize)] pub struct SpectralEmbedding { /// Number of dimensions (eigenvectors) pub dims: usize, /// Eigenvalues for each dimension pub eigenvalues: Vec, /// Node key → coordinate vector pub coords: HashMap>, } pub fn embedding_path() -> PathBuf { crate::store::memory_dir().join("spectral-embedding.json") } /// Load embedding from disk. pub fn load_embedding() -> Result { let path = embedding_path(); let data = std::fs::read_to_string(&path) .map_err(|e| format!("read {}: {}", path.display(), e))?; serde_json::from_str(&data) .map_err(|e| format!("parse embedding: {}", e)) } /// Find nearest neighbors to weighted seed nodes, using link weights. /// /// Each seed has a weight (from query term weighting). For candidates /// directly linked to a seed, the spectral distance is scaled by /// 1/link_strength — strong links make effective distance shorter. /// Seed weight scales the contribution: high-weight seeds pull harder. /// /// Returns (key, effective_distance) sorted by distance ascending. pub fn nearest_to_seeds_weighted( emb: &SpectralEmbedding, seeds: &[(&str, f64)], // (key, seed_weight) graph: Option<&crate::graph::Graph>, k: usize, ) -> Vec<(String, f64)> { let seed_set: HashSet<&str> = seeds.iter().map(|(s, _)| *s).collect(); let seed_data: Vec<(&str, &Vec, f64)> = seeds.iter() .filter_map(|(s, w)| { emb.coords.get(*s) .filter(|c| c.iter().any(|&v| v.abs() > 1e-12)) // skip degenerate seeds .map(|c| (*s, c, *w)) }) .collect(); if seed_data.is_empty() { return vec![]; } // Build seed→neighbor link strength lookup let link_strengths: HashMap<(&str, &str), f32> = if let Some(g) = graph { let mut map = HashMap::new(); for &(seed_key, _) in seeds { for (neighbor, strength) in g.neighbors(seed_key) { map.insert((seed_key, neighbor.as_str()), strength); } } map } else { HashMap::new() }; let dim_weights = eigenvalue_weights(&emb.eigenvalues); let mut distances: Vec<(String, f64)> = emb.coords.iter() .filter(|(k, coords)| { !seed_set.contains(k.as_str()) && coords.iter().any(|&v| v.abs() > 1e-12) // skip degenerate zero-coord nodes }) .map(|(candidate_key, coords)| { let min_dist = seed_data.iter() .map(|(seed_key, sc, seed_weight)| { let raw_dist = weighted_distance(coords, sc, &dim_weights); // Scale by link strength if directly connected let link_scale = link_strengths .get(&(*seed_key, candidate_key.as_str())) .map(|&s| 1.0 / (1.0 + s as f64)) // strong link → smaller distance .unwrap_or(1.0); raw_dist * link_scale / seed_weight }) .fold(f64::MAX, f64::min); (candidate_key.clone(), min_dist) }) .collect(); distances.sort_by(|a, b| a.1.total_cmp(&b.1)); distances.truncate(k); distances } /// Weighted euclidean distance in spectral space. /// Dimensions weighted by 1/eigenvalue — coarser structure matters more. fn weighted_distance(a: &[f64], b: &[f64], weights: &[f64]) -> f64 { a.iter() .zip(b.iter()) .zip(weights.iter()) .map(|((&x, &y), &w)| w * (x - y) * (x - y)) .sum::() .sqrt() } /// Compute eigenvalue-inverse weights for distance calculations. fn eigenvalue_weights(eigenvalues: &[f64]) -> Vec { eigenvalues.iter() .map(|&ev| if ev > 1e-8 { 1.0 / ev } else { 0.0 }) .collect() } /// Compute cluster centers (centroids) in spectral space. pub fn cluster_centers( emb: &SpectralEmbedding, communities: &HashMap, ) -> HashMap> { let mut sums: HashMap, usize)> = HashMap::new(); for (key, coords) in &emb.coords { if let Some(&comm) = communities.get(key) { let entry = sums.entry(comm) .or_insert_with(|| (vec![0.0; emb.dims], 0)); for (i, &c) in coords.iter().enumerate() { entry.0[i] += c; } entry.1 += 1; } } sums.into_iter() .map(|(comm, (sum, count))| { let center: Vec = sum.iter() .map(|s| s / count as f64) .collect(); (comm, center) }) .collect() } /// Per-node analysis of spectral position relative to communities. pub struct SpectralPosition { pub key: String, pub community: u32, /// Distance to own community center pub dist_to_center: f64, /// Distance to nearest OTHER community center pub dist_to_nearest: f64, /// Which community is nearest (other than own) pub nearest_community: u32, /// dist_to_center / median_dist_in_community (>1 = outlier) pub outlier_score: f64, /// dist_to_center / dist_to_nearest (>1 = between clusters, potential bridge) pub bridge_score: f64, } /// Analyze spectral positions for all nodes. /// /// Returns positions sorted by outlier_score descending (most displaced first). pub fn analyze_positions( emb: &SpectralEmbedding, communities: &HashMap, ) -> Vec { let centers = cluster_centers(emb, communities); let weights = eigenvalue_weights(&emb.eigenvalues); // Compute distances to own community center let mut by_community: HashMap> = HashMap::new(); let mut node_dists: Vec<(String, u32, f64)> = Vec::new(); for (key, coords) in &emb.coords { if let Some(&comm) = communities.get(key) && let Some(center) = centers.get(&comm) { let dist = weighted_distance(coords, center, &weights); by_community.entry(comm).or_default().push(dist); node_dists.push((key.clone(), comm, dist)); } } // Median distance per community for outlier scoring let medians: HashMap = by_community.into_iter() .map(|(comm, mut dists)| { dists.sort_by(|a, b| a.total_cmp(b)); let median = if dists.is_empty() { 1.0 } else if dists.len() % 2 == 0 { (dists[dists.len() / 2 - 1] + dists[dists.len() / 2]) / 2.0 } else { dists[dists.len() / 2] }; (comm, median.max(1e-6)) }) .collect(); let mut positions: Vec = node_dists.into_iter() .map(|(key, comm, dist_to_center)| { let coords = &emb.coords[&key]; let (nearest_community, dist_to_nearest) = centers.iter() .filter(|&(&c, _)| c != comm) .map(|(&c, center)| (c, weighted_distance(coords, center, &weights))) .min_by(|a, b| a.1.total_cmp(&b.1)) .unwrap_or((comm, f64::MAX)); let median = medians.get(&comm).copied().unwrap_or(1.0); let outlier_score = dist_to_center / median; let bridge_score = if dist_to_nearest > 1e-8 { dist_to_center / dist_to_nearest } else { 0.0 }; SpectralPosition { key, community: comm, dist_to_center, dist_to_nearest, nearest_community, outlier_score, bridge_score, } }) .collect(); positions.sort_by(|a, b| b.outlier_score.total_cmp(&a.outlier_score)); positions } /// Classify a spectral position: well-integrated, outlier, bridge, or orphan. pub fn classify_position(pos: &SpectralPosition) -> &'static str { if pos.bridge_score > 0.7 { "bridge" // between two communities } else if pos.outlier_score > 2.0 { "outlier" // far from own community center } else if pos.outlier_score < 0.5 { "core" // close to community center } else { "peripheral" // normal community member } }