2026-03-03 01:33:31 -05:00
|
|
|
|
// Spectral decomposition of the memory graph.
|
|
|
|
|
|
//
|
|
|
|
|
|
// Computes eigenvalues and eigenvectors of the normalized graph Laplacian.
|
|
|
|
|
|
// The eigenvectors provide natural coordinates for each node — connected
|
|
|
|
|
|
// nodes land nearby, communities form clusters, bridges sit between clusters.
|
|
|
|
|
|
//
|
|
|
|
|
|
// The eigenvalue spectrum reveals:
|
|
|
|
|
|
// - Number of connected components (count of zero eigenvalues)
|
|
|
|
|
|
// - Number of natural communities (eigenvalues near zero, before the gap)
|
|
|
|
|
|
// - How well-connected the graph is (Fiedler value = second eigenvalue)
|
|
|
|
|
|
//
|
|
|
|
|
|
// The eigenvectors provide:
|
|
|
|
|
|
// - Spectral coordinates for each node (the embedding)
|
|
|
|
|
|
// - Community membership (sign/magnitude of Fiedler vector)
|
|
|
|
|
|
// - Natural projections (select which eigenvectors to include)
|
|
|
|
|
|
|
|
|
|
|
|
use crate::graph::Graph;
|
|
|
|
|
|
|
|
|
|
|
|
use faer::Mat;
|
|
|
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
|
|
use std::collections::{HashMap, HashSet};
|
|
|
|
|
|
use std::path::PathBuf;
|
|
|
|
|
|
|
|
|
|
|
|
pub struct SpectralResult {
|
|
|
|
|
|
/// Node keys in index order
|
|
|
|
|
|
pub keys: Vec<String>,
|
|
|
|
|
|
/// Eigenvalues in ascending order
|
|
|
|
|
|
pub eigenvalues: Vec<f64>,
|
|
|
|
|
|
/// Eigenvectors: eigvecs[k] is the k-th eigenvector (ascending eigenvalue order),
|
|
|
|
|
|
/// with eigvecs[k][i] being the value for node keys[i]
|
|
|
|
|
|
pub eigvecs: Vec<Vec<f64>>,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Per-node spectral embedding, serializable to disk.
|
|
|
|
|
|
#[derive(Serialize, Deserialize)]
|
|
|
|
|
|
pub struct SpectralEmbedding {
|
|
|
|
|
|
/// Number of dimensions (eigenvectors)
|
|
|
|
|
|
pub dims: usize,
|
|
|
|
|
|
/// Eigenvalues for each dimension
|
|
|
|
|
|
pub eigenvalues: Vec<f64>,
|
|
|
|
|
|
/// Node key → coordinate vector
|
|
|
|
|
|
pub coords: HashMap<String, Vec<f64>>,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn embedding_path() -> PathBuf {
|
2026-03-08 21:16:52 -04:00
|
|
|
|
crate::store::memory_dir().join("spectral-embedding.json")
|
2026-03-03 01:33:31 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Compute spectral decomposition of the memory graph.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// Returns the smallest `k` eigenvalues and their eigenvectors of the
|
|
|
|
|
|
/// normalized Laplacian L_sym = I - D^{-1/2} A D^{-1/2}.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// We compute the full decomposition (it's only 2000×2000, takes <1s)
|
|
|
|
|
|
/// and return the bottom k.
|
|
|
|
|
|
pub fn decompose(graph: &Graph, k: usize) -> SpectralResult {
|
|
|
|
|
|
// Only include nodes with edges (filter isolates)
|
|
|
|
|
|
let mut keys: Vec<String> = graph.nodes().iter()
|
|
|
|
|
|
.filter(|k| graph.degree(k) > 0)
|
|
|
|
|
|
.cloned()
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
keys.sort();
|
|
|
|
|
|
let n = keys.len();
|
|
|
|
|
|
let isolates = graph.nodes().len() - n;
|
|
|
|
|
|
if isolates > 0 {
|
|
|
|
|
|
eprintln!("note: filtered {} isolated nodes, decomposing {} connected nodes", isolates, n);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let key_to_idx: HashMap<&str, usize> = keys.iter()
|
|
|
|
|
|
.enumerate()
|
|
|
|
|
|
.map(|(i, k)| (k.as_str(), i))
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
|
|
|
|
// Build weighted degree vector and adjacency
|
|
|
|
|
|
let mut degree = vec![0.0f64; n];
|
|
|
|
|
|
let mut adj_entries: Vec<(usize, usize, f64)> = Vec::new();
|
|
|
|
|
|
|
|
|
|
|
|
for (i, key) in keys.iter().enumerate() {
|
|
|
|
|
|
for (neighbor, strength) in graph.neighbors(key) {
|
|
|
|
|
|
if let Some(&j) = key_to_idx.get(neighbor.as_str()) {
|
|
|
|
|
|
if j > i { // each edge once
|
|
|
|
|
|
let w = strength as f64;
|
|
|
|
|
|
adj_entries.push((i, j, w));
|
|
|
|
|
|
degree[i] += w;
|
|
|
|
|
|
degree[j] += w;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Build normalized Laplacian: L_sym = I - D^{-1/2} A D^{-1/2}
|
|
|
|
|
|
let mut laplacian = Mat::<f64>::zeros(n, n);
|
|
|
|
|
|
|
|
|
|
|
|
// Diagonal = 1 for nodes with edges, 0 for isolates
|
|
|
|
|
|
for i in 0..n {
|
|
|
|
|
|
if degree[i] > 0.0 {
|
|
|
|
|
|
laplacian[(i, i)] = 1.0;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Off-diagonal: -w / sqrt(d_i * d_j)
|
|
|
|
|
|
for &(i, j, w) in &adj_entries {
|
|
|
|
|
|
if degree[i] > 0.0 && degree[j] > 0.0 {
|
|
|
|
|
|
let val = -w / (degree[i] * degree[j]).sqrt();
|
|
|
|
|
|
laplacian[(i, j)] = val;
|
|
|
|
|
|
laplacian[(j, i)] = val;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Eigendecompose
|
|
|
|
|
|
let eig = laplacian.self_adjoint_eigen(faer::Side::Lower)
|
|
|
|
|
|
.expect("eigendecomposition failed");
|
|
|
|
|
|
let s = eig.S();
|
|
|
|
|
|
let u = eig.U();
|
|
|
|
|
|
|
|
|
|
|
|
let mut eigenvalues = Vec::with_capacity(k);
|
|
|
|
|
|
let mut eigvecs = Vec::with_capacity(k);
|
|
|
|
|
|
|
|
|
|
|
|
let s_col = s.column_vector();
|
2026-03-09 01:19:04 -04:00
|
|
|
|
|
|
|
|
|
|
// Skip trivial eigenvalues (near-zero = null space from disconnected components).
|
|
|
|
|
|
// The number of zero eigenvalues equals the number of connected components.
|
|
|
|
|
|
let mut start = 0;
|
|
|
|
|
|
while start < n && s_col[start].abs() < 1e-8 {
|
|
|
|
|
|
start += 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let k = k.min(n.saturating_sub(start));
|
|
|
|
|
|
for col in start..start + k {
|
2026-03-03 01:33:31 -05:00
|
|
|
|
eigenvalues.push(s_col[col]);
|
|
|
|
|
|
let mut vec = Vec::with_capacity(n);
|
|
|
|
|
|
for row in 0..n {
|
|
|
|
|
|
vec.push(u[(row, col)]);
|
|
|
|
|
|
}
|
|
|
|
|
|
eigvecs.push(vec);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
SpectralResult { keys, eigenvalues, eigvecs }
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Print the spectral summary: eigenvalue spectrum, then each axis with
|
|
|
|
|
|
/// its extreme nodes (what the axis "means").
|
|
|
|
|
|
pub fn print_summary(result: &SpectralResult, graph: &Graph) {
|
|
|
|
|
|
let n = result.keys.len();
|
|
|
|
|
|
let k = result.eigenvalues.len();
|
|
|
|
|
|
|
|
|
|
|
|
println!("Spectral Decomposition — {} nodes, {} eigenpairs", n, k);
|
|
|
|
|
|
println!("=========================================\n");
|
|
|
|
|
|
|
|
|
|
|
|
// Compact eigenvalue table
|
|
|
|
|
|
println!("Eigenvalue spectrum:");
|
|
|
|
|
|
for (i, &ev) in result.eigenvalues.iter().enumerate() {
|
|
|
|
|
|
let gap = if i > 0 {
|
|
|
|
|
|
ev - result.eigenvalues[i - 1]
|
|
|
|
|
|
} else {
|
|
|
|
|
|
0.0
|
|
|
|
|
|
};
|
|
|
|
|
|
let gap_bar = if i > 0 {
|
|
|
|
|
|
let bars = (gap * 500.0).min(40.0) as usize;
|
|
|
|
|
|
"#".repeat(bars)
|
|
|
|
|
|
} else {
|
|
|
|
|
|
String::new()
|
|
|
|
|
|
};
|
|
|
|
|
|
println!(" λ_{:<2} = {:.6} {}", i, ev, gap_bar);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Connected components
|
|
|
|
|
|
let near_zero = result.eigenvalues.iter()
|
|
|
|
|
|
.filter(|&&v| v.abs() < 1e-6)
|
|
|
|
|
|
.count();
|
|
|
|
|
|
if near_zero > 1 {
|
|
|
|
|
|
println!("\n {} eigenvalues near 0 = {} disconnected components", near_zero, near_zero);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Each axis: what are the extremes?
|
|
|
|
|
|
println!("\n\nNatural axes of the knowledge space");
|
|
|
|
|
|
println!("====================================");
|
|
|
|
|
|
|
|
|
|
|
|
for axis in 0..k {
|
|
|
|
|
|
let ev = result.eigenvalues[axis];
|
|
|
|
|
|
let vec = &result.eigvecs[axis];
|
|
|
|
|
|
|
|
|
|
|
|
// Sort nodes by their value on this axis
|
|
|
|
|
|
let mut indexed: Vec<(usize, f64)> = vec.iter()
|
|
|
|
|
|
.enumerate()
|
|
|
|
|
|
.map(|(i, &v)| (i, v))
|
|
|
|
|
|
.collect();
|
2026-03-08 21:22:05 -04:00
|
|
|
|
indexed.sort_by(|a, b| a.1.total_cmp(&b.1));
|
2026-03-03 01:33:31 -05:00
|
|
|
|
|
|
|
|
|
|
// Compute the "spread" — how much this axis differentiates
|
|
|
|
|
|
let min_val = indexed.first().map(|x| x.1).unwrap_or(0.0);
|
|
|
|
|
|
let max_val = indexed.last().map(|x| x.1).unwrap_or(0.0);
|
|
|
|
|
|
|
|
|
|
|
|
println!("\n--- Axis {} (λ={:.6}, range={:.4}) ---", axis, ev, max_val - min_val);
|
|
|
|
|
|
|
|
|
|
|
|
// Show extremes: 5 most negative, 5 most positive
|
|
|
|
|
|
let show = 5;
|
|
|
|
|
|
println!(" Negative pole:");
|
|
|
|
|
|
for &(idx, val) in indexed.iter().take(show) {
|
|
|
|
|
|
let key = &result.keys[idx];
|
|
|
|
|
|
// Shorten key for display: take last component
|
|
|
|
|
|
let short = shorten_key(key);
|
|
|
|
|
|
let deg = graph.degree(key);
|
|
|
|
|
|
let comm = graph.communities().get(key).copied().unwrap_or(999);
|
|
|
|
|
|
println!(" {:+.5} d={:<3} c={:<3} {}", val, deg, comm, short);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
println!(" Positive pole:");
|
|
|
|
|
|
for &(idx, val) in indexed.iter().rev().take(show) {
|
|
|
|
|
|
let key = &result.keys[idx];
|
|
|
|
|
|
let short = shorten_key(key);
|
|
|
|
|
|
let deg = graph.degree(key);
|
|
|
|
|
|
let comm = graph.communities().get(key).copied().unwrap_or(999);
|
|
|
|
|
|
println!(" {:+.5} d={:<3} c={:<3} {}", val, deg, comm, short);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Shorten a node key for display.
|
|
|
|
|
|
fn shorten_key(key: &str) -> &str {
|
|
|
|
|
|
if key.len() > 60 { &key[..60] } else { key }
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Convert SpectralResult to a per-node embedding (transposing the layout).
|
|
|
|
|
|
pub fn to_embedding(result: &SpectralResult) -> SpectralEmbedding {
|
|
|
|
|
|
let dims = result.eigvecs.len();
|
|
|
|
|
|
let mut coords = HashMap::new();
|
|
|
|
|
|
|
|
|
|
|
|
for (i, key) in result.keys.iter().enumerate() {
|
|
|
|
|
|
let mut vec = Vec::with_capacity(dims);
|
|
|
|
|
|
for d in 0..dims {
|
|
|
|
|
|
vec.push(result.eigvecs[d][i]);
|
|
|
|
|
|
}
|
|
|
|
|
|
coords.insert(key.clone(), vec);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
SpectralEmbedding {
|
|
|
|
|
|
dims,
|
|
|
|
|
|
eigenvalues: result.eigenvalues.clone(),
|
|
|
|
|
|
coords,
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Save embedding to disk.
|
|
|
|
|
|
pub fn save_embedding(emb: &SpectralEmbedding) -> Result<(), String> {
|
|
|
|
|
|
let path = embedding_path();
|
|
|
|
|
|
let json = serde_json::to_string(emb)
|
|
|
|
|
|
.map_err(|e| format!("serialize embedding: {}", e))?;
|
|
|
|
|
|
std::fs::write(&path, json)
|
|
|
|
|
|
.map_err(|e| format!("write {}: {}", path.display(), e))?;
|
|
|
|
|
|
eprintln!("Saved {}-dim embedding for {} nodes to {}",
|
|
|
|
|
|
emb.dims, emb.coords.len(), path.display());
|
|
|
|
|
|
Ok(())
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Load embedding from disk.
|
|
|
|
|
|
pub fn load_embedding() -> Result<SpectralEmbedding, String> {
|
|
|
|
|
|
let path = embedding_path();
|
|
|
|
|
|
let data = std::fs::read_to_string(&path)
|
|
|
|
|
|
.map_err(|e| format!("read {}: {}", path.display(), e))?;
|
|
|
|
|
|
serde_json::from_str(&data)
|
|
|
|
|
|
.map_err(|e| format!("parse embedding: {}", e))
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Find the k nearest neighbors to a node in spectral space.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// Uses weighted euclidean distance where each dimension is weighted
|
|
|
|
|
|
/// by 1/eigenvalue — lower eigenvalues (coarser structure) matter more.
|
|
|
|
|
|
pub fn nearest_neighbors(
|
|
|
|
|
|
emb: &SpectralEmbedding,
|
|
|
|
|
|
key: &str,
|
|
|
|
|
|
k: usize,
|
|
|
|
|
|
) -> Vec<(String, f64)> {
|
|
|
|
|
|
let target = match emb.coords.get(key) {
|
|
|
|
|
|
Some(c) => c,
|
|
|
|
|
|
None => return vec![],
|
|
|
|
|
|
};
|
|
|
|
|
|
|
2026-03-08 21:22:05 -04:00
|
|
|
|
let weights = eigenvalue_weights(&emb.eigenvalues);
|
2026-03-03 01:33:31 -05:00
|
|
|
|
|
|
|
|
|
|
let mut distances: Vec<(String, f64)> = emb.coords.iter()
|
|
|
|
|
|
.filter(|(k, _)| k.as_str() != key)
|
2026-03-08 21:22:05 -04:00
|
|
|
|
.map(|(k, coords)| (k.clone(), weighted_distance(target, coords, &weights)))
|
2026-03-03 01:33:31 -05:00
|
|
|
|
.collect();
|
|
|
|
|
|
|
2026-03-08 21:22:05 -04:00
|
|
|
|
distances.sort_by(|a, b| a.1.total_cmp(&b.1));
|
2026-03-03 01:33:31 -05:00
|
|
|
|
distances.truncate(k);
|
|
|
|
|
|
distances
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Find nearest neighbors to a set of seed nodes (multi-seed query).
|
|
|
|
|
|
/// Returns nodes ranked by minimum distance to any seed.
|
|
|
|
|
|
pub fn nearest_to_seeds(
|
|
|
|
|
|
emb: &SpectralEmbedding,
|
|
|
|
|
|
seeds: &[&str],
|
|
|
|
|
|
k: usize,
|
|
|
|
|
|
) -> Vec<(String, f64)> {
|
2026-03-09 01:19:04 -04:00
|
|
|
|
nearest_to_seeds_weighted(emb, &seeds.iter().map(|&s| (s, 1.0)).collect::<Vec<_>>(), None, k)
|
|
|
|
|
|
}
|
2026-03-03 01:33:31 -05:00
|
|
|
|
|
2026-03-09 01:19:04 -04:00
|
|
|
|
/// Find nearest neighbors to weighted seed nodes, using link weights.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// Each seed has a weight (from query term weighting). For candidates
|
|
|
|
|
|
/// directly linked to a seed, the spectral distance is scaled by
|
|
|
|
|
|
/// 1/link_strength — strong links make effective distance shorter.
|
|
|
|
|
|
/// Seed weight scales the contribution: high-weight seeds pull harder.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// Returns (key, effective_distance) sorted by distance ascending.
|
|
|
|
|
|
pub fn nearest_to_seeds_weighted(
|
|
|
|
|
|
emb: &SpectralEmbedding,
|
|
|
|
|
|
seeds: &[(&str, f64)], // (key, seed_weight)
|
|
|
|
|
|
graph: Option<&crate::graph::Graph>,
|
|
|
|
|
|
k: usize,
|
|
|
|
|
|
) -> Vec<(String, f64)> {
|
|
|
|
|
|
let seed_set: HashSet<&str> = seeds.iter().map(|(s, _)| *s).collect();
|
|
|
|
|
|
|
|
|
|
|
|
let seed_data: Vec<(&str, &Vec<f64>, f64)> = seeds.iter()
|
|
|
|
|
|
.filter_map(|(s, w)| {
|
|
|
|
|
|
emb.coords.get(*s)
|
|
|
|
|
|
.filter(|c| c.iter().any(|&v| v.abs() > 1e-12)) // skip degenerate seeds
|
|
|
|
|
|
.map(|c| (*s, c, *w))
|
|
|
|
|
|
})
|
2026-03-03 01:33:31 -05:00
|
|
|
|
.collect();
|
2026-03-09 01:19:04 -04:00
|
|
|
|
if seed_data.is_empty() {
|
2026-03-03 01:33:31 -05:00
|
|
|
|
return vec![];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-09 01:19:04 -04:00
|
|
|
|
// Build seed→neighbor link strength lookup
|
|
|
|
|
|
let link_strengths: HashMap<(&str, &str), f32> = if let Some(g) = graph {
|
|
|
|
|
|
let mut map = HashMap::new();
|
|
|
|
|
|
for &(seed_key, _) in seeds {
|
|
|
|
|
|
for (neighbor, strength) in g.neighbors(seed_key) {
|
|
|
|
|
|
map.insert((seed_key, neighbor.as_str()), strength);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
map
|
|
|
|
|
|
} else {
|
|
|
|
|
|
HashMap::new()
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
let dim_weights = eigenvalue_weights(&emb.eigenvalues);
|
2026-03-03 01:33:31 -05:00
|
|
|
|
|
|
|
|
|
|
let mut distances: Vec<(String, f64)> = emb.coords.iter()
|
2026-03-09 01:19:04 -04:00
|
|
|
|
.filter(|(k, coords)| {
|
|
|
|
|
|
!seed_set.contains(k.as_str())
|
|
|
|
|
|
&& coords.iter().any(|&v| v.abs() > 1e-12) // skip degenerate zero-coord nodes
|
|
|
|
|
|
})
|
|
|
|
|
|
.map(|(candidate_key, coords)| {
|
|
|
|
|
|
let min_dist = seed_data.iter()
|
|
|
|
|
|
.map(|(seed_key, sc, seed_weight)| {
|
|
|
|
|
|
let raw_dist = weighted_distance(coords, sc, &dim_weights);
|
|
|
|
|
|
|
|
|
|
|
|
// Scale by link strength if directly connected
|
|
|
|
|
|
let link_scale = link_strengths
|
|
|
|
|
|
.get(&(*seed_key, candidate_key.as_str()))
|
|
|
|
|
|
.map(|&s| 1.0 / (1.0 + s as f64)) // strong link → smaller distance
|
|
|
|
|
|
.unwrap_or(1.0);
|
|
|
|
|
|
|
|
|
|
|
|
raw_dist * link_scale / seed_weight
|
|
|
|
|
|
})
|
2026-03-03 01:33:31 -05:00
|
|
|
|
.fold(f64::MAX, f64::min);
|
2026-03-09 01:19:04 -04:00
|
|
|
|
(candidate_key.clone(), min_dist)
|
2026-03-03 01:33:31 -05:00
|
|
|
|
})
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
2026-03-08 21:22:05 -04:00
|
|
|
|
distances.sort_by(|a, b| a.1.total_cmp(&b.1));
|
2026-03-03 01:33:31 -05:00
|
|
|
|
distances.truncate(k);
|
|
|
|
|
|
distances
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Weighted euclidean distance in spectral space.
|
|
|
|
|
|
/// Dimensions weighted by 1/eigenvalue — coarser structure matters more.
|
|
|
|
|
|
fn weighted_distance(a: &[f64], b: &[f64], weights: &[f64]) -> f64 {
|
|
|
|
|
|
a.iter()
|
|
|
|
|
|
.zip(b.iter())
|
|
|
|
|
|
.zip(weights.iter())
|
|
|
|
|
|
.map(|((&x, &y), &w)| w * (x - y) * (x - y))
|
|
|
|
|
|
.sum::<f64>()
|
|
|
|
|
|
.sqrt()
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Compute eigenvalue-inverse weights for distance calculations.
|
|
|
|
|
|
fn eigenvalue_weights(eigenvalues: &[f64]) -> Vec<f64> {
|
|
|
|
|
|
eigenvalues.iter()
|
|
|
|
|
|
.map(|&ev| if ev > 1e-8 { 1.0 / ev } else { 0.0 })
|
|
|
|
|
|
.collect()
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Compute cluster centers (centroids) in spectral space.
|
|
|
|
|
|
pub fn cluster_centers(
|
|
|
|
|
|
emb: &SpectralEmbedding,
|
|
|
|
|
|
communities: &HashMap<String, u32>,
|
|
|
|
|
|
) -> HashMap<u32, Vec<f64>> {
|
|
|
|
|
|
let mut sums: HashMap<u32, (Vec<f64>, usize)> = HashMap::new();
|
|
|
|
|
|
|
|
|
|
|
|
for (key, coords) in &emb.coords {
|
|
|
|
|
|
if let Some(&comm) = communities.get(key) {
|
|
|
|
|
|
let entry = sums.entry(comm)
|
|
|
|
|
|
.or_insert_with(|| (vec![0.0; emb.dims], 0));
|
|
|
|
|
|
for (i, &c) in coords.iter().enumerate() {
|
|
|
|
|
|
entry.0[i] += c;
|
|
|
|
|
|
}
|
|
|
|
|
|
entry.1 += 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
sums.into_iter()
|
|
|
|
|
|
.map(|(comm, (sum, count))| {
|
|
|
|
|
|
let center: Vec<f64> = sum.iter()
|
|
|
|
|
|
.map(|s| s / count as f64)
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
(comm, center)
|
|
|
|
|
|
})
|
|
|
|
|
|
.collect()
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Per-node analysis of spectral position relative to communities.
|
|
|
|
|
|
pub struct SpectralPosition {
|
|
|
|
|
|
pub key: String,
|
|
|
|
|
|
pub community: u32,
|
|
|
|
|
|
/// Distance to own community center
|
|
|
|
|
|
pub dist_to_center: f64,
|
|
|
|
|
|
/// Distance to nearest OTHER community center
|
|
|
|
|
|
pub dist_to_nearest: f64,
|
|
|
|
|
|
/// Which community is nearest (other than own)
|
|
|
|
|
|
pub nearest_community: u32,
|
|
|
|
|
|
/// dist_to_center / median_dist_in_community (>1 = outlier)
|
|
|
|
|
|
pub outlier_score: f64,
|
|
|
|
|
|
/// dist_to_center / dist_to_nearest (>1 = between clusters, potential bridge)
|
|
|
|
|
|
pub bridge_score: f64,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Analyze spectral positions for all nodes.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// Returns positions sorted by outlier_score descending (most displaced first).
|
|
|
|
|
|
pub fn analyze_positions(
|
|
|
|
|
|
emb: &SpectralEmbedding,
|
|
|
|
|
|
communities: &HashMap<String, u32>,
|
|
|
|
|
|
) -> Vec<SpectralPosition> {
|
|
|
|
|
|
let centers = cluster_centers(emb, communities);
|
|
|
|
|
|
let weights = eigenvalue_weights(&emb.eigenvalues);
|
|
|
|
|
|
|
|
|
|
|
|
// Compute distances to own community center
|
|
|
|
|
|
let mut by_community: HashMap<u32, Vec<f64>> = HashMap::new();
|
|
|
|
|
|
let mut node_dists: Vec<(String, u32, f64)> = Vec::new();
|
|
|
|
|
|
|
|
|
|
|
|
for (key, coords) in &emb.coords {
|
|
|
|
|
|
if let Some(&comm) = communities.get(key) {
|
|
|
|
|
|
if let Some(center) = centers.get(&comm) {
|
|
|
|
|
|
let dist = weighted_distance(coords, center, &weights);
|
|
|
|
|
|
by_community.entry(comm).or_default().push(dist);
|
|
|
|
|
|
node_dists.push((key.clone(), comm, dist));
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Median distance per community for outlier scoring
|
|
|
|
|
|
let medians: HashMap<u32, f64> = by_community.into_iter()
|
|
|
|
|
|
.map(|(comm, mut dists)| {
|
2026-03-08 21:22:05 -04:00
|
|
|
|
dists.sort_by(|a, b| a.total_cmp(b));
|
2026-03-03 01:33:31 -05:00
|
|
|
|
let median = if dists.is_empty() {
|
|
|
|
|
|
1.0
|
|
|
|
|
|
} else if dists.len() % 2 == 0 {
|
|
|
|
|
|
(dists[dists.len() / 2 - 1] + dists[dists.len() / 2]) / 2.0
|
|
|
|
|
|
} else {
|
|
|
|
|
|
dists[dists.len() / 2]
|
|
|
|
|
|
};
|
|
|
|
|
|
(comm, median.max(1e-6))
|
|
|
|
|
|
})
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
|
|
|
|
let mut positions: Vec<SpectralPosition> = node_dists.into_iter()
|
|
|
|
|
|
.map(|(key, comm, dist_to_center)| {
|
|
|
|
|
|
let coords = &emb.coords[&key];
|
|
|
|
|
|
|
|
|
|
|
|
let (nearest_community, dist_to_nearest) = centers.iter()
|
|
|
|
|
|
.filter(|(&c, _)| c != comm)
|
|
|
|
|
|
.map(|(&c, center)| (c, weighted_distance(coords, center, &weights)))
|
2026-03-08 21:22:05 -04:00
|
|
|
|
.min_by(|a, b| a.1.total_cmp(&b.1))
|
2026-03-03 01:33:31 -05:00
|
|
|
|
.unwrap_or((comm, f64::MAX));
|
|
|
|
|
|
|
|
|
|
|
|
let median = medians.get(&comm).copied().unwrap_or(1.0);
|
|
|
|
|
|
let outlier_score = dist_to_center / median;
|
|
|
|
|
|
let bridge_score = if dist_to_nearest > 1e-8 {
|
|
|
|
|
|
dist_to_center / dist_to_nearest
|
|
|
|
|
|
} else {
|
|
|
|
|
|
0.0
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
SpectralPosition {
|
|
|
|
|
|
key, community: comm,
|
|
|
|
|
|
dist_to_center, dist_to_nearest, nearest_community,
|
|
|
|
|
|
outlier_score, bridge_score,
|
|
|
|
|
|
}
|
|
|
|
|
|
})
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
2026-03-08 21:22:05 -04:00
|
|
|
|
positions.sort_by(|a, b| b.outlier_score.total_cmp(&a.outlier_score));
|
2026-03-03 01:33:31 -05:00
|
|
|
|
positions
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Find pairs of nodes that are spectrally close but not linked in the graph.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// These are the most valuable candidates for extractor agents —
|
|
|
|
|
|
/// the spectral structure says they should be related, but nobody
|
|
|
|
|
|
/// has articulated why.
|
|
|
|
|
|
pub fn unlinked_neighbors(
|
|
|
|
|
|
emb: &SpectralEmbedding,
|
|
|
|
|
|
linked_pairs: &HashSet<(String, String)>,
|
|
|
|
|
|
max_pairs: usize,
|
|
|
|
|
|
) -> Vec<(String, String, f64)> {
|
|
|
|
|
|
let weights = eigenvalue_weights(&emb.eigenvalues);
|
|
|
|
|
|
let keys: Vec<&String> = emb.coords.keys().collect();
|
|
|
|
|
|
let mut pairs: Vec<(String, String, f64)> = Vec::new();
|
|
|
|
|
|
|
|
|
|
|
|
for (i, k1) in keys.iter().enumerate() {
|
|
|
|
|
|
let c1 = &emb.coords[*k1];
|
|
|
|
|
|
for k2 in keys.iter().skip(i + 1) {
|
|
|
|
|
|
// Skip if already linked
|
|
|
|
|
|
let pair_fwd = ((*k1).clone(), (*k2).clone());
|
|
|
|
|
|
let pair_rev = ((*k2).clone(), (*k1).clone());
|
|
|
|
|
|
if linked_pairs.contains(&pair_fwd) || linked_pairs.contains(&pair_rev) {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let dist = weighted_distance(c1, &emb.coords[*k2], &weights);
|
|
|
|
|
|
pairs.push(((*k1).clone(), (*k2).clone(), dist));
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-08 21:22:05 -04:00
|
|
|
|
pairs.sort_by(|a, b| a.2.total_cmp(&b.2));
|
2026-03-03 01:33:31 -05:00
|
|
|
|
pairs.truncate(max_pairs);
|
|
|
|
|
|
pairs
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Approximate spectral coordinates for a new node using Nyström extension.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// Given a new node's edges to existing nodes, estimate where it would
|
|
|
|
|
|
/// land in spectral space without recomputing the full decomposition.
|
|
|
|
|
|
/// Uses weighted average of neighbors' coordinates, weighted by edge strength.
|
|
|
|
|
|
pub fn nystrom_project(
|
|
|
|
|
|
emb: &SpectralEmbedding,
|
|
|
|
|
|
neighbors: &[(&str, f32)], // (key, edge_strength)
|
|
|
|
|
|
) -> Option<Vec<f64>> {
|
|
|
|
|
|
let mut weighted_sum = vec![0.0f64; emb.dims];
|
|
|
|
|
|
let mut total_weight = 0.0f64;
|
|
|
|
|
|
|
|
|
|
|
|
for &(key, strength) in neighbors {
|
|
|
|
|
|
if let Some(coords) = emb.coords.get(key) {
|
|
|
|
|
|
let w = strength as f64;
|
|
|
|
|
|
for (i, &c) in coords.iter().enumerate() {
|
|
|
|
|
|
weighted_sum[i] += w * c;
|
|
|
|
|
|
}
|
|
|
|
|
|
total_weight += w;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if total_weight < 1e-8 {
|
|
|
|
|
|
return None;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Some(weighted_sum.iter().map(|s| s / total_weight).collect())
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Classify a spectral position: well-integrated, outlier, bridge, or orphan.
|
|
|
|
|
|
pub fn classify_position(pos: &SpectralPosition) -> &'static str {
|
|
|
|
|
|
if pos.bridge_score > 0.7 {
|
|
|
|
|
|
"bridge" // between two communities
|
|
|
|
|
|
} else if pos.outlier_score > 2.0 {
|
|
|
|
|
|
"outlier" // far from own community center
|
|
|
|
|
|
} else if pos.outlier_score < 0.5 {
|
|
|
|
|
|
"core" // close to community center
|
|
|
|
|
|
} else {
|
|
|
|
|
|
"peripheral" // normal community member
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Identify which spectral dimensions a set of nodes load on most heavily.
|
|
|
|
|
|
/// Returns dimension indices sorted by total loading.
|
|
|
|
|
|
pub fn dominant_dimensions(emb: &SpectralEmbedding, keys: &[&str]) -> Vec<(usize, f64)> {
|
|
|
|
|
|
let coords: Vec<&Vec<f64>> = keys.iter()
|
|
|
|
|
|
.filter_map(|k| emb.coords.get(*k))
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
if coords.is_empty() {
|
|
|
|
|
|
return vec![];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let mut dim_loading: Vec<(usize, f64)> = (0..emb.dims)
|
|
|
|
|
|
.map(|d| {
|
|
|
|
|
|
let loading: f64 = coords.iter()
|
|
|
|
|
|
.map(|c| c[d].abs())
|
|
|
|
|
|
.sum();
|
|
|
|
|
|
(d, loading)
|
|
|
|
|
|
})
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
2026-03-08 21:22:05 -04:00
|
|
|
|
dim_loading.sort_by(|a, b| b.1.total_cmp(&a.1));
|
2026-03-03 01:33:31 -05:00
|
|
|
|
dim_loading
|
|
|
|
|
|
}
|