spectral decomposition, search improvements, char boundary fix

- New spectral module: Laplacian eigendecomposition of the memory graph.
  Commands: spectral, spectral-save, spectral-neighbors, spectral-positions,
  spectral-suggest. Spectral neighbors expand search results beyond keyword
  matching to structural proximity.

- Search: use StoreView trait to avoid 6MB state.bin rewrite on every query.
  Append-only retrieval logging. Spectral expansion shows structurally
  nearby nodes after text results.

- Fix panic in journal-tail: string truncation at byte 67 could land inside
  a multi-byte character (em dash). Now walks back to char boundary.

- Replay queue: show classification and spectral outlier score.

- Knowledge agents: extractor, challenger, connector prompts and runner
  scripts for automated graph enrichment.

- memory-search hook: stale state file cleanup (24h expiry).
This commit is contained in:
ProofOfConcept 2026-03-03 01:33:31 -05:00
parent 94dbca6018
commit 71e6f15d82
16 changed files with 3600 additions and 103 deletions

View file

@ -12,6 +12,7 @@ use std::fs;
use std::io::{self, Read, Write};
use std::path::{Path, PathBuf};
use std::process::Command;
use std::time::{Duration, SystemTime};
fn main() {
let mut input = String::new();
@ -66,6 +67,9 @@ fn main() {
let state_dir = PathBuf::from("/tmp/claude-memory-search");
fs::create_dir_all(&state_dir).ok();
// Clean up state files older than 24h (opportunistic, best-effort)
cleanup_stale_files(&state_dir, Duration::from_secs(86400));
let cookie = load_or_create_cookie(&state_dir, session_id);
let seen = load_seen(&state_dir, session_id);
@ -172,3 +176,20 @@ fn mark_seen(dir: &Path, session_id: &str, key: &str) {
writeln!(f, "{}", key).ok();
}
}
fn cleanup_stale_files(dir: &Path, max_age: Duration) {
let entries = match fs::read_dir(dir) {
Ok(e) => e,
Err(_) => return,
};
let cutoff = SystemTime::now() - max_age;
for entry in entries.flatten() {
if let Ok(meta) = entry.metadata() {
if let Ok(modified) = meta.modified() {
if modified < cutoff {
fs::remove_file(entry.path()).ok();
}
}
}
}
}

View file

@ -5,8 +5,11 @@
// relations.capnp - Relation messages
//
// The Store struct is the derived cache: latest version per UUID,
// rebuilt from logs when stale. Persisted as serde_json for now
// (state.json), will move to bincode/capnp later.
// rebuilt from logs when stale. Three-tier load strategy:
// 1. rkyv mmap snapshot (snapshot.rkyv) — ~4ms deserialize
// 2. bincode cache (state.bin) — ~10ms
// 3. capnp log replay — ~40ms
// Staleness: log file sizes embedded in cache headers.
use crate::memory_capnp;
use crate::graph::{self, Graph};
@ -109,7 +112,8 @@ pub fn today() -> String {
}
// In-memory node representation
#[derive(Clone, Debug, Serialize, Deserialize)]
#[derive(Clone, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
#[archive(check_bytes)]
pub struct Node {
pub uuid: [u8; 16],
pub version: u32,
@ -146,7 +150,8 @@ pub struct Node {
pub degree: Option<u32>,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
#[derive(Clone, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
#[archive(check_bytes)]
pub struct Relation {
pub uuid: [u8; 16],
pub version: u32,
@ -161,7 +166,8 @@ pub struct Relation {
pub target_key: String,
}
#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)]
#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
#[archive(check_bytes)]
pub enum NodeType {
EpisodicSession,
EpisodicDaily,
@ -169,7 +175,8 @@ pub enum NodeType {
Semantic,
}
#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)]
#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
#[archive(check_bytes)]
pub enum Provenance {
Manual,
Journal,
@ -178,7 +185,8 @@ pub enum Provenance {
Derived,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
#[archive(check_bytes)]
pub enum Category {
General,
Core,
@ -220,14 +228,16 @@ impl Category {
}
}
#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)]
#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
#[archive(check_bytes)]
pub enum RelationType {
Link,
Causal,
Auto,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
#[derive(Clone, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
#[archive(check_bytes)]
pub struct RetrievalEvent {
pub query: String,
pub timestamp: String,
@ -235,7 +245,8 @@ pub struct RetrievalEvent {
pub used: Option<Vec<String>>,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
#[derive(Clone, Copy, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
#[archive(check_bytes)]
pub struct Params {
pub default_weight: f64,
pub decay_factor: f64,
@ -261,7 +272,8 @@ impl Default for Params {
}
// Gap record — something we looked for but didn't find
#[derive(Clone, Debug, Serialize, Deserialize)]
#[derive(Clone, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
#[archive(check_bytes)]
pub struct GapRecord {
pub description: String,
pub timestamp: String,
@ -279,19 +291,299 @@ pub struct Store {
pub params: Params,
}
/// Snapshot for mmap: full store state minus retrieval_log (which
/// is append-only in retrieval.log). rkyv zero-copy serialization
/// lets us mmap this and access archived data without deserialization.
#[derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
#[archive(check_bytes)]
struct Snapshot {
nodes: HashMap<String, Node>,
relations: Vec<Relation>,
gaps: Vec<GapRecord>,
params: Params,
}
fn snapshot_path() -> PathBuf { memory_dir().join("snapshot.rkyv") }
// rkyv snapshot header: 32 bytes (multiple of 16 for alignment after mmap)
// [0..4] magic "RKV\x01"
// [4..8] format version (u32 LE)
// [8..16] nodes.capnp file size (u64 LE) — staleness check
// [16..24] relations.capnp file size (u64 LE)
// [24..32] rkyv data length (u64 LE)
const RKYV_MAGIC: [u8; 4] = *b"RKV\x01";
const RKYV_HEADER_LEN: usize = 32;
// state.bin header: magic + log file sizes for staleness detection.
// File sizes are race-free for append-only logs (they only grow),
// unlike mtimes which race with concurrent writers.
const CACHE_MAGIC: [u8; 4] = *b"POC\x01";
const CACHE_HEADER_LEN: usize = 4 + 8 + 8; // magic + nodes_size + rels_size
// ---------------------------------------------------------------------------
// StoreView: read-only access trait for search and graph code.
//
// Abstracts over owned Store and zero-copy MmapView so the same
// spreading-activation and graph code works with either.
// ---------------------------------------------------------------------------
pub trait StoreView {
/// Iterate all nodes. Callback receives (key, content, weight).
fn for_each_node<F: FnMut(&str, &str, f32)>(&self, f: F);
/// Iterate all relations. Callback receives (source_key, target_key, strength, rel_type).
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, f: F);
/// Node weight by key, or the default weight if missing.
fn node_weight(&self, key: &str) -> f64;
/// Node content by key.
fn node_content(&self, key: &str) -> Option<&str>;
/// Check if a node exists.
fn has_node(&self, key: &str) -> bool;
/// Search/graph parameters.
fn params(&self) -> Params;
}
impl StoreView for Store {
fn for_each_node<F: FnMut(&str, &str, f32)>(&self, mut f: F) {
for (key, node) in &self.nodes {
f(key, &node.content, node.weight);
}
}
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, mut f: F) {
for rel in &self.relations {
f(&rel.source_key, &rel.target_key, rel.strength, rel.rel_type);
}
}
fn node_weight(&self, key: &str) -> f64 {
self.nodes.get(key).map(|n| n.weight as f64).unwrap_or(self.params.default_weight)
}
fn node_content(&self, key: &str) -> Option<&str> {
self.nodes.get(key).map(|n| n.content.as_str())
}
fn has_node(&self, key: &str) -> bool {
self.nodes.contains_key(key)
}
fn params(&self) -> Params {
self.params
}
}
// ---------------------------------------------------------------------------
// MmapView: zero-copy store access via mmap'd rkyv snapshot.
//
// Holds the mmap alive; all string reads go directly into the mapped
// pages without allocation. Falls back to None if snapshot is stale.
// ---------------------------------------------------------------------------
pub struct MmapView {
mmap: memmap2::Mmap,
_file: fs::File,
data_offset: usize,
data_len: usize,
}
impl MmapView {
/// Try to open a fresh rkyv snapshot. Returns None if missing or stale.
pub fn open() -> Option<Self> {
let path = snapshot_path();
let file = fs::File::open(&path).ok()?;
let mmap = unsafe { memmap2::Mmap::map(&file) }.ok()?;
if mmap.len() < RKYV_HEADER_LEN { return None; }
if mmap[..4] != RKYV_MAGIC { return None; }
let nodes_size = fs::metadata(nodes_path()).map(|m| m.len()).unwrap_or(0);
let rels_size = fs::metadata(relations_path()).map(|m| m.len()).unwrap_or(0);
let cached_nodes = u64::from_le_bytes(mmap[8..16].try_into().unwrap());
let cached_rels = u64::from_le_bytes(mmap[16..24].try_into().unwrap());
let data_len = u64::from_le_bytes(mmap[24..32].try_into().unwrap()) as usize;
if cached_nodes != nodes_size || cached_rels != rels_size { return None; }
if mmap.len() < RKYV_HEADER_LEN + data_len { return None; }
Some(MmapView { mmap, _file: file, data_offset: RKYV_HEADER_LEN, data_len })
}
fn snapshot(&self) -> &ArchivedSnapshot {
let data = &self.mmap[self.data_offset..self.data_offset + self.data_len];
unsafe { rkyv::archived_root::<Snapshot>(data) }
}
}
impl StoreView for MmapView {
fn for_each_node<F: FnMut(&str, &str, f32)>(&self, mut f: F) {
let snap = self.snapshot();
for (key, node) in snap.nodes.iter() {
f(&key, &node.content, node.weight);
}
}
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, mut f: F) {
let snap = self.snapshot();
for rel in snap.relations.iter() {
let rt = match rel.rel_type {
ArchivedRelationType::Link => RelationType::Link,
ArchivedRelationType::Causal => RelationType::Causal,
ArchivedRelationType::Auto => RelationType::Auto,
};
f(&rel.source_key, &rel.target_key, rel.strength, rt);
}
}
fn node_weight(&self, key: &str) -> f64 {
let snap = self.snapshot();
snap.nodes.get(key)
.map(|n| n.weight as f64)
.unwrap_or(snap.params.default_weight)
}
fn node_content(&self, key: &str) -> Option<&str> {
let snap = self.snapshot();
snap.nodes.get(key).map(|n| &*n.content)
}
fn has_node(&self, key: &str) -> bool {
self.snapshot().nodes.get(key).is_some()
}
fn params(&self) -> Params {
let p = &self.snapshot().params;
Params {
default_weight: p.default_weight,
decay_factor: p.decay_factor,
use_boost: p.use_boost,
prune_threshold: p.prune_threshold,
edge_decay: p.edge_decay,
max_hops: p.max_hops,
min_activation: p.min_activation,
}
}
}
// ---------------------------------------------------------------------------
// AnyView: enum dispatch for read-only access.
//
// MmapView when the snapshot is fresh, owned Store as fallback.
// The match on each call is a single predicted branch — zero overhead.
// ---------------------------------------------------------------------------
pub enum AnyView {
Mmap(MmapView),
Owned(Store),
}
impl AnyView {
/// Load the fastest available view: mmap snapshot or owned store.
pub fn load() -> Result<Self, String> {
if let Some(mv) = MmapView::open() {
Ok(AnyView::Mmap(mv))
} else {
Ok(AnyView::Owned(Store::load()?))
}
}
}
impl StoreView for AnyView {
fn for_each_node<F: FnMut(&str, &str, f32)>(&self, f: F) {
match self {
AnyView::Mmap(v) => v.for_each_node(f),
AnyView::Owned(s) => s.for_each_node(f),
}
}
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, f: F) {
match self {
AnyView::Mmap(v) => v.for_each_relation(f),
AnyView::Owned(s) => s.for_each_relation(f),
}
}
fn node_weight(&self, key: &str) -> f64 {
match self {
AnyView::Mmap(v) => v.node_weight(key),
AnyView::Owned(s) => StoreView::node_weight(s, key),
}
}
fn node_content(&self, key: &str) -> Option<&str> {
match self {
AnyView::Mmap(v) => v.node_content(key),
AnyView::Owned(s) => s.node_content(key),
}
}
fn has_node(&self, key: &str) -> bool {
match self {
AnyView::Mmap(v) => v.has_node(key),
AnyView::Owned(s) => s.has_node(key),
}
}
fn params(&self) -> Params {
match self {
AnyView::Mmap(v) => v.params(),
AnyView::Owned(s) => s.params(),
}
}
}
impl Store {
/// Load store: try state.json cache first, rebuild from capnp logs if stale
/// Load store from state.bin cache if fresh, otherwise rebuild from capnp logs.
///
/// Staleness check uses log file sizes (not mtimes). Since logs are
/// append-only, any write grows the file, invalidating the cache.
/// This avoids the mtime race that caused data loss with concurrent
/// writers (dream loop, link audit, journal enrichment).
pub fn load() -> Result<Store, String> {
// 1. Try rkyv mmap snapshot (~4ms with deserialize, <1ms zero-copy)
match Self::load_snapshot_mmap() {
Ok(Some(store)) => return Ok(store),
Ok(None) => {},
Err(e) => eprintln!("rkyv snapshot: {}", e),
}
// 2. Try bincode state.bin cache (~10ms)
let nodes_p = nodes_path();
let rels_p = relations_path();
let state_p = state_path();
// Always rebuild from capnp logs (source of truth).
// The mtime-based cache was causing data loss: concurrent
// writers (dream loop, link audit, journal enrichment) would
// load stale state.bin, make changes, and save — overwriting
// entries from other processes. Replaying from the append-only
// log costs ~10ms extra at 2K nodes and is always correct.
let nodes_size = fs::metadata(&nodes_p).map(|m| m.len()).unwrap_or(0);
let rels_size = fs::metadata(&rels_p).map(|m| m.len()).unwrap_or(0);
if let Ok(data) = fs::read(&state_p) {
if data.len() >= CACHE_HEADER_LEN && data[..4] == CACHE_MAGIC {
let cached_nodes = u64::from_le_bytes(data[4..12].try_into().unwrap());
let cached_rels = u64::from_le_bytes(data[12..20].try_into().unwrap());
if cached_nodes == nodes_size && cached_rels == rels_size {
if let Ok(mut store) = bincode::deserialize::<Store>(&data[CACHE_HEADER_LEN..]) {
// Rebuild uuid_to_key (skipped by serde)
for (key, node) in &store.nodes {
store.uuid_to_key.insert(node.uuid, key.clone());
}
// Bootstrap: write rkyv snapshot if missing
if !snapshot_path().exists() {
if let Err(e) = store.save_snapshot_inner() {
eprintln!("rkyv bootstrap: {}", e);
}
}
return Ok(store);
}
}
}
}
// Stale or no cache — rebuild from capnp logs
let mut store = Store::default();
if nodes_p.exists() {
@ -307,7 +599,6 @@ impl Store {
store.nodes.contains_key(&r.target_key)
);
// Save cache (still useful for tools that read state.bin directly)
store.save()?;
Ok(store)
}
@ -419,7 +710,8 @@ impl Store {
Ok(())
}
/// Save the derived cache (state.json)
/// Save the derived cache with log size header for staleness detection.
/// Uses atomic write (tmp + rename) to prevent partial reads.
pub fn save(&self) -> Result<(), String> {
let _lock = StoreLock::acquire()?;
@ -427,19 +719,124 @@ impl Store {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).ok();
}
let data = bincode::serialize(self)
.map_err(|e| format!("bincode serialize: {}", e))?;
fs::write(&path, data)
.map_err(|e| format!("write {}: {}", path.display(), e))?;
// Clean up old JSON cache if it exists
let json_path = state_json_path();
if json_path.exists() {
fs::remove_file(&json_path).ok();
let nodes_size = fs::metadata(nodes_path()).map(|m| m.len()).unwrap_or(0);
let rels_size = fs::metadata(relations_path()).map(|m| m.len()).unwrap_or(0);
let bincode_data = bincode::serialize(self)
.map_err(|e| format!("bincode serialize: {}", e))?;
let mut data = Vec::with_capacity(CACHE_HEADER_LEN + bincode_data.len());
data.extend_from_slice(&CACHE_MAGIC);
data.extend_from_slice(&nodes_size.to_le_bytes());
data.extend_from_slice(&rels_size.to_le_bytes());
data.extend_from_slice(&bincode_data);
// Atomic write: tmp file + rename
let tmp_path = path.with_extension("bin.tmp");
fs::write(&tmp_path, &data)
.map_err(|e| format!("write {}: {}", tmp_path.display(), e))?;
fs::rename(&tmp_path, &path)
.map_err(|e| format!("rename {}{}: {}", tmp_path.display(), path.display(), e))?;
// Also write rkyv snapshot (mmap-friendly)
if let Err(e) = self.save_snapshot_inner() {
eprintln!("rkyv snapshot save: {}", e);
}
Ok(())
}
/// Serialize store as rkyv snapshot with staleness header.
/// Assumes StoreLock is already held by caller.
fn save_snapshot_inner(&self) -> Result<(), String> {
let snap = Snapshot {
nodes: self.nodes.clone(),
relations: self.relations.clone(),
gaps: self.gaps.clone(),
params: self.params.clone(),
};
let rkyv_data = rkyv::to_bytes::<_, 256>(&snap)
.map_err(|e| format!("rkyv serialize: {}", e))?;
let nodes_size = fs::metadata(nodes_path()).map(|m| m.len()).unwrap_or(0);
let rels_size = fs::metadata(relations_path()).map(|m| m.len()).unwrap_or(0);
let mut data = Vec::with_capacity(RKYV_HEADER_LEN + rkyv_data.len());
data.extend_from_slice(&RKYV_MAGIC);
data.extend_from_slice(&1u32.to_le_bytes()); // format version
data.extend_from_slice(&nodes_size.to_le_bytes());
data.extend_from_slice(&rels_size.to_le_bytes());
data.extend_from_slice(&(rkyv_data.len() as u64).to_le_bytes());
data.extend_from_slice(&rkyv_data);
let path = snapshot_path();
let tmp_path = path.with_extension("rkyv.tmp");
fs::write(&tmp_path, &data)
.map_err(|e| format!("write {}: {}", tmp_path.display(), e))?;
fs::rename(&tmp_path, &path)
.map_err(|e| format!("rename: {}", e))?;
Ok(())
}
/// Try loading store from mmap'd rkyv snapshot.
/// Returns None if snapshot is missing or stale (log sizes don't match).
fn load_snapshot_mmap() -> Result<Option<Store>, String> {
let path = snapshot_path();
if !path.exists() { return Ok(None); }
let nodes_size = fs::metadata(nodes_path()).map(|m| m.len()).unwrap_or(0);
let rels_size = fs::metadata(relations_path()).map(|m| m.len()).unwrap_or(0);
let file = fs::File::open(&path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
let mmap = unsafe { memmap2::Mmap::map(&file) }
.map_err(|e| format!("mmap {}: {}", path.display(), e))?;
if mmap.len() < RKYV_HEADER_LEN { return Ok(None); }
if mmap[..4] != RKYV_MAGIC { return Ok(None); }
// [4..8] = version, skip for now
let cached_nodes = u64::from_le_bytes(mmap[8..16].try_into().unwrap());
let cached_rels = u64::from_le_bytes(mmap[16..24].try_into().unwrap());
let data_len = u64::from_le_bytes(mmap[24..32].try_into().unwrap()) as usize;
if cached_nodes != nodes_size || cached_rels != rels_size {
return Ok(None); // stale
}
if mmap.len() < RKYV_HEADER_LEN + data_len {
return Ok(None); // truncated
}
let rkyv_data = &mmap[RKYV_HEADER_LEN..RKYV_HEADER_LEN + data_len];
// SAFETY: we wrote this file ourselves via save_snapshot_inner().
// Skip full validation (check_archived_root) — the staleness header
// already confirms this snapshot matches the current log state.
let archived = unsafe { rkyv::archived_root::<Snapshot>(rkyv_data) };
let snap: Snapshot = <ArchivedSnapshot as rkyv::Deserialize<Snapshot, rkyv::Infallible>>
::deserialize(archived, &mut rkyv::Infallible).unwrap();
let mut store = Store {
nodes: snap.nodes,
relations: snap.relations,
gaps: snap.gaps,
params: snap.params,
..Default::default()
};
// Rebuild uuid_to_key (not serialized)
for (key, node) in &store.nodes {
store.uuid_to_key.insert(node.uuid, key.clone());
}
Ok(Some(store))
}
/// Add or update a node (appends to log + updates cache)
pub fn upsert_node(&mut self, mut node: Node) -> Result<(), String> {
if let Some(existing) = self.nodes.get(&node.key) {
@ -822,6 +1219,22 @@ impl Store {
}
}
/// Lightweight retrieval logging — appends one line to retrieval.log
/// instead of rewriting the entire state.bin.
pub fn log_retrieval_append(&self, query: &str, results: &[String]) {
Self::log_retrieval_static(query, results);
}
/// Append retrieval event to retrieval.log without needing a Store instance.
pub fn log_retrieval_static(query: &str, results: &[String]) {
let path = memory_dir().join("retrieval.log");
let line = format!("[{}] q=\"{}\" hits={}\n", today(), query, results.len());
if let Ok(mut f) = fs::OpenOptions::new()
.create(true).append(true).open(&path) {
let _ = f.write_all(line.as_bytes());
}
}
pub fn mark_used(&mut self, key: &str) {
let updated = if let Some(node) = self.nodes.get_mut(key) {
node.uses += 1;

View file

@ -7,7 +7,7 @@
// connections), but relation type and direction are preserved for
// specific queries.
use crate::capnp_store::{Store, RelationType};
use crate::capnp_store::{Store, RelationType, StoreView};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet, VecDeque};
@ -377,38 +377,46 @@ impl Graph {
}
}
/// Build graph from store data
pub fn build_graph(store: &Store) -> Graph {
/// Build graph from store data (with community detection)
pub fn build_graph(store: &impl StoreView) -> Graph {
let (adj, keys) = build_adjacency(store);
let communities = label_propagation(&keys, &adj, 20);
Graph { adj, keys, communities }
}
/// Build graph without community detection — for spreading activation
/// searches where we only need the adjacency list.
pub fn build_graph_fast(store: &impl StoreView) -> Graph {
let (adj, keys) = build_adjacency(store);
Graph { adj, keys, communities: HashMap::new() }
}
fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashSet<String>) {
let mut adj: HashMap<String, Vec<Edge>> = HashMap::new();
let keys: HashSet<String> = store.nodes.keys().cloned().collect();
let mut keys: HashSet<String> = HashSet::new();
// Build adjacency from relations
for rel in &store.relations {
let source_key = &rel.source_key;
let target_key = &rel.target_key;
store.for_each_node(|key, _, _| {
keys.insert(key.to_owned());
});
// Both keys must exist as nodes
store.for_each_relation(|source_key, target_key, strength, rel_type| {
if !keys.contains(source_key) || !keys.contains(target_key) {
continue;
return;
}
// Add bidirectional edges (even for causal — direction is metadata)
adj.entry(source_key.clone()).or_default().push(Edge {
target: target_key.clone(),
strength: rel.strength,
rel_type: rel.rel_type,
adj.entry(source_key.to_owned()).or_default().push(Edge {
target: target_key.to_owned(),
strength,
rel_type,
});
adj.entry(target_key.clone()).or_default().push(Edge {
target: source_key.clone(),
strength: rel.strength,
rel_type: rel.rel_type,
adj.entry(target_key.to_owned()).or_default().push(Edge {
target: source_key.to_owned(),
strength,
rel_type,
});
}
});
// Run community detection
let communities = label_propagation(&keys, &adj, 20);
Graph { adj, keys, communities }
(adj, keys)
}
/// Label propagation community detection.

View file

@ -20,6 +20,7 @@ mod search;
mod similarity;
mod migrate;
mod neuro;
mod spectral;
pub mod memory_capnp {
include!(concat!(env!("OUT_DIR"), "/schema/memory_capnp.rs"));
@ -101,6 +102,11 @@ fn main() {
"differentiate" => cmd_differentiate(&args[2..]),
"link-audit" => cmd_link_audit(&args[2..]),
"trace" => cmd_trace(&args[2..]),
"spectral" => cmd_spectral(&args[2..]),
"spectral-save" => cmd_spectral_save(&args[2..]),
"spectral-neighbors" => cmd_spectral_neighbors(&args[2..]),
"spectral-positions" => cmd_spectral_positions(&args[2..]),
"spectral-suggest" => cmd_spectral_suggest(&args[2..]),
"list-keys" => cmd_list_keys(),
"list-edges" => cmd_list_edges(),
"dump-json" => cmd_dump_json(),
@ -171,6 +177,11 @@ Commands:
Redistribute hub links to section-level children
link-audit [--apply] Walk every link, send to Sonnet for quality review
trace KEY Walk temporal links: semantic episodic conversation
spectral [K] Spectral decomposition of the memory graph (default K=30)
spectral-save [K] Compute and save spectral embedding (default K=20)
spectral-neighbors KEY [N] Find N spectrally nearest nodes (default N=15)
spectral-positions [N] Show N nodes ranked by outlier/bridge score (default 30)
spectral-suggest [N] Find N spectrally close but unlinked pairs (default 20)
list-keys List all node keys (one per line)
list-edges List all edges (tsv: source target strength type)
dump-json Dump entire store as JSON
@ -185,34 +196,76 @@ Commands:
}
fn cmd_search(args: &[String]) -> Result<(), String> {
use capnp_store::StoreView;
if args.is_empty() {
return Err("Usage: poc-memory search QUERY [QUERY...]".into());
}
let query = args.join(" ");
let mut store = capnp_store::Store::load()?;
let results = search::search(&query, &store);
let view = capnp_store::AnyView::load()?;
let results = search::search(&query, &view);
if results.is_empty() {
eprintln!("No results for '{}'", query);
return Ok(());
}
// Log retrieval
store.log_retrieval(&query, &results.iter().map(|r| r.key.clone()).collect::<Vec<_>>());
store.save()?;
// Log retrieval to a small append-only file (avoid 6MB state.bin rewrite)
capnp_store::Store::log_retrieval_static(&query,
&results.iter().map(|r| r.key.clone()).collect::<Vec<_>>());
// Show text results
let text_keys: std::collections::HashSet<String> = results.iter()
.take(15).map(|r| r.key.clone()).collect();
for (i, r) in results.iter().enumerate().take(15) {
let marker = if r.is_direct { "" } else { " " };
let weight = store.node_weight(&r.key).unwrap_or(0.0);
let weight = view.node_weight(&r.key);
print!("{}{:2}. [{:.2}/{:.2}] {}", marker, i + 1, r.activation, weight, r.key);
if let Some(community) = store.node_community(&r.key) {
print!(" (c{})", community);
}
println!();
if let Some(ref snippet) = r.snippet {
println!(" {}", snippet);
}
}
// Spectral expansion: find neighbors of top text hits
if let Ok(emb) = spectral::load_embedding() {
let seeds: Vec<&str> = results.iter()
.take(5)
.map(|r| r.key.as_str())
.filter(|k| emb.coords.contains_key(*k))
.collect();
if !seeds.is_empty() {
let spectral_hits = spectral::nearest_to_seeds(&emb, &seeds, 10);
// Filter to nodes not already in text results
let new_hits: Vec<_> = spectral_hits.into_iter()
.filter(|(k, _)| !text_keys.contains(k))
.take(5)
.collect();
if !new_hits.is_empty() {
println!("\nSpectral neighbors (structural, not keyword):");
for (k, _dist) in &new_hits {
let weight = view.node_weight(k);
print!(" ~ [{:.2}] {}", weight, k);
println!();
// Show first line of content as snippet
if let Some(content) = view.node_content(k) {
let snippet: String = content.lines()
.find(|l| !l.trim().is_empty() && !l.starts_with('#'))
.unwrap_or("")
.chars().take(100).collect();
if !snippet.is_empty() {
println!(" {}", snippet);
}
}
}
}
}
}
Ok(())
}
@ -457,8 +510,9 @@ fn cmd_replay_queue(args: &[String]) -> Result<(), String> {
let queue = neuro::replay_queue(&store, count);
println!("Replay queue ({} items):", queue.len());
for (i, item) in queue.iter().enumerate() {
println!(" {:2}. [{:.3}] {} (interval={}d, emotion={:.1})",
i + 1, item.priority, item.key, item.interval_days, item.emotion);
println!(" {:2}. [{:.3}] {:>10} {} (interval={}d, emotion={:.1}, spectral={:.1})",
i + 1, item.priority, item.classification, item.key,
item.interval_days, item.emotion, item.outlier_score);
}
Ok(())
}
@ -1003,6 +1057,166 @@ fn cmd_trace(args: &[String]) -> Result<(), String> {
Ok(())
}
fn cmd_spectral(args: &[String]) -> Result<(), String> {
let k: usize = args.first()
.and_then(|s| s.parse().ok())
.unwrap_or(30);
let store = capnp_store::Store::load()?;
let g = graph::build_graph(&store);
let result = spectral::decompose(&g, k);
spectral::print_summary(&result, &g);
Ok(())
}
fn cmd_spectral_save(args: &[String]) -> Result<(), String> {
let k: usize = args.first()
.and_then(|s| s.parse().ok())
.unwrap_or(20);
let store = capnp_store::Store::load()?;
let g = graph::build_graph(&store);
let result = spectral::decompose(&g, k);
let emb = spectral::to_embedding(&result);
spectral::save_embedding(&emb)?;
Ok(())
}
fn cmd_spectral_neighbors(args: &[String]) -> Result<(), String> {
if args.is_empty() {
return Err("usage: spectral-neighbors KEY [N]".to_string());
}
let key = &args[0];
let n: usize = args.get(1)
.and_then(|s| s.parse().ok())
.unwrap_or(15);
let emb = spectral::load_embedding()?;
// Show which dimensions this node loads on
let dims = spectral::dominant_dimensions(&emb, &[key.as_str()]);
println!("Node: {} (embedding: {} dims)", key, emb.dims);
println!("Top spectral axes:");
for &(d, loading) in dims.iter().take(5) {
println!(" axis {:<2} (λ={:.4}): loading={:.5}", d, emb.eigenvalues[d], loading);
}
println!("\nNearest neighbors in spectral space:");
let neighbors = spectral::nearest_neighbors(&emb, key, n);
for (i, (k, dist)) in neighbors.iter().enumerate() {
println!(" {:>2}. {:.5} {}", i + 1, dist, k);
}
Ok(())
}
fn cmd_spectral_positions(args: &[String]) -> Result<(), String> {
let n: usize = args.first()
.and_then(|s| s.parse().ok())
.unwrap_or(30);
let store = capnp_store::Store::load()?;
let emb = spectral::load_embedding()?;
// Build communities fresh from graph (don't rely on cached node fields)
let g = store.build_graph();
let communities = g.communities().clone();
let positions = spectral::analyze_positions(&emb, &communities);
// Show outliers first
println!("Spectral position analysis — {} nodes", positions.len());
println!(" outlier: dist_to_center / median (>1 = unusual position)");
println!(" bridge: dist_to_center / dist_to_nearest_other_community");
println!();
// Group by classification
let mut bridges: Vec<&spectral::SpectralPosition> = Vec::new();
let mut outliers: Vec<&spectral::SpectralPosition> = Vec::new();
let mut core: Vec<&spectral::SpectralPosition> = Vec::new();
for pos in positions.iter().take(n) {
match spectral::classify_position(pos) {
"bridge" => bridges.push(pos),
"outlier" => outliers.push(pos),
"core" => core.push(pos),
_ => outliers.push(pos), // peripheral goes with outliers for display
}
}
if !bridges.is_empty() {
println!("=== Bridges (between communities) ===");
for pos in &bridges {
println!(" [{:.2}/{:.2}] c{} → c{} {}",
pos.outlier_score, pos.bridge_score,
pos.community, pos.nearest_community, pos.key);
}
println!();
}
println!("=== Top outliers (far from own community center) ===");
for pos in positions.iter().take(n) {
let class = spectral::classify_position(pos);
println!(" {:>10} outlier={:.2} bridge={:.2} c{:<3} {}",
class, pos.outlier_score, pos.bridge_score,
pos.community, pos.key);
}
Ok(())
}
fn cmd_spectral_suggest(args: &[String]) -> Result<(), String> {
let n: usize = args.first()
.and_then(|s| s.parse().ok())
.unwrap_or(20);
let store = capnp_store::Store::load()?;
let emb = spectral::load_embedding()?;
let g = store.build_graph();
let communities = g.communities();
// Only consider nodes with enough edges for meaningful spectral position
let min_degree = 3;
let well_connected: std::collections::HashSet<&str> = emb.coords.keys()
.filter(|k| g.degree(k) >= min_degree)
.map(|k| k.as_str())
.collect();
// Filter embedding to well-connected nodes
let filtered_emb = spectral::SpectralEmbedding {
dims: emb.dims,
eigenvalues: emb.eigenvalues.clone(),
coords: emb.coords.iter()
.filter(|(k, _)| well_connected.contains(k.as_str()))
.map(|(k, v)| (k.clone(), v.clone()))
.collect(),
};
// Build set of existing linked pairs
let mut linked: std::collections::HashSet<(String, String)> =
std::collections::HashSet::new();
for rel in &store.relations {
linked.insert((rel.source_key.clone(), rel.target_key.clone()));
linked.insert((rel.target_key.clone(), rel.source_key.clone()));
}
eprintln!("Searching {} well-connected nodes (degree >= {})...",
filtered_emb.coords.len(), min_degree);
let pairs = spectral::unlinked_neighbors(&filtered_emb, &linked, n);
println!("{} closest unlinked pairs (candidates for extractor agents):", pairs.len());
for (i, (k1, k2, dist)) in pairs.iter().enumerate() {
let c1 = communities.get(k1)
.map(|c| format!("c{}", c))
.unwrap_or_else(|| "?".into());
let c2 = communities.get(k2)
.map(|c| format!("c{}", c))
.unwrap_or_else(|| "?".into());
let cross = if c1 != c2 { " [cross-community]" } else { "" };
println!(" {:>2}. dist={:.4} {} ({}) ↔ {} ({}){}",
i + 1, dist, k1, c1, k2, c2, cross);
}
Ok(())
}
fn cmd_list_keys() -> Result<(), String> {
let store = capnp_store::Store::load()?;
let mut keys: Vec<_> = store.nodes.keys().collect();
@ -1353,7 +1567,9 @@ fn cmd_journal_tail(args: &[String]) -> Result<(), String> {
} else {
// Use first content line, truncated
title = if stripped.len() > 70 {
format!("{}...", &stripped[..67])
let mut end = 67;
while !stripped.is_char_boundary(end) { end -= 1; }
format!("{}...", &stripped[..end])
} else {
stripped.to_string()
};

View file

@ -7,7 +7,9 @@
use crate::capnp_store::Store;
use crate::graph::{self, Graph};
use crate::similarity;
use crate::spectral::{self, SpectralEmbedding, SpectralPosition};
use std::collections::HashMap;
use std::time::{SystemTime, UNIX_EPOCH};
fn now_epoch() -> f64 {
@ -19,25 +21,45 @@ fn now_epoch() -> f64 {
const SECS_PER_DAY: f64 = 86400.0;
/// Consolidation priority: how urgently a node needs attention
/// Consolidation priority: how urgently a node needs attention.
///
/// priority = (1 - schema_fit) × spaced_repetition_due × emotion × (1 + interference)
pub fn consolidation_priority(store: &Store, key: &str, graph: &Graph) -> f64 {
/// With spectral data:
/// priority = spectral_displacement × overdue × emotion
/// Without:
/// priority = (1 - schema_fit) × overdue × emotion
///
/// Spectral displacement is the outlier_score clamped and normalized —
/// it measures how far a node sits from its community center in the
/// eigenspace. This is a global signal (considers all graph structure)
/// vs schema_fit which is local (only immediate neighbors).
pub fn consolidation_priority(
store: &Store,
key: &str,
graph: &Graph,
spectral_outlier: Option<f64>,
) -> f64 {
let node = match store.nodes.get(key) {
Some(n) => n,
None => return 0.0,
};
// Schema fit: 0 = poorly integrated, 1 = well integrated
let fit = graph::schema_fit(graph, key) as f64;
let fit_factor = 1.0 - fit;
// Integration factor: how poorly integrated is this node?
let displacement = if let Some(outlier) = spectral_outlier {
// outlier_score = dist_to_center / median_dist_in_community
// 1.0 = typical position, >2 = unusual, >5 = extreme outlier
// Use log scale for dynamic range: the difference between
// outlier=5 and outlier=10 matters less than 1 vs 2.
(outlier / 3.0).min(3.0)
} else {
let fit = graph::schema_fit(graph, key) as f64;
1.0 - fit
};
// Spaced repetition: how overdue is this node for replay?
let interval_secs = node.spaced_repetition_interval as f64 * SECS_PER_DAY;
let time_since_replay = if node.last_replayed > 0.0 {
(now_epoch() - node.last_replayed).max(0.0)
} else {
// Never replayed — treat as very overdue
interval_secs * 3.0
};
let overdue_ratio = (time_since_replay / interval_secs).min(5.0);
@ -45,7 +67,7 @@ pub fn consolidation_priority(store: &Store, key: &str, graph: &Graph) -> f64 {
// Emotional intensity: higher emotion = higher priority
let emotion_factor = 1.0 + (node.emotion as f64 / 10.0);
fit_factor * overdue_ratio * emotion_factor
displacement * overdue_ratio * emotion_factor
}
/// Item in the replay queue
@ -55,28 +77,62 @@ pub struct ReplayItem {
pub interval_days: u32,
pub emotion: f32,
pub schema_fit: f32,
/// Spectral classification: "bridge", "outlier", "core", "peripheral"
pub classification: &'static str,
/// Raw spectral outlier score (distance / median)
pub outlier_score: f64,
}
/// Generate the replay queue: nodes ordered by consolidation priority
/// Generate the replay queue: nodes ordered by consolidation priority.
/// Automatically loads spectral embedding if available.
pub fn replay_queue(store: &Store, count: usize) -> Vec<ReplayItem> {
let graph = store.build_graph();
replay_queue_with_graph(store, count, &graph)
let emb = spectral::load_embedding().ok();
replay_queue_with_graph(store, count, &graph, emb.as_ref())
}
/// Generate the replay queue using a pre-built graph (avoids redundant rebuild)
pub fn replay_queue_with_graph(store: &Store, count: usize, graph: &Graph) -> Vec<ReplayItem> {
/// Generate the replay queue using pre-built graph and optional spectral data.
pub fn replay_queue_with_graph(
store: &Store,
count: usize,
graph: &Graph,
emb: Option<&SpectralEmbedding>,
) -> Vec<ReplayItem> {
let fits = graph::schema_fit_all(graph);
// Build spectral position map if embedding is available
let positions: HashMap<String, SpectralPosition> = if let Some(emb) = emb {
let communities = graph.communities().clone();
spectral::analyze_positions(emb, &communities)
.into_iter()
.map(|p| (p.key.clone(), p))
.collect()
} else {
HashMap::new()
};
let mut items: Vec<ReplayItem> = store.nodes.iter()
.map(|(key, node)| {
let priority = consolidation_priority(store, key, graph);
let pos = positions.get(key);
let outlier_score = pos.map(|p| p.outlier_score).unwrap_or(0.0);
let classification = pos
.map(|p| spectral::classify_position(p))
.unwrap_or("unknown");
let priority = consolidation_priority(
store, key, graph,
pos.map(|p| p.outlier_score),
);
let fit = fits.get(key).copied().unwrap_or(0.0);
ReplayItem {
key: key.clone(),
priority,
interval_days: node.spaced_repetition_interval,
emotion: node.emotion,
schema_fit: fit,
classification,
outlier_score,
}
})
.collect();
@ -234,6 +290,10 @@ fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> S
item.priority, item.schema_fit, item.emotion));
out.push_str(&format!("Category: {} Interval: {}d\n",
node.category.label(), node.spaced_repetition_interval));
if item.outlier_score > 0.0 {
out.push_str(&format!("Spectral: {} (outlier={:.1})\n",
item.classification, item.outlier_score));
}
if let Some(community) = node.community_id {
out.push_str(&format!("Community: {} ", community));
@ -474,15 +534,17 @@ pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<String,
let graph = store.build_graph();
let topology = format_topology_header(&graph);
let emb = spectral::load_embedding().ok();
match agent {
"replay" => {
let items = replay_queue_with_graph(store, count, &graph);
let items = replay_queue_with_graph(store, count, &graph, emb.as_ref());
let nodes_section = format_nodes_section(store, &items, &graph);
load_prompt("replay", &[("{{TOPOLOGY}}", &topology), ("{{NODES}}", &nodes_section)])
}
"linker" => {
// Filter to episodic entries
let mut items = replay_queue_with_graph(store, count * 2, &graph);
let mut items = replay_queue_with_graph(store, count * 2, &graph, emb.as_ref());
items.retain(|item| {
store.nodes.get(&item.key)
.map(|n| matches!(n.node_type, crate::capnp_store::NodeType::EpisodicSession))
@ -516,10 +578,12 @@ pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<String,
let fit = graph::schema_fit(&graph, k);
Some(ReplayItem {
key: k.clone(),
priority: consolidation_priority(store, k, &graph),
priority: consolidation_priority(store, k, &graph, None),
interval_days: node.spaced_repetition_interval,
emotion: node.emotion,
schema_fit: fit,
classification: "unknown",
outlier_score: 0.0,
})
})
.collect();

View file

@ -4,7 +4,7 @@
// supports circumscription parameter for blending associative vs
// causal walks, and benefits from community-aware result grouping.
use crate::capnp_store::Store;
use crate::capnp_store::StoreView;
use crate::graph::Graph;
use std::cmp::Ordering;
@ -24,10 +24,10 @@ pub struct SearchResult {
fn spreading_activation(
seeds: &[(String, f64)],
graph: &Graph,
store: &Store,
store: &impl StoreView,
_circumscription: f64,
) -> Vec<(String, f64)> {
let params = &store.params;
let params = store.params();
let mut activation: HashMap<String, f64> = HashMap::new();
let mut queue: VecDeque<(String, f64, u32)> = VecDeque::new();
@ -44,10 +44,7 @@ fn spreading_activation(
if depth >= params.max_hops { continue; }
for (neighbor, strength) in graph.neighbors(&key) {
let neighbor_weight = store.nodes.get(neighbor.as_str())
.map(|n| n.weight as f64)
.unwrap_or(params.default_weight);
let neighbor_weight = store.node_weight(neighbor.as_str());
let propagated = act * params.edge_decay * neighbor_weight * strength as f64;
if propagated < params.min_activation { continue; }
@ -65,27 +62,26 @@ fn spreading_activation(
}
/// Full search: find direct hits, spread activation, return ranked results
pub fn search(query: &str, store: &Store) -> Vec<SearchResult> {
let graph = store.build_graph();
pub fn search(query: &str, store: &impl StoreView) -> Vec<SearchResult> {
let graph = crate::graph::build_graph_fast(store);
let query_lower = query.to_lowercase();
let query_tokens: Vec<&str> = query_lower.split_whitespace().collect();
let mut seeds: Vec<(String, f64)> = Vec::new();
let mut snippets: HashMap<String, String> = HashMap::new();
for (key, node) in &store.nodes {
let content_lower = node.content.to_lowercase();
store.for_each_node(|key, content, weight| {
let content_lower = content.to_lowercase();
let exact_match = content_lower.contains(&query_lower);
let token_match = query_tokens.len() > 1
&& query_tokens.iter().all(|t| content_lower.contains(t));
if exact_match || token_match {
let weight = node.weight as f64;
let activation = if exact_match { weight } else { weight * 0.85 };
seeds.push((key.clone(), activation));
let activation = if exact_match { weight as f64 } else { weight as f64 * 0.85 };
seeds.push((key.to_owned(), activation));
let snippet: String = node.content.lines()
let snippet: String = content.lines()
.filter(|l| {
let ll = l.to_lowercase();
if exact_match && ll.contains(&query_lower) { return true; }
@ -103,9 +99,9 @@ pub fn search(query: &str, store: &Store) -> Vec<SearchResult> {
})
.collect::<Vec<_>>()
.join("\n ");
snippets.insert(key.clone(), snippet);
snippets.insert(key.to_owned(), snippet);
}
}
});
if seeds.is_empty() {
return Vec::new();

566
src/spectral.rs Normal file
View file

@ -0,0 +1,566 @@
// Spectral decomposition of the memory graph.
//
// Computes eigenvalues and eigenvectors of the normalized graph Laplacian.
// The eigenvectors provide natural coordinates for each node — connected
// nodes land nearby, communities form clusters, bridges sit between clusters.
//
// The eigenvalue spectrum reveals:
// - Number of connected components (count of zero eigenvalues)
// - Number of natural communities (eigenvalues near zero, before the gap)
// - How well-connected the graph is (Fiedler value = second eigenvalue)
//
// The eigenvectors provide:
// - Spectral coordinates for each node (the embedding)
// - Community membership (sign/magnitude of Fiedler vector)
// - Natural projections (select which eigenvectors to include)
use crate::graph::Graph;
use faer::Mat;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::path::PathBuf;
pub struct SpectralResult {
/// Node keys in index order
pub keys: Vec<String>,
/// Eigenvalues in ascending order
pub eigenvalues: Vec<f64>,
/// Eigenvectors: eigvecs[k] is the k-th eigenvector (ascending eigenvalue order),
/// with eigvecs[k][i] being the value for node keys[i]
pub eigvecs: Vec<Vec<f64>>,
}
/// Per-node spectral embedding, serializable to disk.
#[derive(Serialize, Deserialize)]
pub struct SpectralEmbedding {
/// Number of dimensions (eigenvectors)
pub dims: usize,
/// Eigenvalues for each dimension
pub eigenvalues: Vec<f64>,
/// Node key → coordinate vector
pub coords: HashMap<String, Vec<f64>>,
}
fn embedding_path() -> PathBuf {
let home = std::env::var("HOME").unwrap_or_default();
PathBuf::from(home).join(".claude/memory/spectral-embedding.json")
}
/// Compute spectral decomposition of the memory graph.
///
/// Returns the smallest `k` eigenvalues and their eigenvectors of the
/// normalized Laplacian L_sym = I - D^{-1/2} A D^{-1/2}.
///
/// We compute the full decomposition (it's only 2000×2000, takes <1s)
/// and return the bottom k.
pub fn decompose(graph: &Graph, k: usize) -> SpectralResult {
// Only include nodes with edges (filter isolates)
let mut keys: Vec<String> = graph.nodes().iter()
.filter(|k| graph.degree(k) > 0)
.cloned()
.collect();
keys.sort();
let n = keys.len();
let isolates = graph.nodes().len() - n;
if isolates > 0 {
eprintln!("note: filtered {} isolated nodes, decomposing {} connected nodes", isolates, n);
}
let key_to_idx: HashMap<&str, usize> = keys.iter()
.enumerate()
.map(|(i, k)| (k.as_str(), i))
.collect();
// Build weighted degree vector and adjacency
let mut degree = vec![0.0f64; n];
let mut adj_entries: Vec<(usize, usize, f64)> = Vec::new();
for (i, key) in keys.iter().enumerate() {
for (neighbor, strength) in graph.neighbors(key) {
if let Some(&j) = key_to_idx.get(neighbor.as_str()) {
if j > i { // each edge once
let w = strength as f64;
adj_entries.push((i, j, w));
degree[i] += w;
degree[j] += w;
}
}
}
}
// Build normalized Laplacian: L_sym = I - D^{-1/2} A D^{-1/2}
let mut laplacian = Mat::<f64>::zeros(n, n);
// Diagonal = 1 for nodes with edges, 0 for isolates
for i in 0..n {
if degree[i] > 0.0 {
laplacian[(i, i)] = 1.0;
}
}
// Off-diagonal: -w / sqrt(d_i * d_j)
for &(i, j, w) in &adj_entries {
if degree[i] > 0.0 && degree[j] > 0.0 {
let val = -w / (degree[i] * degree[j]).sqrt();
laplacian[(i, j)] = val;
laplacian[(j, i)] = val;
}
}
// Eigendecompose
let eig = laplacian.self_adjoint_eigen(faer::Side::Lower)
.expect("eigendecomposition failed");
let s = eig.S();
let u = eig.U();
let k = k.min(n);
let mut eigenvalues = Vec::with_capacity(k);
let mut eigvecs = Vec::with_capacity(k);
let s_col = s.column_vector();
for col in 0..k {
eigenvalues.push(s_col[col]);
let mut vec = Vec::with_capacity(n);
for row in 0..n {
vec.push(u[(row, col)]);
}
eigvecs.push(vec);
}
SpectralResult { keys, eigenvalues, eigvecs }
}
/// Print the spectral summary: eigenvalue spectrum, then each axis with
/// its extreme nodes (what the axis "means").
pub fn print_summary(result: &SpectralResult, graph: &Graph) {
let n = result.keys.len();
let k = result.eigenvalues.len();
println!("Spectral Decomposition — {} nodes, {} eigenpairs", n, k);
println!("=========================================\n");
// Compact eigenvalue table
println!("Eigenvalue spectrum:");
for (i, &ev) in result.eigenvalues.iter().enumerate() {
let gap = if i > 0 {
ev - result.eigenvalues[i - 1]
} else {
0.0
};
let gap_bar = if i > 0 {
let bars = (gap * 500.0).min(40.0) as usize;
"#".repeat(bars)
} else {
String::new()
};
println!(" λ_{:<2} = {:.6} {}", i, ev, gap_bar);
}
// Connected components
let near_zero = result.eigenvalues.iter()
.filter(|&&v| v.abs() < 1e-6)
.count();
if near_zero > 1 {
println!("\n {} eigenvalues near 0 = {} disconnected components", near_zero, near_zero);
}
// Each axis: what are the extremes?
println!("\n\nNatural axes of the knowledge space");
println!("====================================");
for axis in 0..k {
let ev = result.eigenvalues[axis];
let vec = &result.eigvecs[axis];
// Sort nodes by their value on this axis
let mut indexed: Vec<(usize, f64)> = vec.iter()
.enumerate()
.map(|(i, &v)| (i, v))
.collect();
indexed.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
// Compute the "spread" — how much this axis differentiates
let min_val = indexed.first().map(|x| x.1).unwrap_or(0.0);
let max_val = indexed.last().map(|x| x.1).unwrap_or(0.0);
println!("\n--- Axis {} (λ={:.6}, range={:.4}) ---", axis, ev, max_val - min_val);
// Show extremes: 5 most negative, 5 most positive
let show = 5;
println!(" Negative pole:");
for &(idx, val) in indexed.iter().take(show) {
let key = &result.keys[idx];
// Shorten key for display: take last component
let short = shorten_key(key);
let deg = graph.degree(key);
let comm = graph.communities().get(key).copied().unwrap_or(999);
println!(" {:+.5} d={:<3} c={:<3} {}", val, deg, comm, short);
}
println!(" Positive pole:");
for &(idx, val) in indexed.iter().rev().take(show) {
let key = &result.keys[idx];
let short = shorten_key(key);
let deg = graph.degree(key);
let comm = graph.communities().get(key).copied().unwrap_or(999);
println!(" {:+.5} d={:<3} c={:<3} {}", val, deg, comm, short);
}
}
}
/// Shorten a node key for display.
fn shorten_key(key: &str) -> &str {
if key.len() > 60 { &key[..60] } else { key }
}
/// Convert SpectralResult to a per-node embedding (transposing the layout).
pub fn to_embedding(result: &SpectralResult) -> SpectralEmbedding {
let dims = result.eigvecs.len();
let mut coords = HashMap::new();
for (i, key) in result.keys.iter().enumerate() {
let mut vec = Vec::with_capacity(dims);
for d in 0..dims {
vec.push(result.eigvecs[d][i]);
}
coords.insert(key.clone(), vec);
}
SpectralEmbedding {
dims,
eigenvalues: result.eigenvalues.clone(),
coords,
}
}
/// Save embedding to disk.
pub fn save_embedding(emb: &SpectralEmbedding) -> Result<(), String> {
let path = embedding_path();
let json = serde_json::to_string(emb)
.map_err(|e| format!("serialize embedding: {}", e))?;
std::fs::write(&path, json)
.map_err(|e| format!("write {}: {}", path.display(), e))?;
eprintln!("Saved {}-dim embedding for {} nodes to {}",
emb.dims, emb.coords.len(), path.display());
Ok(())
}
/// Load embedding from disk.
pub fn load_embedding() -> Result<SpectralEmbedding, String> {
let path = embedding_path();
let data = std::fs::read_to_string(&path)
.map_err(|e| format!("read {}: {}", path.display(), e))?;
serde_json::from_str(&data)
.map_err(|e| format!("parse embedding: {}", e))
}
/// Find the k nearest neighbors to a node in spectral space.
///
/// Uses weighted euclidean distance where each dimension is weighted
/// by 1/eigenvalue — lower eigenvalues (coarser structure) matter more.
pub fn nearest_neighbors(
emb: &SpectralEmbedding,
key: &str,
k: usize,
) -> Vec<(String, f64)> {
let target = match emb.coords.get(key) {
Some(c) => c,
None => return vec![],
};
// Weight by inverse eigenvalue (coarser axes matter more)
let weights: Vec<f64> = emb.eigenvalues.iter()
.map(|&ev| if ev > 1e-8 { 1.0 / ev } else { 0.0 })
.collect();
let mut distances: Vec<(String, f64)> = emb.coords.iter()
.filter(|(k, _)| k.as_str() != key)
.map(|(k, coords)| {
let dist: f64 = target.iter()
.zip(coords.iter())
.zip(weights.iter())
.map(|((&a, &b), &w)| w * (a - b) * (a - b))
.sum::<f64>()
.sqrt();
(k.clone(), dist)
})
.collect();
distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
distances.truncate(k);
distances
}
/// Find nearest neighbors to a set of seed nodes (multi-seed query).
/// Returns nodes ranked by minimum distance to any seed.
pub fn nearest_to_seeds(
emb: &SpectralEmbedding,
seeds: &[&str],
k: usize,
) -> Vec<(String, f64)> {
let seed_set: std::collections::HashSet<&str> = seeds.iter().copied().collect();
let seed_coords: Vec<&Vec<f64>> = seeds.iter()
.filter_map(|s| emb.coords.get(*s))
.collect();
if seed_coords.is_empty() {
return vec![];
}
let weights: Vec<f64> = emb.eigenvalues.iter()
.map(|&ev| if ev > 1e-8 { 1.0 / ev } else { 0.0 })
.collect();
let mut distances: Vec<(String, f64)> = emb.coords.iter()
.filter(|(k, _)| !seed_set.contains(k.as_str()))
.map(|(k, coords)| {
// Distance to nearest seed
let min_dist = seed_coords.iter()
.map(|sc| {
coords.iter()
.zip(sc.iter())
.zip(weights.iter())
.map(|((&a, &b), &w)| w * (a - b) * (a - b))
.sum::<f64>()
.sqrt()
})
.fold(f64::MAX, f64::min);
(k.clone(), min_dist)
})
.collect();
distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
distances.truncate(k);
distances
}
/// Weighted euclidean distance in spectral space.
/// Dimensions weighted by 1/eigenvalue — coarser structure matters more.
fn weighted_distance(a: &[f64], b: &[f64], weights: &[f64]) -> f64 {
a.iter()
.zip(b.iter())
.zip(weights.iter())
.map(|((&x, &y), &w)| w * (x - y) * (x - y))
.sum::<f64>()
.sqrt()
}
/// Compute eigenvalue-inverse weights for distance calculations.
fn eigenvalue_weights(eigenvalues: &[f64]) -> Vec<f64> {
eigenvalues.iter()
.map(|&ev| if ev > 1e-8 { 1.0 / ev } else { 0.0 })
.collect()
}
/// Compute cluster centers (centroids) in spectral space.
pub fn cluster_centers(
emb: &SpectralEmbedding,
communities: &HashMap<String, u32>,
) -> HashMap<u32, Vec<f64>> {
let mut sums: HashMap<u32, (Vec<f64>, usize)> = HashMap::new();
for (key, coords) in &emb.coords {
if let Some(&comm) = communities.get(key) {
let entry = sums.entry(comm)
.or_insert_with(|| (vec![0.0; emb.dims], 0));
for (i, &c) in coords.iter().enumerate() {
entry.0[i] += c;
}
entry.1 += 1;
}
}
sums.into_iter()
.map(|(comm, (sum, count))| {
let center: Vec<f64> = sum.iter()
.map(|s| s / count as f64)
.collect();
(comm, center)
})
.collect()
}
/// Per-node analysis of spectral position relative to communities.
pub struct SpectralPosition {
pub key: String,
pub community: u32,
/// Distance to own community center
pub dist_to_center: f64,
/// Distance to nearest OTHER community center
pub dist_to_nearest: f64,
/// Which community is nearest (other than own)
pub nearest_community: u32,
/// dist_to_center / median_dist_in_community (>1 = outlier)
pub outlier_score: f64,
/// dist_to_center / dist_to_nearest (>1 = between clusters, potential bridge)
pub bridge_score: f64,
}
/// Analyze spectral positions for all nodes.
///
/// Returns positions sorted by outlier_score descending (most displaced first).
pub fn analyze_positions(
emb: &SpectralEmbedding,
communities: &HashMap<String, u32>,
) -> Vec<SpectralPosition> {
let centers = cluster_centers(emb, communities);
let weights = eigenvalue_weights(&emb.eigenvalues);
// Compute distances to own community center
let mut by_community: HashMap<u32, Vec<f64>> = HashMap::new();
let mut node_dists: Vec<(String, u32, f64)> = Vec::new();
for (key, coords) in &emb.coords {
if let Some(&comm) = communities.get(key) {
if let Some(center) = centers.get(&comm) {
let dist = weighted_distance(coords, center, &weights);
by_community.entry(comm).or_default().push(dist);
node_dists.push((key.clone(), comm, dist));
}
}
}
// Median distance per community for outlier scoring
let medians: HashMap<u32, f64> = by_community.into_iter()
.map(|(comm, mut dists)| {
dists.sort_by(|a, b| a.partial_cmp(b).unwrap());
let median = if dists.is_empty() {
1.0
} else if dists.len() % 2 == 0 {
(dists[dists.len() / 2 - 1] + dists[dists.len() / 2]) / 2.0
} else {
dists[dists.len() / 2]
};
(comm, median.max(1e-6))
})
.collect();
let mut positions: Vec<SpectralPosition> = node_dists.into_iter()
.map(|(key, comm, dist_to_center)| {
let coords = &emb.coords[&key];
let (nearest_community, dist_to_nearest) = centers.iter()
.filter(|(&c, _)| c != comm)
.map(|(&c, center)| (c, weighted_distance(coords, center, &weights)))
.min_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
.unwrap_or((comm, f64::MAX));
let median = medians.get(&comm).copied().unwrap_or(1.0);
let outlier_score = dist_to_center / median;
let bridge_score = if dist_to_nearest > 1e-8 {
dist_to_center / dist_to_nearest
} else {
0.0
};
SpectralPosition {
key, community: comm,
dist_to_center, dist_to_nearest, nearest_community,
outlier_score, bridge_score,
}
})
.collect();
positions.sort_by(|a, b| b.outlier_score.partial_cmp(&a.outlier_score).unwrap());
positions
}
/// Find pairs of nodes that are spectrally close but not linked in the graph.
///
/// These are the most valuable candidates for extractor agents —
/// the spectral structure says they should be related, but nobody
/// has articulated why.
pub fn unlinked_neighbors(
emb: &SpectralEmbedding,
linked_pairs: &HashSet<(String, String)>,
max_pairs: usize,
) -> Vec<(String, String, f64)> {
let weights = eigenvalue_weights(&emb.eigenvalues);
let keys: Vec<&String> = emb.coords.keys().collect();
let mut pairs: Vec<(String, String, f64)> = Vec::new();
for (i, k1) in keys.iter().enumerate() {
let c1 = &emb.coords[*k1];
for k2 in keys.iter().skip(i + 1) {
// Skip if already linked
let pair_fwd = ((*k1).clone(), (*k2).clone());
let pair_rev = ((*k2).clone(), (*k1).clone());
if linked_pairs.contains(&pair_fwd) || linked_pairs.contains(&pair_rev) {
continue;
}
let dist = weighted_distance(c1, &emb.coords[*k2], &weights);
pairs.push(((*k1).clone(), (*k2).clone(), dist));
}
}
pairs.sort_by(|a, b| a.2.partial_cmp(&b.2).unwrap());
pairs.truncate(max_pairs);
pairs
}
/// Approximate spectral coordinates for a new node using Nyström extension.
///
/// Given a new node's edges to existing nodes, estimate where it would
/// land in spectral space without recomputing the full decomposition.
/// Uses weighted average of neighbors' coordinates, weighted by edge strength.
pub fn nystrom_project(
emb: &SpectralEmbedding,
neighbors: &[(&str, f32)], // (key, edge_strength)
) -> Option<Vec<f64>> {
let mut weighted_sum = vec![0.0f64; emb.dims];
let mut total_weight = 0.0f64;
for &(key, strength) in neighbors {
if let Some(coords) = emb.coords.get(key) {
let w = strength as f64;
for (i, &c) in coords.iter().enumerate() {
weighted_sum[i] += w * c;
}
total_weight += w;
}
}
if total_weight < 1e-8 {
return None;
}
Some(weighted_sum.iter().map(|s| s / total_weight).collect())
}
/// Classify a spectral position: well-integrated, outlier, bridge, or orphan.
pub fn classify_position(pos: &SpectralPosition) -> &'static str {
if pos.bridge_score > 0.7 {
"bridge" // between two communities
} else if pos.outlier_score > 2.0 {
"outlier" // far from own community center
} else if pos.outlier_score < 0.5 {
"core" // close to community center
} else {
"peripheral" // normal community member
}
}
/// Identify which spectral dimensions a set of nodes load on most heavily.
/// Returns dimension indices sorted by total loading.
pub fn dominant_dimensions(emb: &SpectralEmbedding, keys: &[&str]) -> Vec<(usize, f64)> {
let coords: Vec<&Vec<f64>> = keys.iter()
.filter_map(|k| emb.coords.get(*k))
.collect();
if coords.is_empty() {
return vec![];
}
let mut dim_loading: Vec<(usize, f64)> = (0..emb.dims)
.map(|d| {
let loading: f64 = coords.iter()
.map(|c| c[d].abs())
.sum();
(d, loading)
})
.collect();
dim_loading.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
dim_loading
}