spectral decomposition, search improvements, char boundary fix

- New spectral module: Laplacian eigendecomposition of the memory graph.
  Commands: spectral, spectral-save, spectral-neighbors, spectral-positions,
  spectral-suggest. Spectral neighbors expand search results beyond keyword
  matching to structural proximity.

- Search: use StoreView trait to avoid 6MB state.bin rewrite on every query.
  Append-only retrieval logging. Spectral expansion shows structurally
  nearby nodes after text results.

- Fix panic in journal-tail: string truncation at byte 67 could land inside
  a multi-byte character (em dash). Now walks back to char boundary.

- Replay queue: show classification and spectral outlier score.

- Knowledge agents: extractor, challenger, connector prompts and runner
  scripts for automated graph enrichment.

- memory-search hook: stale state file cleanup (24h expiry).
This commit is contained in:
ProofOfConcept 2026-03-03 01:33:31 -05:00
parent 94dbca6018
commit 71e6f15d82
16 changed files with 3600 additions and 103 deletions

View file

@ -7,7 +7,9 @@
use crate::capnp_store::Store;
use crate::graph::{self, Graph};
use crate::similarity;
use crate::spectral::{self, SpectralEmbedding, SpectralPosition};
use std::collections::HashMap;
use std::time::{SystemTime, UNIX_EPOCH};
fn now_epoch() -> f64 {
@ -19,25 +21,45 @@ fn now_epoch() -> f64 {
const SECS_PER_DAY: f64 = 86400.0;
/// Consolidation priority: how urgently a node needs attention
/// Consolidation priority: how urgently a node needs attention.
///
/// priority = (1 - schema_fit) × spaced_repetition_due × emotion × (1 + interference)
pub fn consolidation_priority(store: &Store, key: &str, graph: &Graph) -> f64 {
/// With spectral data:
/// priority = spectral_displacement × overdue × emotion
/// Without:
/// priority = (1 - schema_fit) × overdue × emotion
///
/// Spectral displacement is the outlier_score clamped and normalized —
/// it measures how far a node sits from its community center in the
/// eigenspace. This is a global signal (considers all graph structure)
/// vs schema_fit which is local (only immediate neighbors).
pub fn consolidation_priority(
store: &Store,
key: &str,
graph: &Graph,
spectral_outlier: Option<f64>,
) -> f64 {
let node = match store.nodes.get(key) {
Some(n) => n,
None => return 0.0,
};
// Schema fit: 0 = poorly integrated, 1 = well integrated
let fit = graph::schema_fit(graph, key) as f64;
let fit_factor = 1.0 - fit;
// Integration factor: how poorly integrated is this node?
let displacement = if let Some(outlier) = spectral_outlier {
// outlier_score = dist_to_center / median_dist_in_community
// 1.0 = typical position, >2 = unusual, >5 = extreme outlier
// Use log scale for dynamic range: the difference between
// outlier=5 and outlier=10 matters less than 1 vs 2.
(outlier / 3.0).min(3.0)
} else {
let fit = graph::schema_fit(graph, key) as f64;
1.0 - fit
};
// Spaced repetition: how overdue is this node for replay?
let interval_secs = node.spaced_repetition_interval as f64 * SECS_PER_DAY;
let time_since_replay = if node.last_replayed > 0.0 {
(now_epoch() - node.last_replayed).max(0.0)
} else {
// Never replayed — treat as very overdue
interval_secs * 3.0
};
let overdue_ratio = (time_since_replay / interval_secs).min(5.0);
@ -45,7 +67,7 @@ pub fn consolidation_priority(store: &Store, key: &str, graph: &Graph) -> f64 {
// Emotional intensity: higher emotion = higher priority
let emotion_factor = 1.0 + (node.emotion as f64 / 10.0);
fit_factor * overdue_ratio * emotion_factor
displacement * overdue_ratio * emotion_factor
}
/// Item in the replay queue
@ -55,28 +77,62 @@ pub struct ReplayItem {
pub interval_days: u32,
pub emotion: f32,
pub schema_fit: f32,
/// Spectral classification: "bridge", "outlier", "core", "peripheral"
pub classification: &'static str,
/// Raw spectral outlier score (distance / median)
pub outlier_score: f64,
}
/// Generate the replay queue: nodes ordered by consolidation priority
/// Generate the replay queue: nodes ordered by consolidation priority.
/// Automatically loads spectral embedding if available.
pub fn replay_queue(store: &Store, count: usize) -> Vec<ReplayItem> {
let graph = store.build_graph();
replay_queue_with_graph(store, count, &graph)
let emb = spectral::load_embedding().ok();
replay_queue_with_graph(store, count, &graph, emb.as_ref())
}
/// Generate the replay queue using a pre-built graph (avoids redundant rebuild)
pub fn replay_queue_with_graph(store: &Store, count: usize, graph: &Graph) -> Vec<ReplayItem> {
/// Generate the replay queue using pre-built graph and optional spectral data.
pub fn replay_queue_with_graph(
store: &Store,
count: usize,
graph: &Graph,
emb: Option<&SpectralEmbedding>,
) -> Vec<ReplayItem> {
let fits = graph::schema_fit_all(graph);
// Build spectral position map if embedding is available
let positions: HashMap<String, SpectralPosition> = if let Some(emb) = emb {
let communities = graph.communities().clone();
spectral::analyze_positions(emb, &communities)
.into_iter()
.map(|p| (p.key.clone(), p))
.collect()
} else {
HashMap::new()
};
let mut items: Vec<ReplayItem> = store.nodes.iter()
.map(|(key, node)| {
let priority = consolidation_priority(store, key, graph);
let pos = positions.get(key);
let outlier_score = pos.map(|p| p.outlier_score).unwrap_or(0.0);
let classification = pos
.map(|p| spectral::classify_position(p))
.unwrap_or("unknown");
let priority = consolidation_priority(
store, key, graph,
pos.map(|p| p.outlier_score),
);
let fit = fits.get(key).copied().unwrap_or(0.0);
ReplayItem {
key: key.clone(),
priority,
interval_days: node.spaced_repetition_interval,
emotion: node.emotion,
schema_fit: fit,
classification,
outlier_score,
}
})
.collect();
@ -234,6 +290,10 @@ fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> S
item.priority, item.schema_fit, item.emotion));
out.push_str(&format!("Category: {} Interval: {}d\n",
node.category.label(), node.spaced_repetition_interval));
if item.outlier_score > 0.0 {
out.push_str(&format!("Spectral: {} (outlier={:.1})\n",
item.classification, item.outlier_score));
}
if let Some(community) = node.community_id {
out.push_str(&format!("Community: {} ", community));
@ -474,15 +534,17 @@ pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<String,
let graph = store.build_graph();
let topology = format_topology_header(&graph);
let emb = spectral::load_embedding().ok();
match agent {
"replay" => {
let items = replay_queue_with_graph(store, count, &graph);
let items = replay_queue_with_graph(store, count, &graph, emb.as_ref());
let nodes_section = format_nodes_section(store, &items, &graph);
load_prompt("replay", &[("{{TOPOLOGY}}", &topology), ("{{NODES}}", &nodes_section)])
}
"linker" => {
// Filter to episodic entries
let mut items = replay_queue_with_graph(store, count * 2, &graph);
let mut items = replay_queue_with_graph(store, count * 2, &graph, emb.as_ref());
items.retain(|item| {
store.nodes.get(&item.key)
.map(|n| matches!(n.node_type, crate::capnp_store::NodeType::EpisodicSession))
@ -516,10 +578,12 @@ pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<String,
let fit = graph::schema_fit(&graph, k);
Some(ReplayItem {
key: k.clone(),
priority: consolidation_priority(store, k, &graph),
priority: consolidation_priority(store, k, &graph, None),
interval_days: node.spaced_repetition_interval,
emotion: node.emotion,
schema_fit: fit,
classification: "unknown",
outlier_score: 0.0,
})
})
.collect();