search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-03-09 01:19:04 -04:00
parent 0a35a17fad
commit c1664bf76b
4 changed files with 723 additions and 151 deletions

View file

@ -1,13 +1,22 @@
// Spreading activation search across the memory graph
// Memory search: composable algorithm pipeline.
//
// Same model as the old system but richer: uses graph edge strengths,
// supports circumscription parameter for blending associative vs
// causal walks, and benefits from community-aware result grouping.
// Each algorithm is a stage: takes seeds Vec<(String, f64)>, returns
// new/modified seeds. Stages compose left-to-right in a pipeline.
//
// Available algorithms:
// spread — spreading activation through graph edges
// spectral — nearest neighbors in spectral embedding space
// manifold — extrapolation along direction defined by seeds (TODO)
//
// Seed extraction (matching query terms to node keys) is shared
// infrastructure, not an algorithm stage.
use crate::store::StoreView;
use crate::graph::Graph;
use crate::spectral;
use std::collections::{HashMap, HashSet, VecDeque};
use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
use std::fmt;
pub struct SearchResult {
pub key: String,
@ -16,18 +25,211 @@ pub struct SearchResult {
pub snippet: Option<String>,
}
/// Spreading activation with circumscription parameter.
/// A parsed algorithm stage with its parameters.
#[derive(Clone, Debug)]
pub struct AlgoStage {
pub algo: Algorithm,
pub params: HashMap<String, String>,
}
#[derive(Clone, Debug)]
pub enum Algorithm {
Spread,
Spectral,
Manifold,
}
impl fmt::Display for Algorithm {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Algorithm::Spread => write!(f, "spread"),
Algorithm::Spectral => write!(f, "spectral"),
Algorithm::Manifold => write!(f, "manifold"),
}
}
}
impl AlgoStage {
/// Parse "spread,max_hops=4,edge_decay=0.5" into an AlgoStage.
pub fn parse(s: &str) -> Result<Self, String> {
let mut parts = s.split(',');
let name = parts.next().unwrap_or("");
let algo = match name {
"spread" => Algorithm::Spread,
"spectral" => Algorithm::Spectral,
"manifold" => Algorithm::Manifold,
_ => return Err(format!("unknown algorithm: {}", name)),
};
let mut params = HashMap::new();
for part in parts {
if let Some((k, v)) = part.split_once('=') {
params.insert(k.to_string(), v.to_string());
} else {
return Err(format!("bad param (expected key=val): {}", part));
}
}
Ok(AlgoStage { algo, params })
}
fn param_f64(&self, key: &str, default: f64) -> f64 {
self.params.get(key)
.and_then(|v| v.parse().ok())
.unwrap_or(default)
}
fn param_u32(&self, key: &str, default: u32) -> u32 {
self.params.get(key)
.and_then(|v| v.parse().ok())
.unwrap_or(default)
}
fn param_usize(&self, key: &str, default: usize) -> usize {
self.params.get(key)
.and_then(|v| v.parse().ok())
.unwrap_or(default)
}
}
/// Extract seeds from weighted terms by matching against node keys.
///
/// circ = 0.0: field mode — all edges (default, broad resonance)
/// circ = 1.0: causal mode — prefer causal edges
/// Returns (seeds, direct_hits) where direct_hits tracks which keys
/// were matched directly (vs found by an algorithm stage).
pub fn match_seeds(
terms: &BTreeMap<String, f64>,
store: &impl StoreView,
) -> (Vec<(String, f64)>, HashSet<String>) {
let mut seeds: Vec<(String, f64)> = Vec::new();
let mut direct_hits: HashSet<String> = HashSet::new();
let mut key_map: HashMap<String, (String, f64)> = HashMap::new();
store.for_each_node(|key, _content, weight| {
key_map.insert(key.to_lowercase(), (key.to_owned(), weight as f64));
});
for (term, &term_weight) in terms {
if let Some((orig_key, node_weight)) = key_map.get(term) {
let score = term_weight * node_weight;
seeds.push((orig_key.clone(), score));
direct_hits.insert(orig_key.clone());
}
}
(seeds, direct_hits)
}
/// Run a pipeline of algorithm stages.
pub fn run_pipeline(
stages: &[AlgoStage],
seeds: Vec<(String, f64)>,
graph: &Graph,
store: &impl StoreView,
debug: bool,
max_results: usize,
) -> Vec<(String, f64)> {
let mut current = seeds;
for stage in stages {
if debug {
println!("\n[search] === {} ({} seeds in) ===", stage.algo, current.len());
}
current = match stage.algo {
Algorithm::Spread => run_spread(&current, graph, store, stage, debug),
Algorithm::Spectral => run_spectral(&current, graph, stage, debug),
Algorithm::Manifold => {
if debug { println!(" (manifold not yet implemented, passing through)"); }
current
}
};
if debug {
println!("[search] {}{} results", stage.algo, current.len());
for (i, (key, score)) in current.iter().enumerate().take(15) {
let cutoff = if i + 1 == max_results { " <-- cutoff" } else { "" };
println!(" [{:.4}] {}{}", score, key, cutoff);
}
if current.len() > 15 {
println!(" ... ({} more)", current.len() - 15);
}
}
}
current.truncate(max_results);
current
}
/// Spreading activation: propagate scores through graph edges.
///
/// Tunable params: max_hops (default from store), edge_decay (default from store),
/// min_activation (default from store).
fn run_spread(
seeds: &[(String, f64)],
graph: &Graph,
store: &impl StoreView,
stage: &AlgoStage,
_debug: bool,
) -> Vec<(String, f64)> {
let store_params = store.params();
let max_hops = stage.param_u32("max_hops", store_params.max_hops);
let edge_decay = stage.param_f64("edge_decay", store_params.edge_decay);
let min_activation = stage.param_f64("min_activation", store_params.min_activation);
spreading_activation(seeds, graph, store, max_hops, edge_decay, min_activation)
}
/// Spectral projection: find nearest neighbors in spectral embedding space.
///
/// Tunable params: k (default 20, number of neighbors to find).
fn run_spectral(
seeds: &[(String, f64)],
graph: &Graph,
stage: &AlgoStage,
debug: bool,
) -> Vec<(String, f64)> {
let k = stage.param_usize("k", 20);
let emb = match spectral::load_embedding() {
Ok(e) => e,
Err(e) => {
if debug { println!(" no spectral embedding: {}", e); }
return seeds.to_vec();
}
};
let weighted_seeds: Vec<(&str, f64)> = seeds.iter()
.map(|(k, w)| (k.as_str(), *w))
.collect();
let projected = spectral::nearest_to_seeds_weighted(
&emb, &weighted_seeds, Some(graph), k,
);
if debug {
for (key, dist) in &projected {
let score = 1.0 / (1.0 + dist);
println!(" dist={:.6} score={:.4} {}", dist, score, key);
}
}
// Merge: keep original seeds, add spectral results as new seeds
let seed_set: HashSet<&str> = seeds.iter().map(|(k, _)| k.as_str()).collect();
let mut result = seeds.to_vec();
for (key, dist) in projected {
if !seed_set.contains(key.as_str()) {
let score = 1.0 / (1.0 + dist);
result.push((key, score));
}
}
result
}
fn spreading_activation(
seeds: &[(String, f64)],
graph: &Graph,
store: &impl StoreView,
_circumscription: f64,
max_hops: u32,
edge_decay: f64,
min_activation: f64,
) -> Vec<(String, f64)> {
let params = store.params();
let mut activation: HashMap<String, f64> = HashMap::new();
let mut queue: VecDeque<(String, f64, u32)> = VecDeque::new();
@ -40,12 +242,12 @@ fn spreading_activation(
}
while let Some((key, act, depth)) = queue.pop_front() {
if depth >= params.max_hops { continue; }
if depth >= max_hops { continue; }
for (neighbor, strength) in graph.neighbors(&key) {
let neighbor_weight = store.node_weight(neighbor.as_str());
let propagated = act * params.edge_decay * neighbor_weight * strength as f64;
if propagated < params.min_activation { continue; }
let propagated = act * edge_decay * neighbor_weight * strength as f64;
if propagated < min_activation { continue; }
let current = activation.entry(neighbor.clone()).or_insert(0.0);
if propagated > *current {
@ -60,55 +262,71 @@ fn spreading_activation(
results
}
/// Full search: find direct hits, spread activation, return ranked results
pub fn search(query: &str, store: &impl StoreView) -> Vec<SearchResult> {
/// Search with weighted terms: exact key matching + spectral projection.
///
/// Terms are matched against node keys. Matching nodes become seeds,
/// scored by term_weight × node_weight. Seeds are then projected into
/// spectral space to find nearby nodes, with link weights modulating distance.
pub fn search_weighted(
terms: &BTreeMap<String, f64>,
store: &impl StoreView,
) -> Vec<SearchResult> {
search_weighted_inner(terms, store, false, 5)
}
/// Like search_weighted but with debug output and configurable result count.
pub fn search_weighted_debug(
terms: &BTreeMap<String, f64>,
store: &impl StoreView,
max_results: usize,
) -> Vec<SearchResult> {
search_weighted_inner(terms, store, true, max_results)
}
fn search_weighted_inner(
terms: &BTreeMap<String, f64>,
store: &impl StoreView,
debug: bool,
max_results: usize,
) -> Vec<SearchResult> {
let graph = crate::graph::build_graph_fast(store);
let query_lower = query.to_lowercase();
let query_tokens: Vec<&str> = query_lower.split_whitespace().collect();
let mut seeds: Vec<(String, f64)> = Vec::new();
let mut snippets: HashMap<String, String> = HashMap::new();
store.for_each_node(|key, content, weight| {
let content_lower = content.to_lowercase();
let exact_match = content_lower.contains(&query_lower);
let token_match = query_tokens.len() > 1
&& query_tokens.iter().all(|t| content_lower.contains(t));
if exact_match || token_match {
let activation = if exact_match { weight as f64 } else { weight as f64 * 0.85 };
seeds.push((key.to_owned(), activation));
let snippet: String = content.lines()
.filter(|l| {
let ll = l.to_lowercase();
if exact_match && ll.contains(&query_lower) { return true; }
query_tokens.iter().any(|t| ll.contains(t))
})
.take(3)
.map(|l| {
let t = l.trim();
crate::util::truncate(t, 97, "...")
})
.collect::<Vec<_>>()
.join("\n ");
snippets.insert(key.to_owned(), snippet);
}
});
let (seeds, direct_hits) = match_seeds(terms, store);
if seeds.is_empty() {
return Vec::new();
}
let direct_hits: HashSet<String> = seeds.iter().map(|(k, _)| k.clone()).collect();
let raw_results = spreading_activation(&seeds, &graph, store, 0.0);
if debug {
println!("\n[search] === SEEDS ({}) ===", seeds.len());
let mut sorted_seeds = seeds.clone();
sorted_seeds.sort_by(|a, b| b.1.total_cmp(&a.1));
for (key, score) in sorted_seeds.iter().take(20) {
println!(" {:.4} {}", score, key);
}
}
raw_results.into_iter().map(|(key, activation)| {
let is_direct = direct_hits.contains(&key);
let snippet = snippets.get(&key).cloned();
SearchResult { key, activation, is_direct, snippet }
}).collect()
// Default pipeline: spectral → spread (legacy behavior)
let pipeline = vec![
AlgoStage { algo: Algorithm::Spectral, params: HashMap::new() },
AlgoStage { algo: Algorithm::Spread, params: HashMap::new() },
];
let raw_results = run_pipeline(&pipeline, seeds, &graph, store, debug, max_results);
raw_results.into_iter()
.take(max_results)
.map(|(key, activation)| {
let is_direct = direct_hits.contains(&key);
SearchResult { key, activation, is_direct, snippet: None }
}).collect()
}
/// Search with equal-weight terms (for interactive use).
pub fn search(query: &str, store: &impl StoreView) -> Vec<SearchResult> {
let terms: BTreeMap<String, f64> = query.split_whitespace()
.map(|t| (t.to_lowercase(), 1.0))
.collect();
search_weighted(&terms, store)
}
/// Extract meaningful search terms from natural language.