search: composable algorithm pipeline
Break search into composable stages that chain left-to-right: each stage takes seeds Vec<(String, f64)> and returns modified seeds. Available algorithms: spread — spreading activation through graph edges spectral — nearest neighbors in spectral embedding manifold — (placeholder) extrapolation along seed direction Stages accept inline params: spread,max_hops=4,edge_decay=0.5 memory-search gets --hook, --debug, --seen modes plus positional pipeline args. poc-memory search gets -p/--pipeline flags. Also: fix spectral decompose() to skip zero eigenvalues from disconnected components, filter degenerate zero-coord nodes from spectral projection, POC_AGENT bail-out for daemon agents, all debug output to stdout. Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
This commit is contained in:
parent
0a35a17fad
commit
c1664bf76b
4 changed files with 723 additions and 151 deletions
|
|
@ -1,13 +1,22 @@
|
|||
// Spreading activation search across the memory graph
|
||||
// Memory search: composable algorithm pipeline.
|
||||
//
|
||||
// Same model as the old system but richer: uses graph edge strengths,
|
||||
// supports circumscription parameter for blending associative vs
|
||||
// causal walks, and benefits from community-aware result grouping.
|
||||
// Each algorithm is a stage: takes seeds Vec<(String, f64)>, returns
|
||||
// new/modified seeds. Stages compose left-to-right in a pipeline.
|
||||
//
|
||||
// Available algorithms:
|
||||
// spread — spreading activation through graph edges
|
||||
// spectral — nearest neighbors in spectral embedding space
|
||||
// manifold — extrapolation along direction defined by seeds (TODO)
|
||||
//
|
||||
// Seed extraction (matching query terms to node keys) is shared
|
||||
// infrastructure, not an algorithm stage.
|
||||
|
||||
use crate::store::StoreView;
|
||||
use crate::graph::Graph;
|
||||
use crate::spectral;
|
||||
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
|
||||
use std::fmt;
|
||||
|
||||
pub struct SearchResult {
|
||||
pub key: String,
|
||||
|
|
@ -16,18 +25,211 @@ pub struct SearchResult {
|
|||
pub snippet: Option<String>,
|
||||
}
|
||||
|
||||
/// Spreading activation with circumscription parameter.
|
||||
/// A parsed algorithm stage with its parameters.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AlgoStage {
|
||||
pub algo: Algorithm,
|
||||
pub params: HashMap<String, String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Algorithm {
|
||||
Spread,
|
||||
Spectral,
|
||||
Manifold,
|
||||
}
|
||||
|
||||
impl fmt::Display for Algorithm {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Algorithm::Spread => write!(f, "spread"),
|
||||
Algorithm::Spectral => write!(f, "spectral"),
|
||||
Algorithm::Manifold => write!(f, "manifold"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AlgoStage {
|
||||
/// Parse "spread,max_hops=4,edge_decay=0.5" into an AlgoStage.
|
||||
pub fn parse(s: &str) -> Result<Self, String> {
|
||||
let mut parts = s.split(',');
|
||||
let name = parts.next().unwrap_or("");
|
||||
let algo = match name {
|
||||
"spread" => Algorithm::Spread,
|
||||
"spectral" => Algorithm::Spectral,
|
||||
"manifold" => Algorithm::Manifold,
|
||||
_ => return Err(format!("unknown algorithm: {}", name)),
|
||||
};
|
||||
let mut params = HashMap::new();
|
||||
for part in parts {
|
||||
if let Some((k, v)) = part.split_once('=') {
|
||||
params.insert(k.to_string(), v.to_string());
|
||||
} else {
|
||||
return Err(format!("bad param (expected key=val): {}", part));
|
||||
}
|
||||
}
|
||||
Ok(AlgoStage { algo, params })
|
||||
}
|
||||
|
||||
fn param_f64(&self, key: &str, default: f64) -> f64 {
|
||||
self.params.get(key)
|
||||
.and_then(|v| v.parse().ok())
|
||||
.unwrap_or(default)
|
||||
}
|
||||
|
||||
fn param_u32(&self, key: &str, default: u32) -> u32 {
|
||||
self.params.get(key)
|
||||
.and_then(|v| v.parse().ok())
|
||||
.unwrap_or(default)
|
||||
}
|
||||
|
||||
fn param_usize(&self, key: &str, default: usize) -> usize {
|
||||
self.params.get(key)
|
||||
.and_then(|v| v.parse().ok())
|
||||
.unwrap_or(default)
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract seeds from weighted terms by matching against node keys.
|
||||
///
|
||||
/// circ = 0.0: field mode — all edges (default, broad resonance)
|
||||
/// circ = 1.0: causal mode — prefer causal edges
|
||||
/// Returns (seeds, direct_hits) where direct_hits tracks which keys
|
||||
/// were matched directly (vs found by an algorithm stage).
|
||||
pub fn match_seeds(
|
||||
terms: &BTreeMap<String, f64>,
|
||||
store: &impl StoreView,
|
||||
) -> (Vec<(String, f64)>, HashSet<String>) {
|
||||
let mut seeds: Vec<(String, f64)> = Vec::new();
|
||||
let mut direct_hits: HashSet<String> = HashSet::new();
|
||||
|
||||
let mut key_map: HashMap<String, (String, f64)> = HashMap::new();
|
||||
store.for_each_node(|key, _content, weight| {
|
||||
key_map.insert(key.to_lowercase(), (key.to_owned(), weight as f64));
|
||||
});
|
||||
|
||||
for (term, &term_weight) in terms {
|
||||
if let Some((orig_key, node_weight)) = key_map.get(term) {
|
||||
let score = term_weight * node_weight;
|
||||
seeds.push((orig_key.clone(), score));
|
||||
direct_hits.insert(orig_key.clone());
|
||||
}
|
||||
}
|
||||
|
||||
(seeds, direct_hits)
|
||||
}
|
||||
|
||||
/// Run a pipeline of algorithm stages.
|
||||
pub fn run_pipeline(
|
||||
stages: &[AlgoStage],
|
||||
seeds: Vec<(String, f64)>,
|
||||
graph: &Graph,
|
||||
store: &impl StoreView,
|
||||
debug: bool,
|
||||
max_results: usize,
|
||||
) -> Vec<(String, f64)> {
|
||||
let mut current = seeds;
|
||||
|
||||
for stage in stages {
|
||||
if debug {
|
||||
println!("\n[search] === {} ({} seeds in) ===", stage.algo, current.len());
|
||||
}
|
||||
|
||||
current = match stage.algo {
|
||||
Algorithm::Spread => run_spread(¤t, graph, store, stage, debug),
|
||||
Algorithm::Spectral => run_spectral(¤t, graph, stage, debug),
|
||||
Algorithm::Manifold => {
|
||||
if debug { println!(" (manifold not yet implemented, passing through)"); }
|
||||
current
|
||||
}
|
||||
};
|
||||
|
||||
if debug {
|
||||
println!("[search] {} → {} results", stage.algo, current.len());
|
||||
for (i, (key, score)) in current.iter().enumerate().take(15) {
|
||||
let cutoff = if i + 1 == max_results { " <-- cutoff" } else { "" };
|
||||
println!(" [{:.4}] {}{}", score, key, cutoff);
|
||||
}
|
||||
if current.len() > 15 {
|
||||
println!(" ... ({} more)", current.len() - 15);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
current.truncate(max_results);
|
||||
current
|
||||
}
|
||||
|
||||
/// Spreading activation: propagate scores through graph edges.
|
||||
///
|
||||
/// Tunable params: max_hops (default from store), edge_decay (default from store),
|
||||
/// min_activation (default from store).
|
||||
fn run_spread(
|
||||
seeds: &[(String, f64)],
|
||||
graph: &Graph,
|
||||
store: &impl StoreView,
|
||||
stage: &AlgoStage,
|
||||
_debug: bool,
|
||||
) -> Vec<(String, f64)> {
|
||||
let store_params = store.params();
|
||||
let max_hops = stage.param_u32("max_hops", store_params.max_hops);
|
||||
let edge_decay = stage.param_f64("edge_decay", store_params.edge_decay);
|
||||
let min_activation = stage.param_f64("min_activation", store_params.min_activation);
|
||||
|
||||
spreading_activation(seeds, graph, store, max_hops, edge_decay, min_activation)
|
||||
}
|
||||
|
||||
/// Spectral projection: find nearest neighbors in spectral embedding space.
|
||||
///
|
||||
/// Tunable params: k (default 20, number of neighbors to find).
|
||||
fn run_spectral(
|
||||
seeds: &[(String, f64)],
|
||||
graph: &Graph,
|
||||
stage: &AlgoStage,
|
||||
debug: bool,
|
||||
) -> Vec<(String, f64)> {
|
||||
let k = stage.param_usize("k", 20);
|
||||
|
||||
let emb = match spectral::load_embedding() {
|
||||
Ok(e) => e,
|
||||
Err(e) => {
|
||||
if debug { println!(" no spectral embedding: {}", e); }
|
||||
return seeds.to_vec();
|
||||
}
|
||||
};
|
||||
|
||||
let weighted_seeds: Vec<(&str, f64)> = seeds.iter()
|
||||
.map(|(k, w)| (k.as_str(), *w))
|
||||
.collect();
|
||||
let projected = spectral::nearest_to_seeds_weighted(
|
||||
&emb, &weighted_seeds, Some(graph), k,
|
||||
);
|
||||
|
||||
if debug {
|
||||
for (key, dist) in &projected {
|
||||
let score = 1.0 / (1.0 + dist);
|
||||
println!(" dist={:.6} score={:.4} {}", dist, score, key);
|
||||
}
|
||||
}
|
||||
|
||||
// Merge: keep original seeds, add spectral results as new seeds
|
||||
let seed_set: HashSet<&str> = seeds.iter().map(|(k, _)| k.as_str()).collect();
|
||||
let mut result = seeds.to_vec();
|
||||
for (key, dist) in projected {
|
||||
if !seed_set.contains(key.as_str()) {
|
||||
let score = 1.0 / (1.0 + dist);
|
||||
result.push((key, score));
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn spreading_activation(
|
||||
seeds: &[(String, f64)],
|
||||
graph: &Graph,
|
||||
store: &impl StoreView,
|
||||
_circumscription: f64,
|
||||
max_hops: u32,
|
||||
edge_decay: f64,
|
||||
min_activation: f64,
|
||||
) -> Vec<(String, f64)> {
|
||||
let params = store.params();
|
||||
|
||||
let mut activation: HashMap<String, f64> = HashMap::new();
|
||||
let mut queue: VecDeque<(String, f64, u32)> = VecDeque::new();
|
||||
|
||||
|
|
@ -40,12 +242,12 @@ fn spreading_activation(
|
|||
}
|
||||
|
||||
while let Some((key, act, depth)) = queue.pop_front() {
|
||||
if depth >= params.max_hops { continue; }
|
||||
if depth >= max_hops { continue; }
|
||||
|
||||
for (neighbor, strength) in graph.neighbors(&key) {
|
||||
let neighbor_weight = store.node_weight(neighbor.as_str());
|
||||
let propagated = act * params.edge_decay * neighbor_weight * strength as f64;
|
||||
if propagated < params.min_activation { continue; }
|
||||
let propagated = act * edge_decay * neighbor_weight * strength as f64;
|
||||
if propagated < min_activation { continue; }
|
||||
|
||||
let current = activation.entry(neighbor.clone()).or_insert(0.0);
|
||||
if propagated > *current {
|
||||
|
|
@ -60,55 +262,71 @@ fn spreading_activation(
|
|||
results
|
||||
}
|
||||
|
||||
/// Full search: find direct hits, spread activation, return ranked results
|
||||
pub fn search(query: &str, store: &impl StoreView) -> Vec<SearchResult> {
|
||||
/// Search with weighted terms: exact key matching + spectral projection.
|
||||
///
|
||||
/// Terms are matched against node keys. Matching nodes become seeds,
|
||||
/// scored by term_weight × node_weight. Seeds are then projected into
|
||||
/// spectral space to find nearby nodes, with link weights modulating distance.
|
||||
pub fn search_weighted(
|
||||
terms: &BTreeMap<String, f64>,
|
||||
store: &impl StoreView,
|
||||
) -> Vec<SearchResult> {
|
||||
search_weighted_inner(terms, store, false, 5)
|
||||
}
|
||||
|
||||
/// Like search_weighted but with debug output and configurable result count.
|
||||
pub fn search_weighted_debug(
|
||||
terms: &BTreeMap<String, f64>,
|
||||
store: &impl StoreView,
|
||||
max_results: usize,
|
||||
) -> Vec<SearchResult> {
|
||||
search_weighted_inner(terms, store, true, max_results)
|
||||
}
|
||||
|
||||
fn search_weighted_inner(
|
||||
terms: &BTreeMap<String, f64>,
|
||||
store: &impl StoreView,
|
||||
debug: bool,
|
||||
max_results: usize,
|
||||
) -> Vec<SearchResult> {
|
||||
let graph = crate::graph::build_graph_fast(store);
|
||||
let query_lower = query.to_lowercase();
|
||||
let query_tokens: Vec<&str> = query_lower.split_whitespace().collect();
|
||||
|
||||
let mut seeds: Vec<(String, f64)> = Vec::new();
|
||||
let mut snippets: HashMap<String, String> = HashMap::new();
|
||||
|
||||
store.for_each_node(|key, content, weight| {
|
||||
let content_lower = content.to_lowercase();
|
||||
|
||||
let exact_match = content_lower.contains(&query_lower);
|
||||
let token_match = query_tokens.len() > 1
|
||||
&& query_tokens.iter().all(|t| content_lower.contains(t));
|
||||
|
||||
if exact_match || token_match {
|
||||
let activation = if exact_match { weight as f64 } else { weight as f64 * 0.85 };
|
||||
seeds.push((key.to_owned(), activation));
|
||||
|
||||
let snippet: String = content.lines()
|
||||
.filter(|l| {
|
||||
let ll = l.to_lowercase();
|
||||
if exact_match && ll.contains(&query_lower) { return true; }
|
||||
query_tokens.iter().any(|t| ll.contains(t))
|
||||
})
|
||||
.take(3)
|
||||
.map(|l| {
|
||||
let t = l.trim();
|
||||
crate::util::truncate(t, 97, "...")
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n ");
|
||||
snippets.insert(key.to_owned(), snippet);
|
||||
}
|
||||
});
|
||||
let (seeds, direct_hits) = match_seeds(terms, store);
|
||||
|
||||
if seeds.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let direct_hits: HashSet<String> = seeds.iter().map(|(k, _)| k.clone()).collect();
|
||||
let raw_results = spreading_activation(&seeds, &graph, store, 0.0);
|
||||
if debug {
|
||||
println!("\n[search] === SEEDS ({}) ===", seeds.len());
|
||||
let mut sorted_seeds = seeds.clone();
|
||||
sorted_seeds.sort_by(|a, b| b.1.total_cmp(&a.1));
|
||||
for (key, score) in sorted_seeds.iter().take(20) {
|
||||
println!(" {:.4} {}", score, key);
|
||||
}
|
||||
}
|
||||
|
||||
raw_results.into_iter().map(|(key, activation)| {
|
||||
let is_direct = direct_hits.contains(&key);
|
||||
let snippet = snippets.get(&key).cloned();
|
||||
SearchResult { key, activation, is_direct, snippet }
|
||||
}).collect()
|
||||
// Default pipeline: spectral → spread (legacy behavior)
|
||||
let pipeline = vec![
|
||||
AlgoStage { algo: Algorithm::Spectral, params: HashMap::new() },
|
||||
AlgoStage { algo: Algorithm::Spread, params: HashMap::new() },
|
||||
];
|
||||
|
||||
let raw_results = run_pipeline(&pipeline, seeds, &graph, store, debug, max_results);
|
||||
|
||||
raw_results.into_iter()
|
||||
.take(max_results)
|
||||
.map(|(key, activation)| {
|
||||
let is_direct = direct_hits.contains(&key);
|
||||
SearchResult { key, activation, is_direct, snippet: None }
|
||||
}).collect()
|
||||
}
|
||||
|
||||
/// Search with equal-weight terms (for interactive use).
|
||||
pub fn search(query: &str, store: &impl StoreView) -> Vec<SearchResult> {
|
||||
let terms: BTreeMap<String, f64> = query.split_whitespace()
|
||||
.map(|t| (t.to_lowercase(), 1.0))
|
||||
.collect();
|
||||
search_weighted(&terms, store)
|
||||
}
|
||||
|
||||
/// Extract meaningful search terms from natural language.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue