search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-03-09 01:19:04 -04:00
parent 0a35a17fad
commit c1664bf76b
4 changed files with 723 additions and 151 deletions

View file

@ -59,12 +59,21 @@ struct Cli {
#[derive(Subcommand)]
enum Command {
/// Search memory (AND logic across terms)
///
/// Pipeline: -p spread -p spectral,k=20
/// Default pipeline: spread
Search {
/// Search terms
query: Vec<String>,
/// Show 15 results instead of 5, plus spectral neighbors
/// Algorithm pipeline stages (repeatable)
#[arg(short, long = "pipeline")]
pipeline: Vec<String>,
/// Show more results
#[arg(long)]
expand: bool,
/// Show debug output for each pipeline stage
#[arg(long)]
debug: bool,
},
/// Scan markdown files, index all memory units
Init,
@ -469,8 +478,8 @@ fn main() {
let cli = Cli::parse();
let result = match cli.command {
Command::Search { query, expand }
=> cmd_search(&query, expand),
Command::Search { query, pipeline, expand, debug }
=> cmd_search(&query, &pipeline, expand, debug),
Command::Init => cmd_init(),
Command::Migrate => cmd_migrate(),
Command::Health => cmd_health(),
@ -575,8 +584,9 @@ fn main() {
// ── Command implementations ─────────────────────────────────────────
fn cmd_search(terms: &[String], expand: bool) -> Result<(), String> {
fn cmd_search(terms: &[String], pipeline_args: &[String], expand: bool, debug: bool) -> Result<(), String> {
use store::StoreView;
use std::collections::BTreeMap;
if terms.is_empty() {
return Err("search requires at least one term".into());
@ -584,70 +594,68 @@ fn cmd_search(terms: &[String], expand: bool) -> Result<(), String> {
let query: String = terms.join(" ");
// Parse pipeline (default: spread)
let pipeline: Vec<search::AlgoStage> = if pipeline_args.is_empty() {
vec![search::AlgoStage::parse("spread").unwrap()]
} else {
pipeline_args.iter()
.map(|a| search::AlgoStage::parse(a))
.collect::<Result<Vec<_>, _>>()?
};
if debug {
let names: Vec<String> = pipeline.iter().map(|s| format!("{}", s.algo)).collect();
println!("[search] pipeline: {}", names.join(""));
}
let view = store::AnyView::load()?;
let results = search::search(&query, &view);
let graph = graph::build_graph_fast(&view);
// Build equal-weight terms from query
let terms: BTreeMap<String, f64> = query.split_whitespace()
.map(|t| (t.to_lowercase(), 1.0))
.collect();
let (seeds, direct_hits) = search::match_seeds(&terms, &view);
if seeds.is_empty() {
eprintln!("No results for '{}'", query);
return Ok(());
}
if debug {
println!("[search] {} seeds from query '{}'", seeds.len(), query);
for (key, score) in &seeds {
println!(" {:.4} {}", score, key);
}
}
let max_results = if expand { 15 } else { 5 };
let raw = search::run_pipeline(&pipeline, seeds, &graph, &view, debug, max_results);
let results: Vec<search::SearchResult> = raw.into_iter()
.map(|(key, activation)| {
let is_direct = direct_hits.contains(&key);
search::SearchResult { key, activation, is_direct, snippet: None }
})
.collect();
if results.is_empty() {
eprintln!("No results for '{}'", query);
return Ok(());
}
let limit = if expand { 15 } else { 5 };
// Log retrieval to a small append-only file (avoid 6MB state.bin rewrite)
// Log retrieval
store::Store::log_retrieval_static(&query,
&results.iter().map(|r| r.key.clone()).collect::<Vec<_>>());
// Bump daily lookup counters (fast path, no store needed)
let bump_keys: Vec<&str> = results.iter().take(limit).map(|r| r.key.as_str()).collect();
let bump_keys: Vec<&str> = results.iter().take(max_results).map(|r| r.key.as_str()).collect();
let _ = lookups::bump_many(&bump_keys);
let text_keys: std::collections::HashSet<String> = results.iter()
.take(limit).map(|r| r.key.clone()).collect();
for (i, r) in results.iter().enumerate().take(limit) {
for (i, r) in results.iter().enumerate().take(max_results) {
let marker = if r.is_direct { "" } else { " " };
let weight = view.node_weight(&r.key);
println!("{}{:2}. [{:.2}/{:.2}] {}", marker, i + 1, r.activation, weight, r.key);
if let Some(ref snippet) = r.snippet {
println!(" {}", snippet);
}
}
if expand {
if let Ok(emb) = spectral::load_embedding() {
let seeds: Vec<&str> = results.iter()
.take(5)
.map(|r| r.key.as_str())
.filter(|k| emb.coords.contains_key(*k))
.collect();
if !seeds.is_empty() {
let spectral_hits = spectral::nearest_to_seeds(&emb, &seeds, 10);
let new_hits: Vec<_> = spectral_hits.into_iter()
.filter(|(k, _)| !text_keys.contains(k))
.take(5)
.collect();
if !new_hits.is_empty() {
println!("\nSpectral neighbors (structural, not keyword):");
for (k, _dist) in &new_hits {
let weight = view.node_weight(k);
println!(" ~ [{:.2}] {}", weight, k);
if let Some(content) = view.node_content(k) {
let snippet = util::first_n_chars(
content.lines()
.find(|l| !l.trim().is_empty() && !l.starts_with('#'))
.unwrap_or(""),
100);
if !snippet.is_empty() {
println!(" {}", snippet);
}
}
}
}
}
}
}
Ok(())