search: composable algorithm pipeline
Break search into composable stages that chain left-to-right: each stage takes seeds Vec<(String, f64)> and returns modified seeds. Available algorithms: spread — spreading activation through graph edges spectral — nearest neighbors in spectral embedding manifold — (placeholder) extrapolation along seed direction Stages accept inline params: spread,max_hops=4,edge_decay=0.5 memory-search gets --hook, --debug, --seen modes plus positional pipeline args. poc-memory search gets -p/--pipeline flags. Also: fix spectral decompose() to skip zero eigenvalues from disconnected components, filter degenerate zero-coord nodes from spectral projection, POC_AGENT bail-out for daemon agents, all debug output to stdout. Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
This commit is contained in:
parent
0a35a17fad
commit
c1664bf76b
4 changed files with 723 additions and 151 deletions
|
|
@ -59,12 +59,21 @@ struct Cli {
|
|||
#[derive(Subcommand)]
|
||||
enum Command {
|
||||
/// Search memory (AND logic across terms)
|
||||
///
|
||||
/// Pipeline: -p spread -p spectral,k=20
|
||||
/// Default pipeline: spread
|
||||
Search {
|
||||
/// Search terms
|
||||
query: Vec<String>,
|
||||
/// Show 15 results instead of 5, plus spectral neighbors
|
||||
/// Algorithm pipeline stages (repeatable)
|
||||
#[arg(short, long = "pipeline")]
|
||||
pipeline: Vec<String>,
|
||||
/// Show more results
|
||||
#[arg(long)]
|
||||
expand: bool,
|
||||
/// Show debug output for each pipeline stage
|
||||
#[arg(long)]
|
||||
debug: bool,
|
||||
},
|
||||
/// Scan markdown files, index all memory units
|
||||
Init,
|
||||
|
|
@ -469,8 +478,8 @@ fn main() {
|
|||
let cli = Cli::parse();
|
||||
|
||||
let result = match cli.command {
|
||||
Command::Search { query, expand }
|
||||
=> cmd_search(&query, expand),
|
||||
Command::Search { query, pipeline, expand, debug }
|
||||
=> cmd_search(&query, &pipeline, expand, debug),
|
||||
Command::Init => cmd_init(),
|
||||
Command::Migrate => cmd_migrate(),
|
||||
Command::Health => cmd_health(),
|
||||
|
|
@ -575,8 +584,9 @@ fn main() {
|
|||
|
||||
// ── Command implementations ─────────────────────────────────────────
|
||||
|
||||
fn cmd_search(terms: &[String], expand: bool) -> Result<(), String> {
|
||||
fn cmd_search(terms: &[String], pipeline_args: &[String], expand: bool, debug: bool) -> Result<(), String> {
|
||||
use store::StoreView;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
if terms.is_empty() {
|
||||
return Err("search requires at least one term".into());
|
||||
|
|
@ -584,70 +594,68 @@ fn cmd_search(terms: &[String], expand: bool) -> Result<(), String> {
|
|||
|
||||
let query: String = terms.join(" ");
|
||||
|
||||
// Parse pipeline (default: spread)
|
||||
let pipeline: Vec<search::AlgoStage> = if pipeline_args.is_empty() {
|
||||
vec![search::AlgoStage::parse("spread").unwrap()]
|
||||
} else {
|
||||
pipeline_args.iter()
|
||||
.map(|a| search::AlgoStage::parse(a))
|
||||
.collect::<Result<Vec<_>, _>>()?
|
||||
};
|
||||
|
||||
if debug {
|
||||
let names: Vec<String> = pipeline.iter().map(|s| format!("{}", s.algo)).collect();
|
||||
println!("[search] pipeline: {}", names.join(" → "));
|
||||
}
|
||||
|
||||
let view = store::AnyView::load()?;
|
||||
let results = search::search(&query, &view);
|
||||
let graph = graph::build_graph_fast(&view);
|
||||
|
||||
// Build equal-weight terms from query
|
||||
let terms: BTreeMap<String, f64> = query.split_whitespace()
|
||||
.map(|t| (t.to_lowercase(), 1.0))
|
||||
.collect();
|
||||
|
||||
let (seeds, direct_hits) = search::match_seeds(&terms, &view);
|
||||
|
||||
if seeds.is_empty() {
|
||||
eprintln!("No results for '{}'", query);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if debug {
|
||||
println!("[search] {} seeds from query '{}'", seeds.len(), query);
|
||||
for (key, score) in &seeds {
|
||||
println!(" {:.4} {}", score, key);
|
||||
}
|
||||
}
|
||||
|
||||
let max_results = if expand { 15 } else { 5 };
|
||||
let raw = search::run_pipeline(&pipeline, seeds, &graph, &view, debug, max_results);
|
||||
|
||||
let results: Vec<search::SearchResult> = raw.into_iter()
|
||||
.map(|(key, activation)| {
|
||||
let is_direct = direct_hits.contains(&key);
|
||||
search::SearchResult { key, activation, is_direct, snippet: None }
|
||||
})
|
||||
.collect();
|
||||
|
||||
if results.is_empty() {
|
||||
eprintln!("No results for '{}'", query);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let limit = if expand { 15 } else { 5 };
|
||||
|
||||
// Log retrieval to a small append-only file (avoid 6MB state.bin rewrite)
|
||||
// Log retrieval
|
||||
store::Store::log_retrieval_static(&query,
|
||||
&results.iter().map(|r| r.key.clone()).collect::<Vec<_>>());
|
||||
|
||||
// Bump daily lookup counters (fast path, no store needed)
|
||||
let bump_keys: Vec<&str> = results.iter().take(limit).map(|r| r.key.as_str()).collect();
|
||||
let bump_keys: Vec<&str> = results.iter().take(max_results).map(|r| r.key.as_str()).collect();
|
||||
let _ = lookups::bump_many(&bump_keys);
|
||||
|
||||
let text_keys: std::collections::HashSet<String> = results.iter()
|
||||
.take(limit).map(|r| r.key.clone()).collect();
|
||||
|
||||
for (i, r) in results.iter().enumerate().take(limit) {
|
||||
for (i, r) in results.iter().enumerate().take(max_results) {
|
||||
let marker = if r.is_direct { "→" } else { " " };
|
||||
let weight = view.node_weight(&r.key);
|
||||
println!("{}{:2}. [{:.2}/{:.2}] {}", marker, i + 1, r.activation, weight, r.key);
|
||||
if let Some(ref snippet) = r.snippet {
|
||||
println!(" {}", snippet);
|
||||
}
|
||||
}
|
||||
|
||||
if expand {
|
||||
if let Ok(emb) = spectral::load_embedding() {
|
||||
let seeds: Vec<&str> = results.iter()
|
||||
.take(5)
|
||||
.map(|r| r.key.as_str())
|
||||
.filter(|k| emb.coords.contains_key(*k))
|
||||
.collect();
|
||||
|
||||
if !seeds.is_empty() {
|
||||
let spectral_hits = spectral::nearest_to_seeds(&emb, &seeds, 10);
|
||||
let new_hits: Vec<_> = spectral_hits.into_iter()
|
||||
.filter(|(k, _)| !text_keys.contains(k))
|
||||
.take(5)
|
||||
.collect();
|
||||
|
||||
if !new_hits.is_empty() {
|
||||
println!("\nSpectral neighbors (structural, not keyword):");
|
||||
for (k, _dist) in &new_hits {
|
||||
let weight = view.node_weight(k);
|
||||
println!(" ~ [{:.2}] {}", weight, k);
|
||||
if let Some(content) = view.node_content(k) {
|
||||
let snippet = util::first_n_chars(
|
||||
content.lines()
|
||||
.find(|l| !l.trim().is_empty() && !l.starts_with('#'))
|
||||
.unwrap_or(""),
|
||||
100);
|
||||
if !snippet.is_empty() {
|
||||
println!(" {}", snippet);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue