search: unified query pipeline with filters, transforms, generators
Extend the pipeline with four stage types composing left-to-right:
Generators: all, match:TERM
Filters: type:, key:, weight:, age:, content-len:, provenance:,
not-visited:, visited: (plus ! negation)
Transforms: sort:(priority|timestamp|content-len|degree|weight), limit:N
Algorithms: spread, spectral, confluence, geodesic, manifold (unchanged)
Duration syntax (7d, 24h, 30m) and glob matching on keys.
CLI auto-detects filter/transform stages and loads full Store;
algorithm-only pipelines keep the fast MmapView path.
Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
This commit is contained in:
parent
c6bb7c3910
commit
e736471d99
2 changed files with 591 additions and 70 deletions
|
|
@ -598,81 +598,129 @@ fn cmd_search(terms: &[String], pipeline_args: &[String], expand: bool, full: bo
|
|||
use store::StoreView;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
if terms.is_empty() {
|
||||
return Err("search requires at least one term".into());
|
||||
// Parse pipeline stages (unified: algorithms, filters, transforms, generators)
|
||||
let stages: Vec<search::Stage> = if pipeline_args.is_empty() {
|
||||
vec![search::Stage::Algorithm(search::AlgoStage::parse("spread").unwrap())]
|
||||
} else {
|
||||
pipeline_args.iter()
|
||||
.map(|a| search::Stage::parse(a))
|
||||
.collect::<Result<Vec<_>, _>>()?
|
||||
};
|
||||
|
||||
// Check if pipeline needs full Store (has filters/transforms/generators)
|
||||
let needs_store = stages.iter().any(|s| !matches!(s, search::Stage::Algorithm(_)));
|
||||
// Check if pipeline starts with a generator (doesn't need seed terms)
|
||||
let has_generator = stages.first().map(|s| matches!(s, search::Stage::Generator(_))).unwrap_or(false);
|
||||
|
||||
if terms.is_empty() && !has_generator {
|
||||
return Err("search requires terms or a generator stage (e.g. 'all')".into());
|
||||
}
|
||||
|
||||
let query: String = terms.join(" ");
|
||||
|
||||
// Parse pipeline (default: spread)
|
||||
let pipeline: Vec<search::AlgoStage> = if pipeline_args.is_empty() {
|
||||
vec![search::AlgoStage::parse("spread").unwrap()]
|
||||
} else {
|
||||
pipeline_args.iter()
|
||||
.map(|a| search::AlgoStage::parse(a))
|
||||
.collect::<Result<Vec<_>, _>>()?
|
||||
};
|
||||
|
||||
if debug {
|
||||
let names: Vec<String> = pipeline.iter().map(|s| format!("{}", s.algo)).collect();
|
||||
let names: Vec<String> = stages.iter().map(|s| format!("{}", s)).collect();
|
||||
println!("[search] pipeline: {}", names.join(" → "));
|
||||
}
|
||||
|
||||
let view = store::AnyView::load()?;
|
||||
let graph = graph::build_graph_fast(&view);
|
||||
|
||||
// Build equal-weight terms from query
|
||||
let terms: BTreeMap<String, f64> = query.split_whitespace()
|
||||
.map(|t| (t.to_lowercase(), 1.0))
|
||||
.collect();
|
||||
|
||||
let (seeds, direct_hits) = search::match_seeds(&terms, &view);
|
||||
|
||||
if seeds.is_empty() {
|
||||
eprintln!("No results for '{}'", query);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if debug {
|
||||
println!("[search] {} seeds from query '{}'", seeds.len(), query);
|
||||
for (key, score) in &seeds {
|
||||
println!(" {:.4} {}", score, key);
|
||||
}
|
||||
}
|
||||
|
||||
let max_results = if expand { 15 } else { 5 };
|
||||
let raw = search::run_pipeline(&pipeline, seeds, &graph, &view, debug, max_results);
|
||||
|
||||
let results: Vec<search::SearchResult> = raw.into_iter()
|
||||
.map(|(key, activation)| {
|
||||
let is_direct = direct_hits.contains(&key);
|
||||
search::SearchResult { key, activation, is_direct, snippet: None }
|
||||
})
|
||||
.collect();
|
||||
if needs_store {
|
||||
// Full Store path — needed for filter/transform/generator stages
|
||||
let store = store::Store::load()?;
|
||||
let graph = store.build_graph();
|
||||
|
||||
if results.is_empty() {
|
||||
eprintln!("No results for '{}'", query);
|
||||
return Ok(());
|
||||
}
|
||||
let seeds = if has_generator {
|
||||
vec![] // generator will produce its own result set
|
||||
} else {
|
||||
let terms_map: BTreeMap<String, f64> = query.split_whitespace()
|
||||
.map(|t| (t.to_lowercase(), 1.0))
|
||||
.collect();
|
||||
let (seeds, _) = search::match_seeds(&terms_map, &store);
|
||||
seeds
|
||||
};
|
||||
|
||||
// Log retrieval
|
||||
store::Store::log_retrieval_static(&query,
|
||||
&results.iter().map(|r| r.key.clone()).collect::<Vec<_>>());
|
||||
let raw = search::run_query(&stages, seeds, &graph, &store, debug, max_results);
|
||||
|
||||
let bump_keys: Vec<&str> = results.iter().take(max_results).map(|r| r.key.as_str()).collect();
|
||||
let _ = lookups::bump_many(&bump_keys);
|
||||
if raw.is_empty() {
|
||||
eprintln!("No results");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
for (i, r) in results.iter().enumerate().take(max_results) {
|
||||
let marker = if r.is_direct { "→" } else { " " };
|
||||
let weight = view.node_weight(&r.key);
|
||||
println!("{}{:2}. [{:.2}/{:.2}] {}", marker, i + 1, r.activation, weight, r.key);
|
||||
if full {
|
||||
if let Some(content) = view.node_content(&r.key) {
|
||||
println!();
|
||||
for line in content.lines() {
|
||||
println!(" {}", line);
|
||||
for (i, (key, score)) in raw.iter().enumerate().take(max_results) {
|
||||
let weight = store.nodes.get(key).map(|n| n.weight).unwrap_or(0.0);
|
||||
println!("{:2}. [{:.2}/{:.2}] {}", i + 1, score, weight, key);
|
||||
if full {
|
||||
if let Some(node) = store.nodes.get(key) {
|
||||
println!();
|
||||
for line in node.content.lines() {
|
||||
println!(" {}", line);
|
||||
}
|
||||
println!();
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Fast MmapView path — algorithm-only pipeline
|
||||
let view = store::AnyView::load()?;
|
||||
let graph = graph::build_graph_fast(&view);
|
||||
|
||||
let terms_map: BTreeMap<String, f64> = query.split_whitespace()
|
||||
.map(|t| (t.to_lowercase(), 1.0))
|
||||
.collect();
|
||||
let (seeds, direct_hits) = search::match_seeds(&terms_map, &view);
|
||||
|
||||
if seeds.is_empty() {
|
||||
eprintln!("No results for '{}'", query);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if debug {
|
||||
println!("[search] {} seeds from query '{}'", seeds.len(), query);
|
||||
}
|
||||
|
||||
// Extract AlgoStages from the unified stages
|
||||
let algo_stages: Vec<&search::AlgoStage> = stages.iter()
|
||||
.filter_map(|s| match s {
|
||||
search::Stage::Algorithm(a) => Some(a),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
let algo_owned: Vec<search::AlgoStage> = algo_stages.into_iter().cloned().collect();
|
||||
|
||||
let raw = search::run_pipeline(&algo_owned, seeds, &graph, &view, debug, max_results);
|
||||
|
||||
let results: Vec<search::SearchResult> = raw.into_iter()
|
||||
.map(|(key, activation)| {
|
||||
let is_direct = direct_hits.contains(&key);
|
||||
search::SearchResult { key, activation, is_direct, snippet: None }
|
||||
})
|
||||
.collect();
|
||||
|
||||
if results.is_empty() {
|
||||
eprintln!("No results for '{}'", query);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Log retrieval
|
||||
store::Store::log_retrieval_static(&query,
|
||||
&results.iter().map(|r| r.key.clone()).collect::<Vec<_>>());
|
||||
|
||||
let bump_keys: Vec<&str> = results.iter().take(max_results).map(|r| r.key.as_str()).collect();
|
||||
let _ = lookups::bump_many(&bump_keys);
|
||||
|
||||
for (i, r) in results.iter().enumerate().take(max_results) {
|
||||
let marker = if r.is_direct { "→" } else { " " };
|
||||
let weight = view.node_weight(&r.key);
|
||||
println!("{}{:2}. [{:.2}/{:.2}] {}", marker, i + 1, r.activation, weight, r.key);
|
||||
if full {
|
||||
if let Some(content) = view.node_content(&r.key) {
|
||||
println!();
|
||||
for line in content.lines() {
|
||||
println!(" {}", line);
|
||||
}
|
||||
println!();
|
||||
}
|
||||
println!();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue