consciousness/poc-memory/src/search.rs
Kent Overstreet 05c7d55949 spread: simultaneous wavefront instead of independent BFS
All seeds emit at once. At each hop, activations from all sources
sum at each node, and the combined map propagates on the next hop.
Nodes where multiple wavefronts overlap get reinforced and radiate
stronger — natural interference patterns.

Lower default min_activation threshold (×0.1) since individual
contributions are smaller in additive mode.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
2026-03-09 01:35:27 -04:00

790 lines
28 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Memory search: composable algorithm pipeline.
//
// Each algorithm is a stage: takes seeds Vec<(String, f64)>, returns
// new/modified seeds. Stages compose left-to-right in a pipeline.
//
// Available algorithms:
// spread — spreading activation through graph edges
// spectral — nearest neighbors in spectral embedding space
// manifold — extrapolation along direction defined by seeds (TODO)
//
// Seed extraction (matching query terms to node keys) is shared
// infrastructure, not an algorithm stage.
use crate::store::StoreView;
use crate::graph::Graph;
use crate::spectral;
use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
use std::fmt;
pub struct SearchResult {
pub key: String,
pub activation: f64,
pub is_direct: bool,
pub snippet: Option<String>,
}
/// A parsed algorithm stage with its parameters.
#[derive(Clone, Debug)]
pub struct AlgoStage {
pub algo: Algorithm,
pub params: HashMap<String, String>,
}
#[derive(Clone, Debug)]
pub enum Algorithm {
Spread,
Spectral,
Manifold,
Confluence,
Geodesic,
}
impl fmt::Display for Algorithm {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Algorithm::Spread => write!(f, "spread"),
Algorithm::Spectral => write!(f, "spectral"),
Algorithm::Manifold => write!(f, "manifold"),
Algorithm::Confluence => write!(f, "confluence"),
Algorithm::Geodesic => write!(f, "geodesic"),
}
}
}
impl AlgoStage {
/// Parse "spread,max_hops=4,edge_decay=0.5" into an AlgoStage.
pub fn parse(s: &str) -> Result<Self, String> {
let mut parts = s.split(',');
let name = parts.next().unwrap_or("");
let algo = match name {
"spread" => Algorithm::Spread,
"spectral" => Algorithm::Spectral,
"manifold" => Algorithm::Manifold,
"confluence" => Algorithm::Confluence,
"geodesic" => Algorithm::Geodesic,
_ => return Err(format!("unknown algorithm: {}", name)),
};
let mut params = HashMap::new();
for part in parts {
if let Some((k, v)) = part.split_once('=') {
params.insert(k.to_string(), v.to_string());
} else {
return Err(format!("bad param (expected key=val): {}", part));
}
}
Ok(AlgoStage { algo, params })
}
fn param_f64(&self, key: &str, default: f64) -> f64 {
self.params.get(key)
.and_then(|v| v.parse().ok())
.unwrap_or(default)
}
fn param_u32(&self, key: &str, default: u32) -> u32 {
self.params.get(key)
.and_then(|v| v.parse().ok())
.unwrap_or(default)
}
fn param_usize(&self, key: &str, default: usize) -> usize {
self.params.get(key)
.and_then(|v| v.parse().ok())
.unwrap_or(default)
}
}
/// Extract seeds from weighted terms by matching against node keys.
///
/// Returns (seeds, direct_hits) where direct_hits tracks which keys
/// were matched directly (vs found by an algorithm stage).
pub fn match_seeds(
terms: &BTreeMap<String, f64>,
store: &impl StoreView,
) -> (Vec<(String, f64)>, HashSet<String>) {
let mut seeds: Vec<(String, f64)> = Vec::new();
let mut direct_hits: HashSet<String> = HashSet::new();
let mut key_map: HashMap<String, (String, f64)> = HashMap::new();
store.for_each_node(|key, _content, weight| {
key_map.insert(key.to_lowercase(), (key.to_owned(), weight as f64));
});
for (term, &term_weight) in terms {
if let Some((orig_key, node_weight)) = key_map.get(term) {
let score = term_weight * node_weight;
seeds.push((orig_key.clone(), score));
direct_hits.insert(orig_key.clone());
}
}
(seeds, direct_hits)
}
/// Run a pipeline of algorithm stages.
pub fn run_pipeline(
stages: &[AlgoStage],
seeds: Vec<(String, f64)>,
graph: &Graph,
store: &impl StoreView,
debug: bool,
max_results: usize,
) -> Vec<(String, f64)> {
let mut current = seeds;
for stage in stages {
if debug {
println!("\n[search] === {} ({} seeds in) ===", stage.algo, current.len());
}
current = match stage.algo {
Algorithm::Spread => run_spread(&current, graph, store, stage, debug),
Algorithm::Spectral => run_spectral(&current, graph, stage, debug),
Algorithm::Manifold => run_manifold(&current, graph, stage, debug),
Algorithm::Confluence => run_confluence(&current, graph, store, stage, debug),
Algorithm::Geodesic => run_geodesic(&current, graph, stage, debug),
};
if debug {
println!("[search] {}{} results", stage.algo, current.len());
for (i, (key, score)) in current.iter().enumerate().take(15) {
let cutoff = if i + 1 == max_results { " <-- cutoff" } else { "" };
println!(" [{:.4}] {}{}", score, key, cutoff);
}
if current.len() > 15 {
println!(" ... ({} more)", current.len() - 15);
}
}
}
current.truncate(max_results);
current
}
/// Spreading activation: propagate scores through graph edges.
///
/// Tunable params: max_hops (default from store), edge_decay (default from store),
/// min_activation (default from store).
fn run_spread(
seeds: &[(String, f64)],
graph: &Graph,
store: &impl StoreView,
stage: &AlgoStage,
_debug: bool,
) -> Vec<(String, f64)> {
let store_params = store.params();
let max_hops = stage.param_u32("max_hops", store_params.max_hops);
let edge_decay = stage.param_f64("edge_decay", store_params.edge_decay);
let min_activation = stage.param_f64("min_activation", store_params.min_activation * 0.1);
spreading_activation(seeds, graph, store, max_hops, edge_decay, min_activation)
}
/// Spectral projection: find nearest neighbors in spectral embedding space.
///
/// Tunable params: k (default 20, number of neighbors to find).
fn run_spectral(
seeds: &[(String, f64)],
graph: &Graph,
stage: &AlgoStage,
debug: bool,
) -> Vec<(String, f64)> {
let k = stage.param_usize("k", 20);
let emb = match spectral::load_embedding() {
Ok(e) => e,
Err(e) => {
if debug { println!(" no spectral embedding: {}", e); }
return seeds.to_vec();
}
};
let weighted_seeds: Vec<(&str, f64)> = seeds.iter()
.map(|(k, w)| (k.as_str(), *w))
.collect();
let projected = spectral::nearest_to_seeds_weighted(
&emb, &weighted_seeds, Some(graph), k,
);
if debug {
for (key, dist) in &projected {
let score = 1.0 / (1.0 + dist);
println!(" dist={:.6} score={:.4} {}", dist, score, key);
}
}
// Merge: keep original seeds, add spectral results as new seeds
let seed_set: HashSet<&str> = seeds.iter().map(|(k, _)| k.as_str()).collect();
let mut result = seeds.to_vec();
for (key, dist) in projected {
if !seed_set.contains(key.as_str()) {
let score = 1.0 / (1.0 + dist);
result.push((key, score));
}
}
result
}
/// Confluence: multi-source reachability scoring.
///
/// Unlike spreading activation (which takes max activation from any source),
/// confluence rewards nodes reachable from *multiple* seeds. For each candidate
/// node within k hops, score = sum of (seed_weight * edge_decay^distance) across
/// all seeds that can reach it. Nodes at the intersection of multiple seeds'
/// neighborhoods score highest.
///
/// This naturally handles mixed seeds: unrelated seeds activate disjoint
/// neighborhoods that don't overlap, so their results separate naturally.
///
/// Tunable params: max_hops (default 3), edge_decay (default 0.5),
/// min_sources (default 2, minimum number of distinct seeds that must reach a node).
fn run_confluence(
seeds: &[(String, f64)],
graph: &Graph,
store: &impl StoreView,
stage: &AlgoStage,
debug: bool,
) -> Vec<(String, f64)> {
let max_hops = stage.param_u32("max_hops", 3);
let edge_decay = stage.param_f64("edge_decay", 0.5);
let min_sources = stage.param_usize("min_sources", 2);
// For each seed, BFS outward collecting (node → activation) at each distance
// Track which seeds contributed to each node's score
let mut node_scores: HashMap<String, f64> = HashMap::new();
let mut node_sources: HashMap<String, HashSet<usize>> = HashMap::new();
for (seed_idx, (seed_key, seed_weight)) in seeds.iter().enumerate() {
let mut visited: HashMap<String, f64> = HashMap::new();
let mut queue: VecDeque<(String, u32)> = VecDeque::new();
visited.insert(seed_key.clone(), *seed_weight);
queue.push_back((seed_key.clone(), 0));
while let Some((key, depth)) = queue.pop_front() {
if depth >= max_hops { continue; }
let act = visited[&key];
for (neighbor, strength) in graph.neighbors(&key) {
let neighbor_weight = store.node_weight(neighbor.as_str());
let propagated = act * edge_decay * neighbor_weight * strength as f64;
if propagated < 0.001 { continue; }
if !visited.contains_key(neighbor.as_str()) || visited[neighbor.as_str()] < propagated {
visited.insert(neighbor.clone(), propagated);
queue.push_back((neighbor.clone(), depth + 1));
}
}
}
// Accumulate into global scores (additive across seeds)
for (key, act) in visited {
*node_scores.entry(key.clone()).or_insert(0.0) += act;
node_sources.entry(key).or_default().insert(seed_idx);
}
}
// Filter to nodes reached by min_sources distinct seeds
let mut results: Vec<(String, f64)> = node_scores.into_iter()
.filter(|(key, _)| {
node_sources.get(key).map(|s| s.len()).unwrap_or(0) >= min_sources
})
.collect();
if debug {
// Show source counts
for (key, score) in results.iter().take(15) {
let sources = node_sources.get(key).map(|s| s.len()).unwrap_or(0);
println!(" [{:.4}] {} (from {} seeds)", score, key, sources);
}
}
results.sort_by(|a, b| b.1.total_cmp(&a.1));
results
}
/// Geodesic: straightest paths between seed pairs in spectral space.
///
/// For each pair of seeds, walk the graph from one to the other, at each
/// step choosing the neighbor whose spectral direction most aligns with
/// the target direction. Nodes along these geodesic paths score higher
/// the more paths pass through them and the straighter those paths are.
///
/// Tunable params: max_path (default 6), k (default 20 results).
fn run_geodesic(
seeds: &[(String, f64)],
graph: &Graph,
stage: &AlgoStage,
debug: bool,
) -> Vec<(String, f64)> {
let max_path = stage.param_usize("max_path", 6);
let k = stage.param_usize("k", 20);
let emb = match spectral::load_embedding() {
Ok(e) => e,
Err(e) => {
if debug { println!(" no spectral embedding: {}", e); }
return seeds.to_vec();
}
};
// Filter seeds to those with valid spectral coords
let valid_seeds: Vec<(&str, f64, &Vec<f64>)> = seeds.iter()
.filter_map(|(key, weight)| {
emb.coords.get(key.as_str())
.filter(|c| c.iter().any(|&v| v.abs() > 1e-12))
.map(|c| (key.as_str(), *weight, c))
})
.collect();
if valid_seeds.len() < 2 {
if debug { println!(" need ≥2 seeds with spectral coords, have {}", valid_seeds.len()); }
return seeds.to_vec();
}
// For each pair of seeds, find the geodesic path
let mut path_counts: HashMap<String, f64> = HashMap::new();
let seed_set: HashSet<&str> = seeds.iter().map(|(k, _)| k.as_str()).collect();
for i in 0..valid_seeds.len() {
for j in (i + 1)..valid_seeds.len() {
let (key_a, weight_a, coords_a) = &valid_seeds[i];
let (key_b, weight_b, coords_b) = &valid_seeds[j];
let pair_weight = weight_a * weight_b;
// Walk from A toward B
let path_ab = geodesic_walk(
key_a, coords_a, coords_b, graph, &emb, max_path,
);
// Walk from B toward A
let path_ba = geodesic_walk(
key_b, coords_b, coords_a, graph, &emb, max_path,
);
// Score nodes on both paths (nodes found from both directions score double)
for (node, alignment) in path_ab.iter().chain(path_ba.iter()) {
if !seed_set.contains(node.as_str()) {
*path_counts.entry(node.clone()).or_insert(0.0) += pair_weight * alignment;
}
}
}
}
if debug && !path_counts.is_empty() {
println!(" {} pairs examined, {} distinct nodes on paths",
valid_seeds.len() * (valid_seeds.len() - 1) / 2,
path_counts.len());
}
// Merge with original seeds
let mut results = seeds.to_vec();
let mut path_results: Vec<(String, f64)> = path_counts.into_iter().collect();
path_results.sort_by(|a, b| b.1.total_cmp(&a.1));
path_results.truncate(k);
for (key, score) in path_results {
if !seed_set.contains(key.as_str()) {
results.push((key, score));
}
}
results.sort_by(|a, b| b.1.total_cmp(&a.1));
results
}
/// Walk from `start` toward `target_coords` in spectral space, choosing
/// the neighbor at each step whose direction most aligns with the target.
/// Returns (node_key, alignment_score) for each intermediate node.
fn geodesic_walk(
start: &str,
start_coords: &[f64],
target_coords: &[f64],
graph: &Graph,
emb: &spectral::SpectralEmbedding,
max_steps: usize,
) -> Vec<(String, f64)> {
let mut path = Vec::new();
let mut current = start.to_string();
let mut current_coords = start_coords.to_vec();
let mut visited: HashSet<String> = HashSet::new();
visited.insert(current.clone());
for _ in 0..max_steps {
// Direction we want to travel: from current toward target
let direction: Vec<f64> = target_coords.iter()
.zip(current_coords.iter())
.map(|(t, c)| t - c)
.collect();
let dir_norm = direction.iter().map(|d| d * d).sum::<f64>().sqrt();
if dir_norm < 1e-12 { break; } // arrived
// Among neighbors with spectral coords, find the one most aligned
let mut best: Option<(String, Vec<f64>, f64)> = None;
for (neighbor, _strength) in graph.neighbors(&current) {
if visited.contains(neighbor.as_str()) { continue; }
let neighbor_coords = match emb.coords.get(neighbor.as_str()) {
Some(c) if c.iter().any(|&v| v.abs() > 1e-12) => c,
_ => continue,
};
// Direction to this neighbor
let step: Vec<f64> = neighbor_coords.iter()
.zip(current_coords.iter())
.map(|(n, c)| n - c)
.collect();
let step_norm = step.iter().map(|s| s * s).sum::<f64>().sqrt();
if step_norm < 1e-12 { continue; }
// Cosine similarity between desired direction and step direction
let dot: f64 = direction.iter().zip(step.iter()).map(|(d, s)| d * s).sum();
let alignment = dot / (dir_norm * step_norm);
if alignment > 0.0 { // only consider forward-facing neighbors
if best.as_ref().map(|(_, _, a)| alignment > *a).unwrap_or(true) {
best = Some((neighbor.clone(), neighbor_coords.clone(), alignment));
}
}
}
match best {
Some((next_key, next_coords, alignment)) => {
path.push((next_key.clone(), alignment));
visited.insert(next_key.clone());
current = next_key;
current_coords = next_coords;
}
None => break, // no forward-facing neighbors
}
}
path
}
/// Manifold: extrapolation along the direction defined by seeds.
///
/// Instead of finding what's *near* the seeds in spectral space (proximity),
/// find what's in the *direction* the seeds define. Given a weighted centroid
/// of seeds and the principal direction they span, find nodes that continue
/// along that direction.
///
/// Tunable params: k (default 20 results).
fn run_manifold(
seeds: &[(String, f64)],
graph: &Graph,
stage: &AlgoStage,
debug: bool,
) -> Vec<(String, f64)> {
let k = stage.param_usize("k", 20);
let emb = match spectral::load_embedding() {
Ok(e) => e,
Err(e) => {
if debug { println!(" no spectral embedding: {}", e); }
return seeds.to_vec();
}
};
// Collect seeds with valid spectral coordinates
let seed_data: Vec<(&str, f64, &Vec<f64>)> = seeds.iter()
.filter_map(|(key, weight)| {
emb.coords.get(key.as_str())
.filter(|c| c.iter().any(|&v| v.abs() > 1e-12))
.map(|c| (key.as_str(), *weight, c))
})
.collect();
if seed_data.is_empty() {
if debug { println!(" no seeds with spectral coords"); }
return seeds.to_vec();
}
let dims = emb.dims;
// Compute weighted centroid of seeds
let mut centroid = vec![0.0f64; dims];
let mut total_weight = 0.0;
for (_, weight, coords) in &seed_data {
for (i, &c) in coords.iter().enumerate() {
centroid[i] += c * weight;
}
total_weight += weight;
}
if total_weight > 0.0 {
for c in &mut centroid {
*c /= total_weight;
}
}
// Compute principal direction via power iteration on seed covariance.
// Initialize with the two most separated seeds (largest spectral distance).
let mut direction = vec![0.0f64; dims];
if seed_data.len() >= 2 {
// Find the two seeds furthest apart in spectral space
let mut best_dist = 0.0f64;
for i in 0..seed_data.len() {
for j in (i + 1)..seed_data.len() {
let dist: f64 = seed_data[i].2.iter().zip(seed_data[j].2.iter())
.map(|(a, b)| (a - b).powi(2)).sum::<f64>().sqrt();
if dist > best_dist {
best_dist = dist;
for d in 0..dims {
direction[d] = seed_data[j].2[d] - seed_data[i].2[d];
}
}
}
}
// Power iteration: 3 rounds on the weighted covariance matrix
for _ in 0..3 {
let mut new_dir = vec![0.0f64; dims];
for (_, weight, coords) in &seed_data {
let dev: Vec<f64> = coords.iter().zip(centroid.iter()).map(|(c, m)| c - m).collect();
let dot: f64 = dev.iter().zip(direction.iter()).map(|(d, v)| d * v).sum();
for d in 0..dims {
new_dir[d] += weight * dot * dev[d];
}
}
// Normalize
let norm = new_dir.iter().map(|d| d * d).sum::<f64>().sqrt();
if norm > 1e-12 {
for d in &mut new_dir { *d /= norm; }
}
direction = new_dir;
}
}
let dir_norm = direction.iter().map(|d| d * d).sum::<f64>().sqrt();
let seed_set: HashSet<&str> = seeds.iter().map(|(k, _)| k.as_str()).collect();
// Score each non-seed node by projection onto the direction from centroid
let mut candidates: Vec<(String, f64)> = emb.coords.iter()
.filter(|(key, coords)| {
!seed_set.contains(key.as_str())
&& coords.iter().any(|&v| v.abs() > 1e-12)
})
.map(|(key, coords)| {
let deviation: Vec<f64> = coords.iter().zip(centroid.iter())
.map(|(c, m)| c - m)
.collect();
let score = if dir_norm > 1e-12 {
// Project onto direction: how far along the principal axis
let projection: f64 = deviation.iter().zip(direction.iter())
.map(|(d, v)| d * v)
.sum::<f64>() / dir_norm;
// Distance from the axis (perpendicular component)
let proj_vec: Vec<f64> = direction.iter()
.map(|&d| d * projection / dir_norm)
.collect();
let perp_dist: f64 = deviation.iter().zip(proj_vec.iter())
.map(|(d, p)| (d - p).powi(2))
.sum::<f64>()
.sqrt();
// Score: prefer nodes far along the direction but close to the axis
// Use absolute projection (both directions from centroid are interesting)
let along = projection.abs();
if perp_dist < 1e-12 {
along
} else {
along / (1.0 + perp_dist)
}
} else {
// No direction (single seed or all seeds coincide): use distance from centroid
let dist: f64 = deviation.iter().map(|d| d * d).sum::<f64>().sqrt();
1.0 / (1.0 + dist)
};
// Bonus for being connected to seeds in the graph
let graph_bonus: f64 = graph.neighbors(key).iter()
.filter(|(n, _)| seed_set.contains(n.as_str()))
.map(|(_, s)| *s as f64 * 0.1)
.sum();
(key.clone(), score + graph_bonus)
})
.collect();
candidates.sort_by(|a, b| b.1.total_cmp(&a.1));
candidates.truncate(k);
if debug {
for (key, score) in candidates.iter().take(15) {
println!(" [{:.4}] {}", score, key);
}
}
// Merge with original seeds
let mut results = seeds.to_vec();
for (key, score) in candidates {
results.push((key, score));
}
results.sort_by(|a, b| b.1.total_cmp(&a.1));
results
}
/// Simultaneous wavefront spreading activation.
///
/// All seeds emit at once. At each hop, activations from all sources
/// sum at each node, and the combined activation map propagates on
/// the next hop. This creates interference patterns — nodes where
/// multiple wavefronts overlap get reinforced and radiate stronger.
fn spreading_activation(
seeds: &[(String, f64)],
graph: &Graph,
store: &impl StoreView,
max_hops: u32,
edge_decay: f64,
min_activation: f64,
) -> Vec<(String, f64)> {
let mut activation: HashMap<String, f64> = HashMap::new();
// Initialize wavefront from all seeds
let mut frontier: HashMap<String, f64> = HashMap::new();
for (key, act) in seeds {
*frontier.entry(key.clone()).or_insert(0.0) += act;
*activation.entry(key.clone()).or_insert(0.0) += act;
}
// Propagate hop by hop — all sources simultaneously
for _hop in 0..max_hops {
let mut next_frontier: HashMap<String, f64> = HashMap::new();
for (key, act) in &frontier {
for (neighbor, strength) in graph.neighbors(key) {
let neighbor_weight = store.node_weight(neighbor.as_str());
let propagated = act * edge_decay * neighbor_weight * strength as f64;
if propagated < min_activation { continue; }
*next_frontier.entry(neighbor.clone()).or_insert(0.0) += propagated;
}
}
if next_frontier.is_empty() { break; }
// Merge into total activation and advance frontier
for (key, act) in &next_frontier {
*activation.entry(key.clone()).or_insert(0.0) += act;
}
frontier = next_frontier;
}
let mut results: Vec<_> = activation.into_iter().collect();
results.sort_by(|a, b| b.1.total_cmp(&a.1));
results
}
/// Search with weighted terms: exact key matching + spectral projection.
///
/// Terms are matched against node keys. Matching nodes become seeds,
/// scored by term_weight × node_weight. Seeds are then projected into
/// spectral space to find nearby nodes, with link weights modulating distance.
pub fn search_weighted(
terms: &BTreeMap<String, f64>,
store: &impl StoreView,
) -> Vec<SearchResult> {
search_weighted_inner(terms, store, false, 5)
}
/// Like search_weighted but with debug output and configurable result count.
pub fn search_weighted_debug(
terms: &BTreeMap<String, f64>,
store: &impl StoreView,
max_results: usize,
) -> Vec<SearchResult> {
search_weighted_inner(terms, store, true, max_results)
}
fn search_weighted_inner(
terms: &BTreeMap<String, f64>,
store: &impl StoreView,
debug: bool,
max_results: usize,
) -> Vec<SearchResult> {
let graph = crate::graph::build_graph_fast(store);
let (seeds, direct_hits) = match_seeds(terms, store);
if seeds.is_empty() {
return Vec::new();
}
if debug {
println!("\n[search] === SEEDS ({}) ===", seeds.len());
let mut sorted_seeds = seeds.clone();
sorted_seeds.sort_by(|a, b| b.1.total_cmp(&a.1));
for (key, score) in sorted_seeds.iter().take(20) {
println!(" {:.4} {}", score, key);
}
}
// Default pipeline: spectral → spread (legacy behavior)
let pipeline = vec![
AlgoStage { algo: Algorithm::Spectral, params: HashMap::new() },
AlgoStage { algo: Algorithm::Spread, params: HashMap::new() },
];
let raw_results = run_pipeline(&pipeline, seeds, &graph, store, debug, max_results);
raw_results.into_iter()
.take(max_results)
.map(|(key, activation)| {
let is_direct = direct_hits.contains(&key);
SearchResult { key, activation, is_direct, snippet: None }
}).collect()
}
/// Search with equal-weight terms (for interactive use).
pub fn search(query: &str, store: &impl StoreView) -> Vec<SearchResult> {
let terms: BTreeMap<String, f64> = query.split_whitespace()
.map(|t| (t.to_lowercase(), 1.0))
.collect();
search_weighted(&terms, store)
}
/// Extract meaningful search terms from natural language.
/// Strips common English stop words, returns up to max_terms words.
pub fn extract_query_terms(text: &str, max_terms: usize) -> String {
const STOP_WORDS: &[&str] = &[
"the", "a", "an", "is", "are", "was", "were", "do", "does", "did",
"have", "has", "had", "will", "would", "could", "should", "can",
"may", "might", "shall", "been", "being", "to", "of", "in", "for",
"on", "with", "at", "by", "from", "as", "but", "or", "and", "not",
"no", "if", "then", "than", "that", "this", "it", "its", "my",
"your", "our", "we", "you", "i", "me", "he", "she", "they", "them",
"what", "how", "why", "when", "where", "about", "just", "let",
"want", "tell", "show", "think", "know", "see", "look", "make",
"get", "go", "some", "any", "all", "very", "really", "also", "too",
"so", "up", "out", "here", "there",
];
text.to_lowercase()
.split(|c: char| !c.is_alphanumeric())
.filter(|w| !w.is_empty() && w.len() > 2 && !STOP_WORDS.contains(w))
.take(max_terms)
.collect::<Vec<_>>()
.join(" ")
}
/// Format search results as text lines (for hook consumption).
pub fn format_results(results: &[SearchResult]) -> String {
let mut out = String::new();
for (i, r) in results.iter().enumerate().take(5) {
let marker = if r.is_direct { "" } else { " " };
out.push_str(&format!("{}{:2}. [{:.2}/{:.2}] {}",
marker, i + 1, r.activation, r.activation, r.key));
out.push('\n');
if let Some(ref snippet) = r.snippet {
out.push_str(&format!(" {}\n", snippet));
}
}
out
}