search: make component and content matching opt-in

Default search now uses exact key match only. Component matching
(--fuzzy) and content search (--content) are explicit flags. This
makes missing graph structure visible instead of silently falling
back to broad matching.
This commit is contained in:
ProofOfConcept 2026-03-10 23:01:46 -04:00
parent 12dd320a29
commit 15dedea322
2 changed files with 43 additions and 23 deletions

View file

@ -77,6 +77,12 @@ enum Command {
/// Show debug output for each pipeline stage
#[arg(long)]
debug: bool,
/// Also match key components (e.g. "irc" matches "irc-access")
#[arg(long)]
fuzzy: bool,
/// Also search node content (slow, use when graph search misses)
#[arg(long)]
content: bool,
},
/// Scan markdown files, index all memory units
Init,
@ -487,8 +493,8 @@ fn main() {
let cli = Cli::parse();
let result = match cli.command {
Command::Search { query, pipeline, expand, full, debug }
=> cmd_search(&query, &pipeline, expand, full, debug),
Command::Search { query, pipeline, expand, full, debug, fuzzy, content }
=> cmd_search(&query, &pipeline, expand, full, debug, fuzzy, content),
Command::Init => cmd_init(),
Command::Migrate => cmd_migrate(),
Command::Health => cmd_health(),
@ -594,7 +600,7 @@ fn main() {
// ── Command implementations ─────────────────────────────────────────
fn cmd_search(terms: &[String], pipeline_args: &[String], expand: bool, full: bool, debug: bool) -> Result<(), String> {
fn cmd_search(terms: &[String], pipeline_args: &[String], expand: bool, full: bool, debug: bool, fuzzy: bool, content: bool) -> Result<(), String> {
use store::StoreView;
use std::collections::BTreeMap;
@ -636,7 +642,7 @@ fn cmd_search(terms: &[String], pipeline_args: &[String], expand: bool, full: bo
let terms_map: BTreeMap<String, f64> = query.split_whitespace()
.map(|t| (t.to_lowercase(), 1.0))
.collect();
let (seeds, _) = search::match_seeds(&terms_map, &store);
let (seeds, _) = search::match_seeds_opts(&terms_map, &store, fuzzy, content);
seeds
};
@ -668,7 +674,7 @@ fn cmd_search(terms: &[String], pipeline_args: &[String], expand: bool, full: bo
let terms_map: BTreeMap<String, f64> = query.split_whitespace()
.map(|t| (t.to_lowercase(), 1.0))
.collect();
let (seeds, direct_hits) = search::match_seeds(&terms_map, &view);
let (seeds, direct_hits) = search::match_seeds_opts(&terms_map, &view, fuzzy, content);
if seeds.is_empty() {
eprintln!("No results for '{}'", query);

View file

@ -581,6 +581,15 @@ fn run_transform(
pub fn match_seeds(
terms: &BTreeMap<String, f64>,
store: &impl StoreView,
) -> (Vec<(String, f64)>, HashSet<String>) {
match_seeds_opts(terms, store, false, false)
}
pub fn match_seeds_opts(
terms: &BTreeMap<String, f64>,
store: &impl StoreView,
component_match: bool,
content_fallback: bool,
) -> (Vec<(String, f64)>, HashSet<String>) {
let mut seed_map: HashMap<String, f64> = HashMap::new();
let mut direct_hits: HashSet<String> = HashSet::new();
@ -613,28 +622,33 @@ pub fn match_seeds(
continue;
}
// Strategy 2: key component match (0.5× weight)
if let Some(matches) = component_map.get(term.as_str()) {
for (orig_key, node_weight) in matches {
let score = term_weight * node_weight * 0.5;
*seed_map.entry(orig_key.clone()).or_insert(0.0) += score;
direct_hits.insert(orig_key.clone());
// Strategy 2: key component match (0.5× weight) — only when explicitly requested
if component_match {
if let Some(matches) = component_map.get(term.as_str()) {
for (orig_key, node_weight) in matches {
let score = term_weight * node_weight * 0.5;
*seed_map.entry(orig_key.clone()).or_insert(0.0) += score;
direct_hits.insert(orig_key.clone());
}
continue;
}
continue;
}
// Strategy 3: content match (0.2× weight, limited to avoid O(n*m) explosion)
let term_lower = term.to_lowercase();
if term_lower.len() < 3 { continue; }
let mut content_hits = 0;
store.for_each_node(|key, content, weight| {
if content_hits >= 50 { return; }
if content.to_lowercase().contains(&term_lower) {
let score = term_weight * weight as f64 * 0.2;
*seed_map.entry(key.to_owned()).or_insert(0.0) += score;
content_hits += 1;
// Strategy 3: content match (0.2× weight) — only when explicitly requested
if content_fallback {
let term_lower = term.to_lowercase();
if term_lower.len() >= 3 {
let mut content_hits = 0;
store.for_each_node(|key, content, weight| {
if content_hits >= 50 { return; }
if content.to_lowercase().contains(&term_lower) {
let score = term_weight * weight as f64 * 0.2;
*seed_map.entry(key.to_owned()).or_insert(0.0) += score;
content_hits += 1;
}
});
}
});
}
}
let seeds: Vec<(String, f64)> = seed_map.into_iter().collect();