search: make component and content matching opt-in
Default search now uses exact key match only. Component matching (--fuzzy) and content search (--content) are explicit flags. This makes missing graph structure visible instead of silently falling back to broad matching.
This commit is contained in:
parent
12dd320a29
commit
15dedea322
2 changed files with 43 additions and 23 deletions
|
|
@ -77,6 +77,12 @@ enum Command {
|
|||
/// Show debug output for each pipeline stage
|
||||
#[arg(long)]
|
||||
debug: bool,
|
||||
/// Also match key components (e.g. "irc" matches "irc-access")
|
||||
#[arg(long)]
|
||||
fuzzy: bool,
|
||||
/// Also search node content (slow, use when graph search misses)
|
||||
#[arg(long)]
|
||||
content: bool,
|
||||
},
|
||||
/// Scan markdown files, index all memory units
|
||||
Init,
|
||||
|
|
@ -487,8 +493,8 @@ fn main() {
|
|||
let cli = Cli::parse();
|
||||
|
||||
let result = match cli.command {
|
||||
Command::Search { query, pipeline, expand, full, debug }
|
||||
=> cmd_search(&query, &pipeline, expand, full, debug),
|
||||
Command::Search { query, pipeline, expand, full, debug, fuzzy, content }
|
||||
=> cmd_search(&query, &pipeline, expand, full, debug, fuzzy, content),
|
||||
Command::Init => cmd_init(),
|
||||
Command::Migrate => cmd_migrate(),
|
||||
Command::Health => cmd_health(),
|
||||
|
|
@ -594,7 +600,7 @@ fn main() {
|
|||
|
||||
// ── Command implementations ─────────────────────────────────────────
|
||||
|
||||
fn cmd_search(terms: &[String], pipeline_args: &[String], expand: bool, full: bool, debug: bool) -> Result<(), String> {
|
||||
fn cmd_search(terms: &[String], pipeline_args: &[String], expand: bool, full: bool, debug: bool, fuzzy: bool, content: bool) -> Result<(), String> {
|
||||
use store::StoreView;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
|
|
@ -636,7 +642,7 @@ fn cmd_search(terms: &[String], pipeline_args: &[String], expand: bool, full: bo
|
|||
let terms_map: BTreeMap<String, f64> = query.split_whitespace()
|
||||
.map(|t| (t.to_lowercase(), 1.0))
|
||||
.collect();
|
||||
let (seeds, _) = search::match_seeds(&terms_map, &store);
|
||||
let (seeds, _) = search::match_seeds_opts(&terms_map, &store, fuzzy, content);
|
||||
seeds
|
||||
};
|
||||
|
||||
|
|
@ -668,7 +674,7 @@ fn cmd_search(terms: &[String], pipeline_args: &[String], expand: bool, full: bo
|
|||
let terms_map: BTreeMap<String, f64> = query.split_whitespace()
|
||||
.map(|t| (t.to_lowercase(), 1.0))
|
||||
.collect();
|
||||
let (seeds, direct_hits) = search::match_seeds(&terms_map, &view);
|
||||
let (seeds, direct_hits) = search::match_seeds_opts(&terms_map, &view, fuzzy, content);
|
||||
|
||||
if seeds.is_empty() {
|
||||
eprintln!("No results for '{}'", query);
|
||||
|
|
|
|||
|
|
@ -581,6 +581,15 @@ fn run_transform(
|
|||
pub fn match_seeds(
|
||||
terms: &BTreeMap<String, f64>,
|
||||
store: &impl StoreView,
|
||||
) -> (Vec<(String, f64)>, HashSet<String>) {
|
||||
match_seeds_opts(terms, store, false, false)
|
||||
}
|
||||
|
||||
pub fn match_seeds_opts(
|
||||
terms: &BTreeMap<String, f64>,
|
||||
store: &impl StoreView,
|
||||
component_match: bool,
|
||||
content_fallback: bool,
|
||||
) -> (Vec<(String, f64)>, HashSet<String>) {
|
||||
let mut seed_map: HashMap<String, f64> = HashMap::new();
|
||||
let mut direct_hits: HashSet<String> = HashSet::new();
|
||||
|
|
@ -613,28 +622,33 @@ pub fn match_seeds(
|
|||
continue;
|
||||
}
|
||||
|
||||
// Strategy 2: key component match (0.5× weight)
|
||||
if let Some(matches) = component_map.get(term.as_str()) {
|
||||
for (orig_key, node_weight) in matches {
|
||||
let score = term_weight * node_weight * 0.5;
|
||||
*seed_map.entry(orig_key.clone()).or_insert(0.0) += score;
|
||||
direct_hits.insert(orig_key.clone());
|
||||
// Strategy 2: key component match (0.5× weight) — only when explicitly requested
|
||||
if component_match {
|
||||
if let Some(matches) = component_map.get(term.as_str()) {
|
||||
for (orig_key, node_weight) in matches {
|
||||
let score = term_weight * node_weight * 0.5;
|
||||
*seed_map.entry(orig_key.clone()).or_insert(0.0) += score;
|
||||
direct_hits.insert(orig_key.clone());
|
||||
}
|
||||
continue;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Strategy 3: content match (0.2× weight, limited to avoid O(n*m) explosion)
|
||||
let term_lower = term.to_lowercase();
|
||||
if term_lower.len() < 3 { continue; }
|
||||
let mut content_hits = 0;
|
||||
store.for_each_node(|key, content, weight| {
|
||||
if content_hits >= 50 { return; }
|
||||
if content.to_lowercase().contains(&term_lower) {
|
||||
let score = term_weight * weight as f64 * 0.2;
|
||||
*seed_map.entry(key.to_owned()).or_insert(0.0) += score;
|
||||
content_hits += 1;
|
||||
// Strategy 3: content match (0.2× weight) — only when explicitly requested
|
||||
if content_fallback {
|
||||
let term_lower = term.to_lowercase();
|
||||
if term_lower.len() >= 3 {
|
||||
let mut content_hits = 0;
|
||||
store.for_each_node(|key, content, weight| {
|
||||
if content_hits >= 50 { return; }
|
||||
if content.to_lowercase().contains(&term_lower) {
|
||||
let score = term_weight * weight as f64 * 0.2;
|
||||
*seed_map.entry(key.to_owned()).or_insert(0.0) += score;
|
||||
content_hits += 1;
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let seeds: Vec<(String, f64)> = seed_map.into_iter().collect();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue