search: make component and content matching opt-in
Default search now uses exact key match only. Component matching (--fuzzy) and content search (--content) are explicit flags. This makes missing graph structure visible instead of silently falling back to broad matching.
This commit is contained in:
parent
12dd320a29
commit
15dedea322
2 changed files with 43 additions and 23 deletions
|
|
@ -581,6 +581,15 @@ fn run_transform(
|
|||
pub fn match_seeds(
|
||||
terms: &BTreeMap<String, f64>,
|
||||
store: &impl StoreView,
|
||||
) -> (Vec<(String, f64)>, HashSet<String>) {
|
||||
match_seeds_opts(terms, store, false, false)
|
||||
}
|
||||
|
||||
pub fn match_seeds_opts(
|
||||
terms: &BTreeMap<String, f64>,
|
||||
store: &impl StoreView,
|
||||
component_match: bool,
|
||||
content_fallback: bool,
|
||||
) -> (Vec<(String, f64)>, HashSet<String>) {
|
||||
let mut seed_map: HashMap<String, f64> = HashMap::new();
|
||||
let mut direct_hits: HashSet<String> = HashSet::new();
|
||||
|
|
@ -613,28 +622,33 @@ pub fn match_seeds(
|
|||
continue;
|
||||
}
|
||||
|
||||
// Strategy 2: key component match (0.5× weight)
|
||||
if let Some(matches) = component_map.get(term.as_str()) {
|
||||
for (orig_key, node_weight) in matches {
|
||||
let score = term_weight * node_weight * 0.5;
|
||||
*seed_map.entry(orig_key.clone()).or_insert(0.0) += score;
|
||||
direct_hits.insert(orig_key.clone());
|
||||
// Strategy 2: key component match (0.5× weight) — only when explicitly requested
|
||||
if component_match {
|
||||
if let Some(matches) = component_map.get(term.as_str()) {
|
||||
for (orig_key, node_weight) in matches {
|
||||
let score = term_weight * node_weight * 0.5;
|
||||
*seed_map.entry(orig_key.clone()).or_insert(0.0) += score;
|
||||
direct_hits.insert(orig_key.clone());
|
||||
}
|
||||
continue;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Strategy 3: content match (0.2× weight, limited to avoid O(n*m) explosion)
|
||||
let term_lower = term.to_lowercase();
|
||||
if term_lower.len() < 3 { continue; }
|
||||
let mut content_hits = 0;
|
||||
store.for_each_node(|key, content, weight| {
|
||||
if content_hits >= 50 { return; }
|
||||
if content.to_lowercase().contains(&term_lower) {
|
||||
let score = term_weight * weight as f64 * 0.2;
|
||||
*seed_map.entry(key.to_owned()).or_insert(0.0) += score;
|
||||
content_hits += 1;
|
||||
// Strategy 3: content match (0.2× weight) — only when explicitly requested
|
||||
if content_fallback {
|
||||
let term_lower = term.to_lowercase();
|
||||
if term_lower.len() >= 3 {
|
||||
let mut content_hits = 0;
|
||||
store.for_each_node(|key, content, weight| {
|
||||
if content_hits >= 50 { return; }
|
||||
if content.to_lowercase().contains(&term_lower) {
|
||||
let score = term_weight * weight as f64 * 0.2;
|
||||
*seed_map.entry(key.to_owned()).or_insert(0.0) += score;
|
||||
content_hits += 1;
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let seeds: Vec<(String, f64)> = seed_map.into_iter().collect();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue