memory-search: add fuzzy key matching and content-based seed extraction

match_seeds() previously only found nodes whose keys exactly matched
search terms. This meant searches like "formal verification" or
"bcachefs plan" returned nothing — no nodes are keyed with those
exact strings.

Three-tier matching strategy:
1. Exact key match (full weight) — unchanged
2. Key component match (0.5× weight) — split keys on -/_/./#,
   match individual words. "plan" now finds "the-plan", "verification"
   finds "c-to-rust-verification-workflow", etc.
3. Content match (0.2× weight, capped at 50 hits) — search node
   content for terms that didn't match any key. Catches nodes whose
   keys are opaque but whose content is relevant.

Also adds prompt-based seeding to the hook pipeline: extract_query_terms
from the user's prompt and merge into the term set. Previously the hook
only seeded from transcript scanning (finding node keys as substrings
in conversation history), which meant fresh sessions or queries about
new topics produced no search results at all.
This commit is contained in:
ProofOfConcept 2026-03-10 00:41:08 -04:00
parent 2f896bca2c
commit 06df66cf4c
2 changed files with 64 additions and 7 deletions

View file

@ -192,10 +192,22 @@ fn main() {
// Search for node keys in last ~150k tokens of transcript // Search for node keys in last ~150k tokens of transcript
if debug { println!("[memory-search] transcript: {}", transcript_path); } if debug { println!("[memory-search] transcript: {}", transcript_path); }
let terms = extract_weighted_terms(transcript_path, 150_000, &store); let mut terms = extract_weighted_terms(transcript_path, 150_000, &store);
// Also extract terms from the prompt itself (handles fresh sessions
// and queries about topics not yet mentioned in the transcript)
let prompt_terms = search::extract_query_terms(prompt, 8);
if !prompt_terms.is_empty() {
if debug { println!("[memory-search] prompt terms: {}", prompt_terms); }
for word in prompt_terms.split_whitespace() {
let lower = word.to_lowercase();
// Prompt terms get weight 1.0 (same as direct mention)
terms.entry(lower).or_insert(1.0);
}
}
if debug { if debug {
println!("[memory-search] {} node keys found in transcript", terms.len()); println!("[memory-search] {} terms total", terms.len());
let mut by_weight: Vec<_> = terms.iter().collect(); let mut by_weight: Vec<_> = terms.iter().collect();
by_weight.sort_by(|a, b| b.1.total_cmp(a.1)); by_weight.sort_by(|a, b| b.1.total_cmp(a.1));
for (term, weight) in by_weight.iter().take(20) { for (term, weight) in by_weight.iter().take(20) {
@ -204,7 +216,7 @@ fn main() {
} }
if terms.is_empty() { if terms.is_empty() {
if debug { println!("[memory-search] no node keys found, done"); } if debug { println!("[memory-search] no terms found, done"); }
return; return;
} }

View file

@ -96,7 +96,12 @@ impl AlgoStage {
} }
} }
/// Extract seeds from weighted terms by matching against node keys. /// Extract seeds from weighted terms by matching against node keys and content.
///
/// Three matching strategies, in priority order:
/// 1. Exact key match: term matches a node key exactly → full weight
/// 2. Key component match: term matches a word in a hyphenated/underscored key → 0.5× weight
/// 3. Content match: term appears in node content → 0.2× weight (capped at 50 nodes)
/// ///
/// Returns (seeds, direct_hits) where direct_hits tracks which keys /// Returns (seeds, direct_hits) where direct_hits tracks which keys
/// were matched directly (vs found by an algorithm stage). /// were matched directly (vs found by an algorithm stage).
@ -104,22 +109,62 @@ pub fn match_seeds(
terms: &BTreeMap<String, f64>, terms: &BTreeMap<String, f64>,
store: &impl StoreView, store: &impl StoreView,
) -> (Vec<(String, f64)>, HashSet<String>) { ) -> (Vec<(String, f64)>, HashSet<String>) {
let mut seeds: Vec<(String, f64)> = Vec::new(); let mut seed_map: HashMap<String, f64> = HashMap::new();
let mut direct_hits: HashSet<String> = HashSet::new(); let mut direct_hits: HashSet<String> = HashSet::new();
// Build key lookup: lowercase key → (original key, weight)
let mut key_map: HashMap<String, (String, f64)> = HashMap::new(); let mut key_map: HashMap<String, (String, f64)> = HashMap::new();
// Build component index: word → vec of (original key, weight)
let mut component_map: HashMap<String, Vec<(String, f64)>> = HashMap::new();
store.for_each_node(|key, _content, weight| { store.for_each_node(|key, _content, weight| {
key_map.insert(key.to_lowercase(), (key.to_owned(), weight as f64)); let lkey = key.to_lowercase();
key_map.insert(lkey.clone(), (key.to_owned(), weight as f64));
// Split key on hyphens, underscores, dots, hashes for component matching
for component in lkey.split(|c: char| c == '-' || c == '_' || c == '.' || c == '#') {
if component.len() >= 3 {
component_map.entry(component.to_owned())
.or_default()
.push((key.to_owned(), weight as f64));
}
}
}); });
for (term, &term_weight) in terms { for (term, &term_weight) in terms {
// Strategy 1: exact key match
if let Some((orig_key, node_weight)) = key_map.get(term) { if let Some((orig_key, node_weight)) = key_map.get(term) {
let score = term_weight * node_weight; let score = term_weight * node_weight;
seeds.push((orig_key.clone(), score)); *seed_map.entry(orig_key.clone()).or_insert(0.0) += score;
direct_hits.insert(orig_key.clone()); direct_hits.insert(orig_key.clone());
continue;
} }
// Strategy 2: key component match (0.5× weight)
if let Some(matches) = component_map.get(term.as_str()) {
for (orig_key, node_weight) in matches {
let score = term_weight * node_weight * 0.5;
*seed_map.entry(orig_key.clone()).or_insert(0.0) += score;
direct_hits.insert(orig_key.clone());
}
continue;
}
// Strategy 3: content match (0.2× weight, limited to avoid O(n*m) explosion)
let term_lower = term.to_lowercase();
if term_lower.len() < 3 { continue; }
let mut content_hits = 0;
store.for_each_node(|key, content, weight| {
if content_hits >= 50 { return; }
if content.to_lowercase().contains(&term_lower) {
let score = term_weight * weight as f64 * 0.2;
*seed_map.entry(key.to_owned()).or_insert(0.0) += score;
content_hits += 1;
}
});
} }
let seeds: Vec<(String, f64)> = seed_map.into_iter().collect();
(seeds, direct_hits) (seeds, direct_hits)
} }