memory-search: add fuzzy key matching and content-based seed extraction

match_seeds() previously only found nodes whose keys exactly matched
search terms. This meant searches like "formal verification" or
"bcachefs plan" returned nothing — no nodes are keyed with those
exact strings.

Three-tier matching strategy:
1. Exact key match (full weight) — unchanged
2. Key component match (0.5× weight) — split keys on -/_/./#,
   match individual words. "plan" now finds "the-plan", "verification"
   finds "c-to-rust-verification-workflow", etc.
3. Content match (0.2× weight, capped at 50 hits) — search node
   content for terms that didn't match any key. Catches nodes whose
   keys are opaque but whose content is relevant.

Also adds prompt-based seeding to the hook pipeline: extract_query_terms
from the user's prompt and merge into the term set. Previously the hook
only seeded from transcript scanning (finding node keys as substrings
in conversation history), which meant fresh sessions or queries about
new topics produced no search results at all.
This commit is contained in:
ProofOfConcept 2026-03-10 00:41:08 -04:00
parent 2f896bca2c
commit 06df66cf4c
2 changed files with 64 additions and 7 deletions

View file

@ -192,10 +192,22 @@ fn main() {
// Search for node keys in last ~150k tokens of transcript
if debug { println!("[memory-search] transcript: {}", transcript_path); }
let terms = extract_weighted_terms(transcript_path, 150_000, &store);
let mut terms = extract_weighted_terms(transcript_path, 150_000, &store);
// Also extract terms from the prompt itself (handles fresh sessions
// and queries about topics not yet mentioned in the transcript)
let prompt_terms = search::extract_query_terms(prompt, 8);
if !prompt_terms.is_empty() {
if debug { println!("[memory-search] prompt terms: {}", prompt_terms); }
for word in prompt_terms.split_whitespace() {
let lower = word.to_lowercase();
// Prompt terms get weight 1.0 (same as direct mention)
terms.entry(lower).or_insert(1.0);
}
}
if debug {
println!("[memory-search] {} node keys found in transcript", terms.len());
println!("[memory-search] {} terms total", terms.len());
let mut by_weight: Vec<_> = terms.iter().collect();
by_weight.sort_by(|a, b| b.1.total_cmp(a.1));
for (term, weight) in by_weight.iter().take(20) {
@ -204,7 +216,7 @@ fn main() {
}
if terms.is_empty() {
if debug { println!("[memory-search] no node keys found, done"); }
if debug { println!("[memory-search] no terms found, done"); }
return;
}