memory-search: add fuzzy key matching and content-based seed extraction
match_seeds() previously only found nodes whose keys exactly matched search terms. This meant searches like "formal verification" or "bcachefs plan" returned nothing — no nodes are keyed with those exact strings. Three-tier matching strategy: 1. Exact key match (full weight) — unchanged 2. Key component match (0.5× weight) — split keys on -/_/./#, match individual words. "plan" now finds "the-plan", "verification" finds "c-to-rust-verification-workflow", etc. 3. Content match (0.2× weight, capped at 50 hits) — search node content for terms that didn't match any key. Catches nodes whose keys are opaque but whose content is relevant. Also adds prompt-based seeding to the hook pipeline: extract_query_terms from the user's prompt and merge into the term set. Previously the hook only seeded from transcript scanning (finding node keys as substrings in conversation history), which meant fresh sessions or queries about new topics produced no search results at all.
This commit is contained in:
parent
2f896bca2c
commit
06df66cf4c
2 changed files with 64 additions and 7 deletions
|
|
@ -192,10 +192,22 @@ fn main() {
|
|||
|
||||
// Search for node keys in last ~150k tokens of transcript
|
||||
if debug { println!("[memory-search] transcript: {}", transcript_path); }
|
||||
let terms = extract_weighted_terms(transcript_path, 150_000, &store);
|
||||
let mut terms = extract_weighted_terms(transcript_path, 150_000, &store);
|
||||
|
||||
// Also extract terms from the prompt itself (handles fresh sessions
|
||||
// and queries about topics not yet mentioned in the transcript)
|
||||
let prompt_terms = search::extract_query_terms(prompt, 8);
|
||||
if !prompt_terms.is_empty() {
|
||||
if debug { println!("[memory-search] prompt terms: {}", prompt_terms); }
|
||||
for word in prompt_terms.split_whitespace() {
|
||||
let lower = word.to_lowercase();
|
||||
// Prompt terms get weight 1.0 (same as direct mention)
|
||||
terms.entry(lower).or_insert(1.0);
|
||||
}
|
||||
}
|
||||
|
||||
if debug {
|
||||
println!("[memory-search] {} node keys found in transcript", terms.len());
|
||||
println!("[memory-search] {} terms total", terms.len());
|
||||
let mut by_weight: Vec<_> = terms.iter().collect();
|
||||
by_weight.sort_by(|a, b| b.1.total_cmp(a.1));
|
||||
for (term, weight) in by_weight.iter().take(20) {
|
||||
|
|
@ -204,7 +216,7 @@ fn main() {
|
|||
}
|
||||
|
||||
if terms.is_empty() {
|
||||
if debug { println!("[memory-search] no node keys found, done"); }
|
||||
if debug { println!("[memory-search] no terms found, done"); }
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue