From 32d17997af14947e8814e8009ca1f9ebdaeb1e80 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 9 Mar 2026 17:15:24 -0400 Subject: [PATCH] memory-search: fix returned-set deduplication and pre-seeded count mark_returned() was append-only without checking if the key already existed, causing duplicates to accumulate across hook invocations. load_returned() then returned all entries including duplicates, which made the returned count exceed the seen count, causing a u64 underflow in the pre-seeded calculation. Fix: check load_returned() before appending in mark_returned(), dedup on read in load_returned(), and use saturating_sub for the pre-seeded count as a safety net. Co-Authored-By: ProofOfConcept --- poc-memory/src/bin/memory-search.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/poc-memory/src/bin/memory-search.rs b/poc-memory/src/bin/memory-search.rs index 34669fa..8fe2da4 100644 --- a/poc-memory/src/bin/memory-search.rs +++ b/poc-memory/src/bin/memory-search.rs @@ -408,6 +408,8 @@ fn mark_seen(dir: &Path, session_id: &str, key: &str) { } fn mark_returned(dir: &Path, session_id: &str, key: &str) { + let returned = load_returned(dir, session_id); + if returned.contains(&key.to_string()) { return; } let path = dir.join(format!("returned-{}", session_id)); if let Ok(mut f) = fs::OpenOptions::new().create(true).append(true).open(path) { writeln!(f, "{}", key).ok(); @@ -417,10 +419,12 @@ fn mark_returned(dir: &Path, session_id: &str, key: &str) { fn load_returned(dir: &Path, session_id: &str) -> Vec { let path = dir.join(format!("returned-{}", session_id)); if path.exists() { + let mut seen = HashSet::new(); fs::read_to_string(path) .unwrap_or_default() .lines() .filter(|s| !s.is_empty()) + .filter(|s| seen.insert(s.to_string())) .map(|s| s.to_string()) .collect() } else { @@ -476,7 +480,8 @@ fn show_seen() { .map(|s| s.to_string()) .collect(); let returned_set: HashSet<_> = returned.iter().cloned().collect(); - println!("\nSeen set ({} total, {} pre-seeded):", seen_lines.len(), seen_lines.len() - returned.len()); + let pre_seeded = seen_lines.len().saturating_sub(returned.len()); + println!("\nSeen set ({} total, {} pre-seeded):", seen_lines.len(), pre_seeded); if Args::parse().seen_full { for line in &seen_lines {