memory-search: fix returned-set deduplication and pre-seeded count

mark_returned() was append-only without checking if the key already
existed, causing duplicates to accumulate across hook invocations.
load_returned() then returned all entries including duplicates, which
made the returned count exceed the seen count, causing a u64 underflow
in the pre-seeded calculation.

Fix: check load_returned() before appending in mark_returned(), dedup
on read in load_returned(), and use saturating_sub for the pre-seeded
count as a safety net.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-03-09 17:15:24 -04:00
parent 6dc300fcf8
commit 32d17997af

View file

@ -408,6 +408,8 @@ fn mark_seen(dir: &Path, session_id: &str, key: &str) {
} }
fn mark_returned(dir: &Path, session_id: &str, key: &str) { fn mark_returned(dir: &Path, session_id: &str, key: &str) {
let returned = load_returned(dir, session_id);
if returned.contains(&key.to_string()) { return; }
let path = dir.join(format!("returned-{}", session_id)); let path = dir.join(format!("returned-{}", session_id));
if let Ok(mut f) = fs::OpenOptions::new().create(true).append(true).open(path) { if let Ok(mut f) = fs::OpenOptions::new().create(true).append(true).open(path) {
writeln!(f, "{}", key).ok(); writeln!(f, "{}", key).ok();
@ -417,10 +419,12 @@ fn mark_returned(dir: &Path, session_id: &str, key: &str) {
fn load_returned(dir: &Path, session_id: &str) -> Vec<String> { fn load_returned(dir: &Path, session_id: &str) -> Vec<String> {
let path = dir.join(format!("returned-{}", session_id)); let path = dir.join(format!("returned-{}", session_id));
if path.exists() { if path.exists() {
let mut seen = HashSet::new();
fs::read_to_string(path) fs::read_to_string(path)
.unwrap_or_default() .unwrap_or_default()
.lines() .lines()
.filter(|s| !s.is_empty()) .filter(|s| !s.is_empty())
.filter(|s| seen.insert(s.to_string()))
.map(|s| s.to_string()) .map(|s| s.to_string())
.collect() .collect()
} else { } else {
@ -476,7 +480,8 @@ fn show_seen() {
.map(|s| s.to_string()) .map(|s| s.to_string())
.collect(); .collect();
let returned_set: HashSet<_> = returned.iter().cloned().collect(); let returned_set: HashSet<_> = returned.iter().cloned().collect();
println!("\nSeen set ({} total, {} pre-seeded):", seen_lines.len(), seen_lines.len() - returned.len()); let pre_seeded = seen_lines.len().saturating_sub(returned.len());
println!("\nSeen set ({} total, {} pre-seeded):", seen_lines.len(), pre_seeded);
if Args::parse().seen_full { if Args::parse().seen_full {
for line in &seen_lines { for line in &seen_lines {