memory-search: fix returned-set deduplication and pre-seeded count
mark_returned() was append-only without checking if the key already existed, causing duplicates to accumulate across hook invocations. load_returned() then returned all entries including duplicates, which made the returned count exceed the seen count, causing a u64 underflow in the pre-seeded calculation. Fix: check load_returned() before appending in mark_returned(), dedup on read in load_returned(), and use saturating_sub for the pre-seeded count as a safety net. Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
This commit is contained in:
parent
6dc300fcf8
commit
32d17997af
1 changed files with 6 additions and 1 deletions
|
|
@ -408,6 +408,8 @@ fn mark_seen(dir: &Path, session_id: &str, key: &str) {
|
|||
}
|
||||
|
||||
fn mark_returned(dir: &Path, session_id: &str, key: &str) {
|
||||
let returned = load_returned(dir, session_id);
|
||||
if returned.contains(&key.to_string()) { return; }
|
||||
let path = dir.join(format!("returned-{}", session_id));
|
||||
if let Ok(mut f) = fs::OpenOptions::new().create(true).append(true).open(path) {
|
||||
writeln!(f, "{}", key).ok();
|
||||
|
|
@ -417,10 +419,12 @@ fn mark_returned(dir: &Path, session_id: &str, key: &str) {
|
|||
fn load_returned(dir: &Path, session_id: &str) -> Vec<String> {
|
||||
let path = dir.join(format!("returned-{}", session_id));
|
||||
if path.exists() {
|
||||
let mut seen = HashSet::new();
|
||||
fs::read_to_string(path)
|
||||
.unwrap_or_default()
|
||||
.lines()
|
||||
.filter(|s| !s.is_empty())
|
||||
.filter(|s| seen.insert(s.to_string()))
|
||||
.map(|s| s.to_string())
|
||||
.collect()
|
||||
} else {
|
||||
|
|
@ -476,7 +480,8 @@ fn show_seen() {
|
|||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
let returned_set: HashSet<_> = returned.iter().cloned().collect();
|
||||
println!("\nSeen set ({} total, {} pre-seeded):", seen_lines.len(), seen_lines.len() - returned.len());
|
||||
let pre_seeded = seen_lines.len().saturating_sub(returned.len());
|
||||
println!("\nSeen set ({} total, {} pre-seeded):", seen_lines.len(), pre_seeded);
|
||||
|
||||
if Args::parse().seen_full {
|
||||
for line in &seen_lines {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue