// Link audit: walk every link in the graph, batch to Sonnet for quality review. // // Each batch of links gets reviewed by Sonnet, which returns per-link actions: // KEEP, DELETE, RETARGET, WEAKEN, STRENGTHEN. Batches run in parallel via rayon. use crate::store::{self, Store, new_relation}; use std::collections::HashSet; struct LinkInfo { rel_idx: usize, source_key: String, target_key: String, source_content: String, target_content: String, strength: f32, target_sections: Vec, } pub struct AuditStats { pub kept: usize, pub deleted: usize, pub retargeted: usize, pub weakened: usize, pub strengthened: usize, pub errors: usize, } fn build_audit_prompt(batch: &[LinkInfo], batch_num: usize, total_batches: usize) -> String { let mut prompt = format!( "You are auditing memory graph links for quality (batch {}/{}).\n\n\ For each numbered link, decide what to do:\n\n\ KEEP N — link is meaningful, leave it\n\ DELETE N — link is noise, accidental, or too generic to be useful\n\ RETARGET N new_key — link points to the right topic area but wrong node;\n\ \x20 retarget to a more specific section (listed under each link)\n\ WEAKEN N strength — link is marginal; reduce strength (0.1-0.3)\n\ STRENGTHEN N strength — link is important but underweighted; increase (0.8-1.0)\n\n\ Output exactly one action per link number, nothing else.\n\n\ Links to review:\n\n", batch_num, total_batches); for (i, link) in batch.iter().enumerate() { let n = i + 1; prompt.push_str(&format!( "--- Link {} ---\n\ {} → {} (strength={:.2})\n\n\ Source content:\n{}\n\n\ Target content:\n{}\n", n, link.source_key, link.target_key, link.strength, &link.source_content, &link.target_content)); if !link.target_sections.is_empty() { prompt.push_str( "\nTarget has sections (consider RETARGET to a more specific one):\n"); for s in &link.target_sections { prompt.push_str(&format!(" - {}\n", s)); } } prompt.push('\n'); } prompt } fn parse_audit_response(response: &str, batch_size: usize) -> Vec<(usize, AuditAction)> { let mut actions = Vec::new(); for line in response.lines() { let line = line.trim(); if line.is_empty() { continue; } let parts: Vec<&str> = line.splitn(3, ' ').collect(); if parts.len() < 2 { continue; } let action = parts[0].to_uppercase(); let idx: usize = match parts[1].parse::() { Ok(n) if n >= 1 && n <= batch_size => n - 1, _ => continue, }; let audit_action = match action.as_str() { "KEEP" => AuditAction::Keep, "DELETE" => AuditAction::Delete, "RETARGET" => { if parts.len() < 3 { continue; } AuditAction::Retarget(parts[2].trim().to_string()) } "WEAKEN" => { if parts.len() < 3 { continue; } match parts[2].trim().parse::() { Ok(s) => AuditAction::Weaken(s), Err(_) => continue, } } "STRENGTHEN" => { if parts.len() < 3 { continue; } match parts[2].trim().parse::() { Ok(s) => AuditAction::Strengthen(s), Err(_) => continue, } } _ => continue, }; actions.push((idx, audit_action)); } actions } enum AuditAction { Keep, Delete, Retarget(String), Weaken(f32), Strengthen(f32), } /// Run a full link audit: walk every link, batch to Sonnet, apply results. pub fn link_audit(store: &mut Store, apply: bool) -> Result { // Collect all non-deleted relations with their info let mut links: Vec = Vec::new(); for (idx, rel) in store.relations.iter().enumerate() { if rel.deleted { continue; } let source_content = store.nodes.get(&rel.source_key) .map(|n| n.content.clone()).unwrap_or_default(); let target_content = store.nodes.get(&rel.target_key) .map(|n| n.content.clone()).unwrap_or_default(); // Find section children of target if it's file-level let target_sections = if !rel.target_key.contains('#') { let prefix = format!("{}#", rel.target_key); store.nodes.keys() .filter(|k| k.starts_with(&prefix)) .cloned() .collect() } else { Vec::new() }; links.push(LinkInfo { rel_idx: idx, source_key: rel.source_key.clone(), target_key: rel.target_key.clone(), source_content, target_content, strength: rel.strength, target_sections, }); } let total = links.len(); println!("Link audit: {} links to review", total); if !apply { println!("DRY RUN — use --apply to make changes"); } // Batch by char budget (~100K chars per prompt) let char_budget = 100_000usize; let mut batches: Vec> = Vec::new(); let mut current_batch: Vec = Vec::new(); let mut current_chars = 0usize; for (i, link) in links.iter().enumerate() { let link_chars = link.source_content.len() + link.target_content.len() + 200; if !current_batch.is_empty() && current_chars + link_chars > char_budget { batches.push(std::mem::take(&mut current_batch)); current_chars = 0; } current_batch.push(i); current_chars += link_chars; } if !current_batch.is_empty() { batches.push(current_batch); } let total_batches = batches.len(); println!("{} batches (avg {} links/batch)\n", total_batches, if total_batches > 0 { total / total_batches } else { 0 }); use rayon::prelude::*; use std::sync::atomic::{AtomicUsize, Ordering}; // Build all batch prompts up front let batch_data: Vec<(usize, Vec, String)> = batches.iter().enumerate() .map(|(batch_idx, batch_indices)| { let batch_infos: Vec = batch_indices.iter().map(|&i| { let l = &links[i]; LinkInfo { rel_idx: l.rel_idx, source_key: l.source_key.clone(), target_key: l.target_key.clone(), source_content: l.source_content.clone(), target_content: l.target_content.clone(), strength: l.strength, target_sections: l.target_sections.clone(), } }).collect(); let prompt = build_audit_prompt(&batch_infos, batch_idx + 1, total_batches); (batch_idx, batch_infos, prompt) }) .collect(); // Progress counter let done = AtomicUsize::new(0); // Run batches in parallel via rayon let batch_results: Vec<_> = batch_data.par_iter() .map(|(batch_idx, batch_infos, prompt)| { let response = crate::agent::oneshot::call_api_with_tools_sync( "audit", &[prompt.clone()], &[], None, 10, &[], None); let completed = done.fetch_add(1, Ordering::Relaxed) + 1; eprint!("\r Batches: {}/{} done", completed, total_batches); (*batch_idx, batch_infos, response) }) .collect(); eprintln!(); // newline after progress // Process results sequentially let mut stats = AuditStats { kept: 0, deleted: 0, retargeted: 0, weakened: 0, strengthened: 0, errors: 0, }; let mut deletions: Vec = Vec::new(); let mut retargets: Vec<(usize, String)> = Vec::new(); let mut strength_changes: Vec<(usize, f32)> = Vec::new(); for (batch_idx, batch_infos, response) in &batch_results { let response = match response { Ok(r) => r, Err(e) => { eprintln!(" Batch {}: error: {}", batch_idx + 1, e); stats.errors += batch_infos.len(); continue; } }; let actions = parse_audit_response(response, batch_infos.len()); let mut responded: HashSet = HashSet::new(); for (idx, action) in &actions { responded.insert(*idx); let link = &batch_infos[*idx]; match action { AuditAction::Keep => { stats.kept += 1; } AuditAction::Delete => { println!(" DELETE {} → {}", link.source_key, link.target_key); deletions.push(link.rel_idx); stats.deleted += 1; } AuditAction::Retarget(new_target) => { println!(" RETARGET {} → {} (was {})", link.source_key, new_target, link.target_key); retargets.push((link.rel_idx, new_target.clone())); stats.retargeted += 1; } AuditAction::Weaken(s) => { println!(" WEAKEN {} → {} (str {:.2} → {:.2})", link.source_key, link.target_key, link.strength, s); strength_changes.push((link.rel_idx, *s)); stats.weakened += 1; } AuditAction::Strengthen(s) => { println!(" STRENGTHEN {} → {} (str {:.2} → {:.2})", link.source_key, link.target_key, link.strength, s); strength_changes.push((link.rel_idx, *s)); stats.strengthened += 1; } } } for i in 0..batch_infos.len() { if !responded.contains(&i) { stats.kept += 1; } } println!(" Batch {}/{}: +{}kept +{}del +{}retarget +{}weak +{}strong", batch_idx + 1, total_batches, stats.kept, stats.deleted, stats.retargeted, stats.weakened, stats.strengthened); } // Apply changes if apply && (stats.deleted > 0 || stats.retargeted > 0 || stats.weakened > 0 || stats.strengthened > 0) { println!("\nApplying changes..."); // Deletions: soft-delete for rel_idx in &deletions { store.relations[*rel_idx].deleted = true; } // Strength changes for (rel_idx, new_strength) in &strength_changes { store.relations[*rel_idx].strength = *new_strength; } // Retargets: soft-delete old, create new for (rel_idx, new_target) in &retargets { let source_key = store.relations[*rel_idx].source_key.clone(); let old_strength = store.relations[*rel_idx].strength; let source_uuid = store.nodes.get(&source_key) .map(|n| n.uuid).unwrap_or([0u8; 16]); let target_uuid = store.nodes.get(new_target) .map(|n| n.uuid).unwrap_or([0u8; 16]); // Soft-delete old store.relations[*rel_idx].deleted = true; // Create new if target_uuid != [0u8; 16] { let new_rel = new_relation( source_uuid, target_uuid, store::RelationType::Auto, old_strength, &source_key, new_target, ); store.add_relation(new_rel).ok(); } } store.save()?; println!("Saved."); } Ok(stats) }