From 7264bdc39c791b56581de82d203fc022961ffcfc Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 1 Mar 2026 00:48:44 -0500 Subject: [PATCH] link-audit: walk every link through Sonnet for quality review Batch all non-deleted links (~3,800) into char-budgeted groups, send each batch to Sonnet with full content of both endpoints, and apply KEEP/DELETE/RETARGET/WEAKEN/STRENGTHEN decisions. One-time cleanup for links created before refine_target existed. Co-Authored-By: ProofOfConcept --- src/digest.rs | 308 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 14 +++ 2 files changed, 322 insertions(+) diff --git a/src/digest.rs b/src/digest.rs index d7521b8..78550f0 100644 --- a/src/digest.rs +++ b/src/digest.rs @@ -1377,3 +1377,311 @@ pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_file: Optio Ok(()) } + +// --- Link audit --- +// Walk every link in the graph, send batches to Sonnet for quality review. + +struct LinkInfo { + rel_idx: usize, + source_key: String, + target_key: String, + source_content: String, + target_content: String, + strength: f32, + target_sections: Vec, +} + +pub struct AuditStats { + pub kept: usize, + pub deleted: usize, + pub retargeted: usize, + pub weakened: usize, + pub strengthened: usize, + pub errors: usize, +} + +fn build_audit_prompt(batch: &[LinkInfo], batch_num: usize, total_batches: usize) -> String { + let mut prompt = format!( + "You are auditing memory graph links for quality (batch {}/{}).\n\n\ + For each numbered link, decide what to do:\n\n\ + KEEP N — link is meaningful, leave it\n\ + DELETE N — link is noise, accidental, or too generic to be useful\n\ + RETARGET N new_key — link points to the right topic area but wrong node;\n\ + \x20 retarget to a more specific section (listed under each link)\n\ + WEAKEN N strength — link is marginal; reduce strength (0.1-0.3)\n\ + STRENGTHEN N strength — link is important but underweighted; increase (0.8-1.0)\n\n\ + Output exactly one action per link number, nothing else.\n\n\ + Links to review:\n\n", + batch_num, total_batches); + + for (i, link) in batch.iter().enumerate() { + let n = i + 1; + prompt.push_str(&format!( + "--- Link {} ---\n\ + {} → {} (strength={:.2})\n\n\ + Source content:\n{}\n\n\ + Target content:\n{}\n", + n, link.source_key, link.target_key, link.strength, + &link.source_content, &link.target_content)); + + if !link.target_sections.is_empty() { + prompt.push_str( + "\nTarget has sections (consider RETARGET to a more specific one):\n"); + for s in &link.target_sections { + prompt.push_str(&format!(" - {}\n", s)); + } + } + prompt.push('\n'); + } + + prompt +} + +fn parse_audit_response(response: &str, batch_size: usize) -> Vec<(usize, AuditAction)> { + let mut actions = Vec::new(); + + for line in response.lines() { + let line = line.trim(); + if line.is_empty() { continue; } + + let parts: Vec<&str> = line.splitn(3, ' ').collect(); + if parts.len() < 2 { continue; } + + let action = parts[0].to_uppercase(); + let idx: usize = match parts[1].parse::() { + Ok(n) if n >= 1 && n <= batch_size => n - 1, + _ => continue, + }; + + let audit_action = match action.as_str() { + "KEEP" => AuditAction::Keep, + "DELETE" => AuditAction::Delete, + "RETARGET" => { + if parts.len() < 3 { continue; } + AuditAction::Retarget(parts[2].trim().to_string()) + } + "WEAKEN" => { + if parts.len() < 3 { continue; } + match parts[2].trim().parse::() { + Ok(s) => AuditAction::Weaken(s), + Err(_) => continue, + } + } + "STRENGTHEN" => { + if parts.len() < 3 { continue; } + match parts[2].trim().parse::() { + Ok(s) => AuditAction::Strengthen(s), + Err(_) => continue, + } + } + _ => continue, + }; + + actions.push((idx, audit_action)); + } + + actions +} + +enum AuditAction { + Keep, + Delete, + Retarget(String), + Weaken(f32), + Strengthen(f32), +} + +/// Run a full link audit: walk every link, batch to Sonnet, apply results. +pub fn link_audit(store: &mut Store, apply: bool) -> Result { + // Collect all non-deleted relations with their info + let mut links: Vec = Vec::new(); + + for (idx, rel) in store.relations.iter().enumerate() { + if rel.deleted { continue; } + + let source_content = store.nodes.get(&rel.source_key) + .map(|n| n.content.clone()).unwrap_or_default(); + let target_content = store.nodes.get(&rel.target_key) + .map(|n| n.content.clone()).unwrap_or_default(); + + // Find section children of target if it's file-level + let target_sections = if !rel.target_key.contains('#') { + let prefix = format!("{}#", rel.target_key); + store.nodes.keys() + .filter(|k| k.starts_with(&prefix)) + .cloned() + .collect() + } else { + Vec::new() + }; + + links.push(LinkInfo { + rel_idx: idx, + source_key: rel.source_key.clone(), + target_key: rel.target_key.clone(), + source_content, + target_content, + strength: rel.strength, + target_sections, + }); + } + + let total = links.len(); + println!("Link audit: {} links to review", total); + if !apply { + println!("DRY RUN — use --apply to make changes"); + } + + // Batch by char budget (~100K chars per prompt) + let char_budget = 100_000usize; + let mut batches: Vec> = Vec::new(); + let mut current_batch: Vec = Vec::new(); + let mut current_chars = 0usize; + + for (i, link) in links.iter().enumerate() { + let link_chars = link.source_content.len() + link.target_content.len() + 200; + if !current_batch.is_empty() && current_chars + link_chars > char_budget { + batches.push(std::mem::take(&mut current_batch)); + current_chars = 0; + } + current_batch.push(i); + current_chars += link_chars; + } + if !current_batch.is_empty() { + batches.push(current_batch); + } + + let total_batches = batches.len(); + println!("{} batches (avg {} links/batch)\n", total_batches, + if total_batches > 0 { total / total_batches } else { 0 }); + + let mut stats = AuditStats { + kept: 0, deleted: 0, retargeted: 0, weakened: 0, strengthened: 0, errors: 0, + }; + + // Track changes to apply at the end + let mut deletions: Vec = Vec::new(); + let mut retargets: Vec<(usize, String)> = Vec::new(); + let mut strength_changes: Vec<(usize, f32)> = Vec::new(); + + for (batch_idx, batch_indices) in batches.iter().enumerate() { + let batch_links: Vec<&LinkInfo> = batch_indices.iter() + .map(|&i| &links[i]) + .collect(); + + let batch_infos: Vec = batch_links.iter().map(|l| LinkInfo { + rel_idx: l.rel_idx, + source_key: l.source_key.clone(), + target_key: l.target_key.clone(), + source_content: l.source_content.clone(), + target_content: l.target_content.clone(), + strength: l.strength, + target_sections: l.target_sections.clone(), + }).collect(); + + let prompt = build_audit_prompt(&batch_infos, batch_idx + 1, total_batches); + let response = match call_sonnet(&prompt, 300) { + Ok(r) => r, + Err(e) => { + eprintln!(" Batch {}: error: {}", batch_idx + 1, e); + stats.errors += batch_infos.len(); + continue; + } + }; + + let actions = parse_audit_response(&response, batch_infos.len()); + + // Count unresponded links as kept + let mut responded: std::collections::HashSet = std::collections::HashSet::new(); + + for (idx, action) in &actions { + responded.insert(*idx); + let link = &batch_infos[*idx]; + + match action { + AuditAction::Keep => { + stats.kept += 1; + } + AuditAction::Delete => { + println!(" DELETE {} → {}", link.source_key, link.target_key); + deletions.push(link.rel_idx); + stats.deleted += 1; + } + AuditAction::Retarget(new_target) => { + println!(" RETARGET {} → {} (was {})", + link.source_key, new_target, link.target_key); + retargets.push((link.rel_idx, new_target.clone())); + stats.retargeted += 1; + } + AuditAction::Weaken(s) => { + println!(" WEAKEN {} → {} (str {:.2} → {:.2})", + link.source_key, link.target_key, link.strength, s); + strength_changes.push((link.rel_idx, *s)); + stats.weakened += 1; + } + AuditAction::Strengthen(s) => { + println!(" STRENGTHEN {} → {} (str {:.2} → {:.2})", + link.source_key, link.target_key, link.strength, s); + strength_changes.push((link.rel_idx, *s)); + stats.strengthened += 1; + } + } + } + + // Count unresponded as kept + for i in 0..batch_infos.len() { + if !responded.contains(&i) { + stats.kept += 1; + } + } + + println!(" Batch {}/{}: +{}kept +{}del +{}retarget +{}weak +{}strong", + batch_idx + 1, total_batches, + stats.kept, stats.deleted, stats.retargeted, stats.weakened, stats.strengthened); + } + + // Apply changes + if apply && (stats.deleted > 0 || stats.retargeted > 0 + || stats.weakened > 0 || stats.strengthened > 0) { + println!("\nApplying changes..."); + + // Deletions: soft-delete + for rel_idx in &deletions { + store.relations[*rel_idx].deleted = true; + } + + // Strength changes + for (rel_idx, new_strength) in &strength_changes { + store.relations[*rel_idx].strength = *new_strength; + } + + // Retargets: soft-delete old, create new + for (rel_idx, new_target) in &retargets { + let source_key = store.relations[*rel_idx].source_key.clone(); + let old_strength = store.relations[*rel_idx].strength; + let source_uuid = store.nodes.get(&source_key) + .map(|n| n.uuid).unwrap_or([0u8; 16]); + let target_uuid = store.nodes.get(new_target) + .map(|n| n.uuid).unwrap_or([0u8; 16]); + + // Soft-delete old + store.relations[*rel_idx].deleted = true; + + // Create new + if target_uuid != [0u8; 16] { + let new_rel = Store::new_relation( + source_uuid, target_uuid, + capnp_store::RelationType::Auto, + old_strength, + &source_key, new_target, + ); + store.add_relation(new_rel).ok(); + } + } + + store.save()?; + println!("Saved."); + } + + Ok(stats) +} diff --git a/src/main.rs b/src/main.rs index 6f11c20..67af5f2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -93,6 +93,7 @@ fn main() { "journal-enrich" => cmd_journal_enrich(&args[2..]), "apply-consolidation" => cmd_apply_consolidation(&args[2..]), "differentiate" => cmd_differentiate(&args[2..]), + "link-audit" => cmd_link_audit(&args[2..]), "trace" => cmd_trace(&args[2..]), "list-keys" => cmd_list_keys(), "list-edges" => cmd_list_edges(), @@ -157,6 +158,7 @@ Commands: Extract and apply actions from consolidation reports differentiate [KEY] [--apply] Redistribute hub links to section-level children + link-audit [--apply] Walk every link, send to Sonnet for quality review trace KEY Walk temporal links: semantic ↔ episodic ↔ conversation list-keys List all node keys (one per line) list-edges List all edges (tsv: source target strength type) @@ -783,6 +785,18 @@ fn cmd_differentiate(args: &[String]) -> Result<(), String> { Ok(()) } +fn cmd_link_audit(args: &[String]) -> Result<(), String> { + let apply = args.iter().any(|a| a == "--apply"); + let mut store = capnp_store::Store::load()?; + let stats = digest::link_audit(&mut store, apply)?; + println!("\n{}", "=".repeat(60)); + println!("Link audit complete:"); + println!(" Kept: {} Deleted: {} Retargeted: {} Weakened: {} Strengthened: {} Errors: {}", + stats.kept, stats.deleted, stats.retargeted, stats.weakened, stats.strengthened, stats.errors); + println!("{}", "=".repeat(60)); + Ok(()) +} + fn cmd_trace(args: &[String]) -> Result<(), String> { if args.is_empty() { return Err("Usage: poc-memory trace KEY".into());