link-audit: walk every link through Sonnet for quality review
Batch all non-deleted links (~3,800) into char-budgeted groups, send each batch to Sonnet with full content of both endpoints, and apply KEEP/DELETE/RETARGET/WEAKEN/STRENGTHEN decisions. One-time cleanup for links created before refine_target existed. Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
This commit is contained in:
parent
3e883b7ba7
commit
7264bdc39c
2 changed files with 322 additions and 0 deletions
308
src/digest.rs
308
src/digest.rs
|
|
@ -1377,3 +1377,311 @@ pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_file: Optio
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --- Link audit ---
|
||||||
|
// Walk every link in the graph, send batches to Sonnet for quality review.
|
||||||
|
|
||||||
|
struct LinkInfo {
|
||||||
|
rel_idx: usize,
|
||||||
|
source_key: String,
|
||||||
|
target_key: String,
|
||||||
|
source_content: String,
|
||||||
|
target_content: String,
|
||||||
|
strength: f32,
|
||||||
|
target_sections: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct AuditStats {
|
||||||
|
pub kept: usize,
|
||||||
|
pub deleted: usize,
|
||||||
|
pub retargeted: usize,
|
||||||
|
pub weakened: usize,
|
||||||
|
pub strengthened: usize,
|
||||||
|
pub errors: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_audit_prompt(batch: &[LinkInfo], batch_num: usize, total_batches: usize) -> String {
|
||||||
|
let mut prompt = format!(
|
||||||
|
"You are auditing memory graph links for quality (batch {}/{}).\n\n\
|
||||||
|
For each numbered link, decide what to do:\n\n\
|
||||||
|
KEEP N — link is meaningful, leave it\n\
|
||||||
|
DELETE N — link is noise, accidental, or too generic to be useful\n\
|
||||||
|
RETARGET N new_key — link points to the right topic area but wrong node;\n\
|
||||||
|
\x20 retarget to a more specific section (listed under each link)\n\
|
||||||
|
WEAKEN N strength — link is marginal; reduce strength (0.1-0.3)\n\
|
||||||
|
STRENGTHEN N strength — link is important but underweighted; increase (0.8-1.0)\n\n\
|
||||||
|
Output exactly one action per link number, nothing else.\n\n\
|
||||||
|
Links to review:\n\n",
|
||||||
|
batch_num, total_batches);
|
||||||
|
|
||||||
|
for (i, link) in batch.iter().enumerate() {
|
||||||
|
let n = i + 1;
|
||||||
|
prompt.push_str(&format!(
|
||||||
|
"--- Link {} ---\n\
|
||||||
|
{} → {} (strength={:.2})\n\n\
|
||||||
|
Source content:\n{}\n\n\
|
||||||
|
Target content:\n{}\n",
|
||||||
|
n, link.source_key, link.target_key, link.strength,
|
||||||
|
&link.source_content, &link.target_content));
|
||||||
|
|
||||||
|
if !link.target_sections.is_empty() {
|
||||||
|
prompt.push_str(
|
||||||
|
"\nTarget has sections (consider RETARGET to a more specific one):\n");
|
||||||
|
for s in &link.target_sections {
|
||||||
|
prompt.push_str(&format!(" - {}\n", s));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
prompt.push('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
prompt
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_audit_response(response: &str, batch_size: usize) -> Vec<(usize, AuditAction)> {
|
||||||
|
let mut actions = Vec::new();
|
||||||
|
|
||||||
|
for line in response.lines() {
|
||||||
|
let line = line.trim();
|
||||||
|
if line.is_empty() { continue; }
|
||||||
|
|
||||||
|
let parts: Vec<&str> = line.splitn(3, ' ').collect();
|
||||||
|
if parts.len() < 2 { continue; }
|
||||||
|
|
||||||
|
let action = parts[0].to_uppercase();
|
||||||
|
let idx: usize = match parts[1].parse::<usize>() {
|
||||||
|
Ok(n) if n >= 1 && n <= batch_size => n - 1,
|
||||||
|
_ => continue,
|
||||||
|
};
|
||||||
|
|
||||||
|
let audit_action = match action.as_str() {
|
||||||
|
"KEEP" => AuditAction::Keep,
|
||||||
|
"DELETE" => AuditAction::Delete,
|
||||||
|
"RETARGET" => {
|
||||||
|
if parts.len() < 3 { continue; }
|
||||||
|
AuditAction::Retarget(parts[2].trim().to_string())
|
||||||
|
}
|
||||||
|
"WEAKEN" => {
|
||||||
|
if parts.len() < 3 { continue; }
|
||||||
|
match parts[2].trim().parse::<f32>() {
|
||||||
|
Ok(s) => AuditAction::Weaken(s),
|
||||||
|
Err(_) => continue,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"STRENGTHEN" => {
|
||||||
|
if parts.len() < 3 { continue; }
|
||||||
|
match parts[2].trim().parse::<f32>() {
|
||||||
|
Ok(s) => AuditAction::Strengthen(s),
|
||||||
|
Err(_) => continue,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => continue,
|
||||||
|
};
|
||||||
|
|
||||||
|
actions.push((idx, audit_action));
|
||||||
|
}
|
||||||
|
|
||||||
|
actions
|
||||||
|
}
|
||||||
|
|
||||||
|
enum AuditAction {
|
||||||
|
Keep,
|
||||||
|
Delete,
|
||||||
|
Retarget(String),
|
||||||
|
Weaken(f32),
|
||||||
|
Strengthen(f32),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Run a full link audit: walk every link, batch to Sonnet, apply results.
|
||||||
|
pub fn link_audit(store: &mut Store, apply: bool) -> Result<AuditStats, String> {
|
||||||
|
// Collect all non-deleted relations with their info
|
||||||
|
let mut links: Vec<LinkInfo> = Vec::new();
|
||||||
|
|
||||||
|
for (idx, rel) in store.relations.iter().enumerate() {
|
||||||
|
if rel.deleted { continue; }
|
||||||
|
|
||||||
|
let source_content = store.nodes.get(&rel.source_key)
|
||||||
|
.map(|n| n.content.clone()).unwrap_or_default();
|
||||||
|
let target_content = store.nodes.get(&rel.target_key)
|
||||||
|
.map(|n| n.content.clone()).unwrap_or_default();
|
||||||
|
|
||||||
|
// Find section children of target if it's file-level
|
||||||
|
let target_sections = if !rel.target_key.contains('#') {
|
||||||
|
let prefix = format!("{}#", rel.target_key);
|
||||||
|
store.nodes.keys()
|
||||||
|
.filter(|k| k.starts_with(&prefix))
|
||||||
|
.cloned()
|
||||||
|
.collect()
|
||||||
|
} else {
|
||||||
|
Vec::new()
|
||||||
|
};
|
||||||
|
|
||||||
|
links.push(LinkInfo {
|
||||||
|
rel_idx: idx,
|
||||||
|
source_key: rel.source_key.clone(),
|
||||||
|
target_key: rel.target_key.clone(),
|
||||||
|
source_content,
|
||||||
|
target_content,
|
||||||
|
strength: rel.strength,
|
||||||
|
target_sections,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let total = links.len();
|
||||||
|
println!("Link audit: {} links to review", total);
|
||||||
|
if !apply {
|
||||||
|
println!("DRY RUN — use --apply to make changes");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Batch by char budget (~100K chars per prompt)
|
||||||
|
let char_budget = 100_000usize;
|
||||||
|
let mut batches: Vec<Vec<usize>> = Vec::new();
|
||||||
|
let mut current_batch: Vec<usize> = Vec::new();
|
||||||
|
let mut current_chars = 0usize;
|
||||||
|
|
||||||
|
for (i, link) in links.iter().enumerate() {
|
||||||
|
let link_chars = link.source_content.len() + link.target_content.len() + 200;
|
||||||
|
if !current_batch.is_empty() && current_chars + link_chars > char_budget {
|
||||||
|
batches.push(std::mem::take(&mut current_batch));
|
||||||
|
current_chars = 0;
|
||||||
|
}
|
||||||
|
current_batch.push(i);
|
||||||
|
current_chars += link_chars;
|
||||||
|
}
|
||||||
|
if !current_batch.is_empty() {
|
||||||
|
batches.push(current_batch);
|
||||||
|
}
|
||||||
|
|
||||||
|
let total_batches = batches.len();
|
||||||
|
println!("{} batches (avg {} links/batch)\n", total_batches,
|
||||||
|
if total_batches > 0 { total / total_batches } else { 0 });
|
||||||
|
|
||||||
|
let mut stats = AuditStats {
|
||||||
|
kept: 0, deleted: 0, retargeted: 0, weakened: 0, strengthened: 0, errors: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Track changes to apply at the end
|
||||||
|
let mut deletions: Vec<usize> = Vec::new();
|
||||||
|
let mut retargets: Vec<(usize, String)> = Vec::new();
|
||||||
|
let mut strength_changes: Vec<(usize, f32)> = Vec::new();
|
||||||
|
|
||||||
|
for (batch_idx, batch_indices) in batches.iter().enumerate() {
|
||||||
|
let batch_links: Vec<&LinkInfo> = batch_indices.iter()
|
||||||
|
.map(|&i| &links[i])
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let batch_infos: Vec<LinkInfo> = batch_links.iter().map(|l| LinkInfo {
|
||||||
|
rel_idx: l.rel_idx,
|
||||||
|
source_key: l.source_key.clone(),
|
||||||
|
target_key: l.target_key.clone(),
|
||||||
|
source_content: l.source_content.clone(),
|
||||||
|
target_content: l.target_content.clone(),
|
||||||
|
strength: l.strength,
|
||||||
|
target_sections: l.target_sections.clone(),
|
||||||
|
}).collect();
|
||||||
|
|
||||||
|
let prompt = build_audit_prompt(&batch_infos, batch_idx + 1, total_batches);
|
||||||
|
let response = match call_sonnet(&prompt, 300) {
|
||||||
|
Ok(r) => r,
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!(" Batch {}: error: {}", batch_idx + 1, e);
|
||||||
|
stats.errors += batch_infos.len();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let actions = parse_audit_response(&response, batch_infos.len());
|
||||||
|
|
||||||
|
// Count unresponded links as kept
|
||||||
|
let mut responded: std::collections::HashSet<usize> = std::collections::HashSet::new();
|
||||||
|
|
||||||
|
for (idx, action) in &actions {
|
||||||
|
responded.insert(*idx);
|
||||||
|
let link = &batch_infos[*idx];
|
||||||
|
|
||||||
|
match action {
|
||||||
|
AuditAction::Keep => {
|
||||||
|
stats.kept += 1;
|
||||||
|
}
|
||||||
|
AuditAction::Delete => {
|
||||||
|
println!(" DELETE {} → {}", link.source_key, link.target_key);
|
||||||
|
deletions.push(link.rel_idx);
|
||||||
|
stats.deleted += 1;
|
||||||
|
}
|
||||||
|
AuditAction::Retarget(new_target) => {
|
||||||
|
println!(" RETARGET {} → {} (was {})",
|
||||||
|
link.source_key, new_target, link.target_key);
|
||||||
|
retargets.push((link.rel_idx, new_target.clone()));
|
||||||
|
stats.retargeted += 1;
|
||||||
|
}
|
||||||
|
AuditAction::Weaken(s) => {
|
||||||
|
println!(" WEAKEN {} → {} (str {:.2} → {:.2})",
|
||||||
|
link.source_key, link.target_key, link.strength, s);
|
||||||
|
strength_changes.push((link.rel_idx, *s));
|
||||||
|
stats.weakened += 1;
|
||||||
|
}
|
||||||
|
AuditAction::Strengthen(s) => {
|
||||||
|
println!(" STRENGTHEN {} → {} (str {:.2} → {:.2})",
|
||||||
|
link.source_key, link.target_key, link.strength, s);
|
||||||
|
strength_changes.push((link.rel_idx, *s));
|
||||||
|
stats.strengthened += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Count unresponded as kept
|
||||||
|
for i in 0..batch_infos.len() {
|
||||||
|
if !responded.contains(&i) {
|
||||||
|
stats.kept += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
println!(" Batch {}/{}: +{}kept +{}del +{}retarget +{}weak +{}strong",
|
||||||
|
batch_idx + 1, total_batches,
|
||||||
|
stats.kept, stats.deleted, stats.retargeted, stats.weakened, stats.strengthened);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply changes
|
||||||
|
if apply && (stats.deleted > 0 || stats.retargeted > 0
|
||||||
|
|| stats.weakened > 0 || stats.strengthened > 0) {
|
||||||
|
println!("\nApplying changes...");
|
||||||
|
|
||||||
|
// Deletions: soft-delete
|
||||||
|
for rel_idx in &deletions {
|
||||||
|
store.relations[*rel_idx].deleted = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strength changes
|
||||||
|
for (rel_idx, new_strength) in &strength_changes {
|
||||||
|
store.relations[*rel_idx].strength = *new_strength;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Retargets: soft-delete old, create new
|
||||||
|
for (rel_idx, new_target) in &retargets {
|
||||||
|
let source_key = store.relations[*rel_idx].source_key.clone();
|
||||||
|
let old_strength = store.relations[*rel_idx].strength;
|
||||||
|
let source_uuid = store.nodes.get(&source_key)
|
||||||
|
.map(|n| n.uuid).unwrap_or([0u8; 16]);
|
||||||
|
let target_uuid = store.nodes.get(new_target)
|
||||||
|
.map(|n| n.uuid).unwrap_or([0u8; 16]);
|
||||||
|
|
||||||
|
// Soft-delete old
|
||||||
|
store.relations[*rel_idx].deleted = true;
|
||||||
|
|
||||||
|
// Create new
|
||||||
|
if target_uuid != [0u8; 16] {
|
||||||
|
let new_rel = Store::new_relation(
|
||||||
|
source_uuid, target_uuid,
|
||||||
|
capnp_store::RelationType::Auto,
|
||||||
|
old_strength,
|
||||||
|
&source_key, new_target,
|
||||||
|
);
|
||||||
|
store.add_relation(new_rel).ok();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
store.save()?;
|
||||||
|
println!("Saved.");
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(stats)
|
||||||
|
}
|
||||||
|
|
|
||||||
14
src/main.rs
14
src/main.rs
|
|
@ -93,6 +93,7 @@ fn main() {
|
||||||
"journal-enrich" => cmd_journal_enrich(&args[2..]),
|
"journal-enrich" => cmd_journal_enrich(&args[2..]),
|
||||||
"apply-consolidation" => cmd_apply_consolidation(&args[2..]),
|
"apply-consolidation" => cmd_apply_consolidation(&args[2..]),
|
||||||
"differentiate" => cmd_differentiate(&args[2..]),
|
"differentiate" => cmd_differentiate(&args[2..]),
|
||||||
|
"link-audit" => cmd_link_audit(&args[2..]),
|
||||||
"trace" => cmd_trace(&args[2..]),
|
"trace" => cmd_trace(&args[2..]),
|
||||||
"list-keys" => cmd_list_keys(),
|
"list-keys" => cmd_list_keys(),
|
||||||
"list-edges" => cmd_list_edges(),
|
"list-edges" => cmd_list_edges(),
|
||||||
|
|
@ -157,6 +158,7 @@ Commands:
|
||||||
Extract and apply actions from consolidation reports
|
Extract and apply actions from consolidation reports
|
||||||
differentiate [KEY] [--apply]
|
differentiate [KEY] [--apply]
|
||||||
Redistribute hub links to section-level children
|
Redistribute hub links to section-level children
|
||||||
|
link-audit [--apply] Walk every link, send to Sonnet for quality review
|
||||||
trace KEY Walk temporal links: semantic ↔ episodic ↔ conversation
|
trace KEY Walk temporal links: semantic ↔ episodic ↔ conversation
|
||||||
list-keys List all node keys (one per line)
|
list-keys List all node keys (one per line)
|
||||||
list-edges List all edges (tsv: source target strength type)
|
list-edges List all edges (tsv: source target strength type)
|
||||||
|
|
@ -783,6 +785,18 @@ fn cmd_differentiate(args: &[String]) -> Result<(), String> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn cmd_link_audit(args: &[String]) -> Result<(), String> {
|
||||||
|
let apply = args.iter().any(|a| a == "--apply");
|
||||||
|
let mut store = capnp_store::Store::load()?;
|
||||||
|
let stats = digest::link_audit(&mut store, apply)?;
|
||||||
|
println!("\n{}", "=".repeat(60));
|
||||||
|
println!("Link audit complete:");
|
||||||
|
println!(" Kept: {} Deleted: {} Retargeted: {} Weakened: {} Strengthened: {} Errors: {}",
|
||||||
|
stats.kept, stats.deleted, stats.retargeted, stats.weakened, stats.strengthened, stats.errors);
|
||||||
|
println!("{}", "=".repeat(60));
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
fn cmd_trace(args: &[String]) -> Result<(), String> {
|
fn cmd_trace(args: &[String]) -> Result<(), String> {
|
||||||
if args.is_empty() {
|
if args.is_empty() {
|
||||||
return Err("Usage: poc-memory trace KEY".into());
|
return Err("Usage: poc-memory trace KEY".into());
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue