graph: normalize link strengths from Jaccard neighborhood similarity

Add jaccard() and jaccard_strengths() to Graph. Jaccard similarity
measures neighborhood overlap between linked nodes — nodes sharing
many neighbors get stronger links, nodes with no shared neighbors
get weak links.

New subcommand: `poc-memory graph normalize-strengths [--apply]`

Scales raw Jaccard (typically 0.0-0.3) to useful range via j*3
clamped to [0.1, 1.0]. Skips implicit temporal edges (strength=1.0).

Applied to 64,969 edges. Distribution is bimodal: large cluster at
0.1-0.2 (weak) and spike at 0.9-1.0 (strong), with smooth gradient
between. Replaces the meaningless 0.3/0.8 split from manual/agent
creation methods.

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
ProofOfConcept 2026-03-14 11:13:58 -04:00
parent 420a777eba
commit dccc18b205
2 changed files with 120 additions and 0 deletions

View file

@ -362,6 +362,13 @@ enum GraphCmd {
#[arg(default_value_t = 50)]
max_degree: usize,
},
/// Set link strengths from neighborhood overlap (Jaccard similarity)
#[command(name = "normalize-strengths")]
NormalizeStrengths {
/// Apply changes (default: dry run)
#[arg(long)]
apply: bool,
},
/// Redistribute hub links to section-level children
Differentiate {
/// Specific hub key (omit to list all differentiable hubs)
@ -742,6 +749,7 @@ fn main() {
GraphCmd::TriangleClose { min_degree, sim_threshold, max_per_hub }
=> cmd_triangle_close(min_degree, sim_threshold, max_per_hub),
GraphCmd::CapDegree { max_degree } => cmd_cap_degree(max_degree),
GraphCmd::NormalizeStrengths { apply } => cmd_normalize_strengths(apply),
GraphCmd::Differentiate { key, apply }
=> cmd_differentiate(key.as_deref(), apply),
GraphCmd::Trace { key } => cmd_trace(&key),
@ -1377,6 +1385,81 @@ fn cmd_cap_degree(max_deg: usize) -> Result<(), String> {
Ok(())
}
fn cmd_normalize_strengths(apply: bool) -> Result<(), String> {
let mut store = store::Store::load()?;
let graph = store.build_graph();
let strengths = graph.jaccard_strengths();
// Build a lookup from (source_key, target_key) → new_strength
let mut updates: std::collections::HashMap<(String, String), f32> = std::collections::HashMap::new();
for (a, b, s) in &strengths {
// Store both directions for easy lookup
updates.insert((a.clone(), b.clone()), *s);
updates.insert((b.clone(), a.clone()), *s);
}
// Stats
let mut changed = 0usize;
let mut unchanged = 0usize;
let mut temporal_skipped = 0usize;
let mut delta_sum: f64 = 0.0;
// Histogram of new strengths
let mut buckets = [0usize; 10]; // 0.0-0.1, 0.1-0.2, ...
for rel in &mut store.relations {
if rel.deleted { continue; }
// Skip implicit temporal edges (strength 1.0, Auto type)
if rel.strength == 1.0 && rel.rel_type == store::RelationType::Auto {
temporal_skipped += 1;
continue;
}
if let Some(&new_s) = updates.get(&(rel.source_key.clone(), rel.target_key.clone())) {
let old_s = rel.strength;
let delta = (new_s - old_s).abs();
if delta > 0.001 {
delta_sum += delta as f64;
if apply {
rel.strength = new_s;
}
changed += 1;
} else {
unchanged += 1;
}
let bucket = ((new_s * 10.0) as usize).min(9);
buckets[bucket] += 1;
}
}
println!("Normalize link strengths (Jaccard similarity)");
println!(" Total edges in graph: {}", strengths.len());
println!(" Would change: {}", changed);
println!(" Unchanged: {}", unchanged);
println!(" Temporal (skipped): {}", temporal_skipped);
if changed > 0 {
println!(" Avg delta: {:.3}", delta_sum / changed as f64);
}
println!();
println!(" Strength distribution:");
for (i, &count) in buckets.iter().enumerate() {
let lo = i as f32 / 10.0;
let hi = lo + 0.1;
let bar = "#".repeat(count / 50 + if count > 0 { 1 } else { 0 });
println!(" {:.1}-{:.1}: {:5} {}", lo, hi, count, bar);
}
if apply {
store.save()?;
println!("\nApplied {} strength updates.", changed);
} else {
println!("\nDry run. Use --apply to write changes.");
}
Ok(())
}
fn cmd_consolidate_batch(count: usize, auto: bool, agent: Option<String>) -> Result<(), String> {
let store = store::Store::load()?;