graph: normalize link strengths from Jaccard neighborhood similarity

Add jaccard() and jaccard_strengths() to Graph. Jaccard similarity
measures neighborhood overlap between linked nodes — nodes sharing
many neighbors get stronger links, nodes with no shared neighbors
get weak links.

New subcommand: `poc-memory graph normalize-strengths [--apply]`

Scales raw Jaccard (typically 0.0-0.3) to useful range via j*3
clamped to [0.1, 1.0]. Skips implicit temporal edges (strength=1.0).

Applied to 64,969 edges. Distribution is bimodal: large cluster at
0.1-0.2 (weak) and spike at 0.9-1.0 (strong), with smooth gradient
between. Replaces the meaningless 0.3/0.8 split from manual/agent
creation methods.

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
ProofOfConcept 2026-03-14 11:13:58 -04:00
parent 420a777eba
commit dccc18b205
2 changed files with 120 additions and 0 deletions

View file

@ -64,6 +64,43 @@ impl Graph {
.unwrap_or_default()
}
/// Jaccard similarity between two nodes' neighborhoods.
/// Measures overlap: |intersection| / |union| of their neighbor sets.
pub fn jaccard(&self, a: &str, b: &str) -> f32 {
let na = self.neighbor_keys(a);
let nb = self.neighbor_keys(b);
let intersection = na.intersection(&nb).count();
let union = na.union(&nb).count();
if union == 0 { 0.0 } else { intersection as f32 / union as f32 }
}
/// Compute Jaccard-based strength for every edge in the graph.
/// Returns (source_key, target_key, jaccard_strength) triples.
/// Scales raw Jaccard (typically 0.0-0.3) to a useful range.
pub fn jaccard_strengths(&self) -> Vec<(String, String, f32)> {
let mut result = Vec::new();
let mut seen = HashSet::new();
for (key, edges) in &self.adj {
for edge in edges {
// Deduplicate undirected edges
let pair = if key < &edge.target {
(key.as_str(), edge.target.as_str())
} else {
(edge.target.as_str(), key.as_str())
};
if !seen.insert((pair.0.to_string(), pair.1.to_string())) {
continue;
}
let j = self.jaccard(key, &edge.target);
// Scale: raw Jaccard 0.05 → 0.15, 0.15 → 0.45, 0.30 → 0.90
// Formula: clamp(j * 3, 0.1, 1.0)
let strength = (j * 3.0).clamp(0.1, 1.0);
result.push((key.clone(), edge.target.clone(), strength));
}
}
result
}
pub fn community_count(&self) -> usize {
let labels: HashSet<_> = self.communities.values().collect();
labels.len()

View file

@ -362,6 +362,13 @@ enum GraphCmd {
#[arg(default_value_t = 50)]
max_degree: usize,
},
/// Set link strengths from neighborhood overlap (Jaccard similarity)
#[command(name = "normalize-strengths")]
NormalizeStrengths {
/// Apply changes (default: dry run)
#[arg(long)]
apply: bool,
},
/// Redistribute hub links to section-level children
Differentiate {
/// Specific hub key (omit to list all differentiable hubs)
@ -742,6 +749,7 @@ fn main() {
GraphCmd::TriangleClose { min_degree, sim_threshold, max_per_hub }
=> cmd_triangle_close(min_degree, sim_threshold, max_per_hub),
GraphCmd::CapDegree { max_degree } => cmd_cap_degree(max_degree),
GraphCmd::NormalizeStrengths { apply } => cmd_normalize_strengths(apply),
GraphCmd::Differentiate { key, apply }
=> cmd_differentiate(key.as_deref(), apply),
GraphCmd::Trace { key } => cmd_trace(&key),
@ -1377,6 +1385,81 @@ fn cmd_cap_degree(max_deg: usize) -> Result<(), String> {
Ok(())
}
fn cmd_normalize_strengths(apply: bool) -> Result<(), String> {
let mut store = store::Store::load()?;
let graph = store.build_graph();
let strengths = graph.jaccard_strengths();
// Build a lookup from (source_key, target_key) → new_strength
let mut updates: std::collections::HashMap<(String, String), f32> = std::collections::HashMap::new();
for (a, b, s) in &strengths {
// Store both directions for easy lookup
updates.insert((a.clone(), b.clone()), *s);
updates.insert((b.clone(), a.clone()), *s);
}
// Stats
let mut changed = 0usize;
let mut unchanged = 0usize;
let mut temporal_skipped = 0usize;
let mut delta_sum: f64 = 0.0;
// Histogram of new strengths
let mut buckets = [0usize; 10]; // 0.0-0.1, 0.1-0.2, ...
for rel in &mut store.relations {
if rel.deleted { continue; }
// Skip implicit temporal edges (strength 1.0, Auto type)
if rel.strength == 1.0 && rel.rel_type == store::RelationType::Auto {
temporal_skipped += 1;
continue;
}
if let Some(&new_s) = updates.get(&(rel.source_key.clone(), rel.target_key.clone())) {
let old_s = rel.strength;
let delta = (new_s - old_s).abs();
if delta > 0.001 {
delta_sum += delta as f64;
if apply {
rel.strength = new_s;
}
changed += 1;
} else {
unchanged += 1;
}
let bucket = ((new_s * 10.0) as usize).min(9);
buckets[bucket] += 1;
}
}
println!("Normalize link strengths (Jaccard similarity)");
println!(" Total edges in graph: {}", strengths.len());
println!(" Would change: {}", changed);
println!(" Unchanged: {}", unchanged);
println!(" Temporal (skipped): {}", temporal_skipped);
if changed > 0 {
println!(" Avg delta: {:.3}", delta_sum / changed as f64);
}
println!();
println!(" Strength distribution:");
for (i, &count) in buckets.iter().enumerate() {
let lo = i as f32 / 10.0;
let hi = lo + 0.1;
let bar = "#".repeat(count / 50 + if count > 0 { 1 } else { 0 });
println!(" {:.1}-{:.1}: {:5} {}", lo, hi, count, bar);
}
if apply {
store.save()?;
println!("\nApplied {} strength updates.", changed);
} else {
println!("\nDry run. Use --apply to write changes.");
}
Ok(())
}
fn cmd_consolidate_batch(count: usize, auto: bool, agent: Option<String>) -> Result<(), String> {
let store = store::Store::load()?;