From dccc18b2056e812d36d0c2d69994de6a5d765b1c Mon Sep 17 00:00:00 2001 From: ProofOfConcept Date: Sat, 14 Mar 2026 11:13:58 -0400 Subject: [PATCH] graph: normalize link strengths from Jaccard neighborhood similarity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add jaccard() and jaccard_strengths() to Graph. Jaccard similarity measures neighborhood overlap between linked nodes — nodes sharing many neighbors get stronger links, nodes with no shared neighbors get weak links. New subcommand: `poc-memory graph normalize-strengths [--apply]` Scales raw Jaccard (typically 0.0-0.3) to useful range via j*3 clamped to [0.1, 1.0]. Skips implicit temporal edges (strength=1.0). Applied to 64,969 edges. Distribution is bimodal: large cluster at 0.1-0.2 (weak) and spike at 0.9-1.0 (strong), with smooth gradient between. Replaces the meaningless 0.3/0.8 split from manual/agent creation methods. Co-Authored-By: Kent Overstreet --- poc-memory/src/graph.rs | 37 ++++++++++++++++++ poc-memory/src/main.rs | 83 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+) diff --git a/poc-memory/src/graph.rs b/poc-memory/src/graph.rs index 818b7f4..6867473 100644 --- a/poc-memory/src/graph.rs +++ b/poc-memory/src/graph.rs @@ -64,6 +64,43 @@ impl Graph { .unwrap_or_default() } + /// Jaccard similarity between two nodes' neighborhoods. + /// Measures overlap: |intersection| / |union| of their neighbor sets. + pub fn jaccard(&self, a: &str, b: &str) -> f32 { + let na = self.neighbor_keys(a); + let nb = self.neighbor_keys(b); + let intersection = na.intersection(&nb).count(); + let union = na.union(&nb).count(); + if union == 0 { 0.0 } else { intersection as f32 / union as f32 } + } + + /// Compute Jaccard-based strength for every edge in the graph. + /// Returns (source_key, target_key, jaccard_strength) triples. + /// Scales raw Jaccard (typically 0.0-0.3) to a useful range. + pub fn jaccard_strengths(&self) -> Vec<(String, String, f32)> { + let mut result = Vec::new(); + let mut seen = HashSet::new(); + for (key, edges) in &self.adj { + for edge in edges { + // Deduplicate undirected edges + let pair = if key < &edge.target { + (key.as_str(), edge.target.as_str()) + } else { + (edge.target.as_str(), key.as_str()) + }; + if !seen.insert((pair.0.to_string(), pair.1.to_string())) { + continue; + } + let j = self.jaccard(key, &edge.target); + // Scale: raw Jaccard 0.05 → 0.15, 0.15 → 0.45, 0.30 → 0.90 + // Formula: clamp(j * 3, 0.1, 1.0) + let strength = (j * 3.0).clamp(0.1, 1.0); + result.push((key.clone(), edge.target.clone(), strength)); + } + } + result + } + pub fn community_count(&self) -> usize { let labels: HashSet<_> = self.communities.values().collect(); labels.len() diff --git a/poc-memory/src/main.rs b/poc-memory/src/main.rs index e1c1849..bbe2384 100644 --- a/poc-memory/src/main.rs +++ b/poc-memory/src/main.rs @@ -362,6 +362,13 @@ enum GraphCmd { #[arg(default_value_t = 50)] max_degree: usize, }, + /// Set link strengths from neighborhood overlap (Jaccard similarity) + #[command(name = "normalize-strengths")] + NormalizeStrengths { + /// Apply changes (default: dry run) + #[arg(long)] + apply: bool, + }, /// Redistribute hub links to section-level children Differentiate { /// Specific hub key (omit to list all differentiable hubs) @@ -742,6 +749,7 @@ fn main() { GraphCmd::TriangleClose { min_degree, sim_threshold, max_per_hub } => cmd_triangle_close(min_degree, sim_threshold, max_per_hub), GraphCmd::CapDegree { max_degree } => cmd_cap_degree(max_degree), + GraphCmd::NormalizeStrengths { apply } => cmd_normalize_strengths(apply), GraphCmd::Differentiate { key, apply } => cmd_differentiate(key.as_deref(), apply), GraphCmd::Trace { key } => cmd_trace(&key), @@ -1377,6 +1385,81 @@ fn cmd_cap_degree(max_deg: usize) -> Result<(), String> { Ok(()) } +fn cmd_normalize_strengths(apply: bool) -> Result<(), String> { + let mut store = store::Store::load()?; + let graph = store.build_graph(); + let strengths = graph.jaccard_strengths(); + + // Build a lookup from (source_key, target_key) → new_strength + let mut updates: std::collections::HashMap<(String, String), f32> = std::collections::HashMap::new(); + for (a, b, s) in &strengths { + // Store both directions for easy lookup + updates.insert((a.clone(), b.clone()), *s); + updates.insert((b.clone(), a.clone()), *s); + } + + // Stats + let mut changed = 0usize; + let mut unchanged = 0usize; + let mut temporal_skipped = 0usize; + let mut delta_sum: f64 = 0.0; + + // Histogram of new strengths + let mut buckets = [0usize; 10]; // 0.0-0.1, 0.1-0.2, ... + + for rel in &mut store.relations { + if rel.deleted { continue; } + + // Skip implicit temporal edges (strength 1.0, Auto type) + if rel.strength == 1.0 && rel.rel_type == store::RelationType::Auto { + temporal_skipped += 1; + continue; + } + + if let Some(&new_s) = updates.get(&(rel.source_key.clone(), rel.target_key.clone())) { + let old_s = rel.strength; + let delta = (new_s - old_s).abs(); + if delta > 0.001 { + delta_sum += delta as f64; + if apply { + rel.strength = new_s; + } + changed += 1; + } else { + unchanged += 1; + } + let bucket = ((new_s * 10.0) as usize).min(9); + buckets[bucket] += 1; + } + } + + println!("Normalize link strengths (Jaccard similarity)"); + println!(" Total edges in graph: {}", strengths.len()); + println!(" Would change: {}", changed); + println!(" Unchanged: {}", unchanged); + println!(" Temporal (skipped): {}", temporal_skipped); + if changed > 0 { + println!(" Avg delta: {:.3}", delta_sum / changed as f64); + } + println!(); + println!(" Strength distribution:"); + for (i, &count) in buckets.iter().enumerate() { + let lo = i as f32 / 10.0; + let hi = lo + 0.1; + let bar = "#".repeat(count / 50 + if count > 0 { 1 } else { 0 }); + println!(" {:.1}-{:.1}: {:5} {}", lo, hi, count, bar); + } + + if apply { + store.save()?; + println!("\nApplied {} strength updates.", changed); + } else { + println!("\nDry run. Use --apply to write changes."); + } + + Ok(()) +} + fn cmd_consolidate_batch(count: usize, auto: bool, agent: Option) -> Result<(), String> { let store = store::Store::load()?;