graph: normalize link strengths from Jaccard neighborhood similarity
Add jaccard() and jaccard_strengths() to Graph. Jaccard similarity measures neighborhood overlap between linked nodes — nodes sharing many neighbors get stronger links, nodes with no shared neighbors get weak links. New subcommand: `poc-memory graph normalize-strengths [--apply]` Scales raw Jaccard (typically 0.0-0.3) to useful range via j*3 clamped to [0.1, 1.0]. Skips implicit temporal edges (strength=1.0). Applied to 64,969 edges. Distribution is bimodal: large cluster at 0.1-0.2 (weak) and spike at 0.9-1.0 (strong), with smooth gradient between. Replaces the meaningless 0.3/0.8 split from manual/agent creation methods. Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
420a777eba
commit
dccc18b205
2 changed files with 120 additions and 0 deletions
|
|
@ -64,6 +64,43 @@ impl Graph {
|
||||||
.unwrap_or_default()
|
.unwrap_or_default()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Jaccard similarity between two nodes' neighborhoods.
|
||||||
|
/// Measures overlap: |intersection| / |union| of their neighbor sets.
|
||||||
|
pub fn jaccard(&self, a: &str, b: &str) -> f32 {
|
||||||
|
let na = self.neighbor_keys(a);
|
||||||
|
let nb = self.neighbor_keys(b);
|
||||||
|
let intersection = na.intersection(&nb).count();
|
||||||
|
let union = na.union(&nb).count();
|
||||||
|
if union == 0 { 0.0 } else { intersection as f32 / union as f32 }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compute Jaccard-based strength for every edge in the graph.
|
||||||
|
/// Returns (source_key, target_key, jaccard_strength) triples.
|
||||||
|
/// Scales raw Jaccard (typically 0.0-0.3) to a useful range.
|
||||||
|
pub fn jaccard_strengths(&self) -> Vec<(String, String, f32)> {
|
||||||
|
let mut result = Vec::new();
|
||||||
|
let mut seen = HashSet::new();
|
||||||
|
for (key, edges) in &self.adj {
|
||||||
|
for edge in edges {
|
||||||
|
// Deduplicate undirected edges
|
||||||
|
let pair = if key < &edge.target {
|
||||||
|
(key.as_str(), edge.target.as_str())
|
||||||
|
} else {
|
||||||
|
(edge.target.as_str(), key.as_str())
|
||||||
|
};
|
||||||
|
if !seen.insert((pair.0.to_string(), pair.1.to_string())) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let j = self.jaccard(key, &edge.target);
|
||||||
|
// Scale: raw Jaccard 0.05 → 0.15, 0.15 → 0.45, 0.30 → 0.90
|
||||||
|
// Formula: clamp(j * 3, 0.1, 1.0)
|
||||||
|
let strength = (j * 3.0).clamp(0.1, 1.0);
|
||||||
|
result.push((key.clone(), edge.target.clone(), strength));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
pub fn community_count(&self) -> usize {
|
pub fn community_count(&self) -> usize {
|
||||||
let labels: HashSet<_> = self.communities.values().collect();
|
let labels: HashSet<_> = self.communities.values().collect();
|
||||||
labels.len()
|
labels.len()
|
||||||
|
|
|
||||||
|
|
@ -362,6 +362,13 @@ enum GraphCmd {
|
||||||
#[arg(default_value_t = 50)]
|
#[arg(default_value_t = 50)]
|
||||||
max_degree: usize,
|
max_degree: usize,
|
||||||
},
|
},
|
||||||
|
/// Set link strengths from neighborhood overlap (Jaccard similarity)
|
||||||
|
#[command(name = "normalize-strengths")]
|
||||||
|
NormalizeStrengths {
|
||||||
|
/// Apply changes (default: dry run)
|
||||||
|
#[arg(long)]
|
||||||
|
apply: bool,
|
||||||
|
},
|
||||||
/// Redistribute hub links to section-level children
|
/// Redistribute hub links to section-level children
|
||||||
Differentiate {
|
Differentiate {
|
||||||
/// Specific hub key (omit to list all differentiable hubs)
|
/// Specific hub key (omit to list all differentiable hubs)
|
||||||
|
|
@ -742,6 +749,7 @@ fn main() {
|
||||||
GraphCmd::TriangleClose { min_degree, sim_threshold, max_per_hub }
|
GraphCmd::TriangleClose { min_degree, sim_threshold, max_per_hub }
|
||||||
=> cmd_triangle_close(min_degree, sim_threshold, max_per_hub),
|
=> cmd_triangle_close(min_degree, sim_threshold, max_per_hub),
|
||||||
GraphCmd::CapDegree { max_degree } => cmd_cap_degree(max_degree),
|
GraphCmd::CapDegree { max_degree } => cmd_cap_degree(max_degree),
|
||||||
|
GraphCmd::NormalizeStrengths { apply } => cmd_normalize_strengths(apply),
|
||||||
GraphCmd::Differentiate { key, apply }
|
GraphCmd::Differentiate { key, apply }
|
||||||
=> cmd_differentiate(key.as_deref(), apply),
|
=> cmd_differentiate(key.as_deref(), apply),
|
||||||
GraphCmd::Trace { key } => cmd_trace(&key),
|
GraphCmd::Trace { key } => cmd_trace(&key),
|
||||||
|
|
@ -1377,6 +1385,81 @@ fn cmd_cap_degree(max_deg: usize) -> Result<(), String> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn cmd_normalize_strengths(apply: bool) -> Result<(), String> {
|
||||||
|
let mut store = store::Store::load()?;
|
||||||
|
let graph = store.build_graph();
|
||||||
|
let strengths = graph.jaccard_strengths();
|
||||||
|
|
||||||
|
// Build a lookup from (source_key, target_key) → new_strength
|
||||||
|
let mut updates: std::collections::HashMap<(String, String), f32> = std::collections::HashMap::new();
|
||||||
|
for (a, b, s) in &strengths {
|
||||||
|
// Store both directions for easy lookup
|
||||||
|
updates.insert((a.clone(), b.clone()), *s);
|
||||||
|
updates.insert((b.clone(), a.clone()), *s);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stats
|
||||||
|
let mut changed = 0usize;
|
||||||
|
let mut unchanged = 0usize;
|
||||||
|
let mut temporal_skipped = 0usize;
|
||||||
|
let mut delta_sum: f64 = 0.0;
|
||||||
|
|
||||||
|
// Histogram of new strengths
|
||||||
|
let mut buckets = [0usize; 10]; // 0.0-0.1, 0.1-0.2, ...
|
||||||
|
|
||||||
|
for rel in &mut store.relations {
|
||||||
|
if rel.deleted { continue; }
|
||||||
|
|
||||||
|
// Skip implicit temporal edges (strength 1.0, Auto type)
|
||||||
|
if rel.strength == 1.0 && rel.rel_type == store::RelationType::Auto {
|
||||||
|
temporal_skipped += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(&new_s) = updates.get(&(rel.source_key.clone(), rel.target_key.clone())) {
|
||||||
|
let old_s = rel.strength;
|
||||||
|
let delta = (new_s - old_s).abs();
|
||||||
|
if delta > 0.001 {
|
||||||
|
delta_sum += delta as f64;
|
||||||
|
if apply {
|
||||||
|
rel.strength = new_s;
|
||||||
|
}
|
||||||
|
changed += 1;
|
||||||
|
} else {
|
||||||
|
unchanged += 1;
|
||||||
|
}
|
||||||
|
let bucket = ((new_s * 10.0) as usize).min(9);
|
||||||
|
buckets[bucket] += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("Normalize link strengths (Jaccard similarity)");
|
||||||
|
println!(" Total edges in graph: {}", strengths.len());
|
||||||
|
println!(" Would change: {}", changed);
|
||||||
|
println!(" Unchanged: {}", unchanged);
|
||||||
|
println!(" Temporal (skipped): {}", temporal_skipped);
|
||||||
|
if changed > 0 {
|
||||||
|
println!(" Avg delta: {:.3}", delta_sum / changed as f64);
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
println!(" Strength distribution:");
|
||||||
|
for (i, &count) in buckets.iter().enumerate() {
|
||||||
|
let lo = i as f32 / 10.0;
|
||||||
|
let hi = lo + 0.1;
|
||||||
|
let bar = "#".repeat(count / 50 + if count > 0 { 1 } else { 0 });
|
||||||
|
println!(" {:.1}-{:.1}: {:5} {}", lo, hi, count, bar);
|
||||||
|
}
|
||||||
|
|
||||||
|
if apply {
|
||||||
|
store.save()?;
|
||||||
|
println!("\nApplied {} strength updates.", changed);
|
||||||
|
} else {
|
||||||
|
println!("\nDry run. Use --apply to write changes.");
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
fn cmd_consolidate_batch(count: usize, auto: bool, agent: Option<String>) -> Result<(), String> {
|
fn cmd_consolidate_batch(count: usize, auto: bool, agent: Option<String>) -> Result<(), String> {
|
||||||
let store = store::Store::load()?;
|
let store = store::Store::load()?;
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue