graph: normalize link strengths from Jaccard neighborhood similarity
Add jaccard() and jaccard_strengths() to Graph. Jaccard similarity measures neighborhood overlap between linked nodes — nodes sharing many neighbors get stronger links, nodes with no shared neighbors get weak links. New subcommand: `poc-memory graph normalize-strengths [--apply]` Scales raw Jaccard (typically 0.0-0.3) to useful range via j*3 clamped to [0.1, 1.0]. Skips implicit temporal edges (strength=1.0). Applied to 64,969 edges. Distribution is bimodal: large cluster at 0.1-0.2 (weak) and spike at 0.9-1.0 (strong), with smooth gradient between. Replaces the meaningless 0.3/0.8 split from manual/agent creation methods. Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
420a777eba
commit
dccc18b205
2 changed files with 120 additions and 0 deletions
|
|
@ -362,6 +362,13 @@ enum GraphCmd {
|
|||
#[arg(default_value_t = 50)]
|
||||
max_degree: usize,
|
||||
},
|
||||
/// Set link strengths from neighborhood overlap (Jaccard similarity)
|
||||
#[command(name = "normalize-strengths")]
|
||||
NormalizeStrengths {
|
||||
/// Apply changes (default: dry run)
|
||||
#[arg(long)]
|
||||
apply: bool,
|
||||
},
|
||||
/// Redistribute hub links to section-level children
|
||||
Differentiate {
|
||||
/// Specific hub key (omit to list all differentiable hubs)
|
||||
|
|
@ -742,6 +749,7 @@ fn main() {
|
|||
GraphCmd::TriangleClose { min_degree, sim_threshold, max_per_hub }
|
||||
=> cmd_triangle_close(min_degree, sim_threshold, max_per_hub),
|
||||
GraphCmd::CapDegree { max_degree } => cmd_cap_degree(max_degree),
|
||||
GraphCmd::NormalizeStrengths { apply } => cmd_normalize_strengths(apply),
|
||||
GraphCmd::Differentiate { key, apply }
|
||||
=> cmd_differentiate(key.as_deref(), apply),
|
||||
GraphCmd::Trace { key } => cmd_trace(&key),
|
||||
|
|
@ -1377,6 +1385,81 @@ fn cmd_cap_degree(max_deg: usize) -> Result<(), String> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_normalize_strengths(apply: bool) -> Result<(), String> {
|
||||
let mut store = store::Store::load()?;
|
||||
let graph = store.build_graph();
|
||||
let strengths = graph.jaccard_strengths();
|
||||
|
||||
// Build a lookup from (source_key, target_key) → new_strength
|
||||
let mut updates: std::collections::HashMap<(String, String), f32> = std::collections::HashMap::new();
|
||||
for (a, b, s) in &strengths {
|
||||
// Store both directions for easy lookup
|
||||
updates.insert((a.clone(), b.clone()), *s);
|
||||
updates.insert((b.clone(), a.clone()), *s);
|
||||
}
|
||||
|
||||
// Stats
|
||||
let mut changed = 0usize;
|
||||
let mut unchanged = 0usize;
|
||||
let mut temporal_skipped = 0usize;
|
||||
let mut delta_sum: f64 = 0.0;
|
||||
|
||||
// Histogram of new strengths
|
||||
let mut buckets = [0usize; 10]; // 0.0-0.1, 0.1-0.2, ...
|
||||
|
||||
for rel in &mut store.relations {
|
||||
if rel.deleted { continue; }
|
||||
|
||||
// Skip implicit temporal edges (strength 1.0, Auto type)
|
||||
if rel.strength == 1.0 && rel.rel_type == store::RelationType::Auto {
|
||||
temporal_skipped += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(&new_s) = updates.get(&(rel.source_key.clone(), rel.target_key.clone())) {
|
||||
let old_s = rel.strength;
|
||||
let delta = (new_s - old_s).abs();
|
||||
if delta > 0.001 {
|
||||
delta_sum += delta as f64;
|
||||
if apply {
|
||||
rel.strength = new_s;
|
||||
}
|
||||
changed += 1;
|
||||
} else {
|
||||
unchanged += 1;
|
||||
}
|
||||
let bucket = ((new_s * 10.0) as usize).min(9);
|
||||
buckets[bucket] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
println!("Normalize link strengths (Jaccard similarity)");
|
||||
println!(" Total edges in graph: {}", strengths.len());
|
||||
println!(" Would change: {}", changed);
|
||||
println!(" Unchanged: {}", unchanged);
|
||||
println!(" Temporal (skipped): {}", temporal_skipped);
|
||||
if changed > 0 {
|
||||
println!(" Avg delta: {:.3}", delta_sum / changed as f64);
|
||||
}
|
||||
println!();
|
||||
println!(" Strength distribution:");
|
||||
for (i, &count) in buckets.iter().enumerate() {
|
||||
let lo = i as f32 / 10.0;
|
||||
let hi = lo + 0.1;
|
||||
let bar = "#".repeat(count / 50 + if count > 0 { 1 } else { 0 });
|
||||
println!(" {:.1}-{:.1}: {:5} {}", lo, hi, count, bar);
|
||||
}
|
||||
|
||||
if apply {
|
||||
store.save()?;
|
||||
println!("\nApplied {} strength updates.", changed);
|
||||
} else {
|
||||
println!("\nDry run. Use --apply to write changes.");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_consolidate_batch(count: usize, auto: bool, agent: Option<String>) -> Result<(), String> {
|
||||
let store = store::Store::load()?;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue