// Graph algorithms: clustering coefficient, community detection (label // propagation), schema fit scoring, small-world metrics, consolidation // priority scoring. // // The Graph is built from the Store's nodes + relations. Edges are // undirected for clustering/community (even causal edges count as // connections), but relation type and direction are preserved for // specific queries. use crate::store::{Store, RelationType, StoreView}; use serde::{Deserialize, Serialize}; use std::collections::{HashMap, HashSet, VecDeque}; /// Weighted edge in the graph #[derive(Clone, Debug)] pub struct Edge { pub target: String, pub strength: f32, pub rel_type: RelationType, } /// The in-memory graph built from store nodes + relations pub struct Graph { /// Adjacency list: node key → list of edges adj: HashMap>, /// All node keys keys: HashSet, /// Community labels (from label propagation) communities: HashMap, } impl Graph { pub fn nodes(&self) -> &HashSet { &self.keys } pub fn degree(&self, key: &str) -> usize { self.adj.get(key).map(|e| e.len()).unwrap_or(0) } pub fn edge_count(&self) -> usize { self.adj.values().map(|e| e.len()).sum::() / 2 } /// All edges for a node (full Edge data including rel_type) pub fn edges_of(&self, key: &str) -> &[Edge] { self.adj.get(key) .map(|v| v.as_slice()) .unwrap_or(&[]) } /// All neighbor keys with strengths pub fn neighbors(&self, key: &str) -> Vec<(&String, f32)> { self.adj.get(key) .map(|edges| edges.iter().map(|e| (&e.target, e.strength)).collect()) .unwrap_or_default() } /// Just neighbor keys pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> { self.adj.get(key) .map(|edges| edges.iter().map(|e| e.target.as_str()).collect()) .unwrap_or_default() } pub fn community_count(&self) -> usize { let labels: HashSet<_> = self.communities.values().collect(); labels.len() } pub fn communities(&self) -> &HashMap { &self.communities } /// Hub degree threshold: top 5% by degree pub fn hub_threshold(&self) -> usize { let mut degrees: Vec = self.keys.iter() .map(|k| self.degree(k)) .collect(); degrees.sort_unstable(); if degrees.len() >= 20 { degrees[degrees.len() * 95 / 100] } else { usize::MAX } } /// Local clustering coefficient: fraction of a node's neighbors /// that are also neighbors of each other. /// cc(v) = 2E / (deg * (deg - 1)) pub fn clustering_coefficient(&self, key: &str) -> f32 { let neighbors = self.neighbor_keys(key); let deg = neighbors.len(); if deg < 2 { return 0.0; } let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect(); let mut triangles = 0u32; for i in 0..neighbor_vec.len() { for j in (i + 1)..neighbor_vec.len() { let ni_neighbors = self.neighbor_keys(neighbor_vec[i]); if ni_neighbors.contains(neighbor_vec[j]) { triangles += 1; } } } (2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0)) } /// Average clustering coefficient across all nodes with deg >= 2 pub fn avg_clustering_coefficient(&self) -> f32 { let mut sum = 0.0f32; let mut count = 0u32; for key in &self.keys { if self.degree(key) >= 2 { sum += self.clustering_coefficient(key); count += 1; } } if count == 0 { 0.0 } else { sum / count as f32 } } /// Average shortest path length (sampled BFS from up to 100 nodes) pub fn avg_path_length(&self) -> f32 { let sample: Vec<&String> = self.keys.iter().take(100).collect(); if sample.is_empty() { return 0.0; } let mut total_dist = 0u64; let mut total_pairs = 0u64; for &start in &sample { let dists = self.bfs_distances(start); for d in dists.values() { if *d > 0 { total_dist += *d as u64; total_pairs += 1; } } } if total_pairs == 0 { 0.0 } else { total_dist as f32 / total_pairs as f32 } } fn bfs_distances(&self, start: &str) -> HashMap { let mut dist = HashMap::new(); let mut queue = VecDeque::new(); dist.insert(start.to_string(), 0u32); queue.push_back(start.to_string()); while let Some(node) = queue.pop_front() { let d = dist[&node]; for neighbor in self.neighbor_keys(&node) { if !dist.contains_key(neighbor) { dist.insert(neighbor.to_string(), d + 1); queue.push_back(neighbor.to_string()); } } } dist } /// Power-law exponent α of the degree distribution. /// /// Estimated via MLE: α = 1 + n / Σ ln(k_i / (k_min - 0.5)) /// α ≈ 2: extreme hub dominance (fragile) /// α ≈ 3: healthy scale-free /// α > 3: approaching random graph (egalitarian) pub fn degree_power_law_exponent(&self) -> f32 { let mut degrees: Vec = self.keys.iter() .map(|k| self.degree(k)) .filter(|&d| d > 0) // exclude isolates .collect(); if degrees.len() < 10 { return 0.0; } // not enough data degrees.sort_unstable(); let k_min = degrees[0] as f64; if k_min < 1.0 { return 0.0; } let n = degrees.len() as f64; let sum_ln: f64 = degrees.iter() .map(|&k| (k as f64 / (k_min - 0.5)).ln()) .sum(); if sum_ln <= 0.0 { return 0.0; } (1.0 + n / sum_ln) as f32 } /// Gini coefficient of the degree distribution. /// /// 0 = perfectly egalitarian (all nodes same degree) /// 1 = maximally unequal (one node has all edges) /// Measures hub concentration independent of distribution shape. pub fn degree_gini(&self) -> f32 { let mut degrees: Vec = self.keys.iter() .map(|k| self.degree(k) as f64) .collect(); let n = degrees.len(); if n < 2 { return 0.0; } degrees.sort_by(|a, b| a.total_cmp(b)); let mean = degrees.iter().sum::() / n as f64; if mean < 1e-10 { return 0.0; } // Gini = (2 Σ i·x_i) / (n Σ x_i) - (n+1)/n let weighted_sum: f64 = degrees.iter().enumerate() .map(|(i, &d)| (i as f64 + 1.0) * d) .sum(); let total = degrees.iter().sum::(); let gini = (2.0 * weighted_sum) / (n as f64 * total) - (n as f64 + 1.0) / n as f64; gini.max(0.0) as f32 } /// Small-world coefficient σ = (C/C_rand) / (L/L_rand) /// C_rand ≈ /n, L_rand ≈ ln(n)/ln() pub fn small_world_sigma(&self) -> f32 { let n = self.keys.len() as f32; if n < 10.0 { return 0.0; } let avg_degree = self.adj.values() .map(|e| e.len() as f32) .sum::() / n; if avg_degree < 1.0 { return 0.0; } let c = self.avg_clustering_coefficient(); let l = self.avg_path_length(); let c_rand = avg_degree / n; let l_rand = n.ln() / avg_degree.ln(); if c_rand < 1e-10 || l_rand < 1e-10 || l < 1e-10 { return 0.0; } (c / c_rand) / (l / l_rand) } } /// Impact of adding a hypothetical edge #[derive(Debug)] pub struct LinkImpact { pub source: String, pub target: String, pub source_deg: usize, pub target_deg: usize, /// Is this a hub link? (either endpoint in top 5% by degree) pub is_hub_link: bool, /// Are both endpoints in the same community? pub same_community: bool, /// Change in clustering coefficient for source pub delta_cc_source: f32, /// Change in clustering coefficient for target pub delta_cc_target: f32, /// Change in degree Gini (positive = more hub-dominated) pub delta_gini: f32, /// Qualitative assessment pub assessment: &'static str, } impl Graph { /// Simulate adding an edge and report impact on topology metrics. /// /// Doesn't modify the graph — computes what would change if the /// edge were added. pub fn link_impact(&self, source: &str, target: &str) -> LinkImpact { let source_deg = self.degree(source); let target_deg = self.degree(target); let hub_threshold = self.hub_threshold(); let is_hub_link = source_deg >= hub_threshold || target_deg >= hub_threshold; // Community check let sc = self.communities.get(source); let tc = self.communities.get(target); let same_community = match (sc, tc) { (Some(a), Some(b)) => a == b, _ => false, }; // CC change for source: adding target as neighbor changes the // triangle count. New triangles form for each node that's a // neighbor of BOTH source and target. let source_neighbors = self.neighbor_keys(source); let target_neighbors = self.neighbor_keys(target); let shared_neighbors = source_neighbors.intersection(&target_neighbors).count(); let cc_before_source = self.clustering_coefficient(source); let cc_before_target = self.clustering_coefficient(target); // Estimate new CC for source after adding edge let new_source_deg = source_deg + 1; let new_source_triangles = if source_deg >= 2 { // Current triangles + new ones from shared neighbors let current_triangles = (cc_before_source * source_deg as f32 * (source_deg as f32 - 1.0) / 2.0) as u32; current_triangles + shared_neighbors as u32 } else { shared_neighbors as u32 }; let cc_after_source = if new_source_deg >= 2 { (2.0 * new_source_triangles as f32) / (new_source_deg as f32 * (new_source_deg as f32 - 1.0)) } else { 0.0 }; let new_target_deg = target_deg + 1; let new_target_triangles = if target_deg >= 2 { let current_triangles = (cc_before_target * target_deg as f32 * (target_deg as f32 - 1.0) / 2.0) as u32; current_triangles + shared_neighbors as u32 } else { shared_neighbors as u32 }; let cc_after_target = if new_target_deg >= 2 { (2.0 * new_target_triangles as f32) / (new_target_deg as f32 * (new_target_deg as f32 - 1.0)) } else { 0.0 }; // Gini change via influence function: // IF(x; Gini, F) = (2F(x) - 1) * x/μ - Gini - 1 // Adding an edge increments two degrees. The net ΔGini is the sum // of influence contributions from both endpoints shifting up by 1. let gini_before = self.degree_gini(); let n = self.keys.len(); let total_degree: f64 = self.keys.iter() .map(|k| self.degree(k) as f64) .sum(); let mean_deg = if n > 0 { total_degree / n as f64 } else { 1.0 }; // CDF at each endpoint's degree: fraction of nodes with degree ≤ d let delta_gini = if mean_deg > 1e-10 && n >= 2 { // Count nodes with degree ≤ source_deg and ≤ target_deg let f_source = self.keys.iter() .filter(|k| self.degree(k) <= source_deg) .count() as f64 / n as f64; let f_target = self.keys.iter() .filter(|k| self.degree(k) <= target_deg) .count() as f64 / n as f64; // Influence of incrementing source's degree by 1 let new_source = (source_deg + 1) as f64; let if_source = (2.0 * f_source - 1.0) * new_source / mean_deg - gini_before as f64 - 1.0; // Influence of incrementing target's degree by 1 let new_target = (target_deg + 1) as f64; let if_target = (2.0 * f_target - 1.0) * new_target / mean_deg - gini_before as f64 - 1.0; // Scale: each point contributes 1/n to the distribution ((if_source + if_target) / n as f64) as f32 } else { 0.0f32 }; // Qualitative assessment let assessment = if is_hub_link && same_community { "hub-reinforcing: strengthens existing star topology" } else if is_hub_link && !same_community { "hub-bridging: cross-community but through a hub" } else if !is_hub_link && same_community && shared_neighbors > 0 { "lateral-clustering: strengthens local mesh topology" } else if !is_hub_link && !same_community { "lateral-bridging: best kind — cross-community lateral link" } else if !is_hub_link && same_community { "lateral-local: connects peripheral nodes in same community" } else { "neutral" }; LinkImpact { source: source.to_string(), target: target.to_string(), source_deg, target_deg, is_hub_link, same_community, delta_cc_source: cc_after_source - cc_before_source, delta_cc_target: cc_after_target - cc_before_target, delta_gini, assessment, } } } /// Build graph from store data (with community detection) pub fn build_graph(store: &impl StoreView) -> Graph { let (adj, keys) = build_adjacency(store); let communities = label_propagation(&keys, &adj, 20); Graph { adj, keys, communities } } /// Build graph without community detection — for spreading activation /// searches where we only need the adjacency list. pub fn build_graph_fast(store: &impl StoreView) -> Graph { let (adj, keys) = build_adjacency(store); Graph { adj, keys, communities: HashMap::new() } } fn build_adjacency(store: &impl StoreView) -> (HashMap>, HashSet) { let mut adj: HashMap> = HashMap::new(); let mut keys: HashSet = HashSet::new(); store.for_each_node(|key, _, _| { keys.insert(key.to_owned()); }); store.for_each_relation(|source_key, target_key, strength, rel_type| { if !keys.contains(source_key) || !keys.contains(target_key) { return; } adj.entry(source_key.to_owned()).or_default().push(Edge { target: target_key.to_owned(), strength, rel_type, }); adj.entry(target_key.to_owned()).or_default().push(Edge { target: source_key.to_owned(), strength, rel_type, }); }); (adj, keys) } /// Label propagation community detection. /// /// Each node starts with its own label. Each iteration: adopt the most /// common label among neighbors (weighted by edge strength). Iterate /// until stable or max_iterations. fn label_propagation( keys: &HashSet, adj: &HashMap>, max_iterations: u32, ) -> HashMap { // Only consider edges above this strength for community votes. // Weak auto-links from triangle closure (0.15-0.35) bridge // unrelated clusters — filtering them lets natural communities emerge. let min_strength: f32 = 0.3; // Initialize: each node gets its own label let key_vec: Vec = keys.iter().cloned().collect(); let mut labels: HashMap = key_vec.iter() .enumerate() .map(|(i, k)| (k.clone(), i as u32)) .collect(); for _iter in 0..max_iterations { let mut changed = false; for key in &key_vec { let edges = match adj.get(key) { Some(e) => e, None => continue, }; if edges.is_empty() { continue; } // Count weighted votes for each label (skip weak edges) let mut votes: HashMap = HashMap::new(); for edge in edges { if edge.strength < min_strength { continue; } if let Some(&label) = labels.get(&edge.target) { *votes.entry(label).or_default() += edge.strength; } } // Adopt the label with most votes if let Some((&best_label, _)) = votes.iter() .max_by(|a, b| a.1.total_cmp(b.1)) { let current = labels[key]; if best_label != current { labels.insert(key.clone(), best_label); changed = true; } } } if !changed { break; } } // Compact labels to 0..n let mut label_map: HashMap = HashMap::new(); let mut next_id = 0; for label in labels.values_mut() { let new_label = *label_map.entry(*label).or_insert_with(|| { let id = next_id; next_id += 1; id }); *label = new_label; } labels } /// A snapshot of graph topology metrics, for tracking evolution over time #[derive(Clone, Debug, Serialize, Deserialize)] pub struct MetricsSnapshot { pub timestamp: f64, pub date: String, pub nodes: usize, pub edges: usize, pub communities: usize, pub sigma: f32, pub alpha: f32, pub gini: f32, pub avg_cc: f32, pub avg_path_length: f32, // Removed: avg_schema_fit was identical to avg_cc. // Old snapshots with the field still deserialize (serde ignores unknown fields by default). } fn metrics_log_path() -> std::path::PathBuf { let home = std::env::var("HOME").unwrap_or_default(); std::path::PathBuf::from(home).join(".claude/memory/metrics.jsonl") } /// Load previous metrics snapshots pub fn load_metrics_history() -> Vec { let path = metrics_log_path(); let content = match std::fs::read_to_string(&path) { Ok(c) => c, Err(_) => return Vec::new(), }; content.lines() .filter_map(|line| serde_json::from_str(line).ok()) .collect() } /// Append a metrics snapshot to the log pub fn save_metrics_snapshot(snap: &MetricsSnapshot) { let path = metrics_log_path(); if let Ok(json) = serde_json::to_string(snap) { use std::io::Write; if let Ok(mut f) = std::fs::OpenOptions::new() .create(true).append(true).open(&path) { let _ = writeln!(f, "{}", json); } } } /// Health report: summary of graph metrics pub fn health_report(graph: &Graph, store: &Store) -> String { let n = graph.nodes().len(); let e = graph.edge_count(); let avg_cc = graph.avg_clustering_coefficient(); let avg_pl = graph.avg_path_length(); let sigma = graph.small_world_sigma(); let communities = graph.community_count(); // Community sizes let mut comm_sizes: HashMap = HashMap::new(); for label in graph.communities().values() { *comm_sizes.entry(*label).or_default() += 1; } let mut sizes: Vec = comm_sizes.values().copied().collect(); sizes.sort_unstable_by(|a, b| b.cmp(a)); // Degree distribution let mut degrees: Vec = graph.nodes().iter() .map(|k| graph.degree(k)) .collect(); degrees.sort_unstable(); let max_deg = degrees.last().copied().unwrap_or(0); let median_deg = if degrees.is_empty() { 0 } else { degrees[degrees.len() / 2] }; let avg_deg = if n == 0 { 0.0 } else { degrees.iter().sum::() as f64 / n as f64 }; // Topology metrics let alpha = graph.degree_power_law_exponent(); let gini = graph.degree_gini(); // Low-CC nodes: poorly integrated let low_cc = graph.nodes().iter() .filter(|k| graph.clustering_coefficient(k) < 0.1) .count(); // Category breakdown let cats = store.category_counts(); // Snapshot current metrics and log let now = crate::store::now_epoch(); let date = crate::store::format_datetime_space(now); let snap = MetricsSnapshot { timestamp: now, date: date.clone(), nodes: n, edges: e, communities, sigma, alpha, gini, avg_cc, avg_path_length: avg_pl, }; save_metrics_snapshot(&snap); // Load history for deltas let history = load_metrics_history(); let prev = if history.len() >= 2 { Some(&history[history.len() - 2]) // second-to-last (last is the one we just wrote) } else { None }; fn delta(current: f32, prev: Option) -> String { match prev { Some(p) => { let d = current - p; if d.abs() < 0.001 { String::new() } else { format!(" (Δ{:+.3})", d) } } None => String::new(), } } let sigma_d = delta(sigma, prev.map(|p| p.sigma)); let alpha_d = delta(alpha, prev.map(|p| p.alpha)); let gini_d = delta(gini, prev.map(|p| p.gini)); let cc_d = delta(avg_cc, prev.map(|p| p.avg_cc)); let mut report = format!( "Memory Health Report ==================== Nodes: {n} Relations: {e} Communities: {communities} Degree: max={max_deg} median={median_deg} avg={avg_deg:.1} Clustering coefficient (avg): {avg_cc:.4}{cc_d} low-CC (<0.1): {low_cc} nodes Average path length: {avg_pl:.2} Small-world σ: {sigma:.3}{sigma_d} (>1 = small-world) Power-law α: {alpha:.2}{alpha_d} (2=hub-dominated, 3=healthy, >3=egalitarian) Degree Gini: {gini:.3}{gini_d} (0=equal, 1=one-hub) Community sizes (top 5): {top5} Categories: core={core} tech={tech} gen={gen} obs={obs} task={task}", top5 = sizes.iter().take(5) .map(|s| s.to_string()) .collect::>() .join(", "), core = cats.get("core").unwrap_or(&0), tech = cats.get("tech").unwrap_or(&0), gen = cats.get("gen").unwrap_or(&0), obs = cats.get("obs").unwrap_or(&0), task = cats.get("task").unwrap_or(&0), ); // Show history trend if we have enough data points if history.len() >= 3 { report.push_str("\n\nMetrics history (last 5):\n"); for snap in &history[history.len().saturating_sub(5)..] { report.push_str(&format!(" {} — σ={:.1} α={:.2} gini={:.3} cc={:.4}\n", snap.date, snap.sigma, snap.alpha, snap.gini, snap.avg_cc)); } } report }