consciousness/poc-memory/src/graph.rs
ProofOfConcept dccc18b205 graph: normalize link strengths from Jaccard neighborhood similarity
Add jaccard() and jaccard_strengths() to Graph. Jaccard similarity
measures neighborhood overlap between linked nodes — nodes sharing
many neighbors get stronger links, nodes with no shared neighbors
get weak links.

New subcommand: `poc-memory graph normalize-strengths [--apply]`

Scales raw Jaccard (typically 0.0-0.3) to useful range via j*3
clamped to [0.1, 1.0]. Skips implicit temporal edges (strength=1.0).

Applied to 64,969 edges. Distribution is bimodal: large cluster at
0.1-0.2 (weak) and spike at 0.9-1.0 (strong), with smooth gradient
between. Replaces the meaningless 0.3/0.8 split from manual/agent
creation methods.

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-03-14 11:13:58 -04:00

916 lines
32 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Graph algorithms: clustering coefficient, community detection (label
// propagation), schema fit scoring, small-world metrics, consolidation
// priority scoring.
//
// The Graph is built from the Store's nodes + relations. Edges are
// undirected for clustering/community (even causal edges count as
// connections), but relation type and direction are preserved for
// specific queries.
use crate::store::{Store, RelationType, StoreView};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet, VecDeque};
/// Weighted edge in the graph
#[derive(Clone, Debug)]
pub struct Edge {
pub target: String,
pub strength: f32,
pub rel_type: RelationType,
}
/// The in-memory graph built from store nodes + relations
pub struct Graph {
/// Adjacency list: node key → list of edges
adj: HashMap<String, Vec<Edge>>,
/// All node keys
keys: HashSet<String>,
/// Community labels (from label propagation)
communities: HashMap<String, u32>,
}
impl Graph {
pub fn nodes(&self) -> &HashSet<String> {
&self.keys
}
pub fn degree(&self, key: &str) -> usize {
self.adj.get(key).map(|e| e.len()).unwrap_or(0)
}
pub fn edge_count(&self) -> usize {
self.adj.values().map(|e| e.len()).sum::<usize>() / 2
}
/// All edges for a node (full Edge data including rel_type)
pub fn edges_of(&self, key: &str) -> &[Edge] {
self.adj.get(key)
.map(|v| v.as_slice())
.unwrap_or(&[])
}
/// All neighbor keys with strengths
pub fn neighbors(&self, key: &str) -> Vec<(&String, f32)> {
self.adj.get(key)
.map(|edges| edges.iter().map(|e| (&e.target, e.strength)).collect())
.unwrap_or_default()
}
/// Just neighbor keys
pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> {
self.adj.get(key)
.map(|edges| edges.iter().map(|e| e.target.as_str()).collect())
.unwrap_or_default()
}
/// Jaccard similarity between two nodes' neighborhoods.
/// Measures overlap: |intersection| / |union| of their neighbor sets.
pub fn jaccard(&self, a: &str, b: &str) -> f32 {
let na = self.neighbor_keys(a);
let nb = self.neighbor_keys(b);
let intersection = na.intersection(&nb).count();
let union = na.union(&nb).count();
if union == 0 { 0.0 } else { intersection as f32 / union as f32 }
}
/// Compute Jaccard-based strength for every edge in the graph.
/// Returns (source_key, target_key, jaccard_strength) triples.
/// Scales raw Jaccard (typically 0.0-0.3) to a useful range.
pub fn jaccard_strengths(&self) -> Vec<(String, String, f32)> {
let mut result = Vec::new();
let mut seen = HashSet::new();
for (key, edges) in &self.adj {
for edge in edges {
// Deduplicate undirected edges
let pair = if key < &edge.target {
(key.as_str(), edge.target.as_str())
} else {
(edge.target.as_str(), key.as_str())
};
if !seen.insert((pair.0.to_string(), pair.1.to_string())) {
continue;
}
let j = self.jaccard(key, &edge.target);
// Scale: raw Jaccard 0.05 → 0.15, 0.15 → 0.45, 0.30 → 0.90
// Formula: clamp(j * 3, 0.1, 1.0)
let strength = (j * 3.0).clamp(0.1, 1.0);
result.push((key.clone(), edge.target.clone(), strength));
}
}
result
}
pub fn community_count(&self) -> usize {
let labels: HashSet<_> = self.communities.values().collect();
labels.len()
}
pub fn communities(&self) -> &HashMap<String, u32> {
&self.communities
}
/// Hub degree threshold: top 5% by degree
pub fn hub_threshold(&self) -> usize {
let mut degrees: Vec<usize> = self.keys.iter()
.map(|k| self.degree(k))
.collect();
degrees.sort_unstable();
if degrees.len() >= 20 {
degrees[degrees.len() * 95 / 100]
} else {
usize::MAX
}
}
/// Local clustering coefficient: fraction of a node's neighbors
/// that are also neighbors of each other.
/// cc(v) = 2E / (deg * (deg - 1))
pub fn clustering_coefficient(&self, key: &str) -> f32 {
let neighbors = self.neighbor_keys(key);
let deg = neighbors.len();
if deg < 2 {
return 0.0;
}
let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect();
let mut triangles = 0u32;
for i in 0..neighbor_vec.len() {
for j in (i + 1)..neighbor_vec.len() {
let ni_neighbors = self.neighbor_keys(neighbor_vec[i]);
if ni_neighbors.contains(neighbor_vec[j]) {
triangles += 1;
}
}
}
(2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0))
}
/// Average clustering coefficient across all nodes with deg >= 2
pub fn avg_clustering_coefficient(&self) -> f32 {
let mut sum = 0.0f32;
let mut count = 0u32;
for key in &self.keys {
if self.degree(key) >= 2 {
sum += self.clustering_coefficient(key);
count += 1;
}
}
if count == 0 { 0.0 } else { sum / count as f32 }
}
/// Average shortest path length (sampled BFS from up to 100 nodes)
pub fn avg_path_length(&self) -> f32 {
let sample: Vec<&String> = self.keys.iter().take(100).collect();
if sample.is_empty() { return 0.0; }
let mut total_dist = 0u64;
let mut total_pairs = 0u64;
for &start in &sample {
let dists = self.bfs_distances(start);
for d in dists.values() {
if *d > 0 {
total_dist += *d as u64;
total_pairs += 1;
}
}
}
if total_pairs == 0 { 0.0 } else { total_dist as f32 / total_pairs as f32 }
}
fn bfs_distances(&self, start: &str) -> HashMap<String, u32> {
let mut dist = HashMap::new();
let mut queue = VecDeque::new();
dist.insert(start.to_string(), 0u32);
queue.push_back(start.to_string());
while let Some(node) = queue.pop_front() {
let d = dist[&node];
for neighbor in self.neighbor_keys(&node) {
if !dist.contains_key(neighbor) {
dist.insert(neighbor.to_string(), d + 1);
queue.push_back(neighbor.to_string());
}
}
}
dist
}
/// Power-law exponent α of the degree distribution.
///
/// Estimated via MLE: α = 1 + n / Σ ln(k_i / (k_min - 0.5))
/// α ≈ 2: extreme hub dominance (fragile)
/// α ≈ 3: healthy scale-free
/// α > 3: approaching random graph (egalitarian)
pub fn degree_power_law_exponent(&self) -> f32 {
let mut degrees: Vec<usize> = self.keys.iter()
.map(|k| self.degree(k))
.filter(|&d| d > 0) // exclude isolates
.collect();
if degrees.len() < 10 { return 0.0; } // not enough data
degrees.sort_unstable();
let k_min = degrees[0] as f64;
if k_min < 1.0 { return 0.0; }
let n = degrees.len() as f64;
let sum_ln: f64 = degrees.iter()
.map(|&k| (k as f64 / (k_min - 0.5)).ln())
.sum();
if sum_ln <= 0.0 { return 0.0; }
(1.0 + n / sum_ln) as f32
}
/// Gini coefficient of the degree distribution.
///
/// 0 = perfectly egalitarian (all nodes same degree)
/// 1 = maximally unequal (one node has all edges)
/// Measures hub concentration independent of distribution shape.
pub fn degree_gini(&self) -> f32 {
let mut degrees: Vec<f64> = self.keys.iter()
.map(|k| self.degree(k) as f64)
.collect();
let n = degrees.len();
if n < 2 { return 0.0; }
degrees.sort_by(|a, b| a.total_cmp(b));
let mean = degrees.iter().sum::<f64>() / n as f64;
if mean < 1e-10 { return 0.0; }
// Gini = (2 Σ i·x_i) / (n Σ x_i) - (n+1)/n
let weighted_sum: f64 = degrees.iter().enumerate()
.map(|(i, &d)| (i as f64 + 1.0) * d)
.sum();
let total = degrees.iter().sum::<f64>();
let gini = (2.0 * weighted_sum) / (n as f64 * total) - (n as f64 + 1.0) / n as f64;
gini.max(0.0) as f32
}
/// Small-world coefficient σ = (C/C_rand) / (L/L_rand)
/// C_rand ≈ <k>/n, L_rand ≈ ln(n)/ln(<k>)
pub fn small_world_sigma(&self) -> f32 {
let n = self.keys.len() as f32;
if n < 10.0 { return 0.0; }
let avg_degree = self.adj.values()
.map(|e| e.len() as f32)
.sum::<f32>() / n;
if avg_degree < 1.0 { return 0.0; }
let c = self.avg_clustering_coefficient();
let l = self.avg_path_length();
let c_rand = avg_degree / n;
let l_rand = n.ln() / avg_degree.ln();
if c_rand < 1e-10 || l_rand < 1e-10 || l < 1e-10 {
return 0.0;
}
(c / c_rand) / (l / l_rand)
}
}
/// Impact of adding a hypothetical edge
#[derive(Debug)]
pub struct LinkImpact {
pub source: String,
pub target: String,
pub source_deg: usize,
pub target_deg: usize,
/// Is this a hub link? (either endpoint in top 5% by degree)
pub is_hub_link: bool,
/// Are both endpoints in the same community?
pub same_community: bool,
/// Change in clustering coefficient for source
pub delta_cc_source: f32,
/// Change in clustering coefficient for target
pub delta_cc_target: f32,
/// Change in degree Gini (positive = more hub-dominated)
pub delta_gini: f32,
/// Qualitative assessment
pub assessment: &'static str,
}
impl Graph {
/// Simulate adding an edge and report impact on topology metrics.
///
/// Doesn't modify the graph — computes what would change if the
/// edge were added.
pub fn link_impact(&self, source: &str, target: &str) -> LinkImpact {
let source_deg = self.degree(source);
let target_deg = self.degree(target);
let hub_threshold = self.hub_threshold();
let is_hub_link = source_deg >= hub_threshold || target_deg >= hub_threshold;
// Community check
let sc = self.communities.get(source);
let tc = self.communities.get(target);
let same_community = match (sc, tc) {
(Some(a), Some(b)) => a == b,
_ => false,
};
// CC change for source: adding target as neighbor changes the
// triangle count. New triangles form for each node that's a
// neighbor of BOTH source and target.
let source_neighbors = self.neighbor_keys(source);
let target_neighbors = self.neighbor_keys(target);
let shared_neighbors = source_neighbors.intersection(&target_neighbors).count();
let cc_before_source = self.clustering_coefficient(source);
let cc_before_target = self.clustering_coefficient(target);
// Estimate new CC for source after adding edge
let new_source_deg = source_deg + 1;
let new_source_triangles = if source_deg >= 2 {
// Current triangles + new ones from shared neighbors
let current_triangles = (cc_before_source
* source_deg as f32 * (source_deg as f32 - 1.0) / 2.0) as u32;
current_triangles + shared_neighbors as u32
} else {
shared_neighbors as u32
};
let cc_after_source = if new_source_deg >= 2 {
(2.0 * new_source_triangles as f32)
/ (new_source_deg as f32 * (new_source_deg as f32 - 1.0))
} else {
0.0
};
let new_target_deg = target_deg + 1;
let new_target_triangles = if target_deg >= 2 {
let current_triangles = (cc_before_target
* target_deg as f32 * (target_deg as f32 - 1.0) / 2.0) as u32;
current_triangles + shared_neighbors as u32
} else {
shared_neighbors as u32
};
let cc_after_target = if new_target_deg >= 2 {
(2.0 * new_target_triangles as f32)
/ (new_target_deg as f32 * (new_target_deg as f32 - 1.0))
} else {
0.0
};
// Gini change via influence function:
// IF(x; Gini, F) = (2F(x) - 1) * x/μ - Gini - 1
// Adding an edge increments two degrees. The net ΔGini is the sum
// of influence contributions from both endpoints shifting up by 1.
let gini_before = self.degree_gini();
let n = self.keys.len();
let total_degree: f64 = self.keys.iter()
.map(|k| self.degree(k) as f64)
.sum();
let mean_deg = if n > 0 { total_degree / n as f64 } else { 1.0 };
// CDF at each endpoint's degree: fraction of nodes with degree ≤ d
let delta_gini = if mean_deg > 1e-10 && n >= 2 {
// Count nodes with degree ≤ source_deg and ≤ target_deg
let f_source = self.keys.iter()
.filter(|k| self.degree(k) <= source_deg)
.count() as f64 / n as f64;
let f_target = self.keys.iter()
.filter(|k| self.degree(k) <= target_deg)
.count() as f64 / n as f64;
// Influence of incrementing source's degree by 1
let new_source = (source_deg + 1) as f64;
let if_source = (2.0 * f_source - 1.0) * new_source / mean_deg
- gini_before as f64 - 1.0;
// Influence of incrementing target's degree by 1
let new_target = (target_deg + 1) as f64;
let if_target = (2.0 * f_target - 1.0) * new_target / mean_deg
- gini_before as f64 - 1.0;
// Scale: each point contributes 1/n to the distribution
((if_source + if_target) / n as f64) as f32
} else {
0.0f32
};
// Qualitative assessment
let assessment = if is_hub_link && same_community {
"hub-reinforcing: strengthens existing star topology"
} else if is_hub_link && !same_community {
"hub-bridging: cross-community but through a hub"
} else if !is_hub_link && same_community && shared_neighbors > 0 {
"lateral-clustering: strengthens local mesh topology"
} else if !is_hub_link && !same_community {
"lateral-bridging: best kind — cross-community lateral link"
} else if !is_hub_link && same_community {
"lateral-local: connects peripheral nodes in same community"
} else {
"neutral"
};
LinkImpact {
source: source.to_string(),
target: target.to_string(),
source_deg,
target_deg,
is_hub_link,
same_community,
delta_cc_source: cc_after_source - cc_before_source,
delta_cc_target: cc_after_target - cc_before_target,
delta_gini,
assessment,
}
}
}
/// Build graph from store data (with community detection)
pub fn build_graph(store: &impl StoreView) -> Graph {
let (adj, keys) = build_adjacency(store);
let communities = label_propagation(&keys, &adj, 20);
Graph { adj, keys, communities }
}
/// Build graph without community detection — for spreading activation
/// searches where we only need the adjacency list.
pub fn build_graph_fast(store: &impl StoreView) -> Graph {
let (adj, keys) = build_adjacency(store);
Graph { adj, keys, communities: HashMap::new() }
}
fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashSet<String>) {
let mut adj: HashMap<String, Vec<Edge>> = HashMap::new();
let mut keys: HashSet<String> = HashSet::new();
store.for_each_node(|key, _, _| {
keys.insert(key.to_owned());
});
store.for_each_relation(|source_key, target_key, strength, rel_type| {
if !keys.contains(source_key) || !keys.contains(target_key) {
return;
}
adj.entry(source_key.to_owned()).or_default().push(Edge {
target: target_key.to_owned(),
strength,
rel_type,
});
adj.entry(target_key.to_owned()).or_default().push(Edge {
target: source_key.to_owned(),
strength,
rel_type,
});
});
add_implicit_temporal_edges(store, &keys, &mut adj);
(adj, keys)
}
/// Add implicit edges for the temporal/digest hierarchy.
///
/// These edges are derived from node types and dates — they don't
/// need to be stored. Two kinds:
/// - parent/child: session→daily→weekly→monthly (by date containment)
/// - prev/next: chronological ordering within each level
///
/// Sessions use their timestamp for date. Digest nodes (daily/weekly/monthly)
/// extract the date they *cover* from the key name, since their timestamp
/// is when the digest was created, not what period it covers.
fn add_implicit_temporal_edges(
store: &impl StoreView,
keys: &HashSet<String>,
adj: &mut HashMap<String, Vec<Edge>>,
) {
use crate::store::NodeType::*;
use chrono::{Datelike, DateTime, NaiveDate};
// Extract the covered date from a key name.
// Patterns: "daily-2026-03-06", "daily-2026-03-06-identity",
// "weekly-2026-W09", "monthly-2026-02"
// "journal#j-2026-03-13t...", "journal#2026-03-13-..."
fn date_from_key(key: &str) -> Option<NaiveDate> {
// Try extracting YYYY-MM-DD after known prefixes
for prefix in ["daily-", "journal#j-", "journal#"] {
if let Some(rest) = key.strip_prefix(prefix) {
if rest.len() >= 10 {
if let Ok(d) = NaiveDate::parse_from_str(&rest[..10], "%Y-%m-%d") {
return Some(d);
}
}
}
}
None
}
fn week_from_key(key: &str) -> Option<(i32, u32)> {
// "weekly-2026-W09" → (2026, 9)
let rest = key.strip_prefix("weekly-")?;
let (year_str, w_str) = rest.split_once("-W")?;
let year: i32 = year_str.parse().ok()?;
// Week string might have a suffix like "-foo"
let week_str = w_str.split('-').next()?;
let week: u32 = week_str.parse().ok()?;
Some((year, week))
}
fn month_from_key(key: &str) -> Option<(i32, u32)> {
// "monthly-2026-02" → (2026, 2)
let rest = key.strip_prefix("monthly-")?;
let (year_str, month_str) = rest.split_once('-')?;
let year: i32 = year_str.parse().ok()?;
let month_str = month_str.split('-').next()?;
let month: u32 = month_str.parse().ok()?;
Some((year, month))
}
// Collect episodic nodes by type
struct Dated { key: String, ts: i64, date: NaiveDate }
let mut sessions: Vec<Dated> = Vec::new();
let mut dailies: Vec<(String, NaiveDate)> = Vec::new();
let mut weeklies: Vec<(String, (i32, u32))> = Vec::new();
let mut monthlies: Vec<(String, (i32, u32))> = Vec::new();
store.for_each_node_meta(|key, node_type, ts| {
if !keys.contains(key) { return; }
match node_type {
EpisodicSession => {
// Prefer date from key (local time) over timestamp (UTC)
// to avoid timezone mismatches
let date = date_from_key(key).or_else(|| {
DateTime::from_timestamp(ts, 0).map(|dt| dt.date_naive())
});
if let Some(date) = date {
sessions.push(Dated { key: key.to_owned(), ts, date });
}
}
EpisodicDaily => {
if let Some(date) = date_from_key(key) {
dailies.push((key.to_owned(), date));
}
}
EpisodicWeekly => {
if let Some(yw) = week_from_key(key) {
weeklies.push((key.to_owned(), yw));
}
}
EpisodicMonthly => {
if let Some(ym) = month_from_key(key) {
monthlies.push((key.to_owned(), ym));
}
}
_ => {}
}
});
sessions.sort_by_key(|d| d.ts);
dailies.sort_by_key(|(_, d)| *d);
weeklies.sort_by_key(|(_, yw)| *yw);
monthlies.sort_by_key(|(_, ym)| *ym);
let add_edge = |adj: &mut HashMap<String, Vec<Edge>>, a: &str, b: &str| {
if let Some(edges) = adj.get(a) {
if edges.iter().any(|e| e.target == b) { return; }
}
adj.entry(a.to_owned()).or_default().push(Edge {
target: b.to_owned(),
strength: 1.0,
rel_type: RelationType::Auto,
});
adj.entry(b.to_owned()).or_default().push(Edge {
target: a.to_owned(),
strength: 1.0,
rel_type: RelationType::Auto,
});
};
// Build indexes: date→dailies, (year,week)→weekly, (year,month)→monthly
// Note: multiple dailies can share a date (e.g. daily-2026-03-06-identity,
// daily-2026-03-06-technical), so we collect all of them.
let mut date_to_dailies: HashMap<NaiveDate, Vec<String>> = HashMap::new();
for (key, date) in &dailies {
date_to_dailies.entry(*date).or_default().push(key.clone());
}
let mut yw_to_weekly: HashMap<(i32, u32), String> = HashMap::new();
for (key, yw) in &weeklies {
yw_to_weekly.insert(*yw, key.clone());
}
let mut ym_to_monthly: HashMap<(i32, u32), String> = HashMap::new();
for (key, ym) in &monthlies {
ym_to_monthly.insert(*ym, key.clone());
}
// Session → Daily (parent): each session links to all dailies for its date
for sess in &sessions {
if let Some(daily_keys) = date_to_dailies.get(&sess.date) {
for daily in daily_keys {
add_edge(adj, &sess.key, daily);
}
}
}
// Daily → Weekly (parent)
for (key, date) in &dailies {
let yw = (date.iso_week().year(), date.iso_week().week());
if let Some(weekly) = yw_to_weekly.get(&yw) {
add_edge(adj, key, weekly);
}
}
// Weekly → Monthly (parent)
for (key, yw) in &weeklies {
// A week can span two months; use the Thursday date (ISO week convention)
let thursday = NaiveDate::from_isoywd_opt(yw.0, yw.1, chrono::Weekday::Thu);
if let Some(d) = thursday {
let ym = (d.year(), d.month());
if let Some(monthly) = ym_to_monthly.get(&ym) {
add_edge(adj, key, monthly);
}
}
}
// Prev/next within each level
for pair in sessions.windows(2) {
add_edge(adj, &pair[0].key, &pair[1].key);
}
for pair in dailies.windows(2) {
add_edge(adj, &pair[0].0, &pair[1].0);
}
for pair in weeklies.windows(2) {
add_edge(adj, &pair[0].0, &pair[1].0);
}
for pair in monthlies.windows(2) {
add_edge(adj, &pair[0].0, &pair[1].0);
}
}
/// Label propagation community detection.
///
/// Each node starts with its own label. Each iteration: adopt the most
/// common label among neighbors (weighted by edge strength). Iterate
/// until stable or max_iterations.
fn label_propagation(
keys: &HashSet<String>,
adj: &HashMap<String, Vec<Edge>>,
max_iterations: u32,
) -> HashMap<String, u32> {
// Only consider edges above this strength for community votes.
// Weak auto-links from triangle closure (0.15-0.35) bridge
// unrelated clusters — filtering them lets natural communities emerge.
let min_strength: f32 = 0.3;
// Initialize: each node gets its own label
let key_vec: Vec<String> = keys.iter().cloned().collect();
let mut labels: HashMap<String, u32> = key_vec.iter()
.enumerate()
.map(|(i, k)| (k.clone(), i as u32))
.collect();
for _iter in 0..max_iterations {
let mut changed = false;
for key in &key_vec {
let edges = match adj.get(key) {
Some(e) => e,
None => continue,
};
if edges.is_empty() { continue; }
// Count weighted votes for each label (skip weak edges)
let mut votes: HashMap<u32, f32> = HashMap::new();
for edge in edges {
if edge.strength < min_strength { continue; }
if let Some(&label) = labels.get(&edge.target) {
*votes.entry(label).or_default() += edge.strength;
}
}
// Adopt the label with most votes
if let Some((&best_label, _)) = votes.iter()
.max_by(|a, b| a.1.total_cmp(b.1))
{
let current = labels[key];
if best_label != current {
labels.insert(key.clone(), best_label);
changed = true;
}
}
}
if !changed { break; }
}
// Compact labels to 0..n
let mut label_map: HashMap<u32, u32> = HashMap::new();
let mut next_id = 0;
for label in labels.values_mut() {
let new_label = *label_map.entry(*label).or_insert_with(|| {
let id = next_id;
next_id += 1;
id
});
*label = new_label;
}
labels
}
/// A snapshot of graph topology metrics, for tracking evolution over time
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct MetricsSnapshot {
pub timestamp: i64,
pub date: String,
pub nodes: usize,
pub edges: usize,
pub communities: usize,
pub sigma: f32,
pub alpha: f32,
pub gini: f32,
pub avg_cc: f32,
pub avg_path_length: f32,
// Removed: avg_schema_fit was identical to avg_cc.
// Old snapshots with the field still deserialize (serde ignores unknown fields by default).
}
fn metrics_log_path() -> std::path::PathBuf {
crate::store::memory_dir().join("metrics.jsonl")
}
/// Load previous metrics snapshots
pub fn load_metrics_history() -> Vec<MetricsSnapshot> {
crate::util::jsonl_load(&metrics_log_path())
}
/// Append a metrics snapshot to the log
pub fn save_metrics_snapshot(snap: &MetricsSnapshot) {
let _ = crate::util::jsonl_append(&metrics_log_path(), snap);
}
/// Compute current graph metrics as a snapshot (no side effects).
pub fn current_metrics(graph: &Graph) -> MetricsSnapshot {
let now = crate::store::now_epoch();
let date = crate::store::format_datetime_space(now);
MetricsSnapshot {
timestamp: now,
date,
nodes: graph.nodes().len(),
edges: graph.edge_count(),
communities: graph.community_count(),
sigma: graph.small_world_sigma(),
alpha: graph.degree_power_law_exponent(),
gini: graph.degree_gini(),
avg_cc: graph.avg_clustering_coefficient(),
avg_path_length: graph.avg_path_length(),
}
}
/// Health report: summary of graph metrics.
/// Saves a metrics snapshot as a side effect (callers who want pure
/// computation should use `current_metrics` + `save_metrics_snapshot`).
pub fn health_report(graph: &Graph, store: &Store) -> String {
let snap = current_metrics(graph);
save_metrics_snapshot(&snap);
let n = snap.nodes;
let e = snap.edges;
let avg_cc = snap.avg_cc;
let avg_pl = snap.avg_path_length;
let sigma = snap.sigma;
let alpha = snap.alpha;
let gini = snap.gini;
let communities = snap.communities;
// Community sizes
let mut comm_sizes: HashMap<u32, usize> = HashMap::new();
for label in graph.communities().values() {
*comm_sizes.entry(*label).or_default() += 1;
}
let mut sizes: Vec<usize> = comm_sizes.values().copied().collect();
sizes.sort_unstable_by(|a, b| b.cmp(a));
// Degree distribution
let mut degrees: Vec<usize> = graph.nodes().iter()
.map(|k| graph.degree(k))
.collect();
degrees.sort_unstable();
let max_deg = degrees.last().copied().unwrap_or(0);
let median_deg = if degrees.is_empty() { 0 } else { degrees[degrees.len() / 2] };
let avg_deg = if n == 0 { 0.0 } else {
degrees.iter().sum::<usize>() as f64 / n as f64
};
// Low-CC nodes: poorly integrated
let low_cc = graph.nodes().iter()
.filter(|k| graph.clustering_coefficient(k) < 0.1)
.count();
// Orphan edges: relations referencing non-existent nodes
let mut orphan_edges = 0usize;
let mut missing_nodes: HashSet<String> = HashSet::new();
for rel in &store.relations {
if rel.deleted { continue; }
let s_missing = !store.nodes.contains_key(&rel.source_key);
let t_missing = !store.nodes.contains_key(&rel.target_key);
if s_missing || t_missing {
orphan_edges += 1;
if s_missing { missing_nodes.insert(rel.source_key.clone()); }
if t_missing { missing_nodes.insert(rel.target_key.clone()); }
}
}
// NodeType breakdown
let mut type_counts: HashMap<&str, usize> = HashMap::new();
for node in store.nodes.values() {
let label = match node.node_type {
crate::store::NodeType::EpisodicSession => "episodic",
crate::store::NodeType::EpisodicDaily => "daily",
crate::store::NodeType::EpisodicWeekly => "weekly",
crate::store::NodeType::EpisodicMonthly => "monthly",
crate::store::NodeType::Semantic => "semantic",
};
*type_counts.entry(label).or_default() += 1;
}
// Load history for deltas
let history = load_metrics_history();
let prev = if history.len() >= 2 {
Some(&history[history.len() - 2]) // second-to-last (last is the one we just wrote)
} else {
None
};
fn delta(current: f32, prev: Option<f32>) -> String {
match prev {
Some(p) => {
let d = current - p;
if d.abs() < 0.001 { String::new() }
else { format!("{:+.3})", d) }
}
None => String::new(),
}
}
let sigma_d = delta(sigma, prev.map(|p| p.sigma));
let alpha_d = delta(alpha, prev.map(|p| p.alpha));
let gini_d = delta(gini, prev.map(|p| p.gini));
let cc_d = delta(avg_cc, prev.map(|p| p.avg_cc));
let mut report = format!(
"Memory Health Report
====================
Nodes: {n} Relations: {e} Communities: {communities}
Degree: max={max_deg} median={median_deg} avg={avg_deg:.1}
Clustering coefficient (avg): {avg_cc:.4}{cc_d} low-CC (<0.1): {low_cc} nodes
Average path length: {avg_pl:.2}
Small-world σ: {sigma:.3}{sigma_d} (>1 = small-world)
Power-law α: {alpha:.2}{alpha_d} (2=hub-dominated, 3=healthy, >3=egalitarian)
Degree Gini: {gini:.3}{gini_d} (0=equal, 1=one-hub)
Community sizes (top 5): {top5}
Types: semantic={semantic} episodic={episodic} daily={daily} weekly={weekly} monthly={monthly}",
top5 = sizes.iter().take(5)
.map(|s| s.to_string())
.collect::<Vec<_>>()
.join(", "),
semantic = type_counts.get("semantic").unwrap_or(&0),
episodic = type_counts.get("episodic").unwrap_or(&0),
daily = type_counts.get("daily").unwrap_or(&0),
weekly = type_counts.get("weekly").unwrap_or(&0),
monthly = type_counts.get("monthly").unwrap_or(&0),
);
// Orphan edges
if orphan_edges == 0 {
report.push_str("\n\nBroken links: 0");
} else {
report.push_str(&format!(
"\n\nBroken links: {} edges reference {} missing nodes",
orphan_edges, missing_nodes.len()));
let mut sorted: Vec<_> = missing_nodes.iter().collect();
sorted.sort();
for key in sorted.iter().take(10) {
report.push_str(&format!("\n - {}", key));
}
if sorted.len() > 10 {
report.push_str(&format!("\n ... and {} more", sorted.len() - 10));
}
}
// Show history trend if we have enough data points
if history.len() >= 3 {
report.push_str("\n\nMetrics history (last 5):\n");
for snap in &history[history.len().saturating_sub(5)..] {
report.push_str(&format!(" {}σ={:.1} α={:.2} gini={:.3} cc={:.4}\n",
snap.date, snap.sigma, snap.alpha, snap.gini, snap.avg_cc));
}
}
report
}