eliminate schema_fit: it's clustering coefficient

schema_fit was algebraically identical to clustering_coefficient (both compute 2E/(d*(d-1)) = fraction of connected neighbor pairs). Remove the redundant function, field, and metrics column. - Delete schema_fit() and schema_fit_all() from graph.rs - Remove schema_fit field from Node struct - Remove avg_schema_fit from MetricsSnapshot (duplicated avg_cc) - Replace all callers with graph.clustering_coefficient() - Rename ReplayItem.schema_fit to .cc - Query: "cc" and "schema_fit" both resolve from graph CC - Low-CC count folded into health report CC line Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-03-03 12:21:04 -05:00 · 2026-03-03 12:21:04 -05:00 · ec8b4b2ed2
commit ec8b4b2ed2
parent fb7aa46e03
5 changed files with 33 additions and 83 deletions
--- a/src/graph.rs
+++ b/src/graph.rs
@ -500,22 +500,6 @@ fn label_propagation(
    labels
 }

-/// Schema fit for a node: how well-integrated into its local neighborhood.
-///
-/// Algebraically identical to clustering_coefficient — both measure the
-/// fraction of neighbor pairs that are connected. Kept as an alias for
-/// semantic clarity: CC is a graph metric, schema fit is a cognitive one
-/// (how well does this node fit the surrounding schema?).
-pub fn schema_fit(graph: &Graph, key: &str) -> f32 {
-    graph.clustering_coefficient(key)
-}
-
-/// Compute schema fit for all nodes
-pub fn schema_fit_all(graph: &Graph) -> HashMap<String, f32> {
-    graph.nodes().iter()
-        .map(|key| (key.clone(), graph.clustering_coefficient(key)))
-        .collect()
-}

 /// A snapshot of graph topology metrics, for tracking evolution over time
 #[derive(Clone, Debug, Serialize, Deserialize)]
@ -530,7 +514,8 @@ pub struct MetricsSnapshot {
    pub gini: f32,
    pub avg_cc: f32,
    pub avg_path_length: f32,
-    pub avg_schema_fit: f32,
+    // Removed: avg_schema_fit was identical to avg_cc.
+    // Old snapshots with the field still deserialize (serde ignores unknown fields by default).
 }

 fn metrics_log_path() -> std::path::PathBuf {
@ -595,12 +580,10 @@ pub fn health_report(graph: &Graph, store: &Store) -> String {
    let alpha = graph.degree_power_law_exponent();
    let gini = graph.degree_gini();

-    // Schema fit distribution
-    let fits = schema_fit_all(graph);
-    let avg_fit = if fits.is_empty() { 0.0 } else {
-        fits.values().sum::<f32>() / fits.len() as f32
-    };
-    let low_fit = fits.values().filter(|&&f| f < 0.1).count();
+    // Low-CC nodes: poorly integrated
+    let low_cc = graph.nodes().iter()
+        .filter(|k| graph.clustering_coefficient(k) < 0.1)
+        .count();

    // Category breakdown
    let cats = store.category_counts();
@ -614,7 +597,6 @@ pub fn health_report(graph: &Graph, store: &Store) -> String {
        nodes: n, edges: e, communities,
        sigma, alpha, gini, avg_cc,
        avg_path_length: avg_pl,
-        avg_schema_fit: avg_fit,
    };
    save_metrics_snapshot(&snap);

@ -641,7 +623,6 @@ pub fn health_report(graph: &Graph, store: &Store) -> String {
    let alpha_d = delta(alpha, prev.map(|p| p.alpha));
    let gini_d = delta(gini, prev.map(|p| p.gini));
    let cc_d = delta(avg_cc, prev.map(|p| p.avg_cc));
-    let fit_d = delta(avg_fit, prev.map(|p| p.avg_schema_fit));

    let mut report = format!(
 "Memory Health Report
@ -649,15 +630,13 @@ pub fn health_report(graph: &Graph, store: &Store) -> String {
 Nodes: {n}  Relations: {e}  Communities: {communities}

 Degree: max={max_deg} median={median_deg} avg={avg_deg:.1}
-Clustering coefficient (avg): {avg_cc:.4}{cc_d}
+Clustering coefficient (avg): {avg_cc:.4}{cc_d}  low-CC (<0.1): {low_cc} nodes
 Average path length: {avg_pl:.2}
 Small-world σ: {sigma:.3}{sigma_d} (>1 = small-world)
 Power-law α: {alpha:.2}{alpha_d} (2=hub-dominated, 3=healthy, >3=egalitarian)
 Degree Gini: {gini:.3}{gini_d} (0=equal, 1=one-hub)

 Community sizes (top 5): {top5}
-Schema fit: avg={avg_fit:.3}{fit_d}  low-fit (<0.1): {low_fit} nodes
-
 Categories: core={core} tech={tech} gen={gen} obs={obs} task={task}",
        top5 = sizes.iter().take(5)
            .map(|s| s.to_string())
@ -674,8 +653,8 @@ Categories: core={core} tech={tech} gen={gen} obs={obs} task={task}",
    if history.len() >= 3 {
        report.push_str("\n\nMetrics history (last 5):\n");
        for snap in &history[history.len().saturating_sub(5)..] {
-            report.push_str(&format!("  {} — σ={:.1} α={:.2} gini={:.3} cc={:.4} fit={:.3}\n",
-                snap.date, snap.sigma, snap.alpha, snap.gini, snap.avg_cc, snap.avg_schema_fit));
+            report.push_str(&format!("  {} — σ={:.1} α={:.2} gini={:.3} cc={:.4}\n",
+                snap.date, snap.sigma, snap.alpha, snap.gini, snap.avg_cc));
        }
    }