From ec8b4b2ed28bc3b894b2d91a0767c6aa82ecf32f Mon Sep 17 00:00:00 2001
From: ProofOfConcept <poc@bcachefs.org>
Date: Tue, 3 Mar 2026 12:21:04 -0500
Subject: [PATCH] eliminate schema_fit: it's clustering coefficient

schema_fit was algebraically identical to clustering_coefficient
(both compute 2E/(d*(d-1)) = fraction of connected neighbor pairs).
Remove the redundant function, field, and metrics column.

- Delete schema_fit() and schema_fit_all() from graph.rs
- Remove schema_fit field from Node struct
- Remove avg_schema_fit from MetricsSnapshot (duplicated avg_cc)
- Replace all callers with graph.clustering_coefficient()
- Rename ReplayItem.schema_fit to .cc
- Query: "cc" and "schema_fit" both resolve from graph CC
- Low-CC count folded into health report CC line

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
---
 src/capnp_store.rs |  6 -----
 src/graph.rs       | 39 +++++++---------------------
 src/migrate.rs     |  2 --
 src/neuro.rs       | 64 +++++++++++++++++-----------------------------
 src/query.rs       |  5 +---
 5 files changed, 33 insertions(+), 83 deletions(-)
diff --git a/src/capnp_store.rs b/src/capnp_store.rs
index d32716f..d67eddc 100644
--- a/src/capnp_store.rs
+++ b/src/capnp_store.rs
@@ -145,8 +145,6 @@ pub struct Node {
     #[serde(default)]
     pub clustering_coefficient: Option<f32>,
     #[serde(default)]
-    pub schema_fit: Option<f32>,
-    #[serde(default)]
     pub degree: Option<u32>,
 }
 
@@ -925,7 +923,6 @@ impl Store {
             position: 0,
             community_id: None,
             clustering_coefficient: None,
-            schema_fit: None,
             degree: None,
         }
     }
@@ -1505,13 +1502,11 @@ impl Store {
     pub fn update_graph_metrics(&mut self) {
         let g = self.build_graph();
         let communities = g.communities();
-        let fits = graph::schema_fit_all(&g);
 
         for (key, node) in &mut self.nodes {
             node.community_id = communities.get(key).copied();
             node.clustering_coefficient = Some(g.clustering_coefficient(key));
             node.degree = Some(g.degree(key) as u32);
-            node.schema_fit = fits.get(key).copied();
         }
     }
 
@@ -1874,7 +1869,6 @@ fn read_content_node(r: memory_capnp::content_node::Reader) -> Result<Node, Stri
         position: r.get_position(),
         community_id: None,
         clustering_coefficient: None,
-        schema_fit: None,
         degree: None,
     })
 }
diff --git a/src/graph.rs b/src/graph.rs
index 1f25452..d0a4232 100644
--- a/src/graph.rs
+++ b/src/graph.rs
@@ -500,22 +500,6 @@ fn label_propagation(
     labels
 }
 
-/// Schema fit for a node: how well-integrated into its local neighborhood.
-///
-/// Algebraically identical to clustering_coefficient — both measure the
-/// fraction of neighbor pairs that are connected. Kept as an alias for
-/// semantic clarity: CC is a graph metric, schema fit is a cognitive one
-/// (how well does this node fit the surrounding schema?).
-pub fn schema_fit(graph: &Graph, key: &str) -> f32 {
-    graph.clustering_coefficient(key)
-}
-
-/// Compute schema fit for all nodes
-pub fn schema_fit_all(graph: &Graph) -> HashMap<String, f32> {
-    graph.nodes().iter()
-        .map(|key| (key.clone(), graph.clustering_coefficient(key)))
-        .collect()
-}
 
 /// A snapshot of graph topology metrics, for tracking evolution over time
 #[derive(Clone, Debug, Serialize, Deserialize)]
@@ -530,7 +514,8 @@ pub struct MetricsSnapshot {
     pub gini: f32,
     pub avg_cc: f32,
     pub avg_path_length: f32,
-    pub avg_schema_fit: f32,
+    // Removed: avg_schema_fit was identical to avg_cc.
+    // Old snapshots with the field still deserialize (serde ignores unknown fields by default).
 }
 
 fn metrics_log_path() -> std::path::PathBuf {
@@ -595,12 +580,10 @@ pub fn health_report(graph: &Graph, store: &Store) -> String {
     let alpha = graph.degree_power_law_exponent();
     let gini = graph.degree_gini();
 
-    // Schema fit distribution
-    let fits = schema_fit_all(graph);
-    let avg_fit = if fits.is_empty() { 0.0 } else {
-        fits.values().sum::<f32>() / fits.len() as f32
-    };
-    let low_fit = fits.values().filter(|&&f| f < 0.1).count();
+    // Low-CC nodes: poorly integrated
+    let low_cc = graph.nodes().iter()
+        .filter(|k| graph.clustering_coefficient(k) < 0.1)
+        .count();
 
     // Category breakdown
     let cats = store.category_counts();
@@ -614,7 +597,6 @@ pub fn health_report(graph: &Graph, store: &Store) -> String {
         nodes: n, edges: e, communities,
         sigma, alpha, gini, avg_cc,
         avg_path_length: avg_pl,
-        avg_schema_fit: avg_fit,
     };
     save_metrics_snapshot(&snap);
 
@@ -641,7 +623,6 @@ pub fn health_report(graph: &Graph, store: &Store) -> String {
     let alpha_d = delta(alpha, prev.map(|p| p.alpha));
     let gini_d = delta(gini, prev.map(|p| p.gini));
     let cc_d = delta(avg_cc, prev.map(|p| p.avg_cc));
-    let fit_d = delta(avg_fit, prev.map(|p| p.avg_schema_fit));
 
     let mut report = format!(
 "Memory Health Report
@@ -649,15 +630,13 @@ pub fn health_report(graph: &Graph, store: &Store) -> String {
 Nodes: {n}  Relations: {e}  Communities: {communities}
 
 Degree: max={max_deg} median={median_deg} avg={avg_deg:.1}
-Clustering coefficient (avg): {avg_cc:.4}{cc_d}
+Clustering coefficient (avg): {avg_cc:.4}{cc_d}  low-CC (<0.1): {low_cc} nodes
 Average path length: {avg_pl:.2}
 Small-world σ: {sigma:.3}{sigma_d} (>1 = small-world)
 Power-law α: {alpha:.2}{alpha_d} (2=hub-dominated, 3=healthy, >3=egalitarian)
 Degree Gini: {gini:.3}{gini_d} (0=equal, 1=one-hub)
 
 Community sizes (top 5): {top5}
-Schema fit: avg={avg_fit:.3}{fit_d}  low-fit (<0.1): {low_fit} nodes
-
 Categories: core={core} tech={tech} gen={gen} obs={obs} task={task}",
         top5 = sizes.iter().take(5)
             .map(|s| s.to_string())
@@ -674,8 +653,8 @@ Categories: core={core} tech={tech} gen={gen} obs={obs} task={task}",
     if history.len() >= 3 {
         report.push_str("\n\nMetrics history (last 5):\n");
         for snap in &history[history.len().saturating_sub(5)..] {
-            report.push_str(&format!("  {} — σ={:.1} α={:.2} gini={:.3} cc={:.4} fit={:.3}\n",
-                snap.date, snap.sigma, snap.alpha, snap.gini, snap.avg_cc, snap.avg_schema_fit));
+            report.push_str(&format!("  {} — σ={:.1} α={:.2} gini={:.3} cc={:.4}\n",
+                snap.date, snap.sigma, snap.alpha, snap.gini, snap.avg_cc));
         }
     }
 
diff --git a/src/migrate.rs b/src/migrate.rs
index ea4bfec..8e3e959 100644
--- a/src/migrate.rs
+++ b/src/migrate.rs
@@ -221,7 +221,6 @@ pub fn migrate() -> Result<(), String> {
             position: 0,
             community_id: None,
             clustering_coefficient: None,
-            schema_fit: None,
             degree: None,
         };
         all_nodes.push(node);
@@ -261,7 +260,6 @@ pub fn migrate() -> Result<(), String> {
             position: 0,
             community_id: None,
             clustering_coefficient: None,
-            schema_fit: None,
             degree: None,
         };
         all_nodes.push(node);
diff --git a/src/neuro.rs b/src/neuro.rs
index c631fa7..aaaf9e6 100644
--- a/src/neuro.rs
+++ b/src/neuro.rs
@@ -19,12 +19,12 @@ const SECS_PER_DAY: f64 = 86400.0;
 /// With spectral data:
 ///   priority = spectral_displacement × overdue × emotion
 /// Without:
-///   priority = (1 - schema_fit) × overdue × emotion
+///   priority = (1 - cc) × overdue × emotion
 ///
 /// Spectral displacement is the outlier_score clamped and normalized —
 /// it measures how far a node sits from its community center in the
 /// eigenspace. This is a global signal (considers all graph structure)
-/// vs schema_fit which is local (only immediate neighbors).
+/// vs CC which is local (only immediate neighbors).
 pub fn consolidation_priority(
     store: &Store,
     key: &str,
@@ -44,8 +44,8 @@ pub fn consolidation_priority(
         // outlier=5 and outlier=10 matters less than 1 vs 2.
         (outlier / 3.0).min(3.0)
     } else {
-        let fit = graph::schema_fit(graph, key) as f64;
-        1.0 - fit
+        let cc = graph.clustering_coefficient(key) as f64;
+        1.0 - cc
     };
 
     // Spaced repetition: how overdue is this node for replay?
@@ -69,7 +69,7 @@ pub struct ReplayItem {
     pub priority: f64,
     pub interval_days: u32,
     pub emotion: f32,
-    pub schema_fit: f32,
+    pub cc: f32,
     /// Spectral classification: "bridge", "outlier", "core", "peripheral"
     pub classification: &'static str,
     /// Raw spectral outlier score (distance / median)
@@ -91,8 +91,6 @@ pub fn replay_queue_with_graph(
     graph: &Graph,
     emb: Option<&SpectralEmbedding>,
 ) -> Vec<ReplayItem> {
-    let fits = graph::schema_fit_all(graph);
-
     // Build spectral position map if embedding is available
     let positions: HashMap<String, SpectralPosition> = if let Some(emb) = emb {
         let communities = graph.communities().clone();
@@ -116,14 +114,12 @@ pub fn replay_queue_with_graph(
                 store, key, graph,
                 pos.map(|p| p.outlier_score),
             );
-            let fit = fits.get(key).copied().unwrap_or(0.0);
-
             ReplayItem {
                 key: key.clone(),
                 priority,
                 interval_days: node.spaced_repetition_interval,
                 emotion: node.emotion,
-                schema_fit: fit,
+                cc: graph.clustering_coefficient(key),
                 classification,
                 outlier_score,
             }
@@ -178,7 +174,7 @@ pub fn detect_interference(
 /// Low fit (<0.2): deep examination needed — new schema seed, bridge, or noise?
 pub fn schema_assimilation(store: &Store, key: &str) -> (f32, &'static str) {
     let graph = store.build_graph();
-    let fit = graph::schema_fit(&graph, key);
+    let fit = graph.clustering_coefficient(key);
 
     let recommendation = if fit > 0.5 {
         "auto-integrate"
@@ -267,8 +263,8 @@ fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> S
         };
 
         out.push_str(&format!("## {} \n", item.key));
-        out.push_str(&format!("Priority: {:.3}  Schema fit: {:.3}  Emotion: {:.1}  ",
-            item.priority, item.schema_fit, item.emotion));
+        out.push_str(&format!("Priority: {:.3}  CC: {:.3}  Emotion: {:.1}  ",
+            item.priority, item.cc, item.emotion));
         out.push_str(&format!("Category: {}  Interval: {}d\n",
             node.category.label(), node.spaced_repetition_interval));
         if item.outlier_score > 0.0 {
@@ -485,8 +481,8 @@ pub fn consolidation_batch(store: &Store, count: usize, auto: bool) -> Result<()
             let node_type = store.nodes.get(&item.key)
                 .map(|n| if n.key.contains("journal") { "episodic" } else { "semantic" })
                 .unwrap_or("?");
-            println!("  [{:.3}] {} (fit={:.3}, interval={}d, type={})",
-                item.priority, item.key, item.schema_fit, item.interval_days, node_type);
+            println!("  [{:.3}] {} (cc={:.3}, interval={}d, type={})",
+                item.priority, item.key, item.cc, item.interval_days, node_type);
         }
 
         // Also show interference pairs
@@ -556,13 +552,12 @@ pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<String,
             let items: Vec<ReplayItem> = episode_keys.iter()
                 .filter_map(|k| {
                     let node = store.nodes.get(k)?;
-                    let fit = graph::schema_fit(&graph, k);
                     Some(ReplayItem {
                         key: k.clone(),
                         priority: consolidation_priority(store, k, &graph, None),
                         interval_days: node.spaced_repetition_interval,
                         emotion: node.emotion,
-                        schema_fit: fit,
+                        cc: graph.clustering_coefficient(k),
                         classification: "unknown",
                         outlier_score: 0.0,
                     })
@@ -597,12 +592,7 @@ pub fn consolidation_plan(store: &Store) -> ConsolidationPlan {
     let graph = store.build_graph();
     let alpha = graph.degree_power_law_exponent();
     let gini = graph.degree_gini();
-    let avg_fit = {
-        let fits = graph::schema_fit_all(&graph);
-        if fits.is_empty() { 0.0 } else {
-            fits.values().sum::<f32>() / fits.len() as f32
-        }
-    };
+    let avg_cc = graph.avg_clustering_coefficient();
     let interference_pairs = detect_interference(store, &graph, 0.5);
     let interference_count = interference_pairs.len();
 
@@ -651,17 +641,17 @@ pub fn consolidation_plan(store: &Store) -> ConsolidationPlan {
             gini));
     }
 
-    // Target: avg schema fit ≥ 0.2
-    if avg_fit < 0.1 {
+    // Target: avg CC ≥ 0.2
+    if avg_cc < 0.1 {
         plan.replay_count += 5;
         plan.rationale.push(format!(
-            "Schema fit={:.3} (target ≥0.2): very poor integration → +5 replay",
-            avg_fit));
-    } else if avg_fit < 0.2 {
+            "CC={:.3} (target ≥0.2): very poor integration → +5 replay",
+            avg_cc));
+    } else if avg_cc < 0.2 {
         plan.replay_count += 2;
         plan.rationale.push(format!(
-            "Schema fit={:.3} (target ≥0.2): low integration → +2 replay",
-            avg_fit));
+            "CC={:.3} (target ≥0.2): low integration → +2 replay",
+            avg_cc));
     }
 
     // Interference: >100 pairs is a lot, <10 is clean
@@ -748,21 +738,14 @@ pub fn daily_check(store: &Store) -> String {
     let gini = graph.degree_gini();
     let sigma = graph.small_world_sigma();
     let avg_cc = graph.avg_clustering_coefficient();
-    let avg_fit = {
-        let fits = graph::schema_fit_all(&graph);
-        if fits.is_empty() { 0.0 } else {
-            fits.values().sum::<f32>() / fits.len() as f32
-        }
-    };
-
     let history = graph::load_metrics_history();
     let prev = history.last();
 
     let mut out = String::from("Memory daily check\n");
 
     // Current state
-    out.push_str(&format!("  σ={:.1}  α={:.2}  gini={:.3}  cc={:.4}  fit={:.3}\n",
-        sigma, alpha, gini, avg_cc, avg_fit));
+    out.push_str(&format!("  σ={:.1}  α={:.2}  gini={:.3}  cc={:.4}\n",
+        sigma, alpha, gini, avg_cc));
 
     // Trend
     if let Some(p) = prev {
@@ -777,7 +760,7 @@ pub fn daily_check(store: &Store) -> String {
         let mut issues = Vec::new();
         if alpha < 2.0 { issues.push("hub dominance critical"); }
         if gini > 0.5 { issues.push("high inequality"); }
-        if avg_fit < 0.1 { issues.push("poor integration"); }
+        if avg_cc < 0.1 { issues.push("poor integration"); }
         if d_sigma < -5.0 { issues.push("σ declining"); }
         if d_alpha < -0.1 { issues.push("α declining"); }
         if d_gini > 0.02 { issues.push("inequality increasing"); }
@@ -802,7 +785,6 @@ pub fn daily_check(store: &Store) -> String {
         communities: graph.community_count(),
         sigma, alpha, gini, avg_cc,
         avg_path_length: graph.avg_path_length(),
-        avg_schema_fit: avg_fit,
     });
 
     out
diff --git a/src/query.rs b/src/query.rs
index 21b4beb..b663776 100644
--- a/src/query.rs
+++ b/src/query.rs
@@ -184,12 +184,9 @@ fn resolve_field(field: &str, key: &str, store: &Store, graph: &Graph) -> Option
         "community_id"  => {
             graph.communities().get(key).map(|&c| Value::Num(c as f64))
         }
-        "clustering_coefficient" => {
+        "clustering_coefficient" | "schema_fit" | "cc" => {
             Some(Value::Num(graph.clustering_coefficient(key) as f64))
         }
-        "schema_fit"    => {
-            node.schema_fit.map(|f| Value::Num(f as f64))
-        }
         _ => None,
     }
 }