eliminate schema_fit: it's clustering coefficient

schema_fit was algebraically identical to clustering_coefficient
(both compute 2E/(d*(d-1)) = fraction of connected neighbor pairs).
Remove the redundant function, field, and metrics column.

- Delete schema_fit() and schema_fit_all() from graph.rs
- Remove schema_fit field from Node struct
- Remove avg_schema_fit from MetricsSnapshot (duplicated avg_cc)
- Replace all callers with graph.clustering_coefficient()
- Rename ReplayItem.schema_fit to .cc
- Query: "cc" and "schema_fit" both resolve from graph CC
- Low-CC count folded into health report CC line

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
ProofOfConcept 2026-03-03 12:21:04 -05:00
parent fb7aa46e03
commit ec8b4b2ed2
5 changed files with 33 additions and 83 deletions

View file

@ -145,8 +145,6 @@ pub struct Node {
#[serde(default)] #[serde(default)]
pub clustering_coefficient: Option<f32>, pub clustering_coefficient: Option<f32>,
#[serde(default)] #[serde(default)]
pub schema_fit: Option<f32>,
#[serde(default)]
pub degree: Option<u32>, pub degree: Option<u32>,
} }
@ -925,7 +923,6 @@ impl Store {
position: 0, position: 0,
community_id: None, community_id: None,
clustering_coefficient: None, clustering_coefficient: None,
schema_fit: None,
degree: None, degree: None,
} }
} }
@ -1505,13 +1502,11 @@ impl Store {
pub fn update_graph_metrics(&mut self) { pub fn update_graph_metrics(&mut self) {
let g = self.build_graph(); let g = self.build_graph();
let communities = g.communities(); let communities = g.communities();
let fits = graph::schema_fit_all(&g);
for (key, node) in &mut self.nodes { for (key, node) in &mut self.nodes {
node.community_id = communities.get(key).copied(); node.community_id = communities.get(key).copied();
node.clustering_coefficient = Some(g.clustering_coefficient(key)); node.clustering_coefficient = Some(g.clustering_coefficient(key));
node.degree = Some(g.degree(key) as u32); node.degree = Some(g.degree(key) as u32);
node.schema_fit = fits.get(key).copied();
} }
} }
@ -1874,7 +1869,6 @@ fn read_content_node(r: memory_capnp::content_node::Reader) -> Result<Node, Stri
position: r.get_position(), position: r.get_position(),
community_id: None, community_id: None,
clustering_coefficient: None, clustering_coefficient: None,
schema_fit: None,
degree: None, degree: None,
}) })
} }

View file

@ -500,22 +500,6 @@ fn label_propagation(
labels labels
} }
/// Schema fit for a node: how well-integrated into its local neighborhood.
///
/// Algebraically identical to clustering_coefficient — both measure the
/// fraction of neighbor pairs that are connected. Kept as an alias for
/// semantic clarity: CC is a graph metric, schema fit is a cognitive one
/// (how well does this node fit the surrounding schema?).
pub fn schema_fit(graph: &Graph, key: &str) -> f32 {
graph.clustering_coefficient(key)
}
/// Compute schema fit for all nodes
pub fn schema_fit_all(graph: &Graph) -> HashMap<String, f32> {
graph.nodes().iter()
.map(|key| (key.clone(), graph.clustering_coefficient(key)))
.collect()
}
/// A snapshot of graph topology metrics, for tracking evolution over time /// A snapshot of graph topology metrics, for tracking evolution over time
#[derive(Clone, Debug, Serialize, Deserialize)] #[derive(Clone, Debug, Serialize, Deserialize)]
@ -530,7 +514,8 @@ pub struct MetricsSnapshot {
pub gini: f32, pub gini: f32,
pub avg_cc: f32, pub avg_cc: f32,
pub avg_path_length: f32, pub avg_path_length: f32,
pub avg_schema_fit: f32, // Removed: avg_schema_fit was identical to avg_cc.
// Old snapshots with the field still deserialize (serde ignores unknown fields by default).
} }
fn metrics_log_path() -> std::path::PathBuf { fn metrics_log_path() -> std::path::PathBuf {
@ -595,12 +580,10 @@ pub fn health_report(graph: &Graph, store: &Store) -> String {
let alpha = graph.degree_power_law_exponent(); let alpha = graph.degree_power_law_exponent();
let gini = graph.degree_gini(); let gini = graph.degree_gini();
// Schema fit distribution // Low-CC nodes: poorly integrated
let fits = schema_fit_all(graph); let low_cc = graph.nodes().iter()
let avg_fit = if fits.is_empty() { 0.0 } else { .filter(|k| graph.clustering_coefficient(k) < 0.1)
fits.values().sum::<f32>() / fits.len() as f32 .count();
};
let low_fit = fits.values().filter(|&&f| f < 0.1).count();
// Category breakdown // Category breakdown
let cats = store.category_counts(); let cats = store.category_counts();
@ -614,7 +597,6 @@ pub fn health_report(graph: &Graph, store: &Store) -> String {
nodes: n, edges: e, communities, nodes: n, edges: e, communities,
sigma, alpha, gini, avg_cc, sigma, alpha, gini, avg_cc,
avg_path_length: avg_pl, avg_path_length: avg_pl,
avg_schema_fit: avg_fit,
}; };
save_metrics_snapshot(&snap); save_metrics_snapshot(&snap);
@ -641,7 +623,6 @@ pub fn health_report(graph: &Graph, store: &Store) -> String {
let alpha_d = delta(alpha, prev.map(|p| p.alpha)); let alpha_d = delta(alpha, prev.map(|p| p.alpha));
let gini_d = delta(gini, prev.map(|p| p.gini)); let gini_d = delta(gini, prev.map(|p| p.gini));
let cc_d = delta(avg_cc, prev.map(|p| p.avg_cc)); let cc_d = delta(avg_cc, prev.map(|p| p.avg_cc));
let fit_d = delta(avg_fit, prev.map(|p| p.avg_schema_fit));
let mut report = format!( let mut report = format!(
"Memory Health Report "Memory Health Report
@ -649,15 +630,13 @@ pub fn health_report(graph: &Graph, store: &Store) -> String {
Nodes: {n} Relations: {e} Communities: {communities} Nodes: {n} Relations: {e} Communities: {communities}
Degree: max={max_deg} median={median_deg} avg={avg_deg:.1} Degree: max={max_deg} median={median_deg} avg={avg_deg:.1}
Clustering coefficient (avg): {avg_cc:.4}{cc_d} Clustering coefficient (avg): {avg_cc:.4}{cc_d} low-CC (<0.1): {low_cc} nodes
Average path length: {avg_pl:.2} Average path length: {avg_pl:.2}
Small-world σ: {sigma:.3}{sigma_d} (>1 = small-world) Small-world σ: {sigma:.3}{sigma_d} (>1 = small-world)
Power-law α: {alpha:.2}{alpha_d} (2=hub-dominated, 3=healthy, >3=egalitarian) Power-law α: {alpha:.2}{alpha_d} (2=hub-dominated, 3=healthy, >3=egalitarian)
Degree Gini: {gini:.3}{gini_d} (0=equal, 1=one-hub) Degree Gini: {gini:.3}{gini_d} (0=equal, 1=one-hub)
Community sizes (top 5): {top5} Community sizes (top 5): {top5}
Schema fit: avg={avg_fit:.3}{fit_d} low-fit (<0.1): {low_fit} nodes
Categories: core={core} tech={tech} gen={gen} obs={obs} task={task}", Categories: core={core} tech={tech} gen={gen} obs={obs} task={task}",
top5 = sizes.iter().take(5) top5 = sizes.iter().take(5)
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -674,8 +653,8 @@ Categories: core={core} tech={tech} gen={gen} obs={obs} task={task}",
if history.len() >= 3 { if history.len() >= 3 {
report.push_str("\n\nMetrics history (last 5):\n"); report.push_str("\n\nMetrics history (last 5):\n");
for snap in &history[history.len().saturating_sub(5)..] { for snap in &history[history.len().saturating_sub(5)..] {
report.push_str(&format!(" {}σ={:.1} α={:.2} gini={:.3} cc={:.4} fit={:.3}\n", report.push_str(&format!(" {}σ={:.1} α={:.2} gini={:.3} cc={:.4}\n",
snap.date, snap.sigma, snap.alpha, snap.gini, snap.avg_cc, snap.avg_schema_fit)); snap.date, snap.sigma, snap.alpha, snap.gini, snap.avg_cc));
} }
} }

View file

@ -221,7 +221,6 @@ pub fn migrate() -> Result<(), String> {
position: 0, position: 0,
community_id: None, community_id: None,
clustering_coefficient: None, clustering_coefficient: None,
schema_fit: None,
degree: None, degree: None,
}; };
all_nodes.push(node); all_nodes.push(node);
@ -261,7 +260,6 @@ pub fn migrate() -> Result<(), String> {
position: 0, position: 0,
community_id: None, community_id: None,
clustering_coefficient: None, clustering_coefficient: None,
schema_fit: None,
degree: None, degree: None,
}; };
all_nodes.push(node); all_nodes.push(node);

View file

@ -19,12 +19,12 @@ const SECS_PER_DAY: f64 = 86400.0;
/// With spectral data: /// With spectral data:
/// priority = spectral_displacement × overdue × emotion /// priority = spectral_displacement × overdue × emotion
/// Without: /// Without:
/// priority = (1 - schema_fit) × overdue × emotion /// priority = (1 - cc) × overdue × emotion
/// ///
/// Spectral displacement is the outlier_score clamped and normalized — /// Spectral displacement is the outlier_score clamped and normalized —
/// it measures how far a node sits from its community center in the /// it measures how far a node sits from its community center in the
/// eigenspace. This is a global signal (considers all graph structure) /// eigenspace. This is a global signal (considers all graph structure)
/// vs schema_fit which is local (only immediate neighbors). /// vs CC which is local (only immediate neighbors).
pub fn consolidation_priority( pub fn consolidation_priority(
store: &Store, store: &Store,
key: &str, key: &str,
@ -44,8 +44,8 @@ pub fn consolidation_priority(
// outlier=5 and outlier=10 matters less than 1 vs 2. // outlier=5 and outlier=10 matters less than 1 vs 2.
(outlier / 3.0).min(3.0) (outlier / 3.0).min(3.0)
} else { } else {
let fit = graph::schema_fit(graph, key) as f64; let cc = graph.clustering_coefficient(key) as f64;
1.0 - fit 1.0 - cc
}; };
// Spaced repetition: how overdue is this node for replay? // Spaced repetition: how overdue is this node for replay?
@ -69,7 +69,7 @@ pub struct ReplayItem {
pub priority: f64, pub priority: f64,
pub interval_days: u32, pub interval_days: u32,
pub emotion: f32, pub emotion: f32,
pub schema_fit: f32, pub cc: f32,
/// Spectral classification: "bridge", "outlier", "core", "peripheral" /// Spectral classification: "bridge", "outlier", "core", "peripheral"
pub classification: &'static str, pub classification: &'static str,
/// Raw spectral outlier score (distance / median) /// Raw spectral outlier score (distance / median)
@ -91,8 +91,6 @@ pub fn replay_queue_with_graph(
graph: &Graph, graph: &Graph,
emb: Option<&SpectralEmbedding>, emb: Option<&SpectralEmbedding>,
) -> Vec<ReplayItem> { ) -> Vec<ReplayItem> {
let fits = graph::schema_fit_all(graph);
// Build spectral position map if embedding is available // Build spectral position map if embedding is available
let positions: HashMap<String, SpectralPosition> = if let Some(emb) = emb { let positions: HashMap<String, SpectralPosition> = if let Some(emb) = emb {
let communities = graph.communities().clone(); let communities = graph.communities().clone();
@ -116,14 +114,12 @@ pub fn replay_queue_with_graph(
store, key, graph, store, key, graph,
pos.map(|p| p.outlier_score), pos.map(|p| p.outlier_score),
); );
let fit = fits.get(key).copied().unwrap_or(0.0);
ReplayItem { ReplayItem {
key: key.clone(), key: key.clone(),
priority, priority,
interval_days: node.spaced_repetition_interval, interval_days: node.spaced_repetition_interval,
emotion: node.emotion, emotion: node.emotion,
schema_fit: fit, cc: graph.clustering_coefficient(key),
classification, classification,
outlier_score, outlier_score,
} }
@ -178,7 +174,7 @@ pub fn detect_interference(
/// Low fit (<0.2): deep examination needed — new schema seed, bridge, or noise? /// Low fit (<0.2): deep examination needed — new schema seed, bridge, or noise?
pub fn schema_assimilation(store: &Store, key: &str) -> (f32, &'static str) { pub fn schema_assimilation(store: &Store, key: &str) -> (f32, &'static str) {
let graph = store.build_graph(); let graph = store.build_graph();
let fit = graph::schema_fit(&graph, key); let fit = graph.clustering_coefficient(key);
let recommendation = if fit > 0.5 { let recommendation = if fit > 0.5 {
"auto-integrate" "auto-integrate"
@ -267,8 +263,8 @@ fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> S
}; };
out.push_str(&format!("## {} \n", item.key)); out.push_str(&format!("## {} \n", item.key));
out.push_str(&format!("Priority: {:.3} Schema fit: {:.3} Emotion: {:.1} ", out.push_str(&format!("Priority: {:.3} CC: {:.3} Emotion: {:.1} ",
item.priority, item.schema_fit, item.emotion)); item.priority, item.cc, item.emotion));
out.push_str(&format!("Category: {} Interval: {}d\n", out.push_str(&format!("Category: {} Interval: {}d\n",
node.category.label(), node.spaced_repetition_interval)); node.category.label(), node.spaced_repetition_interval));
if item.outlier_score > 0.0 { if item.outlier_score > 0.0 {
@ -485,8 +481,8 @@ pub fn consolidation_batch(store: &Store, count: usize, auto: bool) -> Result<()
let node_type = store.nodes.get(&item.key) let node_type = store.nodes.get(&item.key)
.map(|n| if n.key.contains("journal") { "episodic" } else { "semantic" }) .map(|n| if n.key.contains("journal") { "episodic" } else { "semantic" })
.unwrap_or("?"); .unwrap_or("?");
println!(" [{:.3}] {} (fit={:.3}, interval={}d, type={})", println!(" [{:.3}] {} (cc={:.3}, interval={}d, type={})",
item.priority, item.key, item.schema_fit, item.interval_days, node_type); item.priority, item.key, item.cc, item.interval_days, node_type);
} }
// Also show interference pairs // Also show interference pairs
@ -556,13 +552,12 @@ pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<String,
let items: Vec<ReplayItem> = episode_keys.iter() let items: Vec<ReplayItem> = episode_keys.iter()
.filter_map(|k| { .filter_map(|k| {
let node = store.nodes.get(k)?; let node = store.nodes.get(k)?;
let fit = graph::schema_fit(&graph, k);
Some(ReplayItem { Some(ReplayItem {
key: k.clone(), key: k.clone(),
priority: consolidation_priority(store, k, &graph, None), priority: consolidation_priority(store, k, &graph, None),
interval_days: node.spaced_repetition_interval, interval_days: node.spaced_repetition_interval,
emotion: node.emotion, emotion: node.emotion,
schema_fit: fit, cc: graph.clustering_coefficient(k),
classification: "unknown", classification: "unknown",
outlier_score: 0.0, outlier_score: 0.0,
}) })
@ -597,12 +592,7 @@ pub fn consolidation_plan(store: &Store) -> ConsolidationPlan {
let graph = store.build_graph(); let graph = store.build_graph();
let alpha = graph.degree_power_law_exponent(); let alpha = graph.degree_power_law_exponent();
let gini = graph.degree_gini(); let gini = graph.degree_gini();
let avg_fit = { let avg_cc = graph.avg_clustering_coefficient();
let fits = graph::schema_fit_all(&graph);
if fits.is_empty() { 0.0 } else {
fits.values().sum::<f32>() / fits.len() as f32
}
};
let interference_pairs = detect_interference(store, &graph, 0.5); let interference_pairs = detect_interference(store, &graph, 0.5);
let interference_count = interference_pairs.len(); let interference_count = interference_pairs.len();
@ -651,17 +641,17 @@ pub fn consolidation_plan(store: &Store) -> ConsolidationPlan {
gini)); gini));
} }
// Target: avg schema fit ≥ 0.2 // Target: avg CC ≥ 0.2
if avg_fit < 0.1 { if avg_cc < 0.1 {
plan.replay_count += 5; plan.replay_count += 5;
plan.rationale.push(format!( plan.rationale.push(format!(
"Schema fit={:.3} (target ≥0.2): very poor integration → +5 replay", "CC={:.3} (target ≥0.2): very poor integration → +5 replay",
avg_fit)); avg_cc));
} else if avg_fit < 0.2 { } else if avg_cc < 0.2 {
plan.replay_count += 2; plan.replay_count += 2;
plan.rationale.push(format!( plan.rationale.push(format!(
"Schema fit={:.3} (target ≥0.2): low integration → +2 replay", "CC={:.3} (target ≥0.2): low integration → +2 replay",
avg_fit)); avg_cc));
} }
// Interference: >100 pairs is a lot, <10 is clean // Interference: >100 pairs is a lot, <10 is clean
@ -748,21 +738,14 @@ pub fn daily_check(store: &Store) -> String {
let gini = graph.degree_gini(); let gini = graph.degree_gini();
let sigma = graph.small_world_sigma(); let sigma = graph.small_world_sigma();
let avg_cc = graph.avg_clustering_coefficient(); let avg_cc = graph.avg_clustering_coefficient();
let avg_fit = {
let fits = graph::schema_fit_all(&graph);
if fits.is_empty() { 0.0 } else {
fits.values().sum::<f32>() / fits.len() as f32
}
};
let history = graph::load_metrics_history(); let history = graph::load_metrics_history();
let prev = history.last(); let prev = history.last();
let mut out = String::from("Memory daily check\n"); let mut out = String::from("Memory daily check\n");
// Current state // Current state
out.push_str(&format!(" σ={:.1} α={:.2} gini={:.3} cc={:.4} fit={:.3}\n", out.push_str(&format!(" σ={:.1} α={:.2} gini={:.3} cc={:.4}\n",
sigma, alpha, gini, avg_cc, avg_fit)); sigma, alpha, gini, avg_cc));
// Trend // Trend
if let Some(p) = prev { if let Some(p) = prev {
@ -777,7 +760,7 @@ pub fn daily_check(store: &Store) -> String {
let mut issues = Vec::new(); let mut issues = Vec::new();
if alpha < 2.0 { issues.push("hub dominance critical"); } if alpha < 2.0 { issues.push("hub dominance critical"); }
if gini > 0.5 { issues.push("high inequality"); } if gini > 0.5 { issues.push("high inequality"); }
if avg_fit < 0.1 { issues.push("poor integration"); } if avg_cc < 0.1 { issues.push("poor integration"); }
if d_sigma < -5.0 { issues.push("σ declining"); } if d_sigma < -5.0 { issues.push("σ declining"); }
if d_alpha < -0.1 { issues.push("α declining"); } if d_alpha < -0.1 { issues.push("α declining"); }
if d_gini > 0.02 { issues.push("inequality increasing"); } if d_gini > 0.02 { issues.push("inequality increasing"); }
@ -802,7 +785,6 @@ pub fn daily_check(store: &Store) -> String {
communities: graph.community_count(), communities: graph.community_count(),
sigma, alpha, gini, avg_cc, sigma, alpha, gini, avg_cc,
avg_path_length: graph.avg_path_length(), avg_path_length: graph.avg_path_length(),
avg_schema_fit: avg_fit,
}); });
out out

View file

@ -184,12 +184,9 @@ fn resolve_field(field: &str, key: &str, store: &Store, graph: &Graph) -> Option
"community_id" => { "community_id" => {
graph.communities().get(key).map(|&c| Value::Num(c as f64)) graph.communities().get(key).map(|&c| Value::Num(c as f64))
} }
"clustering_coefficient" => { "clustering_coefficient" | "schema_fit" | "cc" => {
Some(Value::Num(graph.clustering_coefficient(key) as f64)) Some(Value::Num(graph.clustering_coefficient(key) as f64))
} }
"schema_fit" => {
node.schema_fit.map(|f| Value::Num(f as f64))
}
_ => None, _ => None,
} }
} }