From ec8b4b2ed28bc3b894b2d91a0767c6aa82ecf32f Mon Sep 17 00:00:00 2001 From: ProofOfConcept Date: Tue, 3 Mar 2026 12:21:04 -0500 Subject: [PATCH] eliminate schema_fit: it's clustering coefficient schema_fit was algebraically identical to clustering_coefficient (both compute 2E/(d*(d-1)) = fraction of connected neighbor pairs). Remove the redundant function, field, and metrics column. - Delete schema_fit() and schema_fit_all() from graph.rs - Remove schema_fit field from Node struct - Remove avg_schema_fit from MetricsSnapshot (duplicated avg_cc) - Replace all callers with graph.clustering_coefficient() - Rename ReplayItem.schema_fit to .cc - Query: "cc" and "schema_fit" both resolve from graph CC - Low-CC count folded into health report CC line Co-Authored-By: Kent Overstreet --- src/capnp_store.rs | 6 ----- src/graph.rs | 39 +++++++--------------------- src/migrate.rs | 2 -- src/neuro.rs | 64 +++++++++++++++++----------------------------- src/query.rs | 5 +--- 5 files changed, 33 insertions(+), 83 deletions(-) diff --git a/src/capnp_store.rs b/src/capnp_store.rs index d32716f..d67eddc 100644 --- a/src/capnp_store.rs +++ b/src/capnp_store.rs @@ -145,8 +145,6 @@ pub struct Node { #[serde(default)] pub clustering_coefficient: Option, #[serde(default)] - pub schema_fit: Option, - #[serde(default)] pub degree: Option, } @@ -925,7 +923,6 @@ impl Store { position: 0, community_id: None, clustering_coefficient: None, - schema_fit: None, degree: None, } } @@ -1505,13 +1502,11 @@ impl Store { pub fn update_graph_metrics(&mut self) { let g = self.build_graph(); let communities = g.communities(); - let fits = graph::schema_fit_all(&g); for (key, node) in &mut self.nodes { node.community_id = communities.get(key).copied(); node.clustering_coefficient = Some(g.clustering_coefficient(key)); node.degree = Some(g.degree(key) as u32); - node.schema_fit = fits.get(key).copied(); } } @@ -1874,7 +1869,6 @@ fn read_content_node(r: memory_capnp::content_node::Reader) -> Result f32 { - graph.clustering_coefficient(key) -} - -/// Compute schema fit for all nodes -pub fn schema_fit_all(graph: &Graph) -> HashMap { - graph.nodes().iter() - .map(|key| (key.clone(), graph.clustering_coefficient(key))) - .collect() -} /// A snapshot of graph topology metrics, for tracking evolution over time #[derive(Clone, Debug, Serialize, Deserialize)] @@ -530,7 +514,8 @@ pub struct MetricsSnapshot { pub gini: f32, pub avg_cc: f32, pub avg_path_length: f32, - pub avg_schema_fit: f32, + // Removed: avg_schema_fit was identical to avg_cc. + // Old snapshots with the field still deserialize (serde ignores unknown fields by default). } fn metrics_log_path() -> std::path::PathBuf { @@ -595,12 +580,10 @@ pub fn health_report(graph: &Graph, store: &Store) -> String { let alpha = graph.degree_power_law_exponent(); let gini = graph.degree_gini(); - // Schema fit distribution - let fits = schema_fit_all(graph); - let avg_fit = if fits.is_empty() { 0.0 } else { - fits.values().sum::() / fits.len() as f32 - }; - let low_fit = fits.values().filter(|&&f| f < 0.1).count(); + // Low-CC nodes: poorly integrated + let low_cc = graph.nodes().iter() + .filter(|k| graph.clustering_coefficient(k) < 0.1) + .count(); // Category breakdown let cats = store.category_counts(); @@ -614,7 +597,6 @@ pub fn health_report(graph: &Graph, store: &Store) -> String { nodes: n, edges: e, communities, sigma, alpha, gini, avg_cc, avg_path_length: avg_pl, - avg_schema_fit: avg_fit, }; save_metrics_snapshot(&snap); @@ -641,7 +623,6 @@ pub fn health_report(graph: &Graph, store: &Store) -> String { let alpha_d = delta(alpha, prev.map(|p| p.alpha)); let gini_d = delta(gini, prev.map(|p| p.gini)); let cc_d = delta(avg_cc, prev.map(|p| p.avg_cc)); - let fit_d = delta(avg_fit, prev.map(|p| p.avg_schema_fit)); let mut report = format!( "Memory Health Report @@ -649,15 +630,13 @@ pub fn health_report(graph: &Graph, store: &Store) -> String { Nodes: {n} Relations: {e} Communities: {communities} Degree: max={max_deg} median={median_deg} avg={avg_deg:.1} -Clustering coefficient (avg): {avg_cc:.4}{cc_d} +Clustering coefficient (avg): {avg_cc:.4}{cc_d} low-CC (<0.1): {low_cc} nodes Average path length: {avg_pl:.2} Small-world σ: {sigma:.3}{sigma_d} (>1 = small-world) Power-law α: {alpha:.2}{alpha_d} (2=hub-dominated, 3=healthy, >3=egalitarian) Degree Gini: {gini:.3}{gini_d} (0=equal, 1=one-hub) Community sizes (top 5): {top5} -Schema fit: avg={avg_fit:.3}{fit_d} low-fit (<0.1): {low_fit} nodes - Categories: core={core} tech={tech} gen={gen} obs={obs} task={task}", top5 = sizes.iter().take(5) .map(|s| s.to_string()) @@ -674,8 +653,8 @@ Categories: core={core} tech={tech} gen={gen} obs={obs} task={task}", if history.len() >= 3 { report.push_str("\n\nMetrics history (last 5):\n"); for snap in &history[history.len().saturating_sub(5)..] { - report.push_str(&format!(" {} — σ={:.1} α={:.2} gini={:.3} cc={:.4} fit={:.3}\n", - snap.date, snap.sigma, snap.alpha, snap.gini, snap.avg_cc, snap.avg_schema_fit)); + report.push_str(&format!(" {} — σ={:.1} α={:.2} gini={:.3} cc={:.4}\n", + snap.date, snap.sigma, snap.alpha, snap.gini, snap.avg_cc)); } } diff --git a/src/migrate.rs b/src/migrate.rs index ea4bfec..8e3e959 100644 --- a/src/migrate.rs +++ b/src/migrate.rs @@ -221,7 +221,6 @@ pub fn migrate() -> Result<(), String> { position: 0, community_id: None, clustering_coefficient: None, - schema_fit: None, degree: None, }; all_nodes.push(node); @@ -261,7 +260,6 @@ pub fn migrate() -> Result<(), String> { position: 0, community_id: None, clustering_coefficient: None, - schema_fit: None, degree: None, }; all_nodes.push(node); diff --git a/src/neuro.rs b/src/neuro.rs index c631fa7..aaaf9e6 100644 --- a/src/neuro.rs +++ b/src/neuro.rs @@ -19,12 +19,12 @@ const SECS_PER_DAY: f64 = 86400.0; /// With spectral data: /// priority = spectral_displacement × overdue × emotion /// Without: -/// priority = (1 - schema_fit) × overdue × emotion +/// priority = (1 - cc) × overdue × emotion /// /// Spectral displacement is the outlier_score clamped and normalized — /// it measures how far a node sits from its community center in the /// eigenspace. This is a global signal (considers all graph structure) -/// vs schema_fit which is local (only immediate neighbors). +/// vs CC which is local (only immediate neighbors). pub fn consolidation_priority( store: &Store, key: &str, @@ -44,8 +44,8 @@ pub fn consolidation_priority( // outlier=5 and outlier=10 matters less than 1 vs 2. (outlier / 3.0).min(3.0) } else { - let fit = graph::schema_fit(graph, key) as f64; - 1.0 - fit + let cc = graph.clustering_coefficient(key) as f64; + 1.0 - cc }; // Spaced repetition: how overdue is this node for replay? @@ -69,7 +69,7 @@ pub struct ReplayItem { pub priority: f64, pub interval_days: u32, pub emotion: f32, - pub schema_fit: f32, + pub cc: f32, /// Spectral classification: "bridge", "outlier", "core", "peripheral" pub classification: &'static str, /// Raw spectral outlier score (distance / median) @@ -91,8 +91,6 @@ pub fn replay_queue_with_graph( graph: &Graph, emb: Option<&SpectralEmbedding>, ) -> Vec { - let fits = graph::schema_fit_all(graph); - // Build spectral position map if embedding is available let positions: HashMap = if let Some(emb) = emb { let communities = graph.communities().clone(); @@ -116,14 +114,12 @@ pub fn replay_queue_with_graph( store, key, graph, pos.map(|p| p.outlier_score), ); - let fit = fits.get(key).copied().unwrap_or(0.0); - ReplayItem { key: key.clone(), priority, interval_days: node.spaced_repetition_interval, emotion: node.emotion, - schema_fit: fit, + cc: graph.clustering_coefficient(key), classification, outlier_score, } @@ -178,7 +174,7 @@ pub fn detect_interference( /// Low fit (<0.2): deep examination needed — new schema seed, bridge, or noise? pub fn schema_assimilation(store: &Store, key: &str) -> (f32, &'static str) { let graph = store.build_graph(); - let fit = graph::schema_fit(&graph, key); + let fit = graph.clustering_coefficient(key); let recommendation = if fit > 0.5 { "auto-integrate" @@ -267,8 +263,8 @@ fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> S }; out.push_str(&format!("## {} \n", item.key)); - out.push_str(&format!("Priority: {:.3} Schema fit: {:.3} Emotion: {:.1} ", - item.priority, item.schema_fit, item.emotion)); + out.push_str(&format!("Priority: {:.3} CC: {:.3} Emotion: {:.1} ", + item.priority, item.cc, item.emotion)); out.push_str(&format!("Category: {} Interval: {}d\n", node.category.label(), node.spaced_repetition_interval)); if item.outlier_score > 0.0 { @@ -485,8 +481,8 @@ pub fn consolidation_batch(store: &Store, count: usize, auto: bool) -> Result<() let node_type = store.nodes.get(&item.key) .map(|n| if n.key.contains("journal") { "episodic" } else { "semantic" }) .unwrap_or("?"); - println!(" [{:.3}] {} (fit={:.3}, interval={}d, type={})", - item.priority, item.key, item.schema_fit, item.interval_days, node_type); + println!(" [{:.3}] {} (cc={:.3}, interval={}d, type={})", + item.priority, item.key, item.cc, item.interval_days, node_type); } // Also show interference pairs @@ -556,13 +552,12 @@ pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result = episode_keys.iter() .filter_map(|k| { let node = store.nodes.get(k)?; - let fit = graph::schema_fit(&graph, k); Some(ReplayItem { key: k.clone(), priority: consolidation_priority(store, k, &graph, None), interval_days: node.spaced_repetition_interval, emotion: node.emotion, - schema_fit: fit, + cc: graph.clustering_coefficient(k), classification: "unknown", outlier_score: 0.0, }) @@ -597,12 +592,7 @@ pub fn consolidation_plan(store: &Store) -> ConsolidationPlan { let graph = store.build_graph(); let alpha = graph.degree_power_law_exponent(); let gini = graph.degree_gini(); - let avg_fit = { - let fits = graph::schema_fit_all(&graph); - if fits.is_empty() { 0.0 } else { - fits.values().sum::() / fits.len() as f32 - } - }; + let avg_cc = graph.avg_clustering_coefficient(); let interference_pairs = detect_interference(store, &graph, 0.5); let interference_count = interference_pairs.len(); @@ -651,17 +641,17 @@ pub fn consolidation_plan(store: &Store) -> ConsolidationPlan { gini)); } - // Target: avg schema fit ≥ 0.2 - if avg_fit < 0.1 { + // Target: avg CC ≥ 0.2 + if avg_cc < 0.1 { plan.replay_count += 5; plan.rationale.push(format!( - "Schema fit={:.3} (target ≥0.2): very poor integration → +5 replay", - avg_fit)); - } else if avg_fit < 0.2 { + "CC={:.3} (target ≥0.2): very poor integration → +5 replay", + avg_cc)); + } else if avg_cc < 0.2 { plan.replay_count += 2; plan.rationale.push(format!( - "Schema fit={:.3} (target ≥0.2): low integration → +2 replay", - avg_fit)); + "CC={:.3} (target ≥0.2): low integration → +2 replay", + avg_cc)); } // Interference: >100 pairs is a lot, <10 is clean @@ -748,21 +738,14 @@ pub fn daily_check(store: &Store) -> String { let gini = graph.degree_gini(); let sigma = graph.small_world_sigma(); let avg_cc = graph.avg_clustering_coefficient(); - let avg_fit = { - let fits = graph::schema_fit_all(&graph); - if fits.is_empty() { 0.0 } else { - fits.values().sum::() / fits.len() as f32 - } - }; - let history = graph::load_metrics_history(); let prev = history.last(); let mut out = String::from("Memory daily check\n"); // Current state - out.push_str(&format!(" σ={:.1} α={:.2} gini={:.3} cc={:.4} fit={:.3}\n", - sigma, alpha, gini, avg_cc, avg_fit)); + out.push_str(&format!(" σ={:.1} α={:.2} gini={:.3} cc={:.4}\n", + sigma, alpha, gini, avg_cc)); // Trend if let Some(p) = prev { @@ -777,7 +760,7 @@ pub fn daily_check(store: &Store) -> String { let mut issues = Vec::new(); if alpha < 2.0 { issues.push("hub dominance critical"); } if gini > 0.5 { issues.push("high inequality"); } - if avg_fit < 0.1 { issues.push("poor integration"); } + if avg_cc < 0.1 { issues.push("poor integration"); } if d_sigma < -5.0 { issues.push("σ declining"); } if d_alpha < -0.1 { issues.push("α declining"); } if d_gini > 0.02 { issues.push("inequality increasing"); } @@ -802,7 +785,6 @@ pub fn daily_check(store: &Store) -> String { communities: graph.community_count(), sigma, alpha, gini, avg_cc, avg_path_length: graph.avg_path_length(), - avg_schema_fit: avg_fit, }); out diff --git a/src/query.rs b/src/query.rs index 21b4beb..b663776 100644 --- a/src/query.rs +++ b/src/query.rs @@ -184,12 +184,9 @@ fn resolve_field(field: &str, key: &str, store: &Store, graph: &Graph) -> Option "community_id" => { graph.communities().get(key).map(|&c| Value::Num(c as f64)) } - "clustering_coefficient" => { + "clustering_coefficient" | "schema_fit" | "cc" => { Some(Value::Num(graph.clustering_coefficient(key) as f64)) } - "schema_fit" => { - node.schema_fit.map(|f| Value::Num(f as f64)) - } _ => None, } }