eliminate schema_fit: it's clustering coefficient
schema_fit was algebraically identical to clustering_coefficient (both compute 2E/(d*(d-1)) = fraction of connected neighbor pairs). Remove the redundant function, field, and metrics column. - Delete schema_fit() and schema_fit_all() from graph.rs - Remove schema_fit field from Node struct - Remove avg_schema_fit from MetricsSnapshot (duplicated avg_cc) - Replace all callers with graph.clustering_coefficient() - Rename ReplayItem.schema_fit to .cc - Query: "cc" and "schema_fit" both resolve from graph CC - Low-CC count folded into health report CC line Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
fb7aa46e03
commit
ec8b4b2ed2
5 changed files with 33 additions and 83 deletions
39
src/graph.rs
39
src/graph.rs
|
|
@ -500,22 +500,6 @@ fn label_propagation(
|
|||
labels
|
||||
}
|
||||
|
||||
/// Schema fit for a node: how well-integrated into its local neighborhood.
|
||||
///
|
||||
/// Algebraically identical to clustering_coefficient — both measure the
|
||||
/// fraction of neighbor pairs that are connected. Kept as an alias for
|
||||
/// semantic clarity: CC is a graph metric, schema fit is a cognitive one
|
||||
/// (how well does this node fit the surrounding schema?).
|
||||
pub fn schema_fit(graph: &Graph, key: &str) -> f32 {
|
||||
graph.clustering_coefficient(key)
|
||||
}
|
||||
|
||||
/// Compute schema fit for all nodes
|
||||
pub fn schema_fit_all(graph: &Graph) -> HashMap<String, f32> {
|
||||
graph.nodes().iter()
|
||||
.map(|key| (key.clone(), graph.clustering_coefficient(key)))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// A snapshot of graph topology metrics, for tracking evolution over time
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
|
|
@ -530,7 +514,8 @@ pub struct MetricsSnapshot {
|
|||
pub gini: f32,
|
||||
pub avg_cc: f32,
|
||||
pub avg_path_length: f32,
|
||||
pub avg_schema_fit: f32,
|
||||
// Removed: avg_schema_fit was identical to avg_cc.
|
||||
// Old snapshots with the field still deserialize (serde ignores unknown fields by default).
|
||||
}
|
||||
|
||||
fn metrics_log_path() -> std::path::PathBuf {
|
||||
|
|
@ -595,12 +580,10 @@ pub fn health_report(graph: &Graph, store: &Store) -> String {
|
|||
let alpha = graph.degree_power_law_exponent();
|
||||
let gini = graph.degree_gini();
|
||||
|
||||
// Schema fit distribution
|
||||
let fits = schema_fit_all(graph);
|
||||
let avg_fit = if fits.is_empty() { 0.0 } else {
|
||||
fits.values().sum::<f32>() / fits.len() as f32
|
||||
};
|
||||
let low_fit = fits.values().filter(|&&f| f < 0.1).count();
|
||||
// Low-CC nodes: poorly integrated
|
||||
let low_cc = graph.nodes().iter()
|
||||
.filter(|k| graph.clustering_coefficient(k) < 0.1)
|
||||
.count();
|
||||
|
||||
// Category breakdown
|
||||
let cats = store.category_counts();
|
||||
|
|
@ -614,7 +597,6 @@ pub fn health_report(graph: &Graph, store: &Store) -> String {
|
|||
nodes: n, edges: e, communities,
|
||||
sigma, alpha, gini, avg_cc,
|
||||
avg_path_length: avg_pl,
|
||||
avg_schema_fit: avg_fit,
|
||||
};
|
||||
save_metrics_snapshot(&snap);
|
||||
|
||||
|
|
@ -641,7 +623,6 @@ pub fn health_report(graph: &Graph, store: &Store) -> String {
|
|||
let alpha_d = delta(alpha, prev.map(|p| p.alpha));
|
||||
let gini_d = delta(gini, prev.map(|p| p.gini));
|
||||
let cc_d = delta(avg_cc, prev.map(|p| p.avg_cc));
|
||||
let fit_d = delta(avg_fit, prev.map(|p| p.avg_schema_fit));
|
||||
|
||||
let mut report = format!(
|
||||
"Memory Health Report
|
||||
|
|
@ -649,15 +630,13 @@ pub fn health_report(graph: &Graph, store: &Store) -> String {
|
|||
Nodes: {n} Relations: {e} Communities: {communities}
|
||||
|
||||
Degree: max={max_deg} median={median_deg} avg={avg_deg:.1}
|
||||
Clustering coefficient (avg): {avg_cc:.4}{cc_d}
|
||||
Clustering coefficient (avg): {avg_cc:.4}{cc_d} low-CC (<0.1): {low_cc} nodes
|
||||
Average path length: {avg_pl:.2}
|
||||
Small-world σ: {sigma:.3}{sigma_d} (>1 = small-world)
|
||||
Power-law α: {alpha:.2}{alpha_d} (2=hub-dominated, 3=healthy, >3=egalitarian)
|
||||
Degree Gini: {gini:.3}{gini_d} (0=equal, 1=one-hub)
|
||||
|
||||
Community sizes (top 5): {top5}
|
||||
Schema fit: avg={avg_fit:.3}{fit_d} low-fit (<0.1): {low_fit} nodes
|
||||
|
||||
Categories: core={core} tech={tech} gen={gen} obs={obs} task={task}",
|
||||
top5 = sizes.iter().take(5)
|
||||
.map(|s| s.to_string())
|
||||
|
|
@ -674,8 +653,8 @@ Categories: core={core} tech={tech} gen={gen} obs={obs} task={task}",
|
|||
if history.len() >= 3 {
|
||||
report.push_str("\n\nMetrics history (last 5):\n");
|
||||
for snap in &history[history.len().saturating_sub(5)..] {
|
||||
report.push_str(&format!(" {} — σ={:.1} α={:.2} gini={:.3} cc={:.4} fit={:.3}\n",
|
||||
snap.date, snap.sigma, snap.alpha, snap.gini, snap.avg_cc, snap.avg_schema_fit));
|
||||
report.push_str(&format!(" {} — σ={:.1} α={:.2} gini={:.3} cc={:.4}\n",
|
||||
snap.date, snap.sigma, snap.alpha, snap.gini, snap.avg_cc));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue