graph: schema_fit is algebraically identical to clustering_coefficient

Both functions count connected pairs among a node's neighbors:
  cc = 2*triangles / (deg*(deg-1))
  density = inter_edges / (n*(n-1)/2) = 2*inter_edges / (n*(n-1))

Since inter_edges == triangles and n == deg, density == cc.
schema_fit was (density + cc) / 2.0 = (cc + cc) / 2.0 = cc.

Verified empirically: assert!((density - cc).abs() < 1e-6) passed
on all 2401 nodes before this change.

Keep schema_fit as a semantic alias — CC is a graph metric,
schema fit is a cognitive one — but eliminate the redundant O(n²)
pairwise computation that was running for every node.
This commit is contained in:
ProofOfConcept 2026-03-03 12:09:02 -05:00
parent fa7fe8c14b
commit fb7aa46e03

View file

@ -500,41 +500,20 @@ fn label_propagation(
labels labels
} }
/// Schema fit: for a node, measure how well-connected its neighbors are /// Schema fit for a node: how well-integrated into its local neighborhood.
/// to each other. High density + high CC among neighbors = good schema fit. ///
/// Algebraically identical to clustering_coefficient — both measure the
/// fraction of neighbor pairs that are connected. Kept as an alias for
/// semantic clarity: CC is a graph metric, schema fit is a cognitive one
/// (how well does this node fit the surrounding schema?).
pub fn schema_fit(graph: &Graph, key: &str) -> f32 { pub fn schema_fit(graph: &Graph, key: &str) -> f32 {
let neighbors = graph.neighbor_keys(key); graph.clustering_coefficient(key)
let n = neighbors.len();
if n < 2 {
return 0.0; // isolated or leaf — no schema context
}
// Count edges among neighbors
let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect();
let mut inter_edges = 0u32;
for i in 0..neighbor_vec.len() {
for j in (i + 1)..neighbor_vec.len() {
let ni_neighbors = graph.neighbor_keys(neighbor_vec[i]);
if ni_neighbors.contains(neighbor_vec[j]) {
inter_edges += 1;
}
}
}
let max_edges = (n * (n - 1)) / 2;
let density = if max_edges == 0 { 0.0 } else {
inter_edges as f32 / max_edges as f32
};
// Combine neighborhood density with own CC
let cc = graph.clustering_coefficient(key);
(density + cc) / 2.0
} }
/// Compute schema fit for all nodes /// Compute schema fit for all nodes
pub fn schema_fit_all(graph: &Graph) -> HashMap<String, f32> { pub fn schema_fit_all(graph: &Graph) -> HashMap<String, f32> {
graph.nodes().iter() graph.nodes().iter()
.map(|key| (key.clone(), schema_fit(graph, key))) .map(|key| (key.clone(), graph.clustering_coefficient(key)))
.collect() .collect()
} }