From e6613f97bb250b471df98ec41208582eec3ce4a7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 20 Mar 2026 12:55:14 -0400 Subject: [PATCH] graph: community isolation scoring + sort:isolation query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add community_isolation() to Graph — computes per-community ratio of internal vs total edge weight. 1.0 = fully isolated, 0.0 = all edges external. New query: sort:isolation — sorts nodes by their community's isolation score, most isolated first. Useful for aiming organize agents at poorly-integrated knowledge clusters. New CLI: poc-memory graph communities [N] [--min-size M] — lists communities sorted by isolation with member preview. Reveals islands like the Shannon theory cluster (3 nodes, 100% isolated, 0 cross-edges) and large agent-journal clusters (20-30 nodes, 95% isolated). Co-Authored-By: Claude Opus 4.6 (1M context) --- poc-memory/src/cli/graph.rs | 37 ++++++++++++++++ poc-memory/src/graph.rs | 79 ++++++++++++++++++++++++++++++++++ poc-memory/src/main.rs | 10 +++++ poc-memory/src/query/engine.rs | 15 +++++++ 4 files changed, 141 insertions(+) diff --git a/poc-memory/src/cli/graph.rs b/poc-memory/src/cli/graph.rs index 268de17..df60704 100644 --- a/poc-memory/src/cli/graph.rs +++ b/poc-memory/src/cli/graph.rs @@ -656,3 +656,40 @@ pub fn cmd_interference(threshold: f32) -> Result<(), String> { Ok(()) } +/// Show communities sorted by isolation (most isolated first). +/// Useful for finding poorly-integrated knowledge clusters that need +/// organize agents aimed at them. +pub fn cmd_communities(top_n: usize, min_size: usize) -> Result<(), String> { + let store = store::Store::load()?; + let g = store.build_graph(); + let infos = g.community_info(); + + let total = infos.len(); + let shown: Vec<_> = infos.into_iter() + .filter(|c| c.size >= min_size) + .take(top_n) + .collect(); + + println!("{} communities total ({} with size >= {})\n", + total, shown.len(), min_size); + println!("{:<6} {:>5} {:>7} {:>7} members", "id", "size", "iso", "cross"); + println!("{}", "-".repeat(70)); + + for c in &shown { + let preview: Vec<&str> = c.members.iter() + .take(5) + .map(|s| s.as_str()) + .collect(); + let more = if c.size > 5 { + format!(" +{}", c.size - 5) + } else { + String::new() + }; + println!("{:<6} {:>5} {:>6.0}% {:>7} {}{}", + c.id, c.size, c.isolation * 100.0, c.cross_edges, + preview.join(", "), more); + } + + Ok(()) +} + diff --git a/poc-memory/src/graph.rs b/poc-memory/src/graph.rs index 6867473..3f47fec 100644 --- a/poc-memory/src/graph.rs +++ b/poc-memory/src/graph.rs @@ -12,6 +12,16 @@ use crate::store::{Store, RelationType, StoreView}; use serde::{Deserialize, Serialize}; use std::collections::{HashMap, HashSet, VecDeque}; +/// Community info for reporting +#[derive(Clone, Debug)] +pub struct CommunityInfo { + pub id: u32, + pub members: Vec, + pub size: usize, + pub isolation: f32, + pub cross_edges: usize, +} + /// Weighted edge in the graph #[derive(Clone, Debug)] pub struct Edge { @@ -110,6 +120,75 @@ impl Graph { &self.communities } + /// Community isolation scores: for each community, what fraction of its + /// total edge weight is internal (vs cross-community). Returns community_id → score + /// where 1.0 = fully isolated (no external edges), 0.0 = all edges external. + /// Singleton communities (1 node, no edges) get score 1.0. + pub fn community_isolation(&self) -> HashMap { + // Accumulate internal and total edge weight per community + let mut internal: HashMap = HashMap::new(); + let mut total: HashMap = HashMap::new(); + + for (key, edges) in &self.adj { + let Some(&my_comm) = self.communities.get(key) else { continue }; + for edge in edges { + let nbr_comm = self.communities.get(&edge.target).copied().unwrap_or(u32::MAX); + *total.entry(my_comm).or_default() += edge.strength; + if my_comm == nbr_comm { + *internal.entry(my_comm).or_default() += edge.strength; + } + } + } + + let mut scores = HashMap::new(); + let all_communities: HashSet = self.communities.values().copied().collect(); + for &comm in &all_communities { + let t = total.get(&comm).copied().unwrap_or(0.0); + if t < 0.001 { + scores.insert(comm, 1.0); // no edges = fully isolated + } else { + let i = internal.get(&comm).copied().unwrap_or(0.0); + scores.insert(comm, i / t); + } + } + scores + } + + /// Community info: id → (member keys, size, isolation score, cross-community edge count) + pub fn community_info(&self) -> Vec { + let isolation = self.community_isolation(); + + // Group members by community + let mut members: HashMap> = HashMap::new(); + for (key, &comm) in &self.communities { + members.entry(comm).or_default().push(key.clone()); + } + + // Count cross-community edges per community + let mut cross_edges: HashMap = HashMap::new(); + for (key, edges) in &self.adj { + let Some(&my_comm) = self.communities.get(key) else { continue }; + for edge in edges { + let nbr_comm = self.communities.get(&edge.target).copied().unwrap_or(u32::MAX); + if my_comm != nbr_comm { + *cross_edges.entry(my_comm).or_default() += 1; + } + } + } + + let mut result: Vec = members.into_iter() + .map(|(id, mut keys)| { + keys.sort(); + let size = keys.len(); + let iso = isolation.get(&id).copied().unwrap_or(1.0); + let cross = cross_edges.get(&id).copied().unwrap_or(0) / 2; // undirected + CommunityInfo { id, members: keys, size, isolation: iso, cross_edges: cross } + }) + .collect(); + result.sort_by(|a, b| b.isolation.total_cmp(&a.isolation)); + result + } + /// Hub degree threshold: top 5% by degree pub fn hub_threshold(&self) -> usize { let mut degrees: Vec = self.keys.iter() diff --git a/poc-memory/src/main.rs b/poc-memory/src/main.rs index f68af92..9c473f2 100644 --- a/poc-memory/src/main.rs +++ b/poc-memory/src/main.rs @@ -391,6 +391,15 @@ enum GraphCmd { #[arg(long, default_value_t = 0.4)] threshold: f32, }, + /// Show communities sorted by isolation (most isolated first) + Communities { + /// Number of communities to show + #[arg(default_value_t = 20)] + top_n: usize, + /// Minimum community size to show + #[arg(long, default_value_t = 2)] + min_size: usize, + }, /// Show graph structure overview Overview, /// Spectral decomposition of the memory graph @@ -817,6 +826,7 @@ fn main() { => cli::graph::cmd_differentiate(key.as_deref(), apply), GraphCmd::Trace { key } => cli::graph::cmd_trace(&key), GraphCmd::Interference { threshold } => cli::graph::cmd_interference(threshold), + GraphCmd::Communities { top_n, min_size } => cli::graph::cmd_communities(top_n, min_size), GraphCmd::Overview => cli::graph::cmd_graph(), GraphCmd::Spectral { k } => cli::graph::cmd_spectral(k), GraphCmd::SpectralSave { k } => cli::graph::cmd_spectral_save(k), diff --git a/poc-memory/src/query/engine.rs b/poc-memory/src/query/engine.rs index d12fe7f..4543ab6 100644 --- a/poc-memory/src/query/engine.rs +++ b/poc-memory/src/query/engine.rs @@ -167,6 +167,7 @@ pub enum SortField { ContentLen, Degree, Weight, + Isolation, } /// Numeric comparison operator. @@ -307,6 +308,7 @@ impl Stage { "content-len" => SortField::ContentLen, "degree" => SortField::Degree, "weight" => SortField::Weight, + "isolation" => SortField::Isolation, _ => return Err(format!("unknown sort field: {}", value)), }; Stage::Transform(Transform::Sort(field)) @@ -548,6 +550,19 @@ pub fn run_transform( db.cmp(&da) // desc }); } + SortField::Isolation => { + // Score nodes by their community's isolation. + // Most isolated communities first (highest internal edge ratio). + let iso = graph.community_isolation(); + let comms = graph.communities(); + items.sort_by(|a, b| { + let ca = comms.get(&a.0).copied().unwrap_or(0); + let cb = comms.get(&b.0).copied().unwrap_or(0); + let sa = iso.get(&ca).copied().unwrap_or(1.0); + let sb = iso.get(&cb).copied().unwrap_or(1.0); + sb.total_cmp(&sa) // most isolated first + }); + } SortField::Priority => { // Pre-compute priorities to avoid O(n log n) calls // inside the sort comparator.