graph: community isolation scoring + sort:isolation query
Add community_isolation() to Graph — computes per-community ratio of internal vs total edge weight. 1.0 = fully isolated, 0.0 = all edges external. New query: sort:isolation — sorts nodes by their community's isolation score, most isolated first. Useful for aiming organize agents at poorly-integrated knowledge clusters. New CLI: poc-memory graph communities [N] [--min-size M] — lists communities sorted by isolation with member preview. Reveals islands like the Shannon theory cluster (3 nodes, 100% isolated, 0 cross-edges) and large agent-journal clusters (20-30 nodes, 95% isolated). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
d0f126b709
commit
e6613f97bb
4 changed files with 141 additions and 0 deletions
|
|
@ -656,3 +656,40 @@ pub fn cmd_interference(threshold: f32) -> Result<(), String> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Show communities sorted by isolation (most isolated first).
|
||||||
|
/// Useful for finding poorly-integrated knowledge clusters that need
|
||||||
|
/// organize agents aimed at them.
|
||||||
|
pub fn cmd_communities(top_n: usize, min_size: usize) -> Result<(), String> {
|
||||||
|
let store = store::Store::load()?;
|
||||||
|
let g = store.build_graph();
|
||||||
|
let infos = g.community_info();
|
||||||
|
|
||||||
|
let total = infos.len();
|
||||||
|
let shown: Vec<_> = infos.into_iter()
|
||||||
|
.filter(|c| c.size >= min_size)
|
||||||
|
.take(top_n)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
println!("{} communities total ({} with size >= {})\n",
|
||||||
|
total, shown.len(), min_size);
|
||||||
|
println!("{:<6} {:>5} {:>7} {:>7} members", "id", "size", "iso", "cross");
|
||||||
|
println!("{}", "-".repeat(70));
|
||||||
|
|
||||||
|
for c in &shown {
|
||||||
|
let preview: Vec<&str> = c.members.iter()
|
||||||
|
.take(5)
|
||||||
|
.map(|s| s.as_str())
|
||||||
|
.collect();
|
||||||
|
let more = if c.size > 5 {
|
||||||
|
format!(" +{}", c.size - 5)
|
||||||
|
} else {
|
||||||
|
String::new()
|
||||||
|
};
|
||||||
|
println!("{:<6} {:>5} {:>6.0}% {:>7} {}{}",
|
||||||
|
c.id, c.size, c.isolation * 100.0, c.cross_edges,
|
||||||
|
preview.join(", "), more);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,16 @@ use crate::store::{Store, RelationType, StoreView};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::collections::{HashMap, HashSet, VecDeque};
|
use std::collections::{HashMap, HashSet, VecDeque};
|
||||||
|
|
||||||
|
/// Community info for reporting
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct CommunityInfo {
|
||||||
|
pub id: u32,
|
||||||
|
pub members: Vec<String>,
|
||||||
|
pub size: usize,
|
||||||
|
pub isolation: f32,
|
||||||
|
pub cross_edges: usize,
|
||||||
|
}
|
||||||
|
|
||||||
/// Weighted edge in the graph
|
/// Weighted edge in the graph
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct Edge {
|
pub struct Edge {
|
||||||
|
|
@ -110,6 +120,75 @@ impl Graph {
|
||||||
&self.communities
|
&self.communities
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Community isolation scores: for each community, what fraction of its
|
||||||
|
/// total edge weight is internal (vs cross-community). Returns community_id → score
|
||||||
|
/// where 1.0 = fully isolated (no external edges), 0.0 = all edges external.
|
||||||
|
/// Singleton communities (1 node, no edges) get score 1.0.
|
||||||
|
pub fn community_isolation(&self) -> HashMap<u32, f32> {
|
||||||
|
// Accumulate internal and total edge weight per community
|
||||||
|
let mut internal: HashMap<u32, f32> = HashMap::new();
|
||||||
|
let mut total: HashMap<u32, f32> = HashMap::new();
|
||||||
|
|
||||||
|
for (key, edges) in &self.adj {
|
||||||
|
let Some(&my_comm) = self.communities.get(key) else { continue };
|
||||||
|
for edge in edges {
|
||||||
|
let nbr_comm = self.communities.get(&edge.target).copied().unwrap_or(u32::MAX);
|
||||||
|
*total.entry(my_comm).or_default() += edge.strength;
|
||||||
|
if my_comm == nbr_comm {
|
||||||
|
*internal.entry(my_comm).or_default() += edge.strength;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut scores = HashMap::new();
|
||||||
|
let all_communities: HashSet<u32> = self.communities.values().copied().collect();
|
||||||
|
for &comm in &all_communities {
|
||||||
|
let t = total.get(&comm).copied().unwrap_or(0.0);
|
||||||
|
if t < 0.001 {
|
||||||
|
scores.insert(comm, 1.0); // no edges = fully isolated
|
||||||
|
} else {
|
||||||
|
let i = internal.get(&comm).copied().unwrap_or(0.0);
|
||||||
|
scores.insert(comm, i / t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
scores
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Community info: id → (member keys, size, isolation score, cross-community edge count)
|
||||||
|
pub fn community_info(&self) -> Vec<CommunityInfo> {
|
||||||
|
let isolation = self.community_isolation();
|
||||||
|
|
||||||
|
// Group members by community
|
||||||
|
let mut members: HashMap<u32, Vec<String>> = HashMap::new();
|
||||||
|
for (key, &comm) in &self.communities {
|
||||||
|
members.entry(comm).or_default().push(key.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Count cross-community edges per community
|
||||||
|
let mut cross_edges: HashMap<u32, usize> = HashMap::new();
|
||||||
|
for (key, edges) in &self.adj {
|
||||||
|
let Some(&my_comm) = self.communities.get(key) else { continue };
|
||||||
|
for edge in edges {
|
||||||
|
let nbr_comm = self.communities.get(&edge.target).copied().unwrap_or(u32::MAX);
|
||||||
|
if my_comm != nbr_comm {
|
||||||
|
*cross_edges.entry(my_comm).or_default() += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut result: Vec<CommunityInfo> = members.into_iter()
|
||||||
|
.map(|(id, mut keys)| {
|
||||||
|
keys.sort();
|
||||||
|
let size = keys.len();
|
||||||
|
let iso = isolation.get(&id).copied().unwrap_or(1.0);
|
||||||
|
let cross = cross_edges.get(&id).copied().unwrap_or(0) / 2; // undirected
|
||||||
|
CommunityInfo { id, members: keys, size, isolation: iso, cross_edges: cross }
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
result.sort_by(|a, b| b.isolation.total_cmp(&a.isolation));
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
/// Hub degree threshold: top 5% by degree
|
/// Hub degree threshold: top 5% by degree
|
||||||
pub fn hub_threshold(&self) -> usize {
|
pub fn hub_threshold(&self) -> usize {
|
||||||
let mut degrees: Vec<usize> = self.keys.iter()
|
let mut degrees: Vec<usize> = self.keys.iter()
|
||||||
|
|
|
||||||
|
|
@ -391,6 +391,15 @@ enum GraphCmd {
|
||||||
#[arg(long, default_value_t = 0.4)]
|
#[arg(long, default_value_t = 0.4)]
|
||||||
threshold: f32,
|
threshold: f32,
|
||||||
},
|
},
|
||||||
|
/// Show communities sorted by isolation (most isolated first)
|
||||||
|
Communities {
|
||||||
|
/// Number of communities to show
|
||||||
|
#[arg(default_value_t = 20)]
|
||||||
|
top_n: usize,
|
||||||
|
/// Minimum community size to show
|
||||||
|
#[arg(long, default_value_t = 2)]
|
||||||
|
min_size: usize,
|
||||||
|
},
|
||||||
/// Show graph structure overview
|
/// Show graph structure overview
|
||||||
Overview,
|
Overview,
|
||||||
/// Spectral decomposition of the memory graph
|
/// Spectral decomposition of the memory graph
|
||||||
|
|
@ -817,6 +826,7 @@ fn main() {
|
||||||
=> cli::graph::cmd_differentiate(key.as_deref(), apply),
|
=> cli::graph::cmd_differentiate(key.as_deref(), apply),
|
||||||
GraphCmd::Trace { key } => cli::graph::cmd_trace(&key),
|
GraphCmd::Trace { key } => cli::graph::cmd_trace(&key),
|
||||||
GraphCmd::Interference { threshold } => cli::graph::cmd_interference(threshold),
|
GraphCmd::Interference { threshold } => cli::graph::cmd_interference(threshold),
|
||||||
|
GraphCmd::Communities { top_n, min_size } => cli::graph::cmd_communities(top_n, min_size),
|
||||||
GraphCmd::Overview => cli::graph::cmd_graph(),
|
GraphCmd::Overview => cli::graph::cmd_graph(),
|
||||||
GraphCmd::Spectral { k } => cli::graph::cmd_spectral(k),
|
GraphCmd::Spectral { k } => cli::graph::cmd_spectral(k),
|
||||||
GraphCmd::SpectralSave { k } => cli::graph::cmd_spectral_save(k),
|
GraphCmd::SpectralSave { k } => cli::graph::cmd_spectral_save(k),
|
||||||
|
|
|
||||||
|
|
@ -167,6 +167,7 @@ pub enum SortField {
|
||||||
ContentLen,
|
ContentLen,
|
||||||
Degree,
|
Degree,
|
||||||
Weight,
|
Weight,
|
||||||
|
Isolation,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Numeric comparison operator.
|
/// Numeric comparison operator.
|
||||||
|
|
@ -307,6 +308,7 @@ impl Stage {
|
||||||
"content-len" => SortField::ContentLen,
|
"content-len" => SortField::ContentLen,
|
||||||
"degree" => SortField::Degree,
|
"degree" => SortField::Degree,
|
||||||
"weight" => SortField::Weight,
|
"weight" => SortField::Weight,
|
||||||
|
"isolation" => SortField::Isolation,
|
||||||
_ => return Err(format!("unknown sort field: {}", value)),
|
_ => return Err(format!("unknown sort field: {}", value)),
|
||||||
};
|
};
|
||||||
Stage::Transform(Transform::Sort(field))
|
Stage::Transform(Transform::Sort(field))
|
||||||
|
|
@ -548,6 +550,19 @@ pub fn run_transform(
|
||||||
db.cmp(&da) // desc
|
db.cmp(&da) // desc
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
SortField::Isolation => {
|
||||||
|
// Score nodes by their community's isolation.
|
||||||
|
// Most isolated communities first (highest internal edge ratio).
|
||||||
|
let iso = graph.community_isolation();
|
||||||
|
let comms = graph.communities();
|
||||||
|
items.sort_by(|a, b| {
|
||||||
|
let ca = comms.get(&a.0).copied().unwrap_or(0);
|
||||||
|
let cb = comms.get(&b.0).copied().unwrap_or(0);
|
||||||
|
let sa = iso.get(&ca).copied().unwrap_or(1.0);
|
||||||
|
let sb = iso.get(&cb).copied().unwrap_or(1.0);
|
||||||
|
sb.total_cmp(&sa) // most isolated first
|
||||||
|
});
|
||||||
|
}
|
||||||
SortField::Priority => {
|
SortField::Priority => {
|
||||||
// Pre-compute priorities to avoid O(n log n) calls
|
// Pre-compute priorities to avoid O(n log n) calls
|
||||||
// inside the sort comparator.
|
// inside the sort comparator.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue