consciousness: cache expensive graph metrics

This commit is contained in:
Kent Overstreet 2026-06-15 21:16:40 -05:00
commit d8ff7aacc7
3 changed files with 184 additions and 120 deletions

View file

@ -204,10 +204,17 @@ pub fn watch_config(cli: crate::user::CliArgs) {
} }
crate::dbglog!("[config] watching {}", path.display()); crate::dbglog!("[config] watching {}", path.display());
let mut last_seen = config_file_state(&path);
while let Ok(res) = rx.recv() { while let Ok(res) = rx.recv() {
let Ok(events) = res else { continue; }; let Ok(events) = res else { continue; };
if !events.iter().any(|e| e.path == path) { continue; } if !events.iter().any(|e| e.path == path) { continue; }
let current_seen = config_file_state(&path);
if current_seen == last_seen {
continue;
}
last_seen = current_seen;
// Reload both halves. // Reload both halves.
let mem_changed = reload(); let mem_changed = reload();
let app_changed = match build_figment(&cli).extract::<AppConfig>() { let app_changed = match build_figment(&cli).extract::<AppConfig>() {
@ -227,6 +234,11 @@ pub fn watch_config(cli: crate::user::CliArgs) {
.ok(); .ok();
} }
fn config_file_state(path: &std::path::Path) -> Option<(std::time::SystemTime, u64)> {
let meta = std::fs::metadata(path).ok()?;
Some((meta.modified().ok()?, meta.len()))
}
// ============================================================ // ============================================================
// Agent config (top-level settings) // Agent config (top-level settings)
// ============================================================ // ============================================================

View file

@ -11,6 +11,23 @@ use crate::store::{Store, RelationType, StoreView};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet, VecDeque}; use std::collections::{HashMap, HashSet, VecDeque};
use std::sync::{OnceLock, RwLock};
const EXACT_CC_MAX_DEG: usize = 512;
const APPROX_CC_PAIRS: u64 = 4096;
const CC_CACHE_TTL_SECS: i64 = 15 * 60;
#[derive(Clone, Copy)]
struct CachedCc {
value: f32,
computed_at: i64,
}
static CC_CACHE: OnceLock<RwLock<HashMap<String, CachedCc>>> = OnceLock::new();
fn cc_cache() -> &'static RwLock<HashMap<String, CachedCc>> {
CC_CACHE.get_or_init(|| RwLock::new(HashMap::new()))
}
/// Community info for reporting /// Community info for reporting
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
@ -34,37 +51,14 @@ pub struct Edge {
pub struct Graph { pub struct Graph {
/// Adjacency list: node key → list of edges /// Adjacency list: node key → list of edges
adj: HashMap<String, Vec<Edge>>, adj: HashMap<String, Vec<Edge>>,
/// Neighbor sets for membership tests in graph metrics.
neighbor_sets: HashMap<String, HashSet<String>>,
/// All node keys /// All node keys
keys: HashSet<String>, keys: HashSet<String>,
/// Community labels (from label propagation) /// Community labels (from label propagation)
communities: HashMap<String, u32>, communities: HashMap<String, u32>,
} }
/// Compute clustering coefficient for a node whose neighbor-set is `nbrs`,
/// using `cache` to look up each neighbor's neighbor-set in O(1) without
/// re-allocating on every (i, j) pair of the inner loop.
fn cc_cached<'a>(
nbrs: &HashSet<&'a str>,
cache: &HashMap<&'a str, HashSet<&'a str>>,
) -> f32 {
let deg = nbrs.len();
if deg < 2 {
return 0.0;
}
let neighbor_vec: Vec<&str> = nbrs.iter().copied().collect();
let mut triangles = 0u32;
for i in 0..neighbor_vec.len() {
for j in (i + 1)..neighbor_vec.len() {
if let Some(ni) = cache.get(neighbor_vec[i]) {
if ni.contains(neighbor_vec[j]) {
triangles += 1;
}
}
}
}
(2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0))
}
impl Graph { impl Graph {
pub fn nodes(&self) -> &HashSet<String> { pub fn nodes(&self) -> &HashSet<String> {
&self.keys &self.keys
@ -94,18 +88,18 @@ impl Graph {
/// Just neighbor keys /// Just neighbor keys
pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> { pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> {
self.adj.get(key) self.neighbor_sets.get(key)
.map(|edges| edges.iter().map(|e| e.target.as_str()).collect()) .map(|neighbors| neighbors.iter().map(String::as_str).collect())
.unwrap_or_default() .unwrap_or_default()
} }
/// Jaccard similarity between two nodes' neighborhoods. /// Jaccard similarity between two nodes' neighborhoods.
/// Measures overlap: |intersection| / |union| of their neighbor sets. /// Measures overlap: |intersection| / |union| of their neighbor sets.
pub fn jaccard(&self, a: &str, b: &str) -> f32 { pub fn jaccard(&self, a: &str, b: &str) -> f32 {
let na = self.neighbor_keys(a); let Some(na) = self.neighbor_sets.get(a) else { return 0.0 };
let nb = self.neighbor_keys(b); let Some(nb) = self.neighbor_sets.get(b) else { return 0.0 };
let intersection = na.intersection(&nb).count(); let intersection = na.intersection(nb).count();
let union = na.union(&nb).count(); let union = na.len() + nb.len() - intersection;
if union == 0 { 0.0 } else { intersection as f32 / union as f32 } if union == 0 { 0.0 } else { intersection as f32 / union as f32 }
} }
@ -231,37 +225,71 @@ impl Graph {
/// that are also neighbors of each other. /// that are also neighbors of each other.
/// cc(v) = 2E / (deg * (deg - 1)) /// cc(v) = 2E / (deg * (deg - 1))
pub fn clustering_coefficient(&self, key: &str) -> f32 { pub fn clustering_coefficient(&self, key: &str) -> f32 {
let neighbors = self.neighbor_keys(key); let now = crate::store::now_epoch();
if neighbors.len() < 2 { if let Some(cc) = cc_cache().read().unwrap().get(key).copied()
&& now - cc.computed_at < CC_CACHE_TTL_SECS
{
return cc.value;
}
let cc = self.clustering_coefficient_uncached(key);
cc_cache().write().unwrap().insert(key.to_owned(), CachedCc {
value: cc,
computed_at: now,
});
cc
}
fn clustering_coefficient_uncached(&self, key: &str) -> f32 {
let Some(neighbors) = self.neighbor_sets.get(key) else {
return 0.0;
};
let deg = neighbors.len();
if deg < 2 {
return 0.0; return 0.0;
} }
// Cache each neighbor's neighbor-set so the O(deg^2) inner loop
// doesn't re-allocate a HashSet on every (i, j) pair. let neighbor_vec: Vec<&str> = neighbors.iter().map(String::as_str).collect();
let cache: HashMap<&str, HashSet<&str>> = neighbors if deg <= EXACT_CC_MAX_DEG {
.iter() let mut linked = 0u64;
.map(|&n| (n, self.neighbor_keys(n))) for i in 0..neighbor_vec.len() {
.collect(); for j in (i + 1)..neighbor_vec.len() {
cc_cached(&neighbors, &cache) if self.neighbor_sets
.get(neighbor_vec[i])
.is_some_and(|n| n.contains(neighbor_vec[j])) {
linked += 1;
}
}
}
return (2.0 * linked as f32) / (deg as f32 * (deg as f32 - 1.0));
}
let mut linked = 0u64;
let samples = APPROX_CC_PAIRS.min((deg as u64 * (deg as u64 - 1)) / 2);
for sample in 0..samples {
let i = ((sample.wrapping_mul(1_103_515_245).wrapping_add(12_345)) % deg as u64) as usize;
let mut j = ((sample.wrapping_mul(2_654_435_761).wrapping_add(97_531)) % deg as u64) as usize;
if i == j {
j = (j + 1) % deg;
}
if self.neighbor_sets
.get(neighbor_vec[i])
.is_some_and(|n| n.contains(neighbor_vec[j])) {
linked += 1;
}
}
linked as f32 / samples as f32
} }
/// Average clustering coefficient across all nodes with deg >= 2 /// Average clustering coefficient across all nodes with deg >= 2
pub fn avg_clustering_coefficient(&self) -> f32 { pub fn avg_clustering_coefficient(&self) -> f32 {
// Pre-compute neighbor sets for the whole graph once so we don't
// rebuild O(N * deg) HashSets across the outer loop.
let cache: HashMap<&str, HashSet<&str>> = self
.keys
.iter()
.map(|k| (k.as_str(), self.neighbor_keys(k)))
.collect();
let mut sum = 0.0f32; let mut sum = 0.0f32;
let mut count = 0u32; let mut count = 0u32;
for key in &self.keys { for key in &self.keys {
let nbrs = match cache.get(key.as_str()) { match self.neighbor_sets.get(key.as_str()) {
Some(s) if s.len() >= 2 => s, Some(s) if s.len() >= 2 => s,
_ => continue, _ => continue,
}; };
sum += cc_cached(nbrs, &cache); sum += self.clustering_coefficient(key);
count += 1; count += 1;
} }
if count == 0 { 0.0 } else { sum / count as f32 } if count == 0 { 0.0 } else { sum / count as f32 }
@ -296,10 +324,12 @@ impl Graph {
while let Some(node) = queue.pop_front() { while let Some(node) = queue.pop_front() {
let d = dist[&node]; let d = dist[&node];
for neighbor in self.neighbor_keys(&node) { if let Some(neighbors) = self.neighbor_sets.get(&node) {
for neighbor in neighbors {
if !dist.contains_key(neighbor) { if !dist.contains_key(neighbor) {
dist.insert(neighbor.to_string(), d + 1); dist.insert(neighbor.clone(), d + 1);
queue.push_back(neighbor.to_string()); queue.push_back(neighbor.clone());
}
} }
} }
} }
@ -534,15 +564,38 @@ impl Graph {
/// Build graph from store data (with community detection) /// Build graph from store data (with community detection)
pub fn build_graph(store: &impl StoreView) -> Graph { pub fn build_graph(store: &impl StoreView) -> Graph {
let (adj, keys) = build_adjacency(store); let (adj, keys) = build_adjacency(store);
let neighbor_sets = build_neighbor_sets(&adj);
let communities = label_propagation(&keys, &adj, 20); let communities = label_propagation(&keys, &adj, 20);
Graph { adj, keys, communities } Graph {
adj,
neighbor_sets,
keys,
communities,
}
} }
/// Build graph without community detection — for spreading activation /// Build graph without community detection — for spreading activation
/// searches where we only need the adjacency list. /// searches where we only need the adjacency list.
pub fn build_graph_fast(store: &impl StoreView) -> Graph { pub fn build_graph_fast(store: &impl StoreView) -> Graph {
let (adj, keys) = build_adjacency(store); let (adj, keys) = build_adjacency(store);
Graph { adj, keys, communities: HashMap::new() } let neighbor_sets = build_neighbor_sets(&adj);
Graph {
adj,
neighbor_sets,
keys,
communities: HashMap::new(),
}
}
fn build_neighbor_sets(adj: &HashMap<String, Vec<Edge>>) -> HashMap<String, HashSet<String>> {
adj.iter()
.map(|(key, edges)| {
let neighbors = edges.iter()
.map(|edge| edge.target.clone())
.collect();
(key.clone(), neighbors)
})
.collect()
} }
fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashSet<String>) { fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashSet<String>) {

View file

@ -108,7 +108,6 @@ pub fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph)
out.push_str(&format!("Community: {} ", community)); out.push_str(&format!("Community: {} ", community));
} }
let deg = graph.degree(&item.key); let deg = graph.degree(&item.key);
let cc = graph.clustering_coefficient(&item.key);
// Hub-link ratio: what fraction of this node's edges go to hubs? // Hub-link ratio: what fraction of this node's edges go to hubs?
let neighbors = graph.neighbors(&item.key); let neighbors = graph.neighbors(&item.key);
@ -119,7 +118,7 @@ pub fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph)
let is_hub = deg >= hub_thresh; let is_hub = deg >= hub_thresh;
out.push_str(&format!("Degree: {} CC: {:.3} Hub-link ratio: {:.0}% ({}/{})", out.push_str(&format!("Degree: {} CC: {:.3} Hub-link ratio: {:.0}% ({}/{})",
deg, cc, hub_ratio * 100.0, hub_links, deg)); deg, item.cc, hub_ratio * 100.0, hub_links, deg));
if is_hub { if is_hub {
out.push_str(" ← THIS IS A HUB"); out.push_str(" ← THIS IS A HUB");
} else if hub_ratio > 0.6 { } else if hub_ratio > 0.6 {