consciousness: cache expensive graph metrics

This commit is contained in:
Kent Overstreet 2026-06-15 21:16:40 -05:00
commit d8ff7aacc7
3 changed files with 184 additions and 120 deletions

View file

@ -201,16 +201,23 @@ pub fn watch_config(cli: crate::user::CliArgs) {
{ {
crate::dbglog!("[config] watch({}) failed: {}", parent.display(), e); crate::dbglog!("[config] watch({}) failed: {}", parent.display(), e);
return; return;
} }
crate::dbglog!("[config] watching {}", path.display()); crate::dbglog!("[config] watching {}", path.display());
while let Ok(res) = rx.recv() { let mut last_seen = config_file_state(&path);
let Ok(events) = res else { continue; }; while let Ok(res) = rx.recv() {
if !events.iter().any(|e| e.path == path) { continue; } let Ok(events) = res else { continue; };
if !events.iter().any(|e| e.path == path) { continue; }
// Reload both halves. let current_seen = config_file_state(&path);
let mem_changed = reload(); if current_seen == last_seen {
let app_changed = match build_figment(&cli).extract::<AppConfig>() { continue;
}
last_seen = current_seen;
// Reload both halves.
let mem_changed = reload();
let app_changed = match build_figment(&cli).extract::<AppConfig>() {
Ok(app) => { Ok(app) => {
install_app(app); install_app(app);
true true
@ -223,8 +230,13 @@ pub fn watch_config(cli: crate::user::CliArgs) {
crate::dbglog!("[config] reloaded (memory_changed={}, app_changed={})", crate::dbglog!("[config] reloaded (memory_changed={}, app_changed={})",
mem_changed, app_changed); mem_changed, app_changed);
} }
}) })
.ok(); .ok();
}
fn config_file_state(path: &std::path::Path) -> Option<(std::time::SystemTime, u64)> {
let meta = std::fs::metadata(path).ok()?;
Some((meta.modified().ok()?, meta.len()))
} }
// ============================================================ // ============================================================

View file

@ -11,6 +11,23 @@ use crate::store::{Store, RelationType, StoreView};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet, VecDeque}; use std::collections::{HashMap, HashSet, VecDeque};
use std::sync::{OnceLock, RwLock};
const EXACT_CC_MAX_DEG: usize = 512;
const APPROX_CC_PAIRS: u64 = 4096;
const CC_CACHE_TTL_SECS: i64 = 15 * 60;
#[derive(Clone, Copy)]
struct CachedCc {
value: f32,
computed_at: i64,
}
static CC_CACHE: OnceLock<RwLock<HashMap<String, CachedCc>>> = OnceLock::new();
fn cc_cache() -> &'static RwLock<HashMap<String, CachedCc>> {
CC_CACHE.get_or_init(|| RwLock::new(HashMap::new()))
}
/// Community info for reporting /// Community info for reporting
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
@ -32,39 +49,16 @@ pub struct Edge {
/// The in-memory graph built from store nodes + relations /// The in-memory graph built from store nodes + relations
pub struct Graph { pub struct Graph {
/// Adjacency list: node key → list of edges /// Adjacency list: node key → list of edges
adj: HashMap<String, Vec<Edge>>, adj: HashMap<String, Vec<Edge>>,
/// All node keys /// Neighbor sets for membership tests in graph metrics.
keys: HashSet<String>, neighbor_sets: HashMap<String, HashSet<String>>,
/// Community labels (from label propagation) /// All node keys
keys: HashSet<String>,
/// Community labels (from label propagation)
communities: HashMap<String, u32>, communities: HashMap<String, u32>,
} }
/// Compute clustering coefficient for a node whose neighbor-set is `nbrs`,
/// using `cache` to look up each neighbor's neighbor-set in O(1) without
/// re-allocating on every (i, j) pair of the inner loop.
fn cc_cached<'a>(
nbrs: &HashSet<&'a str>,
cache: &HashMap<&'a str, HashSet<&'a str>>,
) -> f32 {
let deg = nbrs.len();
if deg < 2 {
return 0.0;
}
let neighbor_vec: Vec<&str> = nbrs.iter().copied().collect();
let mut triangles = 0u32;
for i in 0..neighbor_vec.len() {
for j in (i + 1)..neighbor_vec.len() {
if let Some(ni) = cache.get(neighbor_vec[i]) {
if ni.contains(neighbor_vec[j]) {
triangles += 1;
}
}
}
}
(2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0))
}
impl Graph { impl Graph {
pub fn nodes(&self) -> &HashSet<String> { pub fn nodes(&self) -> &HashSet<String> {
&self.keys &self.keys
@ -92,22 +86,22 @@ impl Graph {
.unwrap_or_default() .unwrap_or_default()
} }
/// Just neighbor keys /// Just neighbor keys
pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> { pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> {
self.adj.get(key) self.neighbor_sets.get(key)
.map(|edges| edges.iter().map(|e| e.target.as_str()).collect()) .map(|neighbors| neighbors.iter().map(String::as_str).collect())
.unwrap_or_default() .unwrap_or_default()
} }
/// Jaccard similarity between two nodes' neighborhoods. /// Jaccard similarity between two nodes' neighborhoods.
/// Measures overlap: |intersection| / |union| of their neighbor sets. /// Measures overlap: |intersection| / |union| of their neighbor sets.
pub fn jaccard(&self, a: &str, b: &str) -> f32 { pub fn jaccard(&self, a: &str, b: &str) -> f32 {
let na = self.neighbor_keys(a); let Some(na) = self.neighbor_sets.get(a) else { return 0.0 };
let nb = self.neighbor_keys(b); let Some(nb) = self.neighbor_sets.get(b) else { return 0.0 };
let intersection = na.intersection(&nb).count(); let intersection = na.intersection(nb).count();
let union = na.union(&nb).count(); let union = na.len() + nb.len() - intersection;
if union == 0 { 0.0 } else { intersection as f32 / union as f32 } if union == 0 { 0.0 } else { intersection as f32 / union as f32 }
} }
/// Compute Jaccard-based strength for every edge in the graph. /// Compute Jaccard-based strength for every edge in the graph.
/// Returns (source_key, target_key, jaccard_strength) triples. /// Returns (source_key, target_key, jaccard_strength) triples.
@ -227,44 +221,78 @@ impl Graph {
} }
} }
/// Local clustering coefficient: fraction of a node's neighbors /// Local clustering coefficient: fraction of a node's neighbors
/// that are also neighbors of each other. /// that are also neighbors of each other.
/// cc(v) = 2E / (deg * (deg - 1)) /// cc(v) = 2E / (deg * (deg - 1))
pub fn clustering_coefficient(&self, key: &str) -> f32 { pub fn clustering_coefficient(&self, key: &str) -> f32 {
let neighbors = self.neighbor_keys(key); let now = crate::store::now_epoch();
if neighbors.len() < 2 { if let Some(cc) = cc_cache().read().unwrap().get(key).copied()
return 0.0; && now - cc.computed_at < CC_CACHE_TTL_SECS
} {
// Cache each neighbor's neighbor-set so the O(deg^2) inner loop return cc.value;
// doesn't re-allocate a HashSet on every (i, j) pair. }
let cache: HashMap<&str, HashSet<&str>> = neighbors let cc = self.clustering_coefficient_uncached(key);
.iter() cc_cache().write().unwrap().insert(key.to_owned(), CachedCc {
.map(|&n| (n, self.neighbor_keys(n))) value: cc,
.collect(); computed_at: now,
cc_cached(&neighbors, &cache) });
} cc
}
/// Average clustering coefficient across all nodes with deg >= 2 fn clustering_coefficient_uncached(&self, key: &str) -> f32 {
pub fn avg_clustering_coefficient(&self) -> f32 { let Some(neighbors) = self.neighbor_sets.get(key) else {
// Pre-compute neighbor sets for the whole graph once so we don't return 0.0;
// rebuild O(N * deg) HashSets across the outer loop. };
let cache: HashMap<&str, HashSet<&str>> = self let deg = neighbors.len();
.keys if deg < 2 {
.iter() return 0.0;
.map(|k| (k.as_str(), self.neighbor_keys(k))) }
.collect();
let mut sum = 0.0f32; let neighbor_vec: Vec<&str> = neighbors.iter().map(String::as_str).collect();
let mut count = 0u32; if deg <= EXACT_CC_MAX_DEG {
for key in &self.keys { let mut linked = 0u64;
let nbrs = match cache.get(key.as_str()) { for i in 0..neighbor_vec.len() {
Some(s) if s.len() >= 2 => s, for j in (i + 1)..neighbor_vec.len() {
_ => continue, if self.neighbor_sets
}; .get(neighbor_vec[i])
sum += cc_cached(nbrs, &cache); .is_some_and(|n| n.contains(neighbor_vec[j])) {
count += 1; linked += 1;
} }
if count == 0 { 0.0 } else { sum / count as f32 } }
}
return (2.0 * linked as f32) / (deg as f32 * (deg as f32 - 1.0));
}
let mut linked = 0u64;
let samples = APPROX_CC_PAIRS.min((deg as u64 * (deg as u64 - 1)) / 2);
for sample in 0..samples {
let i = ((sample.wrapping_mul(1_103_515_245).wrapping_add(12_345)) % deg as u64) as usize;
let mut j = ((sample.wrapping_mul(2_654_435_761).wrapping_add(97_531)) % deg as u64) as usize;
if i == j {
j = (j + 1) % deg;
}
if self.neighbor_sets
.get(neighbor_vec[i])
.is_some_and(|n| n.contains(neighbor_vec[j])) {
linked += 1;
}
}
linked as f32 / samples as f32
}
/// Average clustering coefficient across all nodes with deg >= 2
pub fn avg_clustering_coefficient(&self) -> f32 {
let mut sum = 0.0f32;
let mut count = 0u32;
for key in &self.keys {
match self.neighbor_sets.get(key.as_str()) {
Some(s) if s.len() >= 2 => s,
_ => continue,
};
sum += self.clustering_coefficient(key);
count += 1;
}
if count == 0 { 0.0 } else { sum / count as f32 }
} }
/// Average shortest path length (sampled BFS from up to 100 nodes) /// Average shortest path length (sampled BFS from up to 100 nodes)
@ -294,15 +322,17 @@ impl Graph {
dist.insert(start.to_string(), 0u32); dist.insert(start.to_string(), 0u32);
queue.push_back(start.to_string()); queue.push_back(start.to_string());
while let Some(node) = queue.pop_front() { while let Some(node) = queue.pop_front() {
let d = dist[&node]; let d = dist[&node];
for neighbor in self.neighbor_keys(&node) { if let Some(neighbors) = self.neighbor_sets.get(&node) {
if !dist.contains_key(neighbor) { for neighbor in neighbors {
dist.insert(neighbor.to_string(), d + 1); if !dist.contains_key(neighbor) {
queue.push_back(neighbor.to_string()); dist.insert(neighbor.clone(), d + 1);
} queue.push_back(neighbor.clone());
} }
} }
}
}
dist dist
} }
@ -533,16 +563,39 @@ impl Graph {
/// Build graph from store data (with community detection) /// Build graph from store data (with community detection)
pub fn build_graph(store: &impl StoreView) -> Graph { pub fn build_graph(store: &impl StoreView) -> Graph {
let (adj, keys) = build_adjacency(store); let (adj, keys) = build_adjacency(store);
let communities = label_propagation(&keys, &adj, 20); let neighbor_sets = build_neighbor_sets(&adj);
Graph { adj, keys, communities } let communities = label_propagation(&keys, &adj, 20);
Graph {
adj,
neighbor_sets,
keys,
communities,
}
} }
/// Build graph without community detection — for spreading activation /// Build graph without community detection — for spreading activation
/// searches where we only need the adjacency list. /// searches where we only need the adjacency list.
pub fn build_graph_fast(store: &impl StoreView) -> Graph { pub fn build_graph_fast(store: &impl StoreView) -> Graph {
let (adj, keys) = build_adjacency(store); let (adj, keys) = build_adjacency(store);
Graph { adj, keys, communities: HashMap::new() } let neighbor_sets = build_neighbor_sets(&adj);
Graph {
adj,
neighbor_sets,
keys,
communities: HashMap::new(),
}
}
fn build_neighbor_sets(adj: &HashMap<String, Vec<Edge>>) -> HashMap<String, HashSet<String>> {
adj.iter()
.map(|(key, edges)| {
let neighbors = edges.iter()
.map(|edge| edge.target.clone())
.collect();
(key.clone(), neighbors)
})
.collect()
} }
fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashSet<String>) { fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashSet<String>) {

View file

@ -104,22 +104,21 @@ pub fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph)
item.classification, item.outlier_score)); item.classification, item.outlier_score));
} }
if let Some(community) = node.community_id { if let Some(community) = node.community_id {
out.push_str(&format!("Community: {} ", community)); out.push_str(&format!("Community: {} ", community));
} }
let deg = graph.degree(&item.key); let deg = graph.degree(&item.key);
let cc = graph.clustering_coefficient(&item.key);
// Hub-link ratio: what fraction of this node's edges go to hubs? // Hub-link ratio: what fraction of this node's edges go to hubs?
let neighbors = graph.neighbors(&item.key); let neighbors = graph.neighbors(&item.key);
let hub_links = neighbors.iter() let hub_links = neighbors.iter()
.filter(|(n, _)| graph.degree(n) >= hub_thresh) .filter(|(n, _)| graph.degree(n) >= hub_thresh)
.count(); .count();
let hub_ratio = if deg > 0 { hub_links as f32 / deg as f32 } else { 0.0 }; let hub_ratio = if deg > 0 { hub_links as f32 / deg as f32 } else { 0.0 };
let is_hub = deg >= hub_thresh; let is_hub = deg >= hub_thresh;
out.push_str(&format!("Degree: {} CC: {:.3} Hub-link ratio: {:.0}% ({}/{})", out.push_str(&format!("Degree: {} CC: {:.3} Hub-link ratio: {:.0}% ({}/{})",
deg, cc, hub_ratio * 100.0, hub_links, deg)); deg, item.cc, hub_ratio * 100.0, hub_links, deg));
if is_hub { if is_hub {
out.push_str(" ← THIS IS A HUB"); out.push_str(" ← THIS IS A HUB");
} else if hub_ratio > 0.6 { } else if hub_ratio > 0.6 {