From d8ff7aacc7252acf14d0a018481fba83a8c58fba Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 15 Jun 2026 21:16:40 -0500 Subject: [PATCH] consciousness: cache expensive graph metrics --- src/config.rs | 32 +++-- src/hippocampus/graph.rs | 243 ++++++++++++++++++++++-------------- src/subconscious/prompts.rs | 19 ++- 3 files changed, 179 insertions(+), 115 deletions(-) diff --git a/src/config.rs b/src/config.rs index 209bdc1..e385c4b 100644 --- a/src/config.rs +++ b/src/config.rs @@ -201,16 +201,23 @@ pub fn watch_config(cli: crate::user::CliArgs) { { crate::dbglog!("[config] watch({}) failed: {}", parent.display(), e); return; - } - crate::dbglog!("[config] watching {}", path.display()); + } + crate::dbglog!("[config] watching {}", path.display()); - while let Ok(res) = rx.recv() { - let Ok(events) = res else { continue; }; - if !events.iter().any(|e| e.path == path) { continue; } + let mut last_seen = config_file_state(&path); + while let Ok(res) = rx.recv() { + let Ok(events) = res else { continue; }; + if !events.iter().any(|e| e.path == path) { continue; } - // Reload both halves. - let mem_changed = reload(); - let app_changed = match build_figment(&cli).extract::() { + let current_seen = config_file_state(&path); + if current_seen == last_seen { + continue; + } + last_seen = current_seen; + + // Reload both halves. + let mem_changed = reload(); + let app_changed = match build_figment(&cli).extract::() { Ok(app) => { install_app(app); true @@ -223,8 +230,13 @@ pub fn watch_config(cli: crate::user::CliArgs) { crate::dbglog!("[config] reloaded (memory_changed={}, app_changed={})", mem_changed, app_changed); } - }) - .ok(); + }) + .ok(); +} + +fn config_file_state(path: &std::path::Path) -> Option<(std::time::SystemTime, u64)> { + let meta = std::fs::metadata(path).ok()?; + Some((meta.modified().ok()?, meta.len())) } // ============================================================ diff --git a/src/hippocampus/graph.rs b/src/hippocampus/graph.rs index 6c07fe5..b8f3736 100644 --- a/src/hippocampus/graph.rs +++ b/src/hippocampus/graph.rs @@ -11,6 +11,23 @@ use crate::store::{Store, RelationType, StoreView}; use serde::{Deserialize, Serialize}; use std::collections::{HashMap, HashSet, VecDeque}; +use std::sync::{OnceLock, RwLock}; + +const EXACT_CC_MAX_DEG: usize = 512; +const APPROX_CC_PAIRS: u64 = 4096; +const CC_CACHE_TTL_SECS: i64 = 15 * 60; + +#[derive(Clone, Copy)] +struct CachedCc { + value: f32, + computed_at: i64, +} + +static CC_CACHE: OnceLock>> = OnceLock::new(); + +fn cc_cache() -> &'static RwLock> { + CC_CACHE.get_or_init(|| RwLock::new(HashMap::new())) +} /// Community info for reporting #[derive(Clone, Debug)] @@ -32,39 +49,16 @@ pub struct Edge { /// The in-memory graph built from store nodes + relations pub struct Graph { - /// Adjacency list: node key → list of edges - adj: HashMap>, - /// All node keys - keys: HashSet, - /// Community labels (from label propagation) + /// Adjacency list: node key → list of edges + adj: HashMap>, + /// Neighbor sets for membership tests in graph metrics. + neighbor_sets: HashMap>, + /// All node keys + keys: HashSet, + /// Community labels (from label propagation) communities: HashMap, } -/// Compute clustering coefficient for a node whose neighbor-set is `nbrs`, -/// using `cache` to look up each neighbor's neighbor-set in O(1) without -/// re-allocating on every (i, j) pair of the inner loop. -fn cc_cached<'a>( - nbrs: &HashSet<&'a str>, - cache: &HashMap<&'a str, HashSet<&'a str>>, -) -> f32 { - let deg = nbrs.len(); - if deg < 2 { - return 0.0; - } - let neighbor_vec: Vec<&str> = nbrs.iter().copied().collect(); - let mut triangles = 0u32; - for i in 0..neighbor_vec.len() { - for j in (i + 1)..neighbor_vec.len() { - if let Some(ni) = cache.get(neighbor_vec[i]) { - if ni.contains(neighbor_vec[j]) { - triangles += 1; - } - } - } - } - (2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0)) -} - impl Graph { pub fn nodes(&self) -> &HashSet { &self.keys @@ -92,22 +86,22 @@ impl Graph { .unwrap_or_default() } - /// Just neighbor keys - pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> { - self.adj.get(key) - .map(|edges| edges.iter().map(|e| e.target.as_str()).collect()) - .unwrap_or_default() - } + /// Just neighbor keys + pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> { + self.neighbor_sets.get(key) + .map(|neighbors| neighbors.iter().map(String::as_str).collect()) + .unwrap_or_default() + } - /// Jaccard similarity between two nodes' neighborhoods. - /// Measures overlap: |intersection| / |union| of their neighbor sets. - pub fn jaccard(&self, a: &str, b: &str) -> f32 { - let na = self.neighbor_keys(a); - let nb = self.neighbor_keys(b); - let intersection = na.intersection(&nb).count(); - let union = na.union(&nb).count(); - if union == 0 { 0.0 } else { intersection as f32 / union as f32 } - } + /// Jaccard similarity between two nodes' neighborhoods. + /// Measures overlap: |intersection| / |union| of their neighbor sets. + pub fn jaccard(&self, a: &str, b: &str) -> f32 { + let Some(na) = self.neighbor_sets.get(a) else { return 0.0 }; + let Some(nb) = self.neighbor_sets.get(b) else { return 0.0 }; + let intersection = na.intersection(nb).count(); + let union = na.len() + nb.len() - intersection; + if union == 0 { 0.0 } else { intersection as f32 / union as f32 } + } /// Compute Jaccard-based strength for every edge in the graph. /// Returns (source_key, target_key, jaccard_strength) triples. @@ -227,44 +221,78 @@ impl Graph { } } - /// Local clustering coefficient: fraction of a node's neighbors - /// that are also neighbors of each other. - /// cc(v) = 2E / (deg * (deg - 1)) - pub fn clustering_coefficient(&self, key: &str) -> f32 { - let neighbors = self.neighbor_keys(key); - if neighbors.len() < 2 { - return 0.0; - } - // Cache each neighbor's neighbor-set so the O(deg^2) inner loop - // doesn't re-allocate a HashSet on every (i, j) pair. - let cache: HashMap<&str, HashSet<&str>> = neighbors - .iter() - .map(|&n| (n, self.neighbor_keys(n))) - .collect(); - cc_cached(&neighbors, &cache) - } + /// Local clustering coefficient: fraction of a node's neighbors + /// that are also neighbors of each other. + /// cc(v) = 2E / (deg * (deg - 1)) + pub fn clustering_coefficient(&self, key: &str) -> f32 { + let now = crate::store::now_epoch(); + if let Some(cc) = cc_cache().read().unwrap().get(key).copied() + && now - cc.computed_at < CC_CACHE_TTL_SECS + { + return cc.value; + } + let cc = self.clustering_coefficient_uncached(key); + cc_cache().write().unwrap().insert(key.to_owned(), CachedCc { + value: cc, + computed_at: now, + }); + cc + } - /// Average clustering coefficient across all nodes with deg >= 2 - pub fn avg_clustering_coefficient(&self) -> f32 { - // Pre-compute neighbor sets for the whole graph once so we don't - // rebuild O(N * deg) HashSets across the outer loop. - let cache: HashMap<&str, HashSet<&str>> = self - .keys - .iter() - .map(|k| (k.as_str(), self.neighbor_keys(k))) - .collect(); + fn clustering_coefficient_uncached(&self, key: &str) -> f32 { + let Some(neighbors) = self.neighbor_sets.get(key) else { + return 0.0; + }; + let deg = neighbors.len(); + if deg < 2 { + return 0.0; + } - let mut sum = 0.0f32; - let mut count = 0u32; - for key in &self.keys { - let nbrs = match cache.get(key.as_str()) { - Some(s) if s.len() >= 2 => s, - _ => continue, - }; - sum += cc_cached(nbrs, &cache); - count += 1; - } - if count == 0 { 0.0 } else { sum / count as f32 } + let neighbor_vec: Vec<&str> = neighbors.iter().map(String::as_str).collect(); + if deg <= EXACT_CC_MAX_DEG { + let mut linked = 0u64; + for i in 0..neighbor_vec.len() { + for j in (i + 1)..neighbor_vec.len() { + if self.neighbor_sets + .get(neighbor_vec[i]) + .is_some_and(|n| n.contains(neighbor_vec[j])) { + linked += 1; + } + } + } + return (2.0 * linked as f32) / (deg as f32 * (deg as f32 - 1.0)); + } + + let mut linked = 0u64; + let samples = APPROX_CC_PAIRS.min((deg as u64 * (deg as u64 - 1)) / 2); + for sample in 0..samples { + let i = ((sample.wrapping_mul(1_103_515_245).wrapping_add(12_345)) % deg as u64) as usize; + let mut j = ((sample.wrapping_mul(2_654_435_761).wrapping_add(97_531)) % deg as u64) as usize; + if i == j { + j = (j + 1) % deg; + } + if self.neighbor_sets + .get(neighbor_vec[i]) + .is_some_and(|n| n.contains(neighbor_vec[j])) { + linked += 1; + } + } + linked as f32 / samples as f32 + } + + /// Average clustering coefficient across all nodes with deg >= 2 + pub fn avg_clustering_coefficient(&self) -> f32 { + let mut sum = 0.0f32; + let mut count = 0u32; + for key in &self.keys { + match self.neighbor_sets.get(key.as_str()) { + Some(s) if s.len() >= 2 => s, + _ => continue, + }; + sum += self.clustering_coefficient(key); + count += 1; + } + if count == 0 { 0.0 } else { sum / count as f32 } } /// Average shortest path length (sampled BFS from up to 100 nodes) @@ -294,15 +322,17 @@ impl Graph { dist.insert(start.to_string(), 0u32); queue.push_back(start.to_string()); - while let Some(node) = queue.pop_front() { - let d = dist[&node]; - for neighbor in self.neighbor_keys(&node) { - if !dist.contains_key(neighbor) { - dist.insert(neighbor.to_string(), d + 1); - queue.push_back(neighbor.to_string()); - } - } - } + while let Some(node) = queue.pop_front() { + let d = dist[&node]; + if let Some(neighbors) = self.neighbor_sets.get(&node) { + for neighbor in neighbors { + if !dist.contains_key(neighbor) { + dist.insert(neighbor.clone(), d + 1); + queue.push_back(neighbor.clone()); + } + } + } + } dist } @@ -533,16 +563,39 @@ impl Graph { /// Build graph from store data (with community detection) pub fn build_graph(store: &impl StoreView) -> Graph { - let (adj, keys) = build_adjacency(store); - let communities = label_propagation(&keys, &adj, 20); - Graph { adj, keys, communities } + let (adj, keys) = build_adjacency(store); + let neighbor_sets = build_neighbor_sets(&adj); + let communities = label_propagation(&keys, &adj, 20); + Graph { + adj, + neighbor_sets, + keys, + communities, + } } /// Build graph without community detection — for spreading activation /// searches where we only need the adjacency list. pub fn build_graph_fast(store: &impl StoreView) -> Graph { - let (adj, keys) = build_adjacency(store); - Graph { adj, keys, communities: HashMap::new() } + let (adj, keys) = build_adjacency(store); + let neighbor_sets = build_neighbor_sets(&adj); + Graph { + adj, + neighbor_sets, + keys, + communities: HashMap::new(), + } +} + +fn build_neighbor_sets(adj: &HashMap>) -> HashMap> { + adj.iter() + .map(|(key, edges)| { + let neighbors = edges.iter() + .map(|edge| edge.target.clone()) + .collect(); + (key.clone(), neighbors) + }) + .collect() } fn build_adjacency(store: &impl StoreView) -> (HashMap>, HashSet) { diff --git a/src/subconscious/prompts.rs b/src/subconscious/prompts.rs index c98b887..b9a0146 100644 --- a/src/subconscious/prompts.rs +++ b/src/subconscious/prompts.rs @@ -104,22 +104,21 @@ pub fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) item.classification, item.outlier_score)); } - if let Some(community) = node.community_id { - out.push_str(&format!("Community: {} ", community)); - } - let deg = graph.degree(&item.key); - let cc = graph.clustering_coefficient(&item.key); + if let Some(community) = node.community_id { + out.push_str(&format!("Community: {} ", community)); + } + let deg = graph.degree(&item.key); - // Hub-link ratio: what fraction of this node's edges go to hubs? - let neighbors = graph.neighbors(&item.key); + // Hub-link ratio: what fraction of this node's edges go to hubs? + let neighbors = graph.neighbors(&item.key); let hub_links = neighbors.iter() .filter(|(n, _)| graph.degree(n) >= hub_thresh) .count(); let hub_ratio = if deg > 0 { hub_links as f32 / deg as f32 } else { 0.0 }; - let is_hub = deg >= hub_thresh; + let is_hub = deg >= hub_thresh; - out.push_str(&format!("Degree: {} CC: {:.3} Hub-link ratio: {:.0}% ({}/{})", - deg, cc, hub_ratio * 100.0, hub_links, deg)); + out.push_str(&format!("Degree: {} CC: {:.3} Hub-link ratio: {:.0}% ({}/{})", + deg, item.cc, hub_ratio * 100.0, hub_links, deg)); if is_hub { out.push_str(" ← THIS IS A HUB"); } else if hub_ratio > 0.6 {