forked from kent/consciousness
consciousness: cache expensive graph metrics
This commit is contained in:
parent
b9f093247d
commit
d8ff7aacc7
3 changed files with 184 additions and 120 deletions
|
|
@ -201,16 +201,23 @@ pub fn watch_config(cli: crate::user::CliArgs) {
|
|||
{
|
||||
crate::dbglog!("[config] watch({}) failed: {}", parent.display(), e);
|
||||
return;
|
||||
}
|
||||
crate::dbglog!("[config] watching {}", path.display());
|
||||
}
|
||||
crate::dbglog!("[config] watching {}", path.display());
|
||||
|
||||
while let Ok(res) = rx.recv() {
|
||||
let Ok(events) = res else { continue; };
|
||||
if !events.iter().any(|e| e.path == path) { continue; }
|
||||
let mut last_seen = config_file_state(&path);
|
||||
while let Ok(res) = rx.recv() {
|
||||
let Ok(events) = res else { continue; };
|
||||
if !events.iter().any(|e| e.path == path) { continue; }
|
||||
|
||||
// Reload both halves.
|
||||
let mem_changed = reload();
|
||||
let app_changed = match build_figment(&cli).extract::<AppConfig>() {
|
||||
let current_seen = config_file_state(&path);
|
||||
if current_seen == last_seen {
|
||||
continue;
|
||||
}
|
||||
last_seen = current_seen;
|
||||
|
||||
// Reload both halves.
|
||||
let mem_changed = reload();
|
||||
let app_changed = match build_figment(&cli).extract::<AppConfig>() {
|
||||
Ok(app) => {
|
||||
install_app(app);
|
||||
true
|
||||
|
|
@ -223,8 +230,13 @@ pub fn watch_config(cli: crate::user::CliArgs) {
|
|||
crate::dbglog!("[config] reloaded (memory_changed={}, app_changed={})",
|
||||
mem_changed, app_changed);
|
||||
}
|
||||
})
|
||||
.ok();
|
||||
})
|
||||
.ok();
|
||||
}
|
||||
|
||||
fn config_file_state(path: &std::path::Path) -> Option<(std::time::SystemTime, u64)> {
|
||||
let meta = std::fs::metadata(path).ok()?;
|
||||
Some((meta.modified().ok()?, meta.len()))
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
|
|
|
|||
|
|
@ -11,6 +11,23 @@ use crate::store::{Store, RelationType, StoreView};
|
|||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
use std::sync::{OnceLock, RwLock};
|
||||
|
||||
const EXACT_CC_MAX_DEG: usize = 512;
|
||||
const APPROX_CC_PAIRS: u64 = 4096;
|
||||
const CC_CACHE_TTL_SECS: i64 = 15 * 60;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
struct CachedCc {
|
||||
value: f32,
|
||||
computed_at: i64,
|
||||
}
|
||||
|
||||
static CC_CACHE: OnceLock<RwLock<HashMap<String, CachedCc>>> = OnceLock::new();
|
||||
|
||||
fn cc_cache() -> &'static RwLock<HashMap<String, CachedCc>> {
|
||||
CC_CACHE.get_or_init(|| RwLock::new(HashMap::new()))
|
||||
}
|
||||
|
||||
/// Community info for reporting
|
||||
#[derive(Clone, Debug)]
|
||||
|
|
@ -32,39 +49,16 @@ pub struct Edge {
|
|||
|
||||
/// The in-memory graph built from store nodes + relations
|
||||
pub struct Graph {
|
||||
/// Adjacency list: node key → list of edges
|
||||
adj: HashMap<String, Vec<Edge>>,
|
||||
/// All node keys
|
||||
keys: HashSet<String>,
|
||||
/// Community labels (from label propagation)
|
||||
/// Adjacency list: node key → list of edges
|
||||
adj: HashMap<String, Vec<Edge>>,
|
||||
/// Neighbor sets for membership tests in graph metrics.
|
||||
neighbor_sets: HashMap<String, HashSet<String>>,
|
||||
/// All node keys
|
||||
keys: HashSet<String>,
|
||||
/// Community labels (from label propagation)
|
||||
communities: HashMap<String, u32>,
|
||||
}
|
||||
|
||||
/// Compute clustering coefficient for a node whose neighbor-set is `nbrs`,
|
||||
/// using `cache` to look up each neighbor's neighbor-set in O(1) without
|
||||
/// re-allocating on every (i, j) pair of the inner loop.
|
||||
fn cc_cached<'a>(
|
||||
nbrs: &HashSet<&'a str>,
|
||||
cache: &HashMap<&'a str, HashSet<&'a str>>,
|
||||
) -> f32 {
|
||||
let deg = nbrs.len();
|
||||
if deg < 2 {
|
||||
return 0.0;
|
||||
}
|
||||
let neighbor_vec: Vec<&str> = nbrs.iter().copied().collect();
|
||||
let mut triangles = 0u32;
|
||||
for i in 0..neighbor_vec.len() {
|
||||
for j in (i + 1)..neighbor_vec.len() {
|
||||
if let Some(ni) = cache.get(neighbor_vec[i]) {
|
||||
if ni.contains(neighbor_vec[j]) {
|
||||
triangles += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
(2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0))
|
||||
}
|
||||
|
||||
impl Graph {
|
||||
pub fn nodes(&self) -> &HashSet<String> {
|
||||
&self.keys
|
||||
|
|
@ -92,22 +86,22 @@ impl Graph {
|
|||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Just neighbor keys
|
||||
pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> {
|
||||
self.adj.get(key)
|
||||
.map(|edges| edges.iter().map(|e| e.target.as_str()).collect())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
/// Just neighbor keys
|
||||
pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> {
|
||||
self.neighbor_sets.get(key)
|
||||
.map(|neighbors| neighbors.iter().map(String::as_str).collect())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Jaccard similarity between two nodes' neighborhoods.
|
||||
/// Measures overlap: |intersection| / |union| of their neighbor sets.
|
||||
pub fn jaccard(&self, a: &str, b: &str) -> f32 {
|
||||
let na = self.neighbor_keys(a);
|
||||
let nb = self.neighbor_keys(b);
|
||||
let intersection = na.intersection(&nb).count();
|
||||
let union = na.union(&nb).count();
|
||||
if union == 0 { 0.0 } else { intersection as f32 / union as f32 }
|
||||
}
|
||||
/// Jaccard similarity between two nodes' neighborhoods.
|
||||
/// Measures overlap: |intersection| / |union| of their neighbor sets.
|
||||
pub fn jaccard(&self, a: &str, b: &str) -> f32 {
|
||||
let Some(na) = self.neighbor_sets.get(a) else { return 0.0 };
|
||||
let Some(nb) = self.neighbor_sets.get(b) else { return 0.0 };
|
||||
let intersection = na.intersection(nb).count();
|
||||
let union = na.len() + nb.len() - intersection;
|
||||
if union == 0 { 0.0 } else { intersection as f32 / union as f32 }
|
||||
}
|
||||
|
||||
/// Compute Jaccard-based strength for every edge in the graph.
|
||||
/// Returns (source_key, target_key, jaccard_strength) triples.
|
||||
|
|
@ -227,44 +221,78 @@ impl Graph {
|
|||
}
|
||||
}
|
||||
|
||||
/// Local clustering coefficient: fraction of a node's neighbors
|
||||
/// that are also neighbors of each other.
|
||||
/// cc(v) = 2E / (deg * (deg - 1))
|
||||
pub fn clustering_coefficient(&self, key: &str) -> f32 {
|
||||
let neighbors = self.neighbor_keys(key);
|
||||
if neighbors.len() < 2 {
|
||||
return 0.0;
|
||||
}
|
||||
// Cache each neighbor's neighbor-set so the O(deg^2) inner loop
|
||||
// doesn't re-allocate a HashSet on every (i, j) pair.
|
||||
let cache: HashMap<&str, HashSet<&str>> = neighbors
|
||||
.iter()
|
||||
.map(|&n| (n, self.neighbor_keys(n)))
|
||||
.collect();
|
||||
cc_cached(&neighbors, &cache)
|
||||
}
|
||||
/// Local clustering coefficient: fraction of a node's neighbors
|
||||
/// that are also neighbors of each other.
|
||||
/// cc(v) = 2E / (deg * (deg - 1))
|
||||
pub fn clustering_coefficient(&self, key: &str) -> f32 {
|
||||
let now = crate::store::now_epoch();
|
||||
if let Some(cc) = cc_cache().read().unwrap().get(key).copied()
|
||||
&& now - cc.computed_at < CC_CACHE_TTL_SECS
|
||||
{
|
||||
return cc.value;
|
||||
}
|
||||
let cc = self.clustering_coefficient_uncached(key);
|
||||
cc_cache().write().unwrap().insert(key.to_owned(), CachedCc {
|
||||
value: cc,
|
||||
computed_at: now,
|
||||
});
|
||||
cc
|
||||
}
|
||||
|
||||
/// Average clustering coefficient across all nodes with deg >= 2
|
||||
pub fn avg_clustering_coefficient(&self) -> f32 {
|
||||
// Pre-compute neighbor sets for the whole graph once so we don't
|
||||
// rebuild O(N * deg) HashSets across the outer loop.
|
||||
let cache: HashMap<&str, HashSet<&str>> = self
|
||||
.keys
|
||||
.iter()
|
||||
.map(|k| (k.as_str(), self.neighbor_keys(k)))
|
||||
.collect();
|
||||
fn clustering_coefficient_uncached(&self, key: &str) -> f32 {
|
||||
let Some(neighbors) = self.neighbor_sets.get(key) else {
|
||||
return 0.0;
|
||||
};
|
||||
let deg = neighbors.len();
|
||||
if deg < 2 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let mut sum = 0.0f32;
|
||||
let mut count = 0u32;
|
||||
for key in &self.keys {
|
||||
let nbrs = match cache.get(key.as_str()) {
|
||||
Some(s) if s.len() >= 2 => s,
|
||||
_ => continue,
|
||||
};
|
||||
sum += cc_cached(nbrs, &cache);
|
||||
count += 1;
|
||||
}
|
||||
if count == 0 { 0.0 } else { sum / count as f32 }
|
||||
let neighbor_vec: Vec<&str> = neighbors.iter().map(String::as_str).collect();
|
||||
if deg <= EXACT_CC_MAX_DEG {
|
||||
let mut linked = 0u64;
|
||||
for i in 0..neighbor_vec.len() {
|
||||
for j in (i + 1)..neighbor_vec.len() {
|
||||
if self.neighbor_sets
|
||||
.get(neighbor_vec[i])
|
||||
.is_some_and(|n| n.contains(neighbor_vec[j])) {
|
||||
linked += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return (2.0 * linked as f32) / (deg as f32 * (deg as f32 - 1.0));
|
||||
}
|
||||
|
||||
let mut linked = 0u64;
|
||||
let samples = APPROX_CC_PAIRS.min((deg as u64 * (deg as u64 - 1)) / 2);
|
||||
for sample in 0..samples {
|
||||
let i = ((sample.wrapping_mul(1_103_515_245).wrapping_add(12_345)) % deg as u64) as usize;
|
||||
let mut j = ((sample.wrapping_mul(2_654_435_761).wrapping_add(97_531)) % deg as u64) as usize;
|
||||
if i == j {
|
||||
j = (j + 1) % deg;
|
||||
}
|
||||
if self.neighbor_sets
|
||||
.get(neighbor_vec[i])
|
||||
.is_some_and(|n| n.contains(neighbor_vec[j])) {
|
||||
linked += 1;
|
||||
}
|
||||
}
|
||||
linked as f32 / samples as f32
|
||||
}
|
||||
|
||||
/// Average clustering coefficient across all nodes with deg >= 2
|
||||
pub fn avg_clustering_coefficient(&self) -> f32 {
|
||||
let mut sum = 0.0f32;
|
||||
let mut count = 0u32;
|
||||
for key in &self.keys {
|
||||
match self.neighbor_sets.get(key.as_str()) {
|
||||
Some(s) if s.len() >= 2 => s,
|
||||
_ => continue,
|
||||
};
|
||||
sum += self.clustering_coefficient(key);
|
||||
count += 1;
|
||||
}
|
||||
if count == 0 { 0.0 } else { sum / count as f32 }
|
||||
}
|
||||
|
||||
/// Average shortest path length (sampled BFS from up to 100 nodes)
|
||||
|
|
@ -294,15 +322,17 @@ impl Graph {
|
|||
dist.insert(start.to_string(), 0u32);
|
||||
queue.push_back(start.to_string());
|
||||
|
||||
while let Some(node) = queue.pop_front() {
|
||||
let d = dist[&node];
|
||||
for neighbor in self.neighbor_keys(&node) {
|
||||
if !dist.contains_key(neighbor) {
|
||||
dist.insert(neighbor.to_string(), d + 1);
|
||||
queue.push_back(neighbor.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
while let Some(node) = queue.pop_front() {
|
||||
let d = dist[&node];
|
||||
if let Some(neighbors) = self.neighbor_sets.get(&node) {
|
||||
for neighbor in neighbors {
|
||||
if !dist.contains_key(neighbor) {
|
||||
dist.insert(neighbor.clone(), d + 1);
|
||||
queue.push_back(neighbor.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
dist
|
||||
}
|
||||
|
||||
|
|
@ -533,16 +563,39 @@ impl Graph {
|
|||
|
||||
/// Build graph from store data (with community detection)
|
||||
pub fn build_graph(store: &impl StoreView) -> Graph {
|
||||
let (adj, keys) = build_adjacency(store);
|
||||
let communities = label_propagation(&keys, &adj, 20);
|
||||
Graph { adj, keys, communities }
|
||||
let (adj, keys) = build_adjacency(store);
|
||||
let neighbor_sets = build_neighbor_sets(&adj);
|
||||
let communities = label_propagation(&keys, &adj, 20);
|
||||
Graph {
|
||||
adj,
|
||||
neighbor_sets,
|
||||
keys,
|
||||
communities,
|
||||
}
|
||||
}
|
||||
|
||||
/// Build graph without community detection — for spreading activation
|
||||
/// searches where we only need the adjacency list.
|
||||
pub fn build_graph_fast(store: &impl StoreView) -> Graph {
|
||||
let (adj, keys) = build_adjacency(store);
|
||||
Graph { adj, keys, communities: HashMap::new() }
|
||||
let (adj, keys) = build_adjacency(store);
|
||||
let neighbor_sets = build_neighbor_sets(&adj);
|
||||
Graph {
|
||||
adj,
|
||||
neighbor_sets,
|
||||
keys,
|
||||
communities: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_neighbor_sets(adj: &HashMap<String, Vec<Edge>>) -> HashMap<String, HashSet<String>> {
|
||||
adj.iter()
|
||||
.map(|(key, edges)| {
|
||||
let neighbors = edges.iter()
|
||||
.map(|edge| edge.target.clone())
|
||||
.collect();
|
||||
(key.clone(), neighbors)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashSet<String>) {
|
||||
|
|
|
|||
|
|
@ -104,22 +104,21 @@ pub fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph)
|
|||
item.classification, item.outlier_score));
|
||||
}
|
||||
|
||||
if let Some(community) = node.community_id {
|
||||
out.push_str(&format!("Community: {} ", community));
|
||||
}
|
||||
let deg = graph.degree(&item.key);
|
||||
let cc = graph.clustering_coefficient(&item.key);
|
||||
if let Some(community) = node.community_id {
|
||||
out.push_str(&format!("Community: {} ", community));
|
||||
}
|
||||
let deg = graph.degree(&item.key);
|
||||
|
||||
// Hub-link ratio: what fraction of this node's edges go to hubs?
|
||||
let neighbors = graph.neighbors(&item.key);
|
||||
// Hub-link ratio: what fraction of this node's edges go to hubs?
|
||||
let neighbors = graph.neighbors(&item.key);
|
||||
let hub_links = neighbors.iter()
|
||||
.filter(|(n, _)| graph.degree(n) >= hub_thresh)
|
||||
.count();
|
||||
let hub_ratio = if deg > 0 { hub_links as f32 / deg as f32 } else { 0.0 };
|
||||
let is_hub = deg >= hub_thresh;
|
||||
let is_hub = deg >= hub_thresh;
|
||||
|
||||
out.push_str(&format!("Degree: {} CC: {:.3} Hub-link ratio: {:.0}% ({}/{})",
|
||||
deg, cc, hub_ratio * 100.0, hub_links, deg));
|
||||
out.push_str(&format!("Degree: {} CC: {:.3} Hub-link ratio: {:.0}% ({}/{})",
|
||||
deg, item.cc, hub_ratio * 100.0, hub_links, deg));
|
||||
if is_hub {
|
||||
out.push_str(" ← THIS IS A HUB");
|
||||
} else if hub_ratio > 0.6 {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue