forked from kent/consciousness
consciousness: cache expensive graph metrics
This commit is contained in:
parent
b9f093247d
commit
d8ff7aacc7
3 changed files with 184 additions and 120 deletions
|
|
@ -201,16 +201,23 @@ pub fn watch_config(cli: crate::user::CliArgs) {
|
||||||
{
|
{
|
||||||
crate::dbglog!("[config] watch({}) failed: {}", parent.display(), e);
|
crate::dbglog!("[config] watch({}) failed: {}", parent.display(), e);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
crate::dbglog!("[config] watching {}", path.display());
|
crate::dbglog!("[config] watching {}", path.display());
|
||||||
|
|
||||||
while let Ok(res) = rx.recv() {
|
let mut last_seen = config_file_state(&path);
|
||||||
let Ok(events) = res else { continue; };
|
while let Ok(res) = rx.recv() {
|
||||||
if !events.iter().any(|e| e.path == path) { continue; }
|
let Ok(events) = res else { continue; };
|
||||||
|
if !events.iter().any(|e| e.path == path) { continue; }
|
||||||
|
|
||||||
// Reload both halves.
|
let current_seen = config_file_state(&path);
|
||||||
let mem_changed = reload();
|
if current_seen == last_seen {
|
||||||
let app_changed = match build_figment(&cli).extract::<AppConfig>() {
|
continue;
|
||||||
|
}
|
||||||
|
last_seen = current_seen;
|
||||||
|
|
||||||
|
// Reload both halves.
|
||||||
|
let mem_changed = reload();
|
||||||
|
let app_changed = match build_figment(&cli).extract::<AppConfig>() {
|
||||||
Ok(app) => {
|
Ok(app) => {
|
||||||
install_app(app);
|
install_app(app);
|
||||||
true
|
true
|
||||||
|
|
@ -223,8 +230,13 @@ pub fn watch_config(cli: crate::user::CliArgs) {
|
||||||
crate::dbglog!("[config] reloaded (memory_changed={}, app_changed={})",
|
crate::dbglog!("[config] reloaded (memory_changed={}, app_changed={})",
|
||||||
mem_changed, app_changed);
|
mem_changed, app_changed);
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.ok();
|
.ok();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn config_file_state(path: &std::path::Path) -> Option<(std::time::SystemTime, u64)> {
|
||||||
|
let meta = std::fs::metadata(path).ok()?;
|
||||||
|
Some((meta.modified().ok()?, meta.len()))
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,23 @@ use crate::store::{Store, RelationType, StoreView};
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::collections::{HashMap, HashSet, VecDeque};
|
use std::collections::{HashMap, HashSet, VecDeque};
|
||||||
|
use std::sync::{OnceLock, RwLock};
|
||||||
|
|
||||||
|
const EXACT_CC_MAX_DEG: usize = 512;
|
||||||
|
const APPROX_CC_PAIRS: u64 = 4096;
|
||||||
|
const CC_CACHE_TTL_SECS: i64 = 15 * 60;
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
struct CachedCc {
|
||||||
|
value: f32,
|
||||||
|
computed_at: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
static CC_CACHE: OnceLock<RwLock<HashMap<String, CachedCc>>> = OnceLock::new();
|
||||||
|
|
||||||
|
fn cc_cache() -> &'static RwLock<HashMap<String, CachedCc>> {
|
||||||
|
CC_CACHE.get_or_init(|| RwLock::new(HashMap::new()))
|
||||||
|
}
|
||||||
|
|
||||||
/// Community info for reporting
|
/// Community info for reporting
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
|
|
@ -32,39 +49,16 @@ pub struct Edge {
|
||||||
|
|
||||||
/// The in-memory graph built from store nodes + relations
|
/// The in-memory graph built from store nodes + relations
|
||||||
pub struct Graph {
|
pub struct Graph {
|
||||||
/// Adjacency list: node key → list of edges
|
/// Adjacency list: node key → list of edges
|
||||||
adj: HashMap<String, Vec<Edge>>,
|
adj: HashMap<String, Vec<Edge>>,
|
||||||
/// All node keys
|
/// Neighbor sets for membership tests in graph metrics.
|
||||||
keys: HashSet<String>,
|
neighbor_sets: HashMap<String, HashSet<String>>,
|
||||||
/// Community labels (from label propagation)
|
/// All node keys
|
||||||
|
keys: HashSet<String>,
|
||||||
|
/// Community labels (from label propagation)
|
||||||
communities: HashMap<String, u32>,
|
communities: HashMap<String, u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Compute clustering coefficient for a node whose neighbor-set is `nbrs`,
|
|
||||||
/// using `cache` to look up each neighbor's neighbor-set in O(1) without
|
|
||||||
/// re-allocating on every (i, j) pair of the inner loop.
|
|
||||||
fn cc_cached<'a>(
|
|
||||||
nbrs: &HashSet<&'a str>,
|
|
||||||
cache: &HashMap<&'a str, HashSet<&'a str>>,
|
|
||||||
) -> f32 {
|
|
||||||
let deg = nbrs.len();
|
|
||||||
if deg < 2 {
|
|
||||||
return 0.0;
|
|
||||||
}
|
|
||||||
let neighbor_vec: Vec<&str> = nbrs.iter().copied().collect();
|
|
||||||
let mut triangles = 0u32;
|
|
||||||
for i in 0..neighbor_vec.len() {
|
|
||||||
for j in (i + 1)..neighbor_vec.len() {
|
|
||||||
if let Some(ni) = cache.get(neighbor_vec[i]) {
|
|
||||||
if ni.contains(neighbor_vec[j]) {
|
|
||||||
triangles += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
(2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0))
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Graph {
|
impl Graph {
|
||||||
pub fn nodes(&self) -> &HashSet<String> {
|
pub fn nodes(&self) -> &HashSet<String> {
|
||||||
&self.keys
|
&self.keys
|
||||||
|
|
@ -92,22 +86,22 @@ impl Graph {
|
||||||
.unwrap_or_default()
|
.unwrap_or_default()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Just neighbor keys
|
/// Just neighbor keys
|
||||||
pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> {
|
pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> {
|
||||||
self.adj.get(key)
|
self.neighbor_sets.get(key)
|
||||||
.map(|edges| edges.iter().map(|e| e.target.as_str()).collect())
|
.map(|neighbors| neighbors.iter().map(String::as_str).collect())
|
||||||
.unwrap_or_default()
|
.unwrap_or_default()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Jaccard similarity between two nodes' neighborhoods.
|
/// Jaccard similarity between two nodes' neighborhoods.
|
||||||
/// Measures overlap: |intersection| / |union| of their neighbor sets.
|
/// Measures overlap: |intersection| / |union| of their neighbor sets.
|
||||||
pub fn jaccard(&self, a: &str, b: &str) -> f32 {
|
pub fn jaccard(&self, a: &str, b: &str) -> f32 {
|
||||||
let na = self.neighbor_keys(a);
|
let Some(na) = self.neighbor_sets.get(a) else { return 0.0 };
|
||||||
let nb = self.neighbor_keys(b);
|
let Some(nb) = self.neighbor_sets.get(b) else { return 0.0 };
|
||||||
let intersection = na.intersection(&nb).count();
|
let intersection = na.intersection(nb).count();
|
||||||
let union = na.union(&nb).count();
|
let union = na.len() + nb.len() - intersection;
|
||||||
if union == 0 { 0.0 } else { intersection as f32 / union as f32 }
|
if union == 0 { 0.0 } else { intersection as f32 / union as f32 }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Compute Jaccard-based strength for every edge in the graph.
|
/// Compute Jaccard-based strength for every edge in the graph.
|
||||||
/// Returns (source_key, target_key, jaccard_strength) triples.
|
/// Returns (source_key, target_key, jaccard_strength) triples.
|
||||||
|
|
@ -227,44 +221,78 @@ impl Graph {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Local clustering coefficient: fraction of a node's neighbors
|
/// Local clustering coefficient: fraction of a node's neighbors
|
||||||
/// that are also neighbors of each other.
|
/// that are also neighbors of each other.
|
||||||
/// cc(v) = 2E / (deg * (deg - 1))
|
/// cc(v) = 2E / (deg * (deg - 1))
|
||||||
pub fn clustering_coefficient(&self, key: &str) -> f32 {
|
pub fn clustering_coefficient(&self, key: &str) -> f32 {
|
||||||
let neighbors = self.neighbor_keys(key);
|
let now = crate::store::now_epoch();
|
||||||
if neighbors.len() < 2 {
|
if let Some(cc) = cc_cache().read().unwrap().get(key).copied()
|
||||||
return 0.0;
|
&& now - cc.computed_at < CC_CACHE_TTL_SECS
|
||||||
}
|
{
|
||||||
// Cache each neighbor's neighbor-set so the O(deg^2) inner loop
|
return cc.value;
|
||||||
// doesn't re-allocate a HashSet on every (i, j) pair.
|
}
|
||||||
let cache: HashMap<&str, HashSet<&str>> = neighbors
|
let cc = self.clustering_coefficient_uncached(key);
|
||||||
.iter()
|
cc_cache().write().unwrap().insert(key.to_owned(), CachedCc {
|
||||||
.map(|&n| (n, self.neighbor_keys(n)))
|
value: cc,
|
||||||
.collect();
|
computed_at: now,
|
||||||
cc_cached(&neighbors, &cache)
|
});
|
||||||
}
|
cc
|
||||||
|
}
|
||||||
|
|
||||||
/// Average clustering coefficient across all nodes with deg >= 2
|
fn clustering_coefficient_uncached(&self, key: &str) -> f32 {
|
||||||
pub fn avg_clustering_coefficient(&self) -> f32 {
|
let Some(neighbors) = self.neighbor_sets.get(key) else {
|
||||||
// Pre-compute neighbor sets for the whole graph once so we don't
|
return 0.0;
|
||||||
// rebuild O(N * deg) HashSets across the outer loop.
|
};
|
||||||
let cache: HashMap<&str, HashSet<&str>> = self
|
let deg = neighbors.len();
|
||||||
.keys
|
if deg < 2 {
|
||||||
.iter()
|
return 0.0;
|
||||||
.map(|k| (k.as_str(), self.neighbor_keys(k)))
|
}
|
||||||
.collect();
|
|
||||||
|
|
||||||
let mut sum = 0.0f32;
|
let neighbor_vec: Vec<&str> = neighbors.iter().map(String::as_str).collect();
|
||||||
let mut count = 0u32;
|
if deg <= EXACT_CC_MAX_DEG {
|
||||||
for key in &self.keys {
|
let mut linked = 0u64;
|
||||||
let nbrs = match cache.get(key.as_str()) {
|
for i in 0..neighbor_vec.len() {
|
||||||
Some(s) if s.len() >= 2 => s,
|
for j in (i + 1)..neighbor_vec.len() {
|
||||||
_ => continue,
|
if self.neighbor_sets
|
||||||
};
|
.get(neighbor_vec[i])
|
||||||
sum += cc_cached(nbrs, &cache);
|
.is_some_and(|n| n.contains(neighbor_vec[j])) {
|
||||||
count += 1;
|
linked += 1;
|
||||||
}
|
}
|
||||||
if count == 0 { 0.0 } else { sum / count as f32 }
|
}
|
||||||
|
}
|
||||||
|
return (2.0 * linked as f32) / (deg as f32 * (deg as f32 - 1.0));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut linked = 0u64;
|
||||||
|
let samples = APPROX_CC_PAIRS.min((deg as u64 * (deg as u64 - 1)) / 2);
|
||||||
|
for sample in 0..samples {
|
||||||
|
let i = ((sample.wrapping_mul(1_103_515_245).wrapping_add(12_345)) % deg as u64) as usize;
|
||||||
|
let mut j = ((sample.wrapping_mul(2_654_435_761).wrapping_add(97_531)) % deg as u64) as usize;
|
||||||
|
if i == j {
|
||||||
|
j = (j + 1) % deg;
|
||||||
|
}
|
||||||
|
if self.neighbor_sets
|
||||||
|
.get(neighbor_vec[i])
|
||||||
|
.is_some_and(|n| n.contains(neighbor_vec[j])) {
|
||||||
|
linked += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
linked as f32 / samples as f32
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Average clustering coefficient across all nodes with deg >= 2
|
||||||
|
pub fn avg_clustering_coefficient(&self) -> f32 {
|
||||||
|
let mut sum = 0.0f32;
|
||||||
|
let mut count = 0u32;
|
||||||
|
for key in &self.keys {
|
||||||
|
match self.neighbor_sets.get(key.as_str()) {
|
||||||
|
Some(s) if s.len() >= 2 => s,
|
||||||
|
_ => continue,
|
||||||
|
};
|
||||||
|
sum += self.clustering_coefficient(key);
|
||||||
|
count += 1;
|
||||||
|
}
|
||||||
|
if count == 0 { 0.0 } else { sum / count as f32 }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Average shortest path length (sampled BFS from up to 100 nodes)
|
/// Average shortest path length (sampled BFS from up to 100 nodes)
|
||||||
|
|
@ -294,15 +322,17 @@ impl Graph {
|
||||||
dist.insert(start.to_string(), 0u32);
|
dist.insert(start.to_string(), 0u32);
|
||||||
queue.push_back(start.to_string());
|
queue.push_back(start.to_string());
|
||||||
|
|
||||||
while let Some(node) = queue.pop_front() {
|
while let Some(node) = queue.pop_front() {
|
||||||
let d = dist[&node];
|
let d = dist[&node];
|
||||||
for neighbor in self.neighbor_keys(&node) {
|
if let Some(neighbors) = self.neighbor_sets.get(&node) {
|
||||||
if !dist.contains_key(neighbor) {
|
for neighbor in neighbors {
|
||||||
dist.insert(neighbor.to_string(), d + 1);
|
if !dist.contains_key(neighbor) {
|
||||||
queue.push_back(neighbor.to_string());
|
dist.insert(neighbor.clone(), d + 1);
|
||||||
}
|
queue.push_back(neighbor.clone());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
dist
|
dist
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -533,16 +563,39 @@ impl Graph {
|
||||||
|
|
||||||
/// Build graph from store data (with community detection)
|
/// Build graph from store data (with community detection)
|
||||||
pub fn build_graph(store: &impl StoreView) -> Graph {
|
pub fn build_graph(store: &impl StoreView) -> Graph {
|
||||||
let (adj, keys) = build_adjacency(store);
|
let (adj, keys) = build_adjacency(store);
|
||||||
let communities = label_propagation(&keys, &adj, 20);
|
let neighbor_sets = build_neighbor_sets(&adj);
|
||||||
Graph { adj, keys, communities }
|
let communities = label_propagation(&keys, &adj, 20);
|
||||||
|
Graph {
|
||||||
|
adj,
|
||||||
|
neighbor_sets,
|
||||||
|
keys,
|
||||||
|
communities,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Build graph without community detection — for spreading activation
|
/// Build graph without community detection — for spreading activation
|
||||||
/// searches where we only need the adjacency list.
|
/// searches where we only need the adjacency list.
|
||||||
pub fn build_graph_fast(store: &impl StoreView) -> Graph {
|
pub fn build_graph_fast(store: &impl StoreView) -> Graph {
|
||||||
let (adj, keys) = build_adjacency(store);
|
let (adj, keys) = build_adjacency(store);
|
||||||
Graph { adj, keys, communities: HashMap::new() }
|
let neighbor_sets = build_neighbor_sets(&adj);
|
||||||
|
Graph {
|
||||||
|
adj,
|
||||||
|
neighbor_sets,
|
||||||
|
keys,
|
||||||
|
communities: HashMap::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_neighbor_sets(adj: &HashMap<String, Vec<Edge>>) -> HashMap<String, HashSet<String>> {
|
||||||
|
adj.iter()
|
||||||
|
.map(|(key, edges)| {
|
||||||
|
let neighbors = edges.iter()
|
||||||
|
.map(|edge| edge.target.clone())
|
||||||
|
.collect();
|
||||||
|
(key.clone(), neighbors)
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashSet<String>) {
|
fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashSet<String>) {
|
||||||
|
|
|
||||||
|
|
@ -104,22 +104,21 @@ pub fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph)
|
||||||
item.classification, item.outlier_score));
|
item.classification, item.outlier_score));
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(community) = node.community_id {
|
if let Some(community) = node.community_id {
|
||||||
out.push_str(&format!("Community: {} ", community));
|
out.push_str(&format!("Community: {} ", community));
|
||||||
}
|
}
|
||||||
let deg = graph.degree(&item.key);
|
let deg = graph.degree(&item.key);
|
||||||
let cc = graph.clustering_coefficient(&item.key);
|
|
||||||
|
|
||||||
// Hub-link ratio: what fraction of this node's edges go to hubs?
|
// Hub-link ratio: what fraction of this node's edges go to hubs?
|
||||||
let neighbors = graph.neighbors(&item.key);
|
let neighbors = graph.neighbors(&item.key);
|
||||||
let hub_links = neighbors.iter()
|
let hub_links = neighbors.iter()
|
||||||
.filter(|(n, _)| graph.degree(n) >= hub_thresh)
|
.filter(|(n, _)| graph.degree(n) >= hub_thresh)
|
||||||
.count();
|
.count();
|
||||||
let hub_ratio = if deg > 0 { hub_links as f32 / deg as f32 } else { 0.0 };
|
let hub_ratio = if deg > 0 { hub_links as f32 / deg as f32 } else { 0.0 };
|
||||||
let is_hub = deg >= hub_thresh;
|
let is_hub = deg >= hub_thresh;
|
||||||
|
|
||||||
out.push_str(&format!("Degree: {} CC: {:.3} Hub-link ratio: {:.0}% ({}/{})",
|
out.push_str(&format!("Degree: {} CC: {:.3} Hub-link ratio: {:.0}% ({}/{})",
|
||||||
deg, cc, hub_ratio * 100.0, hub_links, deg));
|
deg, item.cc, hub_ratio * 100.0, hub_links, deg));
|
||||||
if is_hub {
|
if is_hub {
|
||||||
out.push_str(" ← THIS IS A HUB");
|
out.push_str(" ← THIS IS A HUB");
|
||||||
} else if hub_ratio > 0.6 {
|
} else if hub_ratio > 0.6 {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue