From 5210f7dd66217e579f3cf6b3643a810dee43ae27 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 25 Apr 2026 15:15:21 -0400 Subject: [PATCH] context: heal pre-refactor image logs with token_count=0 Recompute image token counts from persisted dimensions when loading old logs that stored count=0 (server-authoritative count was applied after AppendImage before client-side pad expansion). graph: cache neighbor sets for clustering coefficient Pre-compute neighbor HashSets so the O(deg^2) triangle-counting inner loop doesn't re-allocate on every (i,j) pair. avg_clustering_ coefficient() now builds the cache once instead of O(N*deg) times. --- src/agent/context.rs | 14 ++++++++- src/hippocampus/graph.rs | 66 ++++++++++++++++++++++++++++------------ 2 files changed, 60 insertions(+), 20 deletions(-) diff --git a/src/agent/context.rs b/src/agent/context.rs index a42beeb..a10afb8 100644 --- a/src/agent/context.rs +++ b/src/agent/context.rs @@ -125,7 +125,19 @@ impl<'de> Deserialize<'de> for NodeLeaf { body: NodeBody, timestamp: DateTime, } - let raw = Raw::deserialize(deserializer)?; + let mut raw = Raw::deserialize(deserializer)?; + // Heal pre-refactor logs: Image leaves used to be deserialized + // with token_count=0 (server-authoritative count was applied + // after AppendImage). With pads now expanded client-side at + // construction, recompute from the persisted dimensions if + // the stored count is 0. + if let NodeBody::Image { orig_height, orig_width, token_count, .. } + = &mut raw.body + { + if *token_count == 0 { + *token_count = qwen3_image_token_count(*orig_height, *orig_width); + } + } let token_ids = raw.body.compute_token_ids(); Ok(NodeLeaf { body: raw.body, token_ids, timestamp: raw.timestamp }) } diff --git a/src/hippocampus/graph.rs b/src/hippocampus/graph.rs index 0e7a20d..6c07fe5 100644 --- a/src/hippocampus/graph.rs +++ b/src/hippocampus/graph.rs @@ -40,6 +40,31 @@ pub struct Graph { communities: HashMap, } +/// Compute clustering coefficient for a node whose neighbor-set is `nbrs`, +/// using `cache` to look up each neighbor's neighbor-set in O(1) without +/// re-allocating on every (i, j) pair of the inner loop. +fn cc_cached<'a>( + nbrs: &HashSet<&'a str>, + cache: &HashMap<&'a str, HashSet<&'a str>>, +) -> f32 { + let deg = nbrs.len(); + if deg < 2 { + return 0.0; + } + let neighbor_vec: Vec<&str> = nbrs.iter().copied().collect(); + let mut triangles = 0u32; + for i in 0..neighbor_vec.len() { + for j in (i + 1)..neighbor_vec.len() { + if let Some(ni) = cache.get(neighbor_vec[i]) { + if ni.contains(neighbor_vec[j]) { + triangles += 1; + } + } + } + } + (2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0)) +} + impl Graph { pub fn nodes(&self) -> &HashSet { &self.keys @@ -207,34 +232,37 @@ impl Graph { /// cc(v) = 2E / (deg * (deg - 1)) pub fn clustering_coefficient(&self, key: &str) -> f32 { let neighbors = self.neighbor_keys(key); - let deg = neighbors.len(); - if deg < 2 { + if neighbors.len() < 2 { return 0.0; } - - let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect(); - let mut triangles = 0u32; - for i in 0..neighbor_vec.len() { - for j in (i + 1)..neighbor_vec.len() { - let ni_neighbors = self.neighbor_keys(neighbor_vec[i]); - if ni_neighbors.contains(neighbor_vec[j]) { - triangles += 1; - } - } - } - - (2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0)) + // Cache each neighbor's neighbor-set so the O(deg^2) inner loop + // doesn't re-allocate a HashSet on every (i, j) pair. + let cache: HashMap<&str, HashSet<&str>> = neighbors + .iter() + .map(|&n| (n, self.neighbor_keys(n))) + .collect(); + cc_cached(&neighbors, &cache) } /// Average clustering coefficient across all nodes with deg >= 2 pub fn avg_clustering_coefficient(&self) -> f32 { + // Pre-compute neighbor sets for the whole graph once so we don't + // rebuild O(N * deg) HashSets across the outer loop. + let cache: HashMap<&str, HashSet<&str>> = self + .keys + .iter() + .map(|k| (k.as_str(), self.neighbor_keys(k))) + .collect(); + let mut sum = 0.0f32; let mut count = 0u32; for key in &self.keys { - if self.degree(key) >= 2 { - sum += self.clustering_coefficient(key); - count += 1; - } + let nbrs = match cache.get(key.as_str()) { + Some(s) if s.len() >= 2 => s, + _ => continue, + }; + sum += cc_cached(nbrs, &cache); + count += 1; } if count == 0 { 0.0 } else { sum / count as f32 } }