forked from kent/consciousness
context: heal pre-refactor image logs with token_count=0
Recompute image token counts from persisted dimensions when loading old logs that stored count=0 (server-authoritative count was applied after AppendImage before client-side pad expansion). graph: cache neighbor sets for clustering coefficient Pre-compute neighbor HashSets so the O(deg^2) triangle-counting inner loop doesn't re-allocate on every (i,j) pair. avg_clustering_ coefficient() now builds the cache once instead of O(N*deg) times.
This commit is contained in:
parent
371b40078d
commit
5210f7dd66
2 changed files with 60 additions and 20 deletions
|
|
@ -125,7 +125,19 @@ impl<'de> Deserialize<'de> for NodeLeaf {
|
||||||
body: NodeBody,
|
body: NodeBody,
|
||||||
timestamp: DateTime<Utc>,
|
timestamp: DateTime<Utc>,
|
||||||
}
|
}
|
||||||
let raw = Raw::deserialize(deserializer)?;
|
let mut raw = Raw::deserialize(deserializer)?;
|
||||||
|
// Heal pre-refactor logs: Image leaves used to be deserialized
|
||||||
|
// with token_count=0 (server-authoritative count was applied
|
||||||
|
// after AppendImage). With pads now expanded client-side at
|
||||||
|
// construction, recompute from the persisted dimensions if
|
||||||
|
// the stored count is 0.
|
||||||
|
if let NodeBody::Image { orig_height, orig_width, token_count, .. }
|
||||||
|
= &mut raw.body
|
||||||
|
{
|
||||||
|
if *token_count == 0 {
|
||||||
|
*token_count = qwen3_image_token_count(*orig_height, *orig_width);
|
||||||
|
}
|
||||||
|
}
|
||||||
let token_ids = raw.body.compute_token_ids();
|
let token_ids = raw.body.compute_token_ids();
|
||||||
Ok(NodeLeaf { body: raw.body, token_ids, timestamp: raw.timestamp })
|
Ok(NodeLeaf { body: raw.body, token_ids, timestamp: raw.timestamp })
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -40,6 +40,31 @@ pub struct Graph {
|
||||||
communities: HashMap<String, u32>,
|
communities: HashMap<String, u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Compute clustering coefficient for a node whose neighbor-set is `nbrs`,
|
||||||
|
/// using `cache` to look up each neighbor's neighbor-set in O(1) without
|
||||||
|
/// re-allocating on every (i, j) pair of the inner loop.
|
||||||
|
fn cc_cached<'a>(
|
||||||
|
nbrs: &HashSet<&'a str>,
|
||||||
|
cache: &HashMap<&'a str, HashSet<&'a str>>,
|
||||||
|
) -> f32 {
|
||||||
|
let deg = nbrs.len();
|
||||||
|
if deg < 2 {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
let neighbor_vec: Vec<&str> = nbrs.iter().copied().collect();
|
||||||
|
let mut triangles = 0u32;
|
||||||
|
for i in 0..neighbor_vec.len() {
|
||||||
|
for j in (i + 1)..neighbor_vec.len() {
|
||||||
|
if let Some(ni) = cache.get(neighbor_vec[i]) {
|
||||||
|
if ni.contains(neighbor_vec[j]) {
|
||||||
|
triangles += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0))
|
||||||
|
}
|
||||||
|
|
||||||
impl Graph {
|
impl Graph {
|
||||||
pub fn nodes(&self) -> &HashSet<String> {
|
pub fn nodes(&self) -> &HashSet<String> {
|
||||||
&self.keys
|
&self.keys
|
||||||
|
|
@ -207,34 +232,37 @@ impl Graph {
|
||||||
/// cc(v) = 2E / (deg * (deg - 1))
|
/// cc(v) = 2E / (deg * (deg - 1))
|
||||||
pub fn clustering_coefficient(&self, key: &str) -> f32 {
|
pub fn clustering_coefficient(&self, key: &str) -> f32 {
|
||||||
let neighbors = self.neighbor_keys(key);
|
let neighbors = self.neighbor_keys(key);
|
||||||
let deg = neighbors.len();
|
if neighbors.len() < 2 {
|
||||||
if deg < 2 {
|
|
||||||
return 0.0;
|
return 0.0;
|
||||||
}
|
}
|
||||||
|
// Cache each neighbor's neighbor-set so the O(deg^2) inner loop
|
||||||
let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect();
|
// doesn't re-allocate a HashSet on every (i, j) pair.
|
||||||
let mut triangles = 0u32;
|
let cache: HashMap<&str, HashSet<&str>> = neighbors
|
||||||
for i in 0..neighbor_vec.len() {
|
.iter()
|
||||||
for j in (i + 1)..neighbor_vec.len() {
|
.map(|&n| (n, self.neighbor_keys(n)))
|
||||||
let ni_neighbors = self.neighbor_keys(neighbor_vec[i]);
|
.collect();
|
||||||
if ni_neighbors.contains(neighbor_vec[j]) {
|
cc_cached(&neighbors, &cache)
|
||||||
triangles += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
(2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Average clustering coefficient across all nodes with deg >= 2
|
/// Average clustering coefficient across all nodes with deg >= 2
|
||||||
pub fn avg_clustering_coefficient(&self) -> f32 {
|
pub fn avg_clustering_coefficient(&self) -> f32 {
|
||||||
|
// Pre-compute neighbor sets for the whole graph once so we don't
|
||||||
|
// rebuild O(N * deg) HashSets across the outer loop.
|
||||||
|
let cache: HashMap<&str, HashSet<&str>> = self
|
||||||
|
.keys
|
||||||
|
.iter()
|
||||||
|
.map(|k| (k.as_str(), self.neighbor_keys(k)))
|
||||||
|
.collect();
|
||||||
|
|
||||||
let mut sum = 0.0f32;
|
let mut sum = 0.0f32;
|
||||||
let mut count = 0u32;
|
let mut count = 0u32;
|
||||||
for key in &self.keys {
|
for key in &self.keys {
|
||||||
if self.degree(key) >= 2 {
|
let nbrs = match cache.get(key.as_str()) {
|
||||||
sum += self.clustering_coefficient(key);
|
Some(s) if s.len() >= 2 => s,
|
||||||
count += 1;
|
_ => continue,
|
||||||
}
|
};
|
||||||
|
sum += cc_cached(nbrs, &cache);
|
||||||
|
count += 1;
|
||||||
}
|
}
|
||||||
if count == 0 { 0.0 } else { sum / count as f32 }
|
if count == 0 { 0.0 } else { sum / count as f32 }
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue