forked from kent/consciousness
context: heal pre-refactor image logs with token_count=0
Recompute image token counts from persisted dimensions when loading old logs that stored count=0 (server-authoritative count was applied after AppendImage before client-side pad expansion). graph: cache neighbor sets for clustering coefficient Pre-compute neighbor HashSets so the O(deg^2) triangle-counting inner loop doesn't re-allocate on every (i,j) pair. avg_clustering_ coefficient() now builds the cache once instead of O(N*deg) times.
This commit is contained in:
parent
371b40078d
commit
5210f7dd66
2 changed files with 60 additions and 20 deletions
|
|
@ -125,7 +125,19 @@ impl<'de> Deserialize<'de> for NodeLeaf {
|
|||
body: NodeBody,
|
||||
timestamp: DateTime<Utc>,
|
||||
}
|
||||
let raw = Raw::deserialize(deserializer)?;
|
||||
let mut raw = Raw::deserialize(deserializer)?;
|
||||
// Heal pre-refactor logs: Image leaves used to be deserialized
|
||||
// with token_count=0 (server-authoritative count was applied
|
||||
// after AppendImage). With pads now expanded client-side at
|
||||
// construction, recompute from the persisted dimensions if
|
||||
// the stored count is 0.
|
||||
if let NodeBody::Image { orig_height, orig_width, token_count, .. }
|
||||
= &mut raw.body
|
||||
{
|
||||
if *token_count == 0 {
|
||||
*token_count = qwen3_image_token_count(*orig_height, *orig_width);
|
||||
}
|
||||
}
|
||||
let token_ids = raw.body.compute_token_ids();
|
||||
Ok(NodeLeaf { body: raw.body, token_ids, timestamp: raw.timestamp })
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,6 +40,31 @@ pub struct Graph {
|
|||
communities: HashMap<String, u32>,
|
||||
}
|
||||
|
||||
/// Compute clustering coefficient for a node whose neighbor-set is `nbrs`,
|
||||
/// using `cache` to look up each neighbor's neighbor-set in O(1) without
|
||||
/// re-allocating on every (i, j) pair of the inner loop.
|
||||
fn cc_cached<'a>(
|
||||
nbrs: &HashSet<&'a str>,
|
||||
cache: &HashMap<&'a str, HashSet<&'a str>>,
|
||||
) -> f32 {
|
||||
let deg = nbrs.len();
|
||||
if deg < 2 {
|
||||
return 0.0;
|
||||
}
|
||||
let neighbor_vec: Vec<&str> = nbrs.iter().copied().collect();
|
||||
let mut triangles = 0u32;
|
||||
for i in 0..neighbor_vec.len() {
|
||||
for j in (i + 1)..neighbor_vec.len() {
|
||||
if let Some(ni) = cache.get(neighbor_vec[i]) {
|
||||
if ni.contains(neighbor_vec[j]) {
|
||||
triangles += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
(2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0))
|
||||
}
|
||||
|
||||
impl Graph {
|
||||
pub fn nodes(&self) -> &HashSet<String> {
|
||||
&self.keys
|
||||
|
|
@ -207,34 +232,37 @@ impl Graph {
|
|||
/// cc(v) = 2E / (deg * (deg - 1))
|
||||
pub fn clustering_coefficient(&self, key: &str) -> f32 {
|
||||
let neighbors = self.neighbor_keys(key);
|
||||
let deg = neighbors.len();
|
||||
if deg < 2 {
|
||||
if neighbors.len() < 2 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect();
|
||||
let mut triangles = 0u32;
|
||||
for i in 0..neighbor_vec.len() {
|
||||
for j in (i + 1)..neighbor_vec.len() {
|
||||
let ni_neighbors = self.neighbor_keys(neighbor_vec[i]);
|
||||
if ni_neighbors.contains(neighbor_vec[j]) {
|
||||
triangles += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0))
|
||||
// Cache each neighbor's neighbor-set so the O(deg^2) inner loop
|
||||
// doesn't re-allocate a HashSet on every (i, j) pair.
|
||||
let cache: HashMap<&str, HashSet<&str>> = neighbors
|
||||
.iter()
|
||||
.map(|&n| (n, self.neighbor_keys(n)))
|
||||
.collect();
|
||||
cc_cached(&neighbors, &cache)
|
||||
}
|
||||
|
||||
/// Average clustering coefficient across all nodes with deg >= 2
|
||||
pub fn avg_clustering_coefficient(&self) -> f32 {
|
||||
// Pre-compute neighbor sets for the whole graph once so we don't
|
||||
// rebuild O(N * deg) HashSets across the outer loop.
|
||||
let cache: HashMap<&str, HashSet<&str>> = self
|
||||
.keys
|
||||
.iter()
|
||||
.map(|k| (k.as_str(), self.neighbor_keys(k)))
|
||||
.collect();
|
||||
|
||||
let mut sum = 0.0f32;
|
||||
let mut count = 0u32;
|
||||
for key in &self.keys {
|
||||
if self.degree(key) >= 2 {
|
||||
sum += self.clustering_coefficient(key);
|
||||
count += 1;
|
||||
}
|
||||
let nbrs = match cache.get(key.as_str()) {
|
||||
Some(s) if s.len() >= 2 => s,
|
||||
_ => continue,
|
||||
};
|
||||
sum += cc_cached(nbrs, &cache);
|
||||
count += 1;
|
||||
}
|
||||
if count == 0 { 0.0 } else { sum / count as f32 }
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue