fix unwrap-on-partial_cmp, dedup helpers, O(1) relation dedup
Replace all partial_cmp().unwrap() with total_cmp() in spectral.rs and knowledge.rs — eliminates potential panics on NaN without changing behavior for normal floats. Use existing weighted_distance() and eigenvalue_weights() helpers in nearest_neighbors() and nearest_to_seeds() instead of inlining the same distance computation. Move parse_timestamp_to_epoch() from enrich.rs to util.rs — was duplicated logic, now shared. Replace O(n²) relation existence check in init_from_markdown() with a HashSet of (source, target) UUID pairs. With 26K relations this was scanning linearly for every link in every markdown unit.
This commit is contained in:
parent
2f2c84e1c0
commit
3dddc40841
5 changed files with 55 additions and 63 deletions
|
|
@ -177,7 +177,7 @@ pub fn print_summary(result: &SpectralResult, graph: &Graph) {
|
|||
.enumerate()
|
||||
.map(|(i, &v)| (i, v))
|
||||
.collect();
|
||||
indexed.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
|
||||
indexed.sort_by(|a, b| a.1.total_cmp(&b.1));
|
||||
|
||||
// Compute the "spread" — how much this axis differentiates
|
||||
let min_val = indexed.first().map(|x| x.1).unwrap_or(0.0);
|
||||
|
|
@ -268,25 +268,14 @@ pub fn nearest_neighbors(
|
|||
None => return vec![],
|
||||
};
|
||||
|
||||
// Weight by inverse eigenvalue (coarser axes matter more)
|
||||
let weights: Vec<f64> = emb.eigenvalues.iter()
|
||||
.map(|&ev| if ev > 1e-8 { 1.0 / ev } else { 0.0 })
|
||||
.collect();
|
||||
let weights = eigenvalue_weights(&emb.eigenvalues);
|
||||
|
||||
let mut distances: Vec<(String, f64)> = emb.coords.iter()
|
||||
.filter(|(k, _)| k.as_str() != key)
|
||||
.map(|(k, coords)| {
|
||||
let dist: f64 = target.iter()
|
||||
.zip(coords.iter())
|
||||
.zip(weights.iter())
|
||||
.map(|((&a, &b), &w)| w * (a - b) * (a - b))
|
||||
.sum::<f64>()
|
||||
.sqrt();
|
||||
(k.clone(), dist)
|
||||
})
|
||||
.map(|(k, coords)| (k.clone(), weighted_distance(target, coords, &weights)))
|
||||
.collect();
|
||||
|
||||
distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
|
||||
distances.sort_by(|a, b| a.1.total_cmp(&b.1));
|
||||
distances.truncate(k);
|
||||
distances
|
||||
}
|
||||
|
|
@ -298,7 +287,7 @@ pub fn nearest_to_seeds(
|
|||
seeds: &[&str],
|
||||
k: usize,
|
||||
) -> Vec<(String, f64)> {
|
||||
let seed_set: std::collections::HashSet<&str> = seeds.iter().copied().collect();
|
||||
let seed_set: HashSet<&str> = seeds.iter().copied().collect();
|
||||
|
||||
let seed_coords: Vec<&Vec<f64>> = seeds.iter()
|
||||
.filter_map(|s| emb.coords.get(*s))
|
||||
|
|
@ -307,29 +296,19 @@ pub fn nearest_to_seeds(
|
|||
return vec![];
|
||||
}
|
||||
|
||||
let weights: Vec<f64> = emb.eigenvalues.iter()
|
||||
.map(|&ev| if ev > 1e-8 { 1.0 / ev } else { 0.0 })
|
||||
.collect();
|
||||
let weights = eigenvalue_weights(&emb.eigenvalues);
|
||||
|
||||
let mut distances: Vec<(String, f64)> = emb.coords.iter()
|
||||
.filter(|(k, _)| !seed_set.contains(k.as_str()))
|
||||
.map(|(k, coords)| {
|
||||
// Distance to nearest seed
|
||||
let min_dist = seed_coords.iter()
|
||||
.map(|sc| {
|
||||
coords.iter()
|
||||
.zip(sc.iter())
|
||||
.zip(weights.iter())
|
||||
.map(|((&a, &b), &w)| w * (a - b) * (a - b))
|
||||
.sum::<f64>()
|
||||
.sqrt()
|
||||
})
|
||||
.map(|sc| weighted_distance(coords, sc, &weights))
|
||||
.fold(f64::MAX, f64::min);
|
||||
(k.clone(), min_dist)
|
||||
})
|
||||
.collect();
|
||||
|
||||
distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
|
||||
distances.sort_by(|a, b| a.1.total_cmp(&b.1));
|
||||
distances.truncate(k);
|
||||
distances
|
||||
}
|
||||
|
|
@ -423,7 +402,7 @@ pub fn analyze_positions(
|
|||
// Median distance per community for outlier scoring
|
||||
let medians: HashMap<u32, f64> = by_community.into_iter()
|
||||
.map(|(comm, mut dists)| {
|
||||
dists.sort_by(|a, b| a.partial_cmp(b).unwrap());
|
||||
dists.sort_by(|a, b| a.total_cmp(b));
|
||||
let median = if dists.is_empty() {
|
||||
1.0
|
||||
} else if dists.len() % 2 == 0 {
|
||||
|
|
@ -442,7 +421,7 @@ pub fn analyze_positions(
|
|||
let (nearest_community, dist_to_nearest) = centers.iter()
|
||||
.filter(|(&c, _)| c != comm)
|
||||
.map(|(&c, center)| (c, weighted_distance(coords, center, &weights)))
|
||||
.min_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
|
||||
.min_by(|a, b| a.1.total_cmp(&b.1))
|
||||
.unwrap_or((comm, f64::MAX));
|
||||
|
||||
let median = medians.get(&comm).copied().unwrap_or(1.0);
|
||||
|
|
@ -461,7 +440,7 @@ pub fn analyze_positions(
|
|||
})
|
||||
.collect();
|
||||
|
||||
positions.sort_by(|a, b| b.outlier_score.partial_cmp(&a.outlier_score).unwrap());
|
||||
positions.sort_by(|a, b| b.outlier_score.total_cmp(&a.outlier_score));
|
||||
positions
|
||||
}
|
||||
|
||||
|
|
@ -494,7 +473,7 @@ pub fn unlinked_neighbors(
|
|||
}
|
||||
}
|
||||
|
||||
pairs.sort_by(|a, b| a.2.partial_cmp(&b.2).unwrap());
|
||||
pairs.sort_by(|a, b| a.2.total_cmp(&b.2));
|
||||
pairs.truncate(max_pairs);
|
||||
pairs
|
||||
}
|
||||
|
|
@ -560,6 +539,6 @@ pub fn dominant_dimensions(emb: &SpectralEmbedding, keys: &[&str]) -> Vec<(usize
|
|||
})
|
||||
.collect();
|
||||
|
||||
dim_loading.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
|
||||
dim_loading.sort_by(|a, b| b.1.total_cmp(&a.1));
|
||||
dim_loading
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue