fix unwrap-on-partial_cmp, dedup helpers, O(1) relation dedup

Replace all partial_cmp().unwrap() with total_cmp() in spectral.rs
and knowledge.rs — eliminates potential panics on NaN without
changing behavior for normal floats.

Use existing weighted_distance() and eigenvalue_weights() helpers in
nearest_neighbors() and nearest_to_seeds() instead of inlining the
same distance computation.

Move parse_timestamp_to_epoch() from enrich.rs to util.rs — was
duplicated logic, now shared.

Replace O(n²) relation existence check in init_from_markdown() with
a HashSet of (source, target) UUID pairs. With 26K relations this
was scanning linearly for every link in every markdown unit.
This commit is contained in:
ProofOfConcept 2026-03-08 21:22:05 -04:00
parent 2f2c84e1c0
commit 3dddc40841
5 changed files with 55 additions and 63 deletions

View file

@ -522,7 +522,7 @@ fn select_extractor_clusters(_store: &Store, n: usize) -> Vec<Vec<String>> {
.map(|k| (spectral_distance(&embedding, seed, k), **k))
.filter(|(d, _)| d.is_finite())
.collect();
distances.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
distances.sort_by(|a, b| a.0.total_cmp(&b.0));
let cluster: Vec<String> = std::iter::once((*seed).clone())
.chain(distances.iter().take(cluster_size - 1).map(|(_, k)| (*k).clone()))
@ -576,7 +576,7 @@ fn select_connector_pairs(store: &Store, graph: &Graph, n: usize) -> Vec<(Vec<St
.map(|k| (spectral_distance(&embedding, seed, k), *k))
.filter(|(d, _)| *d < 0.5 && d.is_finite())
.collect();
near.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
near.sort_by(|a, b| a.0.total_cmp(&b.0));
for (_, target) in near.iter().take(5) {
if !has_edge(store, seed, target) {