fix unwrap-on-partial_cmp, dedup helpers, O(1) relation dedup
Replace all partial_cmp().unwrap() with total_cmp() in spectral.rs and knowledge.rs — eliminates potential panics on NaN without changing behavior for normal floats. Use existing weighted_distance() and eigenvalue_weights() helpers in nearest_neighbors() and nearest_to_seeds() instead of inlining the same distance computation. Move parse_timestamp_to_epoch() from enrich.rs to util.rs — was duplicated logic, now shared. Replace O(n²) relation existence check in init_from_markdown() with a HashSet of (source, target) UUID pairs. With 26K relations this was scanning linearly for every link in every markdown unit.
This commit is contained in:
parent
2f2c84e1c0
commit
3dddc40841
5 changed files with 55 additions and 63 deletions
|
|
@ -79,8 +79,6 @@ impl Store {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// Resolve a link target to (key, uuid).
|
||||
fn resolve_node_uuid(&self, target: &str) -> Option<(String, [u8; 16])> {
|
||||
let bare = strip_md_suffix(target);
|
||||
|
|
@ -103,12 +101,28 @@ impl Store {
|
|||
let dir = memory_dir();
|
||||
let mut count = 0;
|
||||
if dir.exists() {
|
||||
count = self.scan_dir_for_init(&dir)?;
|
||||
// Build edge set for O(1) dedup during ingestion
|
||||
let mut edge_set = self.build_edge_set();
|
||||
count = self.scan_dir_for_init(&dir, &mut edge_set)?;
|
||||
}
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
fn scan_dir_for_init(&mut self, dir: &Path) -> Result<usize, String> {
|
||||
/// Build a HashSet of existing (source, target) UUID pairs for O(1) dedup.
|
||||
fn build_edge_set(&self) -> std::collections::HashSet<([u8; 16], [u8; 16])> {
|
||||
let mut set = std::collections::HashSet::with_capacity(self.relations.len() * 2);
|
||||
for r in &self.relations {
|
||||
set.insert((r.source, r.target));
|
||||
set.insert((r.target, r.source));
|
||||
}
|
||||
set
|
||||
}
|
||||
|
||||
fn scan_dir_for_init(
|
||||
&mut self,
|
||||
dir: &Path,
|
||||
edge_set: &mut std::collections::HashSet<([u8; 16], [u8; 16])>,
|
||||
) -> Result<usize, String> {
|
||||
let mut count = 0;
|
||||
let entries = fs::read_dir(dir)
|
||||
.map_err(|e| format!("read dir {}: {}", dir.display(), e))?;
|
||||
|
|
@ -116,7 +130,7 @@ impl Store {
|
|||
for entry in entries.flatten() {
|
||||
let path = entry.path();
|
||||
if path.is_dir() {
|
||||
count += self.scan_dir_for_init(&path)?;
|
||||
count += self.scan_dir_for_init(&path, edge_set)?;
|
||||
continue;
|
||||
}
|
||||
let Some(ext) = path.extension() else { continue };
|
||||
|
|
@ -140,10 +154,9 @@ impl Store {
|
|||
|
||||
for link in unit.marker_links.iter().chain(unit.md_links.iter()) {
|
||||
let Some((key, uuid)) = self.resolve_node_uuid(link) else { continue };
|
||||
let exists = self.relations.iter().any(|r|
|
||||
(r.source == source_uuid && r.target == uuid) ||
|
||||
(r.source == uuid && r.target == source_uuid));
|
||||
if !exists {
|
||||
if !edge_set.contains(&(source_uuid, uuid)) {
|
||||
edge_set.insert((source_uuid, uuid));
|
||||
edge_set.insert((uuid, source_uuid));
|
||||
new_relations.push(new_relation(
|
||||
source_uuid, uuid, RelationType::Link, 1.0,
|
||||
&unit.key, &key,
|
||||
|
|
@ -153,10 +166,8 @@ impl Store {
|
|||
|
||||
for cause in &unit.causes {
|
||||
let Some((key, uuid)) = self.resolve_node_uuid(cause) else { continue };
|
||||
let exists = self.relations.iter().any(|r|
|
||||
r.source == uuid && r.target == source_uuid
|
||||
&& r.rel_type == RelationType::Causal);
|
||||
if !exists {
|
||||
if !edge_set.contains(&(uuid, source_uuid)) {
|
||||
edge_set.insert((uuid, source_uuid));
|
||||
new_relations.push(new_relation(
|
||||
uuid, source_uuid, RelationType::Causal, 1.0,
|
||||
&key, &unit.key,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue