store: split mod.rs into persist.rs and ops.rs
mod.rs was 937 lines with all Store methods in one block.
Split into three files by responsibility:
- persist.rs (318 lines): load, save, replay, append, snapshot
— all disk IO and cache management
- ops.rs (300 lines): upsert, delete, modify, mark_used/wrong,
decay, fix_categories, cap_degree — all mutations
- mod.rs (356 lines): re-exports, key resolution, ingestion,
rendering, search — read-only operations
No behavioral changes; cargo check + full smoke test pass.
2026-03-03 16:40:32 -05:00
|
|
|
// Mutation operations on the store
|
|
|
|
|
//
|
|
|
|
|
// CRUD (upsert, delete, modify), feedback tracking (mark_used, mark_wrong),
|
|
|
|
|
// maintenance (decay, fix_categories, cap_degree), and graph metrics.
|
|
|
|
|
|
|
|
|
|
use super::types::*;
|
|
|
|
|
|
|
|
|
|
use std::collections::{HashMap, HashSet};
|
|
|
|
|
|
|
|
|
|
impl Store {
|
|
|
|
|
/// Add or update a node (appends to log + updates cache)
|
|
|
|
|
pub fn upsert_node(&mut self, mut node: Node) -> Result<(), String> {
|
|
|
|
|
if let Some(existing) = self.nodes.get(&node.key) {
|
|
|
|
|
node.uuid = existing.uuid;
|
|
|
|
|
node.version = existing.version + 1;
|
|
|
|
|
}
|
|
|
|
|
self.append_nodes(&[node.clone()])?;
|
|
|
|
|
self.uuid_to_key.insert(node.uuid, node.key.clone());
|
|
|
|
|
self.nodes.insert(node.key.clone(), node);
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Add a relation (appends to log + updates cache)
|
|
|
|
|
pub fn add_relation(&mut self, rel: Relation) -> Result<(), String> {
|
|
|
|
|
self.append_relations(std::slice::from_ref(&rel))?;
|
|
|
|
|
self.relations.push(rel);
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Upsert a node: update if exists (and content changed), create if not.
|
|
|
|
|
/// Returns: "created", "updated", or "unchanged".
|
2026-03-06 21:42:39 -05:00
|
|
|
///
|
|
|
|
|
/// Provenance is determined by the POC_PROVENANCE env var if set,
|
|
|
|
|
/// otherwise defaults to Manual.
|
store: split mod.rs into persist.rs and ops.rs
mod.rs was 937 lines with all Store methods in one block.
Split into three files by responsibility:
- persist.rs (318 lines): load, save, replay, append, snapshot
— all disk IO and cache management
- ops.rs (300 lines): upsert, delete, modify, mark_used/wrong,
decay, fix_categories, cap_degree — all mutations
- mod.rs (356 lines): re-exports, key resolution, ingestion,
rendering, search — read-only operations
No behavioral changes; cargo check + full smoke test pass.
2026-03-03 16:40:32 -05:00
|
|
|
pub fn upsert(&mut self, key: &str, content: &str) -> Result<&'static str, String> {
|
2026-03-06 21:42:39 -05:00
|
|
|
let prov = Provenance::from_env().unwrap_or(Provenance::Manual);
|
|
|
|
|
self.upsert_provenance(key, content, prov)
|
2026-03-05 15:30:57 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Upsert with explicit provenance (for agent-created nodes).
|
|
|
|
|
pub fn upsert_provenance(&mut self, key: &str, content: &str, provenance: Provenance) -> Result<&'static str, String> {
|
store: split mod.rs into persist.rs and ops.rs
mod.rs was 937 lines with all Store methods in one block.
Split into three files by responsibility:
- persist.rs (318 lines): load, save, replay, append, snapshot
— all disk IO and cache management
- ops.rs (300 lines): upsert, delete, modify, mark_used/wrong,
decay, fix_categories, cap_degree — all mutations
- mod.rs (356 lines): re-exports, key resolution, ingestion,
rendering, search — read-only operations
No behavioral changes; cargo check + full smoke test pass.
2026-03-03 16:40:32 -05:00
|
|
|
if let Some(existing) = self.nodes.get(key) {
|
|
|
|
|
if existing.content == content {
|
|
|
|
|
return Ok("unchanged");
|
|
|
|
|
}
|
|
|
|
|
let mut node = existing.clone();
|
|
|
|
|
node.content = content.to_string();
|
2026-03-05 15:30:57 -05:00
|
|
|
node.provenance = provenance;
|
store: split mod.rs into persist.rs and ops.rs
mod.rs was 937 lines with all Store methods in one block.
Split into three files by responsibility:
- persist.rs (318 lines): load, save, replay, append, snapshot
— all disk IO and cache management
- ops.rs (300 lines): upsert, delete, modify, mark_used/wrong,
decay, fix_categories, cap_degree — all mutations
- mod.rs (356 lines): re-exports, key resolution, ingestion,
rendering, search — read-only operations
No behavioral changes; cargo check + full smoke test pass.
2026-03-03 16:40:32 -05:00
|
|
|
node.version += 1;
|
|
|
|
|
self.append_nodes(std::slice::from_ref(&node))?;
|
|
|
|
|
self.nodes.insert(key.to_string(), node);
|
|
|
|
|
Ok("updated")
|
|
|
|
|
} else {
|
2026-03-05 15:30:57 -05:00
|
|
|
let mut node = new_node(key, content);
|
|
|
|
|
node.provenance = provenance;
|
store: split mod.rs into persist.rs and ops.rs
mod.rs was 937 lines with all Store methods in one block.
Split into three files by responsibility:
- persist.rs (318 lines): load, save, replay, append, snapshot
— all disk IO and cache management
- ops.rs (300 lines): upsert, delete, modify, mark_used/wrong,
decay, fix_categories, cap_degree — all mutations
- mod.rs (356 lines): re-exports, key resolution, ingestion,
rendering, search — read-only operations
No behavioral changes; cargo check + full smoke test pass.
2026-03-03 16:40:32 -05:00
|
|
|
self.append_nodes(std::slice::from_ref(&node))?;
|
|
|
|
|
self.uuid_to_key.insert(node.uuid, node.key.clone());
|
|
|
|
|
self.nodes.insert(key.to_string(), node);
|
|
|
|
|
Ok("created")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Soft-delete a node (appends deleted version, removes from cache).
|
|
|
|
|
pub fn delete_node(&mut self, key: &str) -> Result<(), String> {
|
|
|
|
|
let node = self.nodes.get(key)
|
|
|
|
|
.ok_or_else(|| format!("No node '{}'", key))?;
|
|
|
|
|
let mut deleted = node.clone();
|
|
|
|
|
deleted.deleted = true;
|
|
|
|
|
deleted.version += 1;
|
|
|
|
|
self.append_nodes(std::slice::from_ref(&deleted))?;
|
|
|
|
|
self.nodes.remove(key);
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-05 10:24:18 -05:00
|
|
|
/// Rename a node: change its key, update debug strings on all edges.
|
|
|
|
|
///
|
|
|
|
|
/// Graph edges (source/target UUIDs) are unaffected — they're already
|
|
|
|
|
/// UUID-based. We update the human-readable source_key/target_key strings
|
|
|
|
|
/// on relations, and created_at is preserved untouched.
|
|
|
|
|
///
|
|
|
|
|
/// Appends: (new_key, v+1) + (old_key, deleted, v+1) + updated relations.
|
|
|
|
|
pub fn rename_node(&mut self, old_key: &str, new_key: &str) -> Result<(), String> {
|
|
|
|
|
if old_key == new_key {
|
|
|
|
|
return Ok(());
|
|
|
|
|
}
|
|
|
|
|
if self.nodes.contains_key(new_key) {
|
|
|
|
|
return Err(format!("Key '{}' already exists", new_key));
|
|
|
|
|
}
|
|
|
|
|
let node = self.nodes.get(old_key)
|
|
|
|
|
.ok_or_else(|| format!("No node '{}'", old_key))?
|
|
|
|
|
.clone();
|
|
|
|
|
|
|
|
|
|
// New version under the new key
|
|
|
|
|
let mut renamed = node.clone();
|
|
|
|
|
renamed.key = new_key.to_string();
|
|
|
|
|
renamed.version += 1;
|
|
|
|
|
|
|
|
|
|
// Deletion record for the old key (same UUID, independent version counter)
|
|
|
|
|
let mut tombstone = node.clone();
|
|
|
|
|
tombstone.deleted = true;
|
|
|
|
|
tombstone.version += 1;
|
|
|
|
|
|
|
|
|
|
// Collect affected relations and update their debug key strings
|
|
|
|
|
let updated_rels: Vec<_> = self.relations.iter()
|
|
|
|
|
.filter(|r| r.source_key == old_key || r.target_key == old_key)
|
|
|
|
|
.map(|r| {
|
|
|
|
|
let mut r = r.clone();
|
|
|
|
|
r.version += 1;
|
|
|
|
|
if r.source_key == old_key { r.source_key = new_key.to_string(); }
|
|
|
|
|
if r.target_key == old_key { r.target_key = new_key.to_string(); }
|
|
|
|
|
r
|
|
|
|
|
})
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
|
|
// Persist (each append acquires its own file lock)
|
|
|
|
|
self.append_nodes(&[renamed.clone(), tombstone])?;
|
|
|
|
|
if !updated_rels.is_empty() {
|
|
|
|
|
self.append_relations(&updated_rels)?;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Update in-memory cache
|
|
|
|
|
self.nodes.remove(old_key);
|
|
|
|
|
self.uuid_to_key.insert(renamed.uuid, new_key.to_string());
|
|
|
|
|
self.nodes.insert(new_key.to_string(), renamed);
|
|
|
|
|
for updated in &updated_rels {
|
|
|
|
|
if let Some(r) = self.relations.iter_mut().find(|r| r.uuid == updated.uuid) {
|
|
|
|
|
r.source_key = updated.source_key.clone();
|
|
|
|
|
r.target_key = updated.target_key.clone();
|
|
|
|
|
r.version = updated.version;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
store: split mod.rs into persist.rs and ops.rs
mod.rs was 937 lines with all Store methods in one block.
Split into three files by responsibility:
- persist.rs (318 lines): load, save, replay, append, snapshot
— all disk IO and cache management
- ops.rs (300 lines): upsert, delete, modify, mark_used/wrong,
decay, fix_categories, cap_degree — all mutations
- mod.rs (356 lines): re-exports, key resolution, ingestion,
rendering, search — read-only operations
No behavioral changes; cargo check + full smoke test pass.
2026-03-03 16:40:32 -05:00
|
|
|
/// Modify a node in-place, bump version, and persist to capnp log.
|
|
|
|
|
fn modify_node(&mut self, key: &str, f: impl FnOnce(&mut Node)) -> Result<(), String> {
|
|
|
|
|
let node = self.nodes.get_mut(key)
|
|
|
|
|
.ok_or_else(|| format!("No node '{}'", key))?;
|
|
|
|
|
f(node);
|
|
|
|
|
node.version += 1;
|
|
|
|
|
let node = node.clone();
|
|
|
|
|
self.append_nodes(&[node])
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn mark_used(&mut self, key: &str) {
|
|
|
|
|
let boost = self.params.use_boost as f32;
|
|
|
|
|
let _ = self.modify_node(key, |n| {
|
|
|
|
|
n.uses += 1;
|
|
|
|
|
n.weight = (n.weight + boost).min(1.0);
|
|
|
|
|
if n.spaced_repetition_interval < 30 {
|
|
|
|
|
n.spaced_repetition_interval = match n.spaced_repetition_interval {
|
|
|
|
|
1 => 3, 3 => 7, 7 => 14, 14 => 30, _ => 30,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
n.last_replayed = now_epoch();
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn mark_wrong(&mut self, key: &str, _ctx: Option<&str>) {
|
|
|
|
|
let _ = self.modify_node(key, |n| {
|
|
|
|
|
n.wrongs += 1;
|
|
|
|
|
n.weight = (n.weight - 0.1).max(0.0);
|
|
|
|
|
n.spaced_repetition_interval = 1;
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn record_gap(&mut self, desc: &str) {
|
|
|
|
|
self.gaps.push(GapRecord {
|
|
|
|
|
description: desc.to_string(),
|
|
|
|
|
timestamp: today(),
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn categorize(&mut self, key: &str, cat_str: &str) -> Result<(), String> {
|
|
|
|
|
let cat = Category::from_str(cat_str)
|
|
|
|
|
.ok_or_else(|| format!("Unknown category '{}'. Use: core/tech/gen/obs/task", cat_str))?;
|
|
|
|
|
self.modify_node(key, |n| { n.category = cat; })
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn decay(&mut self) -> (usize, usize) {
|
|
|
|
|
let base = self.params.decay_factor;
|
|
|
|
|
let threshold = self.params.prune_threshold as f32;
|
|
|
|
|
let mut decayed = 0;
|
|
|
|
|
let mut pruned = 0;
|
2026-03-05 10:24:18 -05:00
|
|
|
let mut updated = Vec::new();
|
store: split mod.rs into persist.rs and ops.rs
mod.rs was 937 lines with all Store methods in one block.
Split into three files by responsibility:
- persist.rs (318 lines): load, save, replay, append, snapshot
— all disk IO and cache management
- ops.rs (300 lines): upsert, delete, modify, mark_used/wrong,
decay, fix_categories, cap_degree — all mutations
- mod.rs (356 lines): re-exports, key resolution, ingestion,
rendering, search — read-only operations
No behavioral changes; cargo check + full smoke test pass.
2026-03-03 16:40:32 -05:00
|
|
|
|
2026-03-05 10:24:18 -05:00
|
|
|
for (_key, node) in &mut self.nodes {
|
store: split mod.rs into persist.rs and ops.rs
mod.rs was 937 lines with all Store methods in one block.
Split into three files by responsibility:
- persist.rs (318 lines): load, save, replay, append, snapshot
— all disk IO and cache management
- ops.rs (300 lines): upsert, delete, modify, mark_used/wrong,
decay, fix_categories, cap_degree — all mutations
- mod.rs (356 lines): re-exports, key resolution, ingestion,
rendering, search — read-only operations
No behavioral changes; cargo check + full smoke test pass.
2026-03-03 16:40:32 -05:00
|
|
|
let factor = node.category.decay_factor(base) as f32;
|
2026-03-05 10:24:18 -05:00
|
|
|
let old_weight = node.weight;
|
store: split mod.rs into persist.rs and ops.rs
mod.rs was 937 lines with all Store methods in one block.
Split into three files by responsibility:
- persist.rs (318 lines): load, save, replay, append, snapshot
— all disk IO and cache management
- ops.rs (300 lines): upsert, delete, modify, mark_used/wrong,
decay, fix_categories, cap_degree — all mutations
- mod.rs (356 lines): re-exports, key resolution, ingestion,
rendering, search — read-only operations
No behavioral changes; cargo check + full smoke test pass.
2026-03-03 16:40:32 -05:00
|
|
|
node.weight *= factor;
|
2026-03-05 10:24:18 -05:00
|
|
|
|
|
|
|
|
// Clamp near-prune nodes instead of removing
|
store: split mod.rs into persist.rs and ops.rs
mod.rs was 937 lines with all Store methods in one block.
Split into three files by responsibility:
- persist.rs (318 lines): load, save, replay, append, snapshot
— all disk IO and cache management
- ops.rs (300 lines): upsert, delete, modify, mark_used/wrong,
decay, fix_categories, cap_degree — all mutations
- mod.rs (356 lines): re-exports, key resolution, ingestion,
rendering, search — read-only operations
No behavioral changes; cargo check + full smoke test pass.
2026-03-03 16:40:32 -05:00
|
|
|
if node.weight < threshold {
|
2026-03-05 10:24:18 -05:00
|
|
|
node.weight = node.weight.max(0.01);
|
store: split mod.rs into persist.rs and ops.rs
mod.rs was 937 lines with all Store methods in one block.
Split into three files by responsibility:
- persist.rs (318 lines): load, save, replay, append, snapshot
— all disk IO and cache management
- ops.rs (300 lines): upsert, delete, modify, mark_used/wrong,
decay, fix_categories, cap_degree — all mutations
- mod.rs (356 lines): re-exports, key resolution, ingestion,
rendering, search — read-only operations
No behavioral changes; cargo check + full smoke test pass.
2026-03-03 16:40:32 -05:00
|
|
|
pruned += 1;
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-05 10:24:18 -05:00
|
|
|
// Only persist nodes whose weight actually changed
|
|
|
|
|
if (node.weight - old_weight).abs() > 1e-6 {
|
|
|
|
|
node.version += 1;
|
|
|
|
|
updated.push(node.clone());
|
|
|
|
|
decayed += 1;
|
store: split mod.rs into persist.rs and ops.rs
mod.rs was 937 lines with all Store methods in one block.
Split into three files by responsibility:
- persist.rs (318 lines): load, save, replay, append, snapshot
— all disk IO and cache management
- ops.rs (300 lines): upsert, delete, modify, mark_used/wrong,
decay, fix_categories, cap_degree — all mutations
- mod.rs (356 lines): re-exports, key resolution, ingestion,
rendering, search — read-only operations
No behavioral changes; cargo check + full smoke test pass.
2026-03-03 16:40:32 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-05 10:24:18 -05:00
|
|
|
if !updated.is_empty() {
|
|
|
|
|
let _ = self.append_nodes(&updated);
|
|
|
|
|
}
|
store: split mod.rs into persist.rs and ops.rs
mod.rs was 937 lines with all Store methods in one block.
Split into three files by responsibility:
- persist.rs (318 lines): load, save, replay, append, snapshot
— all disk IO and cache management
- ops.rs (300 lines): upsert, delete, modify, mark_used/wrong,
decay, fix_categories, cap_degree — all mutations
- mod.rs (356 lines): re-exports, key resolution, ingestion,
rendering, search — read-only operations
No behavioral changes; cargo check + full smoke test pass.
2026-03-03 16:40:32 -05:00
|
|
|
|
|
|
|
|
(decayed, pruned)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Bulk recategorize nodes using rule-based logic.
|
|
|
|
|
/// Returns (changed, unchanged) counts.
|
|
|
|
|
pub fn fix_categories(&mut self) -> Result<(usize, usize), String> {
|
2026-03-05 15:41:35 -05:00
|
|
|
let cfg = crate::config::get();
|
|
|
|
|
let core_files: Vec<&str> = cfg.core_nodes.iter().map(|s| s.as_str()).collect();
|
store: split mod.rs into persist.rs and ops.rs
mod.rs was 937 lines with all Store methods in one block.
Split into three files by responsibility:
- persist.rs (318 lines): load, save, replay, append, snapshot
— all disk IO and cache management
- ops.rs (300 lines): upsert, delete, modify, mark_used/wrong,
decay, fix_categories, cap_degree — all mutations
- mod.rs (356 lines): re-exports, key resolution, ingestion,
rendering, search — read-only operations
No behavioral changes; cargo check + full smoke test pass.
2026-03-03 16:40:32 -05:00
|
|
|
let tech_files = [
|
|
|
|
|
"language-theory.md", "zoom-navigation.md",
|
|
|
|
|
"rust-conversion.md", "poc-architecture.md",
|
|
|
|
|
];
|
|
|
|
|
let tech_prefixes = ["design-"];
|
|
|
|
|
let obs_files = [
|
|
|
|
|
"reflections.md", "reflections-zoom.md", "differentiation.md",
|
|
|
|
|
"cognitive-modes.md", "paper-notes.md", "inner-life.md",
|
|
|
|
|
"conversation.md", "interests.md", "stuck-toolkit.md",
|
|
|
|
|
];
|
|
|
|
|
let obs_prefixes = ["skill-", "worked-example-"];
|
|
|
|
|
|
|
|
|
|
let mut changed_nodes = Vec::new();
|
|
|
|
|
let mut unchanged = 0;
|
|
|
|
|
|
|
|
|
|
let keys: Vec<String> = self.nodes.keys().cloned().collect();
|
|
|
|
|
for key in &keys {
|
|
|
|
|
let node = self.nodes.get(key).unwrap();
|
|
|
|
|
if node.category != Category::Core {
|
|
|
|
|
unchanged += 1;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let file = key.split('#').next().unwrap_or(key);
|
|
|
|
|
|
|
|
|
|
let new_cat = if core_files.iter().any(|&f| file == f) {
|
|
|
|
|
None
|
|
|
|
|
} else if tech_files.iter().any(|&f| file == f)
|
|
|
|
|
|| tech_prefixes.iter().any(|p| file.starts_with(p))
|
|
|
|
|
{
|
|
|
|
|
Some(Category::Technical)
|
|
|
|
|
} else if obs_files.iter().any(|&f| file == f)
|
|
|
|
|
|| obs_prefixes.iter().any(|p| file.starts_with(p))
|
|
|
|
|
{
|
|
|
|
|
Some(Category::Observation)
|
|
|
|
|
} else {
|
|
|
|
|
Some(Category::General)
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if let Some(cat) = new_cat {
|
|
|
|
|
let node = self.nodes.get_mut(key).unwrap();
|
|
|
|
|
node.category = cat;
|
|
|
|
|
node.version += 1;
|
|
|
|
|
changed_nodes.push(node.clone());
|
|
|
|
|
} else {
|
|
|
|
|
unchanged += 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if !changed_nodes.is_empty() {
|
|
|
|
|
self.append_nodes(&changed_nodes)?;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok((changed_nodes.len(), unchanged))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Cap node degree by soft-deleting edges from mega-hubs.
|
|
|
|
|
pub fn cap_degree(&mut self, max_degree: usize) -> Result<(usize, usize), String> {
|
|
|
|
|
let mut node_degree: HashMap<String, usize> = HashMap::new();
|
|
|
|
|
for rel in &self.relations {
|
|
|
|
|
if rel.deleted { continue; }
|
|
|
|
|
*node_degree.entry(rel.source_key.clone()).or_default() += 1;
|
|
|
|
|
*node_degree.entry(rel.target_key.clone()).or_default() += 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let mut node_edges: HashMap<String, Vec<usize>> = HashMap::new();
|
|
|
|
|
for (i, rel) in self.relations.iter().enumerate() {
|
|
|
|
|
if rel.deleted { continue; }
|
|
|
|
|
node_edges.entry(rel.source_key.clone()).or_default().push(i);
|
|
|
|
|
node_edges.entry(rel.target_key.clone()).or_default().push(i);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let mut to_delete: HashSet<usize> = HashSet::new();
|
|
|
|
|
let mut hubs_capped = 0;
|
|
|
|
|
|
|
|
|
|
for (_key, edge_indices) in &node_edges {
|
|
|
|
|
let active: Vec<usize> = edge_indices.iter()
|
|
|
|
|
.filter(|&&i| !to_delete.contains(&i))
|
|
|
|
|
.copied()
|
|
|
|
|
.collect();
|
|
|
|
|
if active.len() <= max_degree { continue; }
|
|
|
|
|
|
|
|
|
|
let mut auto_indices: Vec<(usize, f32)> = Vec::new();
|
|
|
|
|
let mut link_indices: Vec<(usize, usize)> = Vec::new();
|
|
|
|
|
for &i in &active {
|
|
|
|
|
let rel = &self.relations[i];
|
|
|
|
|
if rel.rel_type == RelationType::Auto {
|
|
|
|
|
auto_indices.push((i, rel.strength));
|
|
|
|
|
} else {
|
|
|
|
|
let other = if &rel.source_key == _key {
|
|
|
|
|
&rel.target_key
|
|
|
|
|
} else {
|
|
|
|
|
&rel.source_key
|
|
|
|
|
};
|
|
|
|
|
let other_deg = node_degree.get(other).copied().unwrap_or(0);
|
|
|
|
|
link_indices.push((i, other_deg));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let excess = active.len() - max_degree;
|
|
|
|
|
|
|
|
|
|
auto_indices.sort_by(|a, b| a.1.total_cmp(&b.1));
|
|
|
|
|
let auto_prune = excess.min(auto_indices.len());
|
|
|
|
|
for &(i, _) in auto_indices.iter().take(auto_prune) {
|
|
|
|
|
to_delete.insert(i);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let remaining_excess = excess.saturating_sub(auto_prune);
|
|
|
|
|
if remaining_excess > 0 {
|
|
|
|
|
link_indices.sort_by(|a, b| b.1.cmp(&a.1));
|
|
|
|
|
let link_prune = remaining_excess.min(link_indices.len());
|
|
|
|
|
for &(i, _) in link_indices.iter().take(link_prune) {
|
|
|
|
|
to_delete.insert(i);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hubs_capped += 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let mut pruned_rels = Vec::new();
|
|
|
|
|
for &i in &to_delete {
|
|
|
|
|
self.relations[i].deleted = true;
|
|
|
|
|
self.relations[i].version += 1;
|
|
|
|
|
pruned_rels.push(self.relations[i].clone());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if !pruned_rels.is_empty() {
|
|
|
|
|
self.append_relations(&pruned_rels)?;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.relations.retain(|r| !r.deleted);
|
|
|
|
|
|
|
|
|
|
Ok((hubs_capped, to_delete.len()))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn category_counts(&self) -> HashMap<&str, usize> {
|
|
|
|
|
let mut counts = HashMap::new();
|
|
|
|
|
for node in self.nodes.values() {
|
|
|
|
|
*counts.entry(node.category.label()).or_insert(0) += 1;
|
|
|
|
|
}
|
|
|
|
|
counts
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Update graph-derived fields on all nodes
|
|
|
|
|
pub fn update_graph_metrics(&mut self) {
|
|
|
|
|
let g = self.build_graph();
|
|
|
|
|
let communities = g.communities();
|
|
|
|
|
|
|
|
|
|
for (key, node) in &mut self.nodes {
|
|
|
|
|
node.community_id = communities.get(key).copied();
|
|
|
|
|
node.clustering_coefficient = Some(g.clustering_coefficient(key));
|
|
|
|
|
node.degree = Some(g.degree(key) as u32);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|