2026-03-09 01:19:04 -04:00
|
|
|
|
// Memory search: composable algorithm pipeline.
|
poc-memory v0.4.0: graph-structured memory with consolidation pipeline
Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
link-add, link-impact, decay, consolidate-session, etc.
Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation
Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00
|
|
|
|
//
|
2026-03-09 01:19:04 -04:00
|
|
|
|
// Each algorithm is a stage: takes seeds Vec<(String, f64)>, returns
|
|
|
|
|
|
// new/modified seeds. Stages compose left-to-right in a pipeline.
|
|
|
|
|
|
//
|
|
|
|
|
|
// Available algorithms:
|
|
|
|
|
|
// spread — spreading activation through graph edges
|
|
|
|
|
|
// spectral — nearest neighbors in spectral embedding space
|
|
|
|
|
|
// manifold — extrapolation along direction defined by seeds (TODO)
|
|
|
|
|
|
//
|
|
|
|
|
|
// Seed extraction (matching query terms to node keys) is shared
|
|
|
|
|
|
// infrastructure, not an algorithm stage.
|
poc-memory v0.4.0: graph-structured memory with consolidation pipeline
Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
link-add, link-impact, decay, consolidate-session, etc.
Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation
Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00
|
|
|
|
|
2026-03-03 12:56:15 -05:00
|
|
|
|
use crate::store::StoreView;
|
poc-memory v0.4.0: graph-structured memory with consolidation pipeline
Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
link-add, link-impact, decay, consolidate-session, etc.
Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation
Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00
|
|
|
|
use crate::graph::Graph;
|
2026-03-09 01:19:04 -04:00
|
|
|
|
use crate::spectral;
|
poc-memory v0.4.0: graph-structured memory with consolidation pipeline
Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
link-add, link-impact, decay, consolidate-session, etc.
Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation
Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00
|
|
|
|
|
2026-03-09 01:19:04 -04:00
|
|
|
|
use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
|
|
|
|
|
|
use std::fmt;
|
poc-memory v0.4.0: graph-structured memory with consolidation pipeline
Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
link-add, link-impact, decay, consolidate-session, etc.
Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation
Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00
|
|
|
|
|
|
|
|
|
|
pub struct SearchResult {
|
|
|
|
|
|
pub key: String,
|
|
|
|
|
|
pub activation: f64,
|
|
|
|
|
|
pub is_direct: bool,
|
|
|
|
|
|
pub snippet: Option<String>,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-09 01:19:04 -04:00
|
|
|
|
/// A parsed algorithm stage with its parameters.
|
|
|
|
|
|
#[derive(Clone, Debug)]
|
|
|
|
|
|
pub struct AlgoStage {
|
|
|
|
|
|
pub algo: Algorithm,
|
|
|
|
|
|
pub params: HashMap<String, String>,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[derive(Clone, Debug)]
|
|
|
|
|
|
pub enum Algorithm {
|
|
|
|
|
|
Spread,
|
|
|
|
|
|
Spectral,
|
|
|
|
|
|
Manifold,
|
2026-03-09 01:22:29 -04:00
|
|
|
|
Confluence,
|
|
|
|
|
|
Geodesic,
|
2026-03-09 01:19:04 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
impl fmt::Display for Algorithm {
|
|
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
|
|
|
|
match self {
|
|
|
|
|
|
Algorithm::Spread => write!(f, "spread"),
|
|
|
|
|
|
Algorithm::Spectral => write!(f, "spectral"),
|
|
|
|
|
|
Algorithm::Manifold => write!(f, "manifold"),
|
2026-03-09 01:22:29 -04:00
|
|
|
|
Algorithm::Confluence => write!(f, "confluence"),
|
|
|
|
|
|
Algorithm::Geodesic => write!(f, "geodesic"),
|
2026-03-09 01:19:04 -04:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
impl AlgoStage {
|
|
|
|
|
|
/// Parse "spread,max_hops=4,edge_decay=0.5" into an AlgoStage.
|
|
|
|
|
|
pub fn parse(s: &str) -> Result<Self, String> {
|
|
|
|
|
|
let mut parts = s.split(',');
|
|
|
|
|
|
let name = parts.next().unwrap_or("");
|
|
|
|
|
|
let algo = match name {
|
|
|
|
|
|
"spread" => Algorithm::Spread,
|
|
|
|
|
|
"spectral" => Algorithm::Spectral,
|
|
|
|
|
|
"manifold" => Algorithm::Manifold,
|
2026-03-09 01:22:29 -04:00
|
|
|
|
"confluence" => Algorithm::Confluence,
|
|
|
|
|
|
"geodesic" => Algorithm::Geodesic,
|
2026-03-09 01:19:04 -04:00
|
|
|
|
_ => return Err(format!("unknown algorithm: {}", name)),
|
|
|
|
|
|
};
|
|
|
|
|
|
let mut params = HashMap::new();
|
|
|
|
|
|
for part in parts {
|
|
|
|
|
|
if let Some((k, v)) = part.split_once('=') {
|
|
|
|
|
|
params.insert(k.to_string(), v.to_string());
|
|
|
|
|
|
} else {
|
|
|
|
|
|
return Err(format!("bad param (expected key=val): {}", part));
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
Ok(AlgoStage { algo, params })
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn param_f64(&self, key: &str, default: f64) -> f64 {
|
|
|
|
|
|
self.params.get(key)
|
|
|
|
|
|
.and_then(|v| v.parse().ok())
|
|
|
|
|
|
.unwrap_or(default)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn param_u32(&self, key: &str, default: u32) -> u32 {
|
|
|
|
|
|
self.params.get(key)
|
|
|
|
|
|
.and_then(|v| v.parse().ok())
|
|
|
|
|
|
.unwrap_or(default)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn param_usize(&self, key: &str, default: usize) -> usize {
|
|
|
|
|
|
self.params.get(key)
|
|
|
|
|
|
.and_then(|v| v.parse().ok())
|
|
|
|
|
|
.unwrap_or(default)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Extract seeds from weighted terms by matching against node keys.
|
poc-memory v0.4.0: graph-structured memory with consolidation pipeline
Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
link-add, link-impact, decay, consolidate-session, etc.
Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation
Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00
|
|
|
|
///
|
2026-03-09 01:19:04 -04:00
|
|
|
|
/// Returns (seeds, direct_hits) where direct_hits tracks which keys
|
|
|
|
|
|
/// were matched directly (vs found by an algorithm stage).
|
|
|
|
|
|
pub fn match_seeds(
|
|
|
|
|
|
terms: &BTreeMap<String, f64>,
|
|
|
|
|
|
store: &impl StoreView,
|
|
|
|
|
|
) -> (Vec<(String, f64)>, HashSet<String>) {
|
|
|
|
|
|
let mut seeds: Vec<(String, f64)> = Vec::new();
|
|
|
|
|
|
let mut direct_hits: HashSet<String> = HashSet::new();
|
|
|
|
|
|
|
|
|
|
|
|
let mut key_map: HashMap<String, (String, f64)> = HashMap::new();
|
|
|
|
|
|
store.for_each_node(|key, _content, weight| {
|
|
|
|
|
|
key_map.insert(key.to_lowercase(), (key.to_owned(), weight as f64));
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
for (term, &term_weight) in terms {
|
|
|
|
|
|
if let Some((orig_key, node_weight)) = key_map.get(term) {
|
|
|
|
|
|
let score = term_weight * node_weight;
|
|
|
|
|
|
seeds.push((orig_key.clone(), score));
|
|
|
|
|
|
direct_hits.insert(orig_key.clone());
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
(seeds, direct_hits)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Run a pipeline of algorithm stages.
|
|
|
|
|
|
pub fn run_pipeline(
|
|
|
|
|
|
stages: &[AlgoStage],
|
|
|
|
|
|
seeds: Vec<(String, f64)>,
|
|
|
|
|
|
graph: &Graph,
|
|
|
|
|
|
store: &impl StoreView,
|
|
|
|
|
|
debug: bool,
|
|
|
|
|
|
max_results: usize,
|
|
|
|
|
|
) -> Vec<(String, f64)> {
|
|
|
|
|
|
let mut current = seeds;
|
|
|
|
|
|
|
|
|
|
|
|
for stage in stages {
|
|
|
|
|
|
if debug {
|
|
|
|
|
|
println!("\n[search] === {} ({} seeds in) ===", stage.algo, current.len());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
current = match stage.algo {
|
|
|
|
|
|
Algorithm::Spread => run_spread(¤t, graph, store, stage, debug),
|
|
|
|
|
|
Algorithm::Spectral => run_spectral(¤t, graph, stage, debug),
|
2026-03-09 01:22:29 -04:00
|
|
|
|
Algorithm::Manifold => run_manifold(¤t, graph, stage, debug),
|
|
|
|
|
|
Algorithm::Confluence => run_confluence(¤t, graph, store, stage, debug),
|
|
|
|
|
|
Algorithm::Geodesic => run_geodesic(¤t, graph, stage, debug),
|
2026-03-09 01:19:04 -04:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
if debug {
|
|
|
|
|
|
println!("[search] {} → {} results", stage.algo, current.len());
|
|
|
|
|
|
for (i, (key, score)) in current.iter().enumerate().take(15) {
|
|
|
|
|
|
let cutoff = if i + 1 == max_results { " <-- cutoff" } else { "" };
|
|
|
|
|
|
println!(" [{:.4}] {}{}", score, key, cutoff);
|
|
|
|
|
|
}
|
|
|
|
|
|
if current.len() > 15 {
|
|
|
|
|
|
println!(" ... ({} more)", current.len() - 15);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
current.truncate(max_results);
|
|
|
|
|
|
current
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Spreading activation: propagate scores through graph edges.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// Tunable params: max_hops (default from store), edge_decay (default from store),
|
|
|
|
|
|
/// min_activation (default from store).
|
|
|
|
|
|
fn run_spread(
|
poc-memory v0.4.0: graph-structured memory with consolidation pipeline
Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
link-add, link-impact, decay, consolidate-session, etc.
Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation
Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00
|
|
|
|
seeds: &[(String, f64)],
|
|
|
|
|
|
graph: &Graph,
|
2026-03-03 01:33:31 -05:00
|
|
|
|
store: &impl StoreView,
|
2026-03-09 01:19:04 -04:00
|
|
|
|
stage: &AlgoStage,
|
|
|
|
|
|
_debug: bool,
|
|
|
|
|
|
) -> Vec<(String, f64)> {
|
|
|
|
|
|
let store_params = store.params();
|
|
|
|
|
|
let max_hops = stage.param_u32("max_hops", store_params.max_hops);
|
|
|
|
|
|
let edge_decay = stage.param_f64("edge_decay", store_params.edge_decay);
|
|
|
|
|
|
let min_activation = stage.param_f64("min_activation", store_params.min_activation);
|
|
|
|
|
|
|
|
|
|
|
|
spreading_activation(seeds, graph, store, max_hops, edge_decay, min_activation)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Spectral projection: find nearest neighbors in spectral embedding space.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// Tunable params: k (default 20, number of neighbors to find).
|
|
|
|
|
|
fn run_spectral(
|
|
|
|
|
|
seeds: &[(String, f64)],
|
|
|
|
|
|
graph: &Graph,
|
|
|
|
|
|
stage: &AlgoStage,
|
|
|
|
|
|
debug: bool,
|
poc-memory v0.4.0: graph-structured memory with consolidation pipeline
Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
link-add, link-impact, decay, consolidate-session, etc.
Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation
Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00
|
|
|
|
) -> Vec<(String, f64)> {
|
2026-03-09 01:19:04 -04:00
|
|
|
|
let k = stage.param_usize("k", 20);
|
|
|
|
|
|
|
|
|
|
|
|
let emb = match spectral::load_embedding() {
|
|
|
|
|
|
Ok(e) => e,
|
|
|
|
|
|
Err(e) => {
|
|
|
|
|
|
if debug { println!(" no spectral embedding: {}", e); }
|
|
|
|
|
|
return seeds.to_vec();
|
|
|
|
|
|
}
|
|
|
|
|
|
};
|
poc-memory v0.4.0: graph-structured memory with consolidation pipeline
Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
link-add, link-impact, decay, consolidate-session, etc.
Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation
Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00
|
|
|
|
|
2026-03-09 01:19:04 -04:00
|
|
|
|
let weighted_seeds: Vec<(&str, f64)> = seeds.iter()
|
|
|
|
|
|
.map(|(k, w)| (k.as_str(), *w))
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
let projected = spectral::nearest_to_seeds_weighted(
|
|
|
|
|
|
&emb, &weighted_seeds, Some(graph), k,
|
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
if debug {
|
|
|
|
|
|
for (key, dist) in &projected {
|
|
|
|
|
|
let score = 1.0 / (1.0 + dist);
|
|
|
|
|
|
println!(" dist={:.6} score={:.4} {}", dist, score, key);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Merge: keep original seeds, add spectral results as new seeds
|
|
|
|
|
|
let seed_set: HashSet<&str> = seeds.iter().map(|(k, _)| k.as_str()).collect();
|
|
|
|
|
|
let mut result = seeds.to_vec();
|
|
|
|
|
|
for (key, dist) in projected {
|
|
|
|
|
|
if !seed_set.contains(key.as_str()) {
|
|
|
|
|
|
let score = 1.0 / (1.0 + dist);
|
|
|
|
|
|
result.push((key, score));
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
result
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-09 01:22:29 -04:00
|
|
|
|
/// Confluence: multi-source reachability scoring.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// Unlike spreading activation (which takes max activation from any source),
|
|
|
|
|
|
/// confluence rewards nodes reachable from *multiple* seeds. For each candidate
|
|
|
|
|
|
/// node within k hops, score = sum of (seed_weight * edge_decay^distance) across
|
|
|
|
|
|
/// all seeds that can reach it. Nodes at the intersection of multiple seeds'
|
|
|
|
|
|
/// neighborhoods score highest.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// This naturally handles mixed seeds: unrelated seeds activate disjoint
|
|
|
|
|
|
/// neighborhoods that don't overlap, so their results separate naturally.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// Tunable params: max_hops (default 3), edge_decay (default 0.5),
|
|
|
|
|
|
/// min_sources (default 2, minimum number of distinct seeds that must reach a node).
|
|
|
|
|
|
fn run_confluence(
|
|
|
|
|
|
seeds: &[(String, f64)],
|
|
|
|
|
|
graph: &Graph,
|
|
|
|
|
|
store: &impl StoreView,
|
|
|
|
|
|
stage: &AlgoStage,
|
|
|
|
|
|
debug: bool,
|
|
|
|
|
|
) -> Vec<(String, f64)> {
|
|
|
|
|
|
let max_hops = stage.param_u32("max_hops", 3);
|
|
|
|
|
|
let edge_decay = stage.param_f64("edge_decay", 0.5);
|
|
|
|
|
|
let min_sources = stage.param_usize("min_sources", 2);
|
|
|
|
|
|
|
|
|
|
|
|
// For each seed, BFS outward collecting (node → activation) at each distance
|
|
|
|
|
|
// Track which seeds contributed to each node's score
|
|
|
|
|
|
let mut node_scores: HashMap<String, f64> = HashMap::new();
|
|
|
|
|
|
let mut node_sources: HashMap<String, HashSet<usize>> = HashMap::new();
|
|
|
|
|
|
|
|
|
|
|
|
for (seed_idx, (seed_key, seed_weight)) in seeds.iter().enumerate() {
|
|
|
|
|
|
let mut visited: HashMap<String, f64> = HashMap::new();
|
|
|
|
|
|
let mut queue: VecDeque<(String, u32)> = VecDeque::new();
|
|
|
|
|
|
|
|
|
|
|
|
visited.insert(seed_key.clone(), *seed_weight);
|
|
|
|
|
|
queue.push_back((seed_key.clone(), 0));
|
|
|
|
|
|
|
|
|
|
|
|
while let Some((key, depth)) = queue.pop_front() {
|
|
|
|
|
|
if depth >= max_hops { continue; }
|
|
|
|
|
|
|
|
|
|
|
|
let act = visited[&key];
|
|
|
|
|
|
|
|
|
|
|
|
for (neighbor, strength) in graph.neighbors(&key) {
|
|
|
|
|
|
let neighbor_weight = store.node_weight(neighbor.as_str());
|
|
|
|
|
|
let propagated = act * edge_decay * neighbor_weight * strength as f64;
|
|
|
|
|
|
if propagated < 0.001 { continue; }
|
|
|
|
|
|
|
|
|
|
|
|
if !visited.contains_key(neighbor.as_str()) || visited[neighbor.as_str()] < propagated {
|
|
|
|
|
|
visited.insert(neighbor.clone(), propagated);
|
|
|
|
|
|
queue.push_back((neighbor.clone(), depth + 1));
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Accumulate into global scores (additive across seeds)
|
|
|
|
|
|
for (key, act) in visited {
|
|
|
|
|
|
*node_scores.entry(key.clone()).or_insert(0.0) += act;
|
|
|
|
|
|
node_sources.entry(key).or_default().insert(seed_idx);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Filter to nodes reached by min_sources distinct seeds
|
|
|
|
|
|
let mut results: Vec<(String, f64)> = node_scores.into_iter()
|
|
|
|
|
|
.filter(|(key, _)| {
|
|
|
|
|
|
node_sources.get(key).map(|s| s.len()).unwrap_or(0) >= min_sources
|
|
|
|
|
|
})
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
|
|
|
|
if debug {
|
|
|
|
|
|
// Show source counts
|
|
|
|
|
|
for (key, score) in results.iter().take(15) {
|
|
|
|
|
|
let sources = node_sources.get(key).map(|s| s.len()).unwrap_or(0);
|
|
|
|
|
|
println!(" [{:.4}] {} (from {} seeds)", score, key, sources);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
results.sort_by(|a, b| b.1.total_cmp(&a.1));
|
|
|
|
|
|
results
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Geodesic: straightest paths between seed pairs in spectral space.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// For each pair of seeds, walk the graph from one to the other, at each
|
|
|
|
|
|
/// step choosing the neighbor whose spectral direction most aligns with
|
|
|
|
|
|
/// the target direction. Nodes along these geodesic paths score higher
|
|
|
|
|
|
/// the more paths pass through them and the straighter those paths are.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// Tunable params: max_path (default 6), k (default 20 results).
|
|
|
|
|
|
fn run_geodesic(
|
|
|
|
|
|
seeds: &[(String, f64)],
|
|
|
|
|
|
graph: &Graph,
|
|
|
|
|
|
stage: &AlgoStage,
|
|
|
|
|
|
debug: bool,
|
|
|
|
|
|
) -> Vec<(String, f64)> {
|
|
|
|
|
|
let max_path = stage.param_usize("max_path", 6);
|
|
|
|
|
|
let k = stage.param_usize("k", 20);
|
|
|
|
|
|
|
|
|
|
|
|
let emb = match spectral::load_embedding() {
|
|
|
|
|
|
Ok(e) => e,
|
|
|
|
|
|
Err(e) => {
|
|
|
|
|
|
if debug { println!(" no spectral embedding: {}", e); }
|
|
|
|
|
|
return seeds.to_vec();
|
|
|
|
|
|
}
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// Filter seeds to those with valid spectral coords
|
|
|
|
|
|
let valid_seeds: Vec<(&str, f64, &Vec<f64>)> = seeds.iter()
|
|
|
|
|
|
.filter_map(|(key, weight)| {
|
|
|
|
|
|
emb.coords.get(key.as_str())
|
|
|
|
|
|
.filter(|c| c.iter().any(|&v| v.abs() > 1e-12))
|
|
|
|
|
|
.map(|c| (key.as_str(), *weight, c))
|
|
|
|
|
|
})
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
|
|
|
|
if valid_seeds.len() < 2 {
|
|
|
|
|
|
if debug { println!(" need ≥2 seeds with spectral coords, have {}", valid_seeds.len()); }
|
|
|
|
|
|
return seeds.to_vec();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// For each pair of seeds, find the geodesic path
|
|
|
|
|
|
let mut path_counts: HashMap<String, f64> = HashMap::new();
|
|
|
|
|
|
let seed_set: HashSet<&str> = seeds.iter().map(|(k, _)| k.as_str()).collect();
|
|
|
|
|
|
|
|
|
|
|
|
for i in 0..valid_seeds.len() {
|
|
|
|
|
|
for j in (i + 1)..valid_seeds.len() {
|
|
|
|
|
|
let (key_a, weight_a, coords_a) = &valid_seeds[i];
|
|
|
|
|
|
let (key_b, weight_b, coords_b) = &valid_seeds[j];
|
|
|
|
|
|
let pair_weight = weight_a * weight_b;
|
|
|
|
|
|
|
|
|
|
|
|
// Walk from A toward B
|
|
|
|
|
|
let path_ab = geodesic_walk(
|
|
|
|
|
|
key_a, coords_a, coords_b, graph, &emb, max_path,
|
|
|
|
|
|
);
|
|
|
|
|
|
// Walk from B toward A
|
|
|
|
|
|
let path_ba = geodesic_walk(
|
|
|
|
|
|
key_b, coords_b, coords_a, graph, &emb, max_path,
|
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
// Score nodes on both paths (nodes found from both directions score double)
|
|
|
|
|
|
for (node, alignment) in path_ab.iter().chain(path_ba.iter()) {
|
|
|
|
|
|
if !seed_set.contains(node.as_str()) {
|
|
|
|
|
|
*path_counts.entry(node.clone()).or_insert(0.0) += pair_weight * alignment;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if debug && !path_counts.is_empty() {
|
|
|
|
|
|
println!(" {} pairs examined, {} distinct nodes on paths",
|
|
|
|
|
|
valid_seeds.len() * (valid_seeds.len() - 1) / 2,
|
|
|
|
|
|
path_counts.len());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Merge with original seeds
|
|
|
|
|
|
let mut results = seeds.to_vec();
|
|
|
|
|
|
let mut path_results: Vec<(String, f64)> = path_counts.into_iter().collect();
|
|
|
|
|
|
path_results.sort_by(|a, b| b.1.total_cmp(&a.1));
|
|
|
|
|
|
path_results.truncate(k);
|
|
|
|
|
|
|
|
|
|
|
|
for (key, score) in path_results {
|
|
|
|
|
|
if !seed_set.contains(key.as_str()) {
|
|
|
|
|
|
results.push((key, score));
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
results.sort_by(|a, b| b.1.total_cmp(&a.1));
|
|
|
|
|
|
results
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Walk from `start` toward `target_coords` in spectral space, choosing
|
|
|
|
|
|
/// the neighbor at each step whose direction most aligns with the target.
|
|
|
|
|
|
/// Returns (node_key, alignment_score) for each intermediate node.
|
|
|
|
|
|
fn geodesic_walk(
|
|
|
|
|
|
start: &str,
|
|
|
|
|
|
start_coords: &[f64],
|
|
|
|
|
|
target_coords: &[f64],
|
|
|
|
|
|
graph: &Graph,
|
|
|
|
|
|
emb: &spectral::SpectralEmbedding,
|
|
|
|
|
|
max_steps: usize,
|
|
|
|
|
|
) -> Vec<(String, f64)> {
|
|
|
|
|
|
let mut path = Vec::new();
|
|
|
|
|
|
let mut current = start.to_string();
|
|
|
|
|
|
let mut current_coords = start_coords.to_vec();
|
|
|
|
|
|
let mut visited: HashSet<String> = HashSet::new();
|
|
|
|
|
|
visited.insert(current.clone());
|
|
|
|
|
|
|
|
|
|
|
|
for _ in 0..max_steps {
|
|
|
|
|
|
// Direction we want to travel: from current toward target
|
|
|
|
|
|
let direction: Vec<f64> = target_coords.iter()
|
|
|
|
|
|
.zip(current_coords.iter())
|
|
|
|
|
|
.map(|(t, c)| t - c)
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
|
|
|
|
let dir_norm = direction.iter().map(|d| d * d).sum::<f64>().sqrt();
|
|
|
|
|
|
if dir_norm < 1e-12 { break; } // arrived
|
|
|
|
|
|
|
|
|
|
|
|
// Among neighbors with spectral coords, find the one most aligned
|
|
|
|
|
|
let mut best: Option<(String, Vec<f64>, f64)> = None;
|
|
|
|
|
|
|
|
|
|
|
|
for (neighbor, _strength) in graph.neighbors(¤t) {
|
|
|
|
|
|
if visited.contains(neighbor.as_str()) { continue; }
|
|
|
|
|
|
|
|
|
|
|
|
let neighbor_coords = match emb.coords.get(neighbor.as_str()) {
|
|
|
|
|
|
Some(c) if c.iter().any(|&v| v.abs() > 1e-12) => c,
|
|
|
|
|
|
_ => continue,
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// Direction to this neighbor
|
|
|
|
|
|
let step: Vec<f64> = neighbor_coords.iter()
|
|
|
|
|
|
.zip(current_coords.iter())
|
|
|
|
|
|
.map(|(n, c)| n - c)
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
|
|
|
|
let step_norm = step.iter().map(|s| s * s).sum::<f64>().sqrt();
|
|
|
|
|
|
if step_norm < 1e-12 { continue; }
|
|
|
|
|
|
|
|
|
|
|
|
// Cosine similarity between desired direction and step direction
|
|
|
|
|
|
let dot: f64 = direction.iter().zip(step.iter()).map(|(d, s)| d * s).sum();
|
|
|
|
|
|
let alignment = dot / (dir_norm * step_norm);
|
|
|
|
|
|
|
|
|
|
|
|
if alignment > 0.0 { // only consider forward-facing neighbors
|
|
|
|
|
|
if best.as_ref().map(|(_, _, a)| alignment > *a).unwrap_or(true) {
|
|
|
|
|
|
best = Some((neighbor.clone(), neighbor_coords.clone(), alignment));
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
match best {
|
|
|
|
|
|
Some((next_key, next_coords, alignment)) => {
|
|
|
|
|
|
path.push((next_key.clone(), alignment));
|
|
|
|
|
|
visited.insert(next_key.clone());
|
|
|
|
|
|
current = next_key;
|
|
|
|
|
|
current_coords = next_coords;
|
|
|
|
|
|
}
|
|
|
|
|
|
None => break, // no forward-facing neighbors
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
path
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Manifold: extrapolation along the direction defined by seeds.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// Instead of finding what's *near* the seeds in spectral space (proximity),
|
|
|
|
|
|
/// find what's in the *direction* the seeds define. Given a weighted centroid
|
|
|
|
|
|
/// of seeds and the principal direction they span, find nodes that continue
|
|
|
|
|
|
/// along that direction.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// Tunable params: k (default 20 results).
|
|
|
|
|
|
fn run_manifold(
|
|
|
|
|
|
seeds: &[(String, f64)],
|
|
|
|
|
|
graph: &Graph,
|
|
|
|
|
|
stage: &AlgoStage,
|
|
|
|
|
|
debug: bool,
|
|
|
|
|
|
) -> Vec<(String, f64)> {
|
|
|
|
|
|
let k = stage.param_usize("k", 20);
|
|
|
|
|
|
|
|
|
|
|
|
let emb = match spectral::load_embedding() {
|
|
|
|
|
|
Ok(e) => e,
|
|
|
|
|
|
Err(e) => {
|
|
|
|
|
|
if debug { println!(" no spectral embedding: {}", e); }
|
|
|
|
|
|
return seeds.to_vec();
|
|
|
|
|
|
}
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// Collect seeds with valid spectral coordinates
|
|
|
|
|
|
let seed_data: Vec<(&str, f64, &Vec<f64>)> = seeds.iter()
|
|
|
|
|
|
.filter_map(|(key, weight)| {
|
|
|
|
|
|
emb.coords.get(key.as_str())
|
|
|
|
|
|
.filter(|c| c.iter().any(|&v| v.abs() > 1e-12))
|
|
|
|
|
|
.map(|c| (key.as_str(), *weight, c))
|
|
|
|
|
|
})
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
|
|
|
|
if seed_data.is_empty() {
|
|
|
|
|
|
if debug { println!(" no seeds with spectral coords"); }
|
|
|
|
|
|
return seeds.to_vec();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let dims = emb.dims;
|
|
|
|
|
|
|
|
|
|
|
|
// Compute weighted centroid of seeds
|
|
|
|
|
|
let mut centroid = vec![0.0f64; dims];
|
|
|
|
|
|
let mut total_weight = 0.0;
|
|
|
|
|
|
for (_, weight, coords) in &seed_data {
|
|
|
|
|
|
for (i, &c) in coords.iter().enumerate() {
|
|
|
|
|
|
centroid[i] += c * weight;
|
|
|
|
|
|
}
|
|
|
|
|
|
total_weight += weight;
|
|
|
|
|
|
}
|
|
|
|
|
|
if total_weight > 0.0 {
|
|
|
|
|
|
for c in &mut centroid {
|
|
|
|
|
|
*c /= total_weight;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Compute principal direction: weighted PCA axis 1
|
|
|
|
|
|
// For each seed, its deviation from centroid contributes to the direction
|
|
|
|
|
|
let mut direction = vec![0.0f64; dims];
|
|
|
|
|
|
if seed_data.len() >= 2 {
|
|
|
|
|
|
// Use power iteration to find dominant direction of seed spread
|
|
|
|
|
|
// Initialize with the vector from first seed to last seed
|
|
|
|
|
|
let first = seed_data.first().unwrap().2;
|
|
|
|
|
|
let last = seed_data.last().unwrap().2;
|
|
|
|
|
|
for i in 0..dims {
|
|
|
|
|
|
direction[i] = last[i] - first[i];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// One round of power iteration on the covariance matrix
|
|
|
|
|
|
let mut new_dir = vec![0.0f64; dims];
|
|
|
|
|
|
for (_, weight, coords) in &seed_data {
|
|
|
|
|
|
let dev: Vec<f64> = coords.iter().zip(centroid.iter()).map(|(c, m)| c - m).collect();
|
|
|
|
|
|
let dot: f64 = dev.iter().zip(direction.iter()).map(|(d, v)| d * v).sum();
|
|
|
|
|
|
for i in 0..dims {
|
|
|
|
|
|
new_dir[i] += weight * dot * dev[i];
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
direction = new_dir;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let dir_norm = direction.iter().map(|d| d * d).sum::<f64>().sqrt();
|
|
|
|
|
|
|
|
|
|
|
|
let seed_set: HashSet<&str> = seeds.iter().map(|(k, _)| k.as_str()).collect();
|
|
|
|
|
|
|
|
|
|
|
|
// Score each non-seed node by projection onto the direction from centroid
|
|
|
|
|
|
let mut candidates: Vec<(String, f64)> = emb.coords.iter()
|
|
|
|
|
|
.filter(|(key, coords)| {
|
|
|
|
|
|
!seed_set.contains(key.as_str())
|
|
|
|
|
|
&& coords.iter().any(|&v| v.abs() > 1e-12)
|
|
|
|
|
|
})
|
|
|
|
|
|
.map(|(key, coords)| {
|
|
|
|
|
|
let deviation: Vec<f64> = coords.iter().zip(centroid.iter())
|
|
|
|
|
|
.map(|(c, m)| c - m)
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
|
|
|
|
let score = if dir_norm > 1e-12 {
|
|
|
|
|
|
// Project onto direction: how far along the principal axis
|
|
|
|
|
|
let projection: f64 = deviation.iter().zip(direction.iter())
|
|
|
|
|
|
.map(|(d, v)| d * v)
|
|
|
|
|
|
.sum::<f64>() / dir_norm;
|
|
|
|
|
|
|
|
|
|
|
|
// Distance from the axis (perpendicular component)
|
|
|
|
|
|
let proj_vec: Vec<f64> = direction.iter()
|
|
|
|
|
|
.map(|&d| d * projection / dir_norm)
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
let perp_dist: f64 = deviation.iter().zip(proj_vec.iter())
|
|
|
|
|
|
.map(|(d, p)| (d - p).powi(2))
|
|
|
|
|
|
.sum::<f64>()
|
|
|
|
|
|
.sqrt();
|
|
|
|
|
|
|
|
|
|
|
|
// Score: prefer nodes far along the direction but close to the axis
|
|
|
|
|
|
// Use absolute projection (both directions from centroid are interesting)
|
|
|
|
|
|
let along = projection.abs();
|
|
|
|
|
|
if perp_dist < 1e-12 {
|
|
|
|
|
|
along
|
|
|
|
|
|
} else {
|
|
|
|
|
|
along / (1.0 + perp_dist)
|
|
|
|
|
|
}
|
|
|
|
|
|
} else {
|
|
|
|
|
|
// No direction (single seed or all seeds coincide): use distance from centroid
|
|
|
|
|
|
let dist: f64 = deviation.iter().map(|d| d * d).sum::<f64>().sqrt();
|
|
|
|
|
|
1.0 / (1.0 + dist)
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// Bonus for being connected to seeds in the graph
|
|
|
|
|
|
let graph_bonus: f64 = graph.neighbors(key).iter()
|
|
|
|
|
|
.filter(|(n, _)| seed_set.contains(n.as_str()))
|
|
|
|
|
|
.map(|(_, s)| *s as f64 * 0.1)
|
|
|
|
|
|
.sum();
|
|
|
|
|
|
|
|
|
|
|
|
(key.clone(), score + graph_bonus)
|
|
|
|
|
|
})
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
|
|
|
|
candidates.sort_by(|a, b| b.1.total_cmp(&a.1));
|
|
|
|
|
|
candidates.truncate(k);
|
|
|
|
|
|
|
|
|
|
|
|
if debug {
|
|
|
|
|
|
for (key, score) in candidates.iter().take(15) {
|
|
|
|
|
|
println!(" [{:.4}] {}", score, key);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Merge with original seeds
|
|
|
|
|
|
let mut results = seeds.to_vec();
|
|
|
|
|
|
for (key, score) in candidates {
|
|
|
|
|
|
results.push((key, score));
|
|
|
|
|
|
}
|
|
|
|
|
|
results.sort_by(|a, b| b.1.total_cmp(&a.1));
|
|
|
|
|
|
results
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-09 01:19:04 -04:00
|
|
|
|
fn spreading_activation(
|
|
|
|
|
|
seeds: &[(String, f64)],
|
|
|
|
|
|
graph: &Graph,
|
|
|
|
|
|
store: &impl StoreView,
|
|
|
|
|
|
max_hops: u32,
|
|
|
|
|
|
edge_decay: f64,
|
|
|
|
|
|
min_activation: f64,
|
|
|
|
|
|
) -> Vec<(String, f64)> {
|
poc-memory v0.4.0: graph-structured memory with consolidation pipeline
Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
link-add, link-impact, decay, consolidate-session, etc.
Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation
Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00
|
|
|
|
let mut activation: HashMap<String, f64> = HashMap::new();
|
|
|
|
|
|
let mut queue: VecDeque<(String, f64, u32)> = VecDeque::new();
|
|
|
|
|
|
|
|
|
|
|
|
for (key, act) in seeds {
|
|
|
|
|
|
let current = activation.entry(key.clone()).or_insert(0.0);
|
|
|
|
|
|
if *act > *current {
|
|
|
|
|
|
*current = *act;
|
|
|
|
|
|
queue.push_back((key.clone(), *act, 0));
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
while let Some((key, act, depth)) = queue.pop_front() {
|
2026-03-09 01:19:04 -04:00
|
|
|
|
if depth >= max_hops { continue; }
|
poc-memory v0.4.0: graph-structured memory with consolidation pipeline
Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
link-add, link-impact, decay, consolidate-session, etc.
Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation
Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00
|
|
|
|
|
|
|
|
|
|
for (neighbor, strength) in graph.neighbors(&key) {
|
2026-03-03 01:33:31 -05:00
|
|
|
|
let neighbor_weight = store.node_weight(neighbor.as_str());
|
2026-03-09 01:19:04 -04:00
|
|
|
|
let propagated = act * edge_decay * neighbor_weight * strength as f64;
|
|
|
|
|
|
if propagated < min_activation { continue; }
|
poc-memory v0.4.0: graph-structured memory with consolidation pipeline
Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
link-add, link-impact, decay, consolidate-session, etc.
Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation
Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00
|
|
|
|
|
|
|
|
|
|
let current = activation.entry(neighbor.clone()).or_insert(0.0);
|
|
|
|
|
|
if propagated > *current {
|
|
|
|
|
|
*current = propagated;
|
|
|
|
|
|
queue.push_back((neighbor.clone(), propagated, depth + 1));
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let mut results: Vec<_> = activation.into_iter().collect();
|
2026-03-03 12:07:04 -05:00
|
|
|
|
results.sort_by(|a, b| b.1.total_cmp(&a.1));
|
poc-memory v0.4.0: graph-structured memory with consolidation pipeline
Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
link-add, link-impact, decay, consolidate-session, etc.
Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation
Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00
|
|
|
|
results
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-09 01:19:04 -04:00
|
|
|
|
/// Search with weighted terms: exact key matching + spectral projection.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// Terms are matched against node keys. Matching nodes become seeds,
|
|
|
|
|
|
/// scored by term_weight × node_weight. Seeds are then projected into
|
|
|
|
|
|
/// spectral space to find nearby nodes, with link weights modulating distance.
|
|
|
|
|
|
pub fn search_weighted(
|
|
|
|
|
|
terms: &BTreeMap<String, f64>,
|
|
|
|
|
|
store: &impl StoreView,
|
|
|
|
|
|
) -> Vec<SearchResult> {
|
|
|
|
|
|
search_weighted_inner(terms, store, false, 5)
|
|
|
|
|
|
}
|
poc-memory v0.4.0: graph-structured memory with consolidation pipeline
Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
link-add, link-impact, decay, consolidate-session, etc.
Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation
Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00
|
|
|
|
|
2026-03-09 01:19:04 -04:00
|
|
|
|
/// Like search_weighted but with debug output and configurable result count.
|
|
|
|
|
|
pub fn search_weighted_debug(
|
|
|
|
|
|
terms: &BTreeMap<String, f64>,
|
|
|
|
|
|
store: &impl StoreView,
|
|
|
|
|
|
max_results: usize,
|
|
|
|
|
|
) -> Vec<SearchResult> {
|
|
|
|
|
|
search_weighted_inner(terms, store, true, max_results)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn search_weighted_inner(
|
|
|
|
|
|
terms: &BTreeMap<String, f64>,
|
|
|
|
|
|
store: &impl StoreView,
|
|
|
|
|
|
debug: bool,
|
|
|
|
|
|
max_results: usize,
|
|
|
|
|
|
) -> Vec<SearchResult> {
|
|
|
|
|
|
let graph = crate::graph::build_graph_fast(store);
|
|
|
|
|
|
let (seeds, direct_hits) = match_seeds(terms, store);
|
poc-memory v0.4.0: graph-structured memory with consolidation pipeline
Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
link-add, link-impact, decay, consolidate-session, etc.
Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation
Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00
|
|
|
|
|
|
|
|
|
|
if seeds.is_empty() {
|
|
|
|
|
|
return Vec::new();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-09 01:19:04 -04:00
|
|
|
|
if debug {
|
|
|
|
|
|
println!("\n[search] === SEEDS ({}) ===", seeds.len());
|
|
|
|
|
|
let mut sorted_seeds = seeds.clone();
|
|
|
|
|
|
sorted_seeds.sort_by(|a, b| b.1.total_cmp(&a.1));
|
|
|
|
|
|
for (key, score) in sorted_seeds.iter().take(20) {
|
|
|
|
|
|
println!(" {:.4} {}", score, key);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
poc-memory v0.4.0: graph-structured memory with consolidation pipeline
Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
link-add, link-impact, decay, consolidate-session, etc.
Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation
Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00
|
|
|
|
|
2026-03-09 01:19:04 -04:00
|
|
|
|
// Default pipeline: spectral → spread (legacy behavior)
|
|
|
|
|
|
let pipeline = vec![
|
|
|
|
|
|
AlgoStage { algo: Algorithm::Spectral, params: HashMap::new() },
|
|
|
|
|
|
AlgoStage { algo: Algorithm::Spread, params: HashMap::new() },
|
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
|
|
let raw_results = run_pipeline(&pipeline, seeds, &graph, store, debug, max_results);
|
|
|
|
|
|
|
|
|
|
|
|
raw_results.into_iter()
|
|
|
|
|
|
.take(max_results)
|
|
|
|
|
|
.map(|(key, activation)| {
|
|
|
|
|
|
let is_direct = direct_hits.contains(&key);
|
|
|
|
|
|
SearchResult { key, activation, is_direct, snippet: None }
|
|
|
|
|
|
}).collect()
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Search with equal-weight terms (for interactive use).
|
|
|
|
|
|
pub fn search(query: &str, store: &impl StoreView) -> Vec<SearchResult> {
|
|
|
|
|
|
let terms: BTreeMap<String, f64> = query.split_whitespace()
|
|
|
|
|
|
.map(|t| (t.to_lowercase(), 1.0))
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
search_weighted(&terms, store)
|
poc-memory v0.4.0: graph-structured memory with consolidation pipeline
Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
link-add, link-impact, decay, consolidate-session, etc.
Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation
Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Extract meaningful search terms from natural language.
|
|
|
|
|
|
/// Strips common English stop words, returns up to max_terms words.
|
|
|
|
|
|
pub fn extract_query_terms(text: &str, max_terms: usize) -> String {
|
|
|
|
|
|
const STOP_WORDS: &[&str] = &[
|
|
|
|
|
|
"the", "a", "an", "is", "are", "was", "were", "do", "does", "did",
|
|
|
|
|
|
"have", "has", "had", "will", "would", "could", "should", "can",
|
|
|
|
|
|
"may", "might", "shall", "been", "being", "to", "of", "in", "for",
|
|
|
|
|
|
"on", "with", "at", "by", "from", "as", "but", "or", "and", "not",
|
|
|
|
|
|
"no", "if", "then", "than", "that", "this", "it", "its", "my",
|
|
|
|
|
|
"your", "our", "we", "you", "i", "me", "he", "she", "they", "them",
|
|
|
|
|
|
"what", "how", "why", "when", "where", "about", "just", "let",
|
|
|
|
|
|
"want", "tell", "show", "think", "know", "see", "look", "make",
|
|
|
|
|
|
"get", "go", "some", "any", "all", "very", "really", "also", "too",
|
|
|
|
|
|
"so", "up", "out", "here", "there",
|
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
|
|
text.to_lowercase()
|
|
|
|
|
|
.split(|c: char| !c.is_alphanumeric())
|
|
|
|
|
|
.filter(|w| !w.is_empty() && w.len() > 2 && !STOP_WORDS.contains(w))
|
|
|
|
|
|
.take(max_terms)
|
|
|
|
|
|
.collect::<Vec<_>>()
|
|
|
|
|
|
.join(" ")
|
|
|
|
|
|
}
|
2026-03-05 22:23:03 -05:00
|
|
|
|
|
|
|
|
|
|
/// Format search results as text lines (for hook consumption).
|
|
|
|
|
|
pub fn format_results(results: &[SearchResult]) -> String {
|
|
|
|
|
|
let mut out = String::new();
|
|
|
|
|
|
for (i, r) in results.iter().enumerate().take(5) {
|
|
|
|
|
|
let marker = if r.is_direct { "→" } else { " " };
|
|
|
|
|
|
out.push_str(&format!("{}{:2}. [{:.2}/{:.2}] {}",
|
|
|
|
|
|
marker, i + 1, r.activation, r.activation, r.key));
|
|
|
|
|
|
out.push('\n');
|
|
|
|
|
|
if let Some(ref snippet) = r.snippet {
|
|
|
|
|
|
out.push_str(&format!(" {}\n", snippet));
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
out
|
|
|
|
|
|
}
|