poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
ProofOfConcept 2026-02-28 22:17:00 -05:00
commit 23fac4e5fe
35 changed files with 9388 additions and 0 deletions

186
src/bin/memory-search.rs Normal file
View file

@ -0,0 +1,186 @@
// memory-search: hook binary for ambient memory retrieval
//
// Reads JSON from stdin (Claude Code UserPromptSubmit hook format),
// searches memory for relevant entries, outputs results tagged with
// an anti-injection cookie.
//
// This is a thin wrapper that delegates to the poc-memory search
// engine but formats output for the hook protocol.
use std::collections::HashSet;
use std::fs;
use std::io::{self, Read, Write};
use std::path::PathBuf;
use std::process::Command;
fn main() {
let mut input = String::new();
io::stdin().read_to_string(&mut input).unwrap_or_default();
let json: serde_json::Value = match serde_json::from_str(&input) {
Ok(v) => v,
Err(_) => return,
};
let prompt = json["prompt"].as_str().unwrap_or("");
let session_id = json["session_id"].as_str().unwrap_or("");
if prompt.is_empty() || session_id.is_empty() {
return;
}
// Skip short prompts
let word_count = prompt.split_whitespace().count();
if word_count < 3 {
return;
}
// Skip system/idle prompts
for prefix in &["Kent is AFK", "You're on your own", "IRC mention"] {
if prompt.starts_with(prefix) {
return;
}
}
// Extract search terms (strip stop words)
let query = extract_query_terms(prompt, 3);
if query.is_empty() {
return;
}
// Run poc-memory search
let output = Command::new("poc-memory")
.args(["search", &query])
.output();
let search_output = match output {
Ok(o) if o.status.success() => String::from_utf8_lossy(&o.stdout).to_string(),
_ => return,
};
if search_output.trim().is_empty() {
return;
}
// Session state for dedup
let state_dir = PathBuf::from("/tmp/claude-memory-search");
fs::create_dir_all(&state_dir).ok();
let cookie = load_or_create_cookie(&state_dir, session_id);
let seen = load_seen(&state_dir, session_id);
// Parse search output and filter
let mut result_output = String::new();
let mut count = 0;
let max_entries = 5;
for line in search_output.lines() {
if count >= max_entries { break; }
// Lines starting with → or space+number are results
let trimmed = line.trim();
if trimmed.is_empty() { continue; }
// Extract key from result line like "→ 1. [0.83/0.83] identity.md (c4)"
if let Some(key) = extract_key_from_line(trimmed) {
if seen.contains(&key) { continue; }
mark_seen(&state_dir, session_id, &key);
result_output.push_str(line);
result_output.push('\n');
count += 1;
} else if count > 0 {
// Snippet line following a result
result_output.push_str(line);
result_output.push('\n');
}
}
if count == 0 { return; }
println!("Recalled memories [{}]:", cookie);
print!("{}", result_output);
}
fn extract_query_terms(text: &str, max_terms: usize) -> String {
const STOP_WORDS: &[&str] = &[
"the", "a", "an", "is", "are", "was", "were", "do", "does", "did",
"have", "has", "had", "will", "would", "could", "should", "can",
"may", "might", "shall", "been", "being", "to", "of", "in", "for",
"on", "with", "at", "by", "from", "as", "but", "or", "and", "not",
"no", "if", "then", "than", "that", "this", "it", "its", "my",
"your", "our", "we", "you", "i", "me", "he", "she", "they", "them",
"what", "how", "why", "when", "where", "about", "just", "let",
"want", "tell", "show", "think", "know", "see", "look", "make",
"get", "go", "some", "any", "all", "very", "really", "also", "too",
"so", "up", "out", "here", "there",
];
text.to_lowercase()
.split(|c: char| !c.is_alphanumeric())
.filter(|w| !w.is_empty() && w.len() > 2 && !STOP_WORDS.contains(w))
.take(max_terms)
.collect::<Vec<_>>()
.join(" ")
}
fn extract_key_from_line(line: &str) -> Option<String> {
// Match lines like "→ 1. [0.83/0.83] identity.md (c4)"
// or " 1. [0.83/0.83] identity.md (c4)"
let after_bracket = line.find("] ")?;
let rest = &line[after_bracket + 2..];
// Key is from here until optional " (c" or end of line
let key_end = rest.find(" (c").unwrap_or(rest.len());
let key = rest[..key_end].trim();
if key.is_empty() || !key.contains('.') {
None
} else {
Some(key.to_string())
}
}
fn load_or_create_cookie(dir: &PathBuf, session_id: &str) -> String {
let path = dir.join(format!("cookie-{}", session_id));
if path.exists() {
fs::read_to_string(&path).unwrap_or_default().trim().to_string()
} else {
let cookie = generate_cookie();
fs::write(&path, &cookie).ok();
cookie
}
}
fn generate_cookie() -> String {
let out = Command::new("head")
.args(["-c", "12", "/dev/urandom"])
.output()
.expect("failed to read urandom");
out.stdout.iter()
.map(|b| {
let idx = (*b as usize) % 62;
if idx < 10 { (b'0' + idx as u8) as char }
else if idx < 36 { (b'a' + (idx - 10) as u8) as char }
else { (b'A' + (idx - 36) as u8) as char }
})
.take(16)
.collect()
}
fn load_seen(dir: &PathBuf, session_id: &str) -> HashSet<String> {
let path = dir.join(format!("seen-{}", session_id));
if path.exists() {
fs::read_to_string(path)
.unwrap_or_default()
.lines()
.map(|s| s.to_string())
.collect()
} else {
HashSet::new()
}
}
fn mark_seen(dir: &PathBuf, session_id: &str, key: &str) {
let path = dir.join(format!("seen-{}", session_id));
if let Ok(mut f) = fs::OpenOptions::new().create(true).append(true).open(path) {
writeln!(f, "{}", key).ok();
}
}

1067
src/capnp_store.rs Normal file

File diff suppressed because it is too large Load diff

685
src/graph.rs Normal file
View file

@ -0,0 +1,685 @@
// Graph algorithms: clustering coefficient, community detection (label
// propagation), schema fit scoring, small-world metrics, consolidation
// priority scoring.
//
// The Graph is built from the Store's nodes + relations. Edges are
// undirected for clustering/community (even causal edges count as
// connections), but relation type and direction are preserved for
// specific queries.
use crate::capnp_store::{Store, RelationType};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet, VecDeque};
/// Weighted edge in the graph
#[derive(Clone, Debug)]
pub struct Edge {
pub target: String,
pub strength: f32,
pub rel_type: RelationType,
}
/// The in-memory graph built from store nodes + relations
pub struct Graph {
/// Adjacency list: node key → list of edges
adj: HashMap<String, Vec<Edge>>,
/// All node keys
keys: HashSet<String>,
/// Community labels (from label propagation)
communities: HashMap<String, u32>,
}
impl Graph {
pub fn nodes(&self) -> &HashSet<String> {
&self.keys
}
pub fn degree(&self, key: &str) -> usize {
self.adj.get(key).map(|e| e.len()).unwrap_or(0)
}
pub fn edge_count(&self) -> usize {
self.adj.values().map(|e| e.len()).sum::<usize>() / 2
}
/// All neighbor keys with strengths
pub fn neighbors(&self, key: &str) -> Vec<(&String, f32)> {
self.adj.get(key)
.map(|edges| edges.iter().map(|e| (&e.target, e.strength)).collect())
.unwrap_or_default()
}
/// Just neighbor keys
pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> {
self.adj.get(key)
.map(|edges| edges.iter().map(|e| e.target.as_str()).collect())
.unwrap_or_default()
}
pub fn community_count(&self) -> usize {
let labels: HashSet<_> = self.communities.values().collect();
labels.len()
}
pub fn communities(&self) -> &HashMap<String, u32> {
&self.communities
}
/// Local clustering coefficient: fraction of a node's neighbors
/// that are also neighbors of each other.
/// cc(v) = 2E / (deg * (deg - 1))
pub fn clustering_coefficient(&self, key: &str) -> f32 {
let neighbors = self.neighbor_keys(key);
let deg = neighbors.len();
if deg < 2 {
return 0.0;
}
let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect();
let mut triangles = 0u32;
for i in 0..neighbor_vec.len() {
for j in (i + 1)..neighbor_vec.len() {
let ni_neighbors = self.neighbor_keys(neighbor_vec[i]);
if ni_neighbors.contains(neighbor_vec[j]) {
triangles += 1;
}
}
}
(2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0))
}
/// Average clustering coefficient across all nodes with deg >= 2
pub fn avg_clustering_coefficient(&self) -> f32 {
let mut sum = 0.0f32;
let mut count = 0u32;
for key in &self.keys {
if self.degree(key) >= 2 {
sum += self.clustering_coefficient(key);
count += 1;
}
}
if count == 0 { 0.0 } else { sum / count as f32 }
}
/// Average shortest path length (sampled BFS from up to 100 nodes)
pub fn avg_path_length(&self) -> f32 {
let sample: Vec<&String> = self.keys.iter().take(100).collect();
if sample.is_empty() { return 0.0; }
let mut total_dist = 0u64;
let mut total_pairs = 0u64;
for &start in &sample {
let dists = self.bfs_distances(start);
for d in dists.values() {
if *d > 0 {
total_dist += *d as u64;
total_pairs += 1;
}
}
}
if total_pairs == 0 { 0.0 } else { total_dist as f32 / total_pairs as f32 }
}
fn bfs_distances(&self, start: &str) -> HashMap<String, u32> {
let mut dist = HashMap::new();
let mut queue = VecDeque::new();
dist.insert(start.to_string(), 0u32);
queue.push_back(start.to_string());
while let Some(node) = queue.pop_front() {
let d = dist[&node];
for neighbor in self.neighbor_keys(&node) {
if !dist.contains_key(neighbor) {
dist.insert(neighbor.to_string(), d + 1);
queue.push_back(neighbor.to_string());
}
}
}
dist
}
/// Power-law exponent α of the degree distribution.
///
/// Estimated via MLE: α = 1 + n / Σ ln(k_i / (k_min - 0.5))
/// α ≈ 2: extreme hub dominance (fragile)
/// α ≈ 3: healthy scale-free
/// α > 3: approaching random graph (egalitarian)
pub fn degree_power_law_exponent(&self) -> f32 {
let mut degrees: Vec<usize> = self.keys.iter()
.map(|k| self.degree(k))
.filter(|&d| d > 0) // exclude isolates
.collect();
if degrees.len() < 10 { return 0.0; } // not enough data
degrees.sort_unstable();
let k_min = degrees[0] as f64;
if k_min < 1.0 { return 0.0; }
let n = degrees.len() as f64;
let sum_ln: f64 = degrees.iter()
.map(|&k| (k as f64 / (k_min - 0.5)).ln())
.sum();
if sum_ln <= 0.0 { return 0.0; }
(1.0 + n / sum_ln) as f32
}
/// Gini coefficient of the degree distribution.
///
/// 0 = perfectly egalitarian (all nodes same degree)
/// 1 = maximally unequal (one node has all edges)
/// Measures hub concentration independent of distribution shape.
pub fn degree_gini(&self) -> f32 {
let mut degrees: Vec<f64> = self.keys.iter()
.map(|k| self.degree(k) as f64)
.collect();
let n = degrees.len();
if n < 2 { return 0.0; }
degrees.sort_by(|a, b| a.partial_cmp(b).unwrap());
let mean = degrees.iter().sum::<f64>() / n as f64;
if mean < 1e-10 { return 0.0; }
// Gini = (2 Σ i·x_i) / (n Σ x_i) - (n+1)/n
let weighted_sum: f64 = degrees.iter().enumerate()
.map(|(i, &d)| (i as f64 + 1.0) * d)
.sum();
let total = degrees.iter().sum::<f64>();
let gini = (2.0 * weighted_sum) / (n as f64 * total) - (n as f64 + 1.0) / n as f64;
gini.max(0.0) as f32
}
/// Small-world coefficient σ = (C/C_rand) / (L/L_rand)
/// C_rand ≈ <k>/n, L_rand ≈ ln(n)/ln(<k>)
pub fn small_world_sigma(&self) -> f32 {
let n = self.keys.len() as f32;
if n < 10.0 { return 0.0; }
let avg_degree = self.adj.values()
.map(|e| e.len() as f32)
.sum::<f32>() / n;
if avg_degree < 1.0 { return 0.0; }
let c = self.avg_clustering_coefficient();
let l = self.avg_path_length();
let c_rand = avg_degree / n;
let l_rand = n.ln() / avg_degree.ln();
if c_rand < 1e-10 || l_rand < 1e-10 || l < 1e-10 {
return 0.0;
}
(c / c_rand) / (l / l_rand)
}
}
/// Impact of adding a hypothetical edge
#[derive(Debug)]
pub struct LinkImpact {
pub source: String,
pub target: String,
pub source_deg: usize,
pub target_deg: usize,
/// Is this a hub link? (either endpoint in top 5% by degree)
pub is_hub_link: bool,
/// Are both endpoints in the same community?
pub same_community: bool,
/// Change in clustering coefficient for source
pub delta_cc_source: f32,
/// Change in clustering coefficient for target
pub delta_cc_target: f32,
/// Change in degree Gini (positive = more hub-dominated)
pub delta_gini: f32,
/// Qualitative assessment
pub assessment: &'static str,
}
impl Graph {
/// Simulate adding an edge and report impact on topology metrics.
///
/// Doesn't modify the graph — computes what would change if the
/// edge were added.
pub fn link_impact(&self, source: &str, target: &str) -> LinkImpact {
let source_deg = self.degree(source);
let target_deg = self.degree(target);
// Hub threshold: top 5% by degree
let mut all_degrees: Vec<usize> = self.keys.iter()
.map(|k| self.degree(k))
.collect();
all_degrees.sort_unstable();
let hub_threshold = if all_degrees.len() >= 20 {
all_degrees[all_degrees.len() * 95 / 100]
} else {
usize::MAX // can't define hubs with <20 nodes
};
let is_hub_link = source_deg >= hub_threshold || target_deg >= hub_threshold;
// Community check
let sc = self.communities.get(source);
let tc = self.communities.get(target);
let same_community = match (sc, tc) {
(Some(a), Some(b)) => a == b,
_ => false,
};
// CC change for source: adding target as neighbor changes the
// triangle count. New triangles form for each node that's a
// neighbor of BOTH source and target.
let source_neighbors = self.neighbor_keys(source);
let target_neighbors = self.neighbor_keys(target);
let shared_neighbors = source_neighbors.intersection(&target_neighbors).count();
let cc_before_source = self.clustering_coefficient(source);
let cc_before_target = self.clustering_coefficient(target);
// Estimate new CC for source after adding edge
let new_source_deg = source_deg + 1;
let new_source_triangles = if source_deg >= 2 {
// Current triangles + new ones from shared neighbors
let current_triangles = (cc_before_source
* source_deg as f32 * (source_deg as f32 - 1.0) / 2.0) as u32;
current_triangles + shared_neighbors as u32
} else {
shared_neighbors as u32
};
let cc_after_source = if new_source_deg >= 2 {
(2.0 * new_source_triangles as f32)
/ (new_source_deg as f32 * (new_source_deg as f32 - 1.0))
} else {
0.0
};
let new_target_deg = target_deg + 1;
let new_target_triangles = if target_deg >= 2 {
let current_triangles = (cc_before_target
* target_deg as f32 * (target_deg as f32 - 1.0) / 2.0) as u32;
current_triangles + shared_neighbors as u32
} else {
shared_neighbors as u32
};
let cc_after_target = if new_target_deg >= 2 {
(2.0 * new_target_triangles as f32)
/ (new_target_deg as f32 * (new_target_deg as f32 - 1.0))
} else {
0.0
};
// Gini change via influence function:
// IF(x; Gini, F) = (2F(x) - 1) * x/μ - Gini - 1
// Adding an edge increments two degrees. The net ΔGini is the sum
// of influence contributions from both endpoints shifting up by 1.
let gini_before = self.degree_gini();
let n = self.keys.len();
let total_degree: f64 = self.keys.iter()
.map(|k| self.degree(k) as f64)
.sum();
let mean_deg = if n > 0 { total_degree / n as f64 } else { 1.0 };
// CDF at each endpoint's degree: fraction of nodes with degree ≤ d
let delta_gini = if mean_deg > 1e-10 && n >= 2 {
// Count nodes with degree ≤ source_deg and ≤ target_deg
let f_source = self.keys.iter()
.filter(|k| self.degree(k) <= source_deg)
.count() as f64 / n as f64;
let f_target = self.keys.iter()
.filter(|k| self.degree(k) <= target_deg)
.count() as f64 / n as f64;
// Influence of incrementing source's degree by 1
let new_source = (source_deg + 1) as f64;
let if_source = (2.0 * f_source - 1.0) * new_source / mean_deg
- gini_before as f64 - 1.0;
// Influence of incrementing target's degree by 1
let new_target = (target_deg + 1) as f64;
let if_target = (2.0 * f_target - 1.0) * new_target / mean_deg
- gini_before as f64 - 1.0;
// Scale: each point contributes 1/n to the distribution
((if_source + if_target) / n as f64) as f32
} else {
0.0f32
};
// Qualitative assessment
let assessment = if is_hub_link && same_community {
"hub-reinforcing: strengthens existing star topology"
} else if is_hub_link && !same_community {
"hub-bridging: cross-community but through a hub"
} else if !is_hub_link && same_community && shared_neighbors > 0 {
"lateral-clustering: strengthens local mesh topology"
} else if !is_hub_link && !same_community {
"lateral-bridging: best kind — cross-community lateral link"
} else if !is_hub_link && same_community {
"lateral-local: connects peripheral nodes in same community"
} else {
"neutral"
};
LinkImpact {
source: source.to_string(),
target: target.to_string(),
source_deg,
target_deg,
is_hub_link,
same_community,
delta_cc_source: cc_after_source - cc_before_source,
delta_cc_target: cc_after_target - cc_before_target,
delta_gini: delta_gini,
assessment,
}
}
}
/// Build graph from store data
pub fn build_graph(store: &Store) -> Graph {
let mut adj: HashMap<String, Vec<Edge>> = HashMap::new();
let keys: HashSet<String> = store.nodes.keys().cloned().collect();
// Build adjacency from relations
for rel in &store.relations {
let source_key = &rel.source_key;
let target_key = &rel.target_key;
// Both keys must exist as nodes
if !keys.contains(source_key) || !keys.contains(target_key) {
continue;
}
// Add bidirectional edges (even for causal — direction is metadata)
adj.entry(source_key.clone()).or_default().push(Edge {
target: target_key.clone(),
strength: rel.strength,
rel_type: rel.rel_type,
});
adj.entry(target_key.clone()).or_default().push(Edge {
target: source_key.clone(),
strength: rel.strength,
rel_type: rel.rel_type,
});
}
// Run community detection
let communities = label_propagation(&keys, &adj, 20);
Graph { adj, keys, communities }
}
/// Label propagation community detection.
///
/// Each node starts with its own label. Each iteration: adopt the most
/// common label among neighbors (weighted by edge strength). Iterate
/// until stable or max_iterations.
fn label_propagation(
keys: &HashSet<String>,
adj: &HashMap<String, Vec<Edge>>,
max_iterations: u32,
) -> HashMap<String, u32> {
// Initialize: each node gets its own label
let key_vec: Vec<String> = keys.iter().cloned().collect();
let mut labels: HashMap<String, u32> = key_vec.iter()
.enumerate()
.map(|(i, k)| (k.clone(), i as u32))
.collect();
for _iter in 0..max_iterations {
let mut changed = false;
for key in &key_vec {
let edges = match adj.get(key) {
Some(e) => e,
None => continue,
};
if edges.is_empty() { continue; }
// Count weighted votes for each label
let mut votes: HashMap<u32, f32> = HashMap::new();
for edge in edges {
if let Some(&label) = labels.get(&edge.target) {
*votes.entry(label).or_default() += edge.strength;
}
}
// Adopt the label with most votes
if let Some((&best_label, _)) = votes.iter()
.max_by(|a, b| a.1.partial_cmp(b.1).unwrap())
{
let current = labels[key];
if best_label != current {
labels.insert(key.clone(), best_label);
changed = true;
}
}
}
if !changed { break; }
}
// Compact labels to 0..n
let mut label_map: HashMap<u32, u32> = HashMap::new();
let mut next_id = 0;
for label in labels.values_mut() {
let new_label = *label_map.entry(*label).or_insert_with(|| {
let id = next_id;
next_id += 1;
id
});
*label = new_label;
}
labels
}
/// Schema fit: for a node, measure how well-connected its neighbors are
/// to each other. High density + high CC among neighbors = good schema fit.
pub fn schema_fit(graph: &Graph, key: &str) -> f32 {
let neighbors = graph.neighbor_keys(key);
let n = neighbors.len();
if n < 2 {
return 0.0; // isolated or leaf — no schema context
}
// Count edges among neighbors
let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect();
let mut inter_edges = 0u32;
for i in 0..neighbor_vec.len() {
for j in (i + 1)..neighbor_vec.len() {
let ni_neighbors = graph.neighbor_keys(neighbor_vec[i]);
if ni_neighbors.contains(neighbor_vec[j]) {
inter_edges += 1;
}
}
}
let max_edges = (n * (n - 1)) / 2;
let density = if max_edges == 0 { 0.0 } else {
inter_edges as f32 / max_edges as f32
};
// Combine neighborhood density with own CC
let cc = graph.clustering_coefficient(key);
(density + cc) / 2.0
}
/// Compute schema fit for all nodes
pub fn schema_fit_all(graph: &Graph) -> HashMap<String, f32> {
graph.nodes().iter()
.map(|key| (key.clone(), schema_fit(graph, key)))
.collect()
}
/// A snapshot of graph topology metrics, for tracking evolution over time
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct MetricsSnapshot {
pub timestamp: f64,
pub date: String,
pub nodes: usize,
pub edges: usize,
pub communities: usize,
pub sigma: f32,
pub alpha: f32,
pub gini: f32,
pub avg_cc: f32,
pub avg_path_length: f32,
pub avg_schema_fit: f32,
}
fn metrics_log_path() -> std::path::PathBuf {
let home = std::env::var("HOME").unwrap_or_default();
std::path::PathBuf::from(home).join(".claude/memory/metrics.jsonl")
}
/// Load previous metrics snapshots
pub fn load_metrics_history() -> Vec<MetricsSnapshot> {
let path = metrics_log_path();
let content = match std::fs::read_to_string(&path) {
Ok(c) => c,
Err(_) => return Vec::new(),
};
content.lines()
.filter_map(|line| serde_json::from_str(line).ok())
.collect()
}
/// Append a metrics snapshot to the log
pub fn save_metrics_snapshot(snap: &MetricsSnapshot) {
let path = metrics_log_path();
if let Ok(json) = serde_json::to_string(snap) {
use std::io::Write;
if let Ok(mut f) = std::fs::OpenOptions::new()
.create(true).append(true).open(&path)
{
let _ = writeln!(f, "{}", json);
}
}
}
/// Health report: summary of graph metrics
pub fn health_report(graph: &Graph, store: &Store) -> String {
let n = graph.nodes().len();
let e = graph.edge_count();
let avg_cc = graph.avg_clustering_coefficient();
let avg_pl = graph.avg_path_length();
let sigma = graph.small_world_sigma();
let communities = graph.community_count();
// Community sizes
let mut comm_sizes: HashMap<u32, usize> = HashMap::new();
for label in graph.communities().values() {
*comm_sizes.entry(*label).or_default() += 1;
}
let mut sizes: Vec<usize> = comm_sizes.values().copied().collect();
sizes.sort_unstable_by(|a, b| b.cmp(a));
// Degree distribution
let mut degrees: Vec<usize> = graph.nodes().iter()
.map(|k| graph.degree(k))
.collect();
degrees.sort_unstable();
let max_deg = degrees.last().copied().unwrap_or(0);
let median_deg = if degrees.is_empty() { 0 } else { degrees[degrees.len() / 2] };
let avg_deg = if n == 0 { 0.0 } else {
degrees.iter().sum::<usize>() as f64 / n as f64
};
// Topology metrics
let alpha = graph.degree_power_law_exponent();
let gini = graph.degree_gini();
// Schema fit distribution
let fits = schema_fit_all(graph);
let avg_fit = if fits.is_empty() { 0.0 } else {
fits.values().sum::<f32>() / fits.len() as f32
};
let low_fit = fits.values().filter(|&&f| f < 0.1).count();
// Category breakdown
let cats = store.category_counts();
// Snapshot current metrics and log
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH).unwrap().as_secs_f64();
let date = {
let out = std::process::Command::new("date").arg("+%Y-%m-%d %H:%M")
.output().unwrap_or_else(|_| std::process::Command::new("echo").output().unwrap());
String::from_utf8_lossy(&out.stdout).trim().to_string()
};
let snap = MetricsSnapshot {
timestamp: now,
date: date.clone(),
nodes: n, edges: e, communities,
sigma, alpha, gini, avg_cc,
avg_path_length: avg_pl,
avg_schema_fit: avg_fit,
};
save_metrics_snapshot(&snap);
// Load history for deltas
let history = load_metrics_history();
let prev = if history.len() >= 2 {
Some(&history[history.len() - 2]) // second-to-last (last is the one we just wrote)
} else {
None
};
fn delta(current: f32, prev: Option<f32>) -> String {
match prev {
Some(p) => {
let d = current - p;
if d.abs() < 0.001 { String::new() }
else { format!("{:+.3})", d) }
}
None => String::new(),
}
}
let sigma_d = delta(sigma, prev.map(|p| p.sigma));
let alpha_d = delta(alpha, prev.map(|p| p.alpha));
let gini_d = delta(gini, prev.map(|p| p.gini));
let cc_d = delta(avg_cc, prev.map(|p| p.avg_cc));
let fit_d = delta(avg_fit, prev.map(|p| p.avg_schema_fit));
let mut report = format!(
"Memory Health Report
====================
Nodes: {n} Relations: {e} Communities: {communities}
Degree: max={max_deg} median={median_deg} avg={avg_deg:.1}
Clustering coefficient (avg): {avg_cc:.4}{cc_d}
Average path length: {avg_pl:.2}
Small-world σ: {sigma:.3}{sigma_d} (>1 = small-world)
Power-law α: {alpha:.2}{alpha_d} (2=hub-dominated, 3=healthy, >3=egalitarian)
Degree Gini: {gini:.3}{gini_d} (0=equal, 1=one-hub)
Community sizes (top 5): {top5}
Schema fit: avg={avg_fit:.3}{fit_d} low-fit (<0.1): {low_fit} nodes
Categories: core={core} tech={tech} gen={gen} obs={obs} task={task}",
top5 = sizes.iter().take(5)
.map(|s| s.to_string())
.collect::<Vec<_>>()
.join(", "),
core = cats.get("core").unwrap_or(&0),
tech = cats.get("tech").unwrap_or(&0),
gen = cats.get("gen").unwrap_or(&0),
obs = cats.get("obs").unwrap_or(&0),
task = cats.get("task").unwrap_or(&0),
);
// Show history trend if we have enough data points
if history.len() >= 3 {
report.push_str("\n\nMetrics history (last 5):\n");
for snap in history.iter().rev().take(5).collect::<Vec<_>>().into_iter().rev() {
report.push_str(&format!(" {}σ={:.1} α={:.2} gini={:.3} cc={:.4} fit={:.3}\n",
snap.date, snap.sigma, snap.alpha, snap.gini, snap.avg_cc, snap.avg_schema_fit));
}
}
report
}

766
src/main.rs Normal file
View file

@ -0,0 +1,766 @@
#![allow(dead_code)]
// poc-memory: graph-structured memory with append-only Cap'n Proto storage
//
// Architecture:
// nodes.capnp - append-only content node log
// relations.capnp - append-only relation log
// state.bin - derived KV cache (rebuilt from logs when stale)
//
// Graph algorithms: clustering coefficient, community detection (label
// propagation), schema fit scoring, small-world metrics, consolidation
// priority. Text similarity via BM25 with Porter stemming.
//
// Neuroscience-inspired: spaced repetition replay, emotional gating,
// interference detection, schema assimilation, reconsolidation.
mod capnp_store;
mod graph;
mod search;
mod similarity;
mod migrate;
mod neuro;
pub mod memory_capnp {
include!(concat!(env!("OUT_DIR"), "/schema/memory_capnp.rs"));
}
use std::env;
use std::process;
fn main() {
let args: Vec<String> = env::args().collect();
if args.len() < 2 {
usage();
process::exit(1);
}
let result = match args[1].as_str() {
"search" => cmd_search(&args[2..]),
"init" => cmd_init(),
"migrate" => cmd_migrate(),
"health" => cmd_health(),
"status" => cmd_status(),
"graph" => cmd_graph(),
"used" => cmd_used(&args[2..]),
"wrong" => cmd_wrong(&args[2..]),
"gap" => cmd_gap(&args[2..]),
"categorize" => cmd_categorize(&args[2..]),
"decay" => cmd_decay(),
"consolidate-batch" => cmd_consolidate_batch(&args[2..]),
"log" => cmd_log(),
"params" => cmd_params(),
"link" => cmd_link(&args[2..]),
"replay-queue" => cmd_replay_queue(&args[2..]),
"interference" => cmd_interference(&args[2..]),
"link-add" => cmd_link_add(&args[2..]),
"link-impact" => cmd_link_impact(&args[2..]),
"consolidate-session" => cmd_consolidate_session(),
"daily-check" => cmd_daily_check(),
"apply-agent" => cmd_apply_agent(&args[2..]),
"digest" => cmd_digest(&args[2..]),
"trace" => cmd_trace(&args[2..]),
_ => {
eprintln!("Unknown command: {}", args[1]);
usage();
process::exit(1);
}
};
if let Err(e) = result {
eprintln!("Error: {}", e);
process::exit(1);
}
}
fn usage() {
eprintln!("poc-memory v0.4.0 — graph-structured memory store
Commands:
search QUERY [QUERY...] Search memory (AND logic across terms)
init Scan markdown files, index all memory units
migrate Migrate from old weights.json system
health Report graph metrics (CC, communities, small-world)
status Summary of memory state
graph Show graph structure overview
used KEY Mark a memory as useful (boosts weight)
wrong KEY [CONTEXT] Mark a memory as wrong/irrelevant
gap DESCRIPTION Record a gap in memory coverage
categorize KEY CATEGORY Reassign category (core/tech/gen/obs/task)
decay Apply daily weight decay
consolidate-batch [--count N] [--auto]
Run agent consolidation on priority nodes
log Show recent retrieval log
params Show current parameters
link N Interactive graph walk from search result N
replay-queue [--count N] Show spaced repetition replay queue
interference [--threshold F]
Detect potentially confusable memory pairs
link-add SOURCE TARGET [REASON]
Add a link between two nodes
link-impact SOURCE TARGET Simulate adding an edge, report topology impact
consolidate-session Analyze metrics, plan agent allocation
daily-check Brief metrics check (for cron/notifications)
apply-agent [--all] Import pending agent results into the graph
digest daily [DATE] Generate daily episodic digest (default: today)
digest weekly [DATE] Generate weekly digest (any date in target week)
trace KEY Walk temporal links: semantic episodic conversation");
}
fn cmd_search(args: &[String]) -> Result<(), String> {
if args.is_empty() {
return Err("Usage: poc-memory search QUERY [QUERY...]".into());
}
let query = args.join(" ");
let mut store = capnp_store::Store::load()?;
let results = search::search(&query, &store);
if results.is_empty() {
eprintln!("No results for '{}'", query);
return Ok(());
}
// Log retrieval
store.log_retrieval(&query, &results.iter().map(|r| r.key.clone()).collect::<Vec<_>>());
store.save()?;
for (i, r) in results.iter().enumerate().take(15) {
let marker = if r.is_direct { "" } else { " " };
let weight = store.node_weight(&r.key).unwrap_or(0.0);
print!("{}{:2}. [{:.2}/{:.2}] {}", marker, i + 1, r.activation, weight, r.key);
if let Some(community) = store.node_community(&r.key) {
print!(" (c{})", community);
}
println!();
if let Some(ref snippet) = r.snippet {
println!(" {}", snippet);
}
}
Ok(())
}
fn cmd_init() -> Result<(), String> {
let mut store = capnp_store::Store::load()?;
let count = store.init_from_markdown()?;
store.save()?;
println!("Indexed {} memory units", count);
Ok(())
}
fn cmd_migrate() -> Result<(), String> {
migrate::migrate()
}
fn cmd_health() -> Result<(), String> {
let store = capnp_store::Store::load()?;
let g = store.build_graph();
let health = graph::health_report(&g, &store);
println!("{}", health);
Ok(())
}
fn cmd_status() -> Result<(), String> {
let store = capnp_store::Store::load()?;
let node_count = store.nodes.len();
let rel_count = store.relations.len();
let categories = store.category_counts();
println!("Nodes: {} Relations: {}", node_count, rel_count);
println!("Categories: core={} tech={} gen={} obs={} task={}",
categories.get("core").unwrap_or(&0),
categories.get("tech").unwrap_or(&0),
categories.get("gen").unwrap_or(&0),
categories.get("obs").unwrap_or(&0),
categories.get("task").unwrap_or(&0),
);
let g = store.build_graph();
println!("Graph edges: {} Communities: {}",
g.edge_count(), g.community_count());
Ok(())
}
fn cmd_graph() -> Result<(), String> {
let store = capnp_store::Store::load()?;
let g = store.build_graph();
// Show top-10 highest degree nodes
let mut degrees: Vec<_> = g.nodes().iter()
.map(|k| (k.clone(), g.degree(k)))
.collect();
degrees.sort_by(|a, b| b.1.cmp(&a.1));
println!("Top nodes by degree:");
for (key, deg) in degrees.iter().take(10) {
let cc = g.clustering_coefficient(key);
println!(" {:40} deg={:3} cc={:.3}", key, deg, cc);
}
Ok(())
}
fn cmd_used(args: &[String]) -> Result<(), String> {
if args.is_empty() {
return Err("Usage: poc-memory used KEY".into());
}
let key = args.join(" ");
let mut store = capnp_store::Store::load()?;
let resolved = store.resolve_key(&key)?;
store.mark_used(&resolved);
store.save()?;
println!("Marked '{}' as used", resolved);
Ok(())
}
fn cmd_wrong(args: &[String]) -> Result<(), String> {
if args.is_empty() {
return Err("Usage: poc-memory wrong KEY [CONTEXT]".into());
}
let key = &args[0];
let ctx = if args.len() > 1 { Some(args[1..].join(" ")) } else { None };
let mut store = capnp_store::Store::load()?;
let resolved = store.resolve_key(key)?;
store.mark_wrong(&resolved, ctx.as_deref());
store.save()?;
println!("Marked '{}' as wrong", resolved);
Ok(())
}
fn cmd_gap(args: &[String]) -> Result<(), String> {
if args.is_empty() {
return Err("Usage: poc-memory gap DESCRIPTION".into());
}
let desc = args.join(" ");
let mut store = capnp_store::Store::load()?;
store.record_gap(&desc);
store.save()?;
println!("Recorded gap: {}", desc);
Ok(())
}
fn cmd_categorize(args: &[String]) -> Result<(), String> {
if args.len() < 2 {
return Err("Usage: poc-memory categorize KEY CATEGORY".into());
}
let key = &args[0];
let cat = &args[1];
let mut store = capnp_store::Store::load()?;
let resolved = store.resolve_key(key)?;
store.categorize(&resolved, cat)?;
store.save()?;
println!("Set '{}' category to {}", resolved, cat);
Ok(())
}
fn cmd_decay() -> Result<(), String> {
let mut store = capnp_store::Store::load()?;
let (decayed, pruned) = store.decay();
store.save()?;
println!("Decayed {} nodes, pruned {} below threshold", decayed, pruned);
Ok(())
}
fn cmd_consolidate_batch(args: &[String]) -> Result<(), String> {
let mut count = 5usize;
let mut auto = false;
let mut agent: Option<String> = None;
let mut i = 0;
while i < args.len() {
match args[i].as_str() {
"--count" if i + 1 < args.len() => {
count = args[i + 1].parse().map_err(|_| "invalid count")?;
i += 2;
}
"--auto" => { auto = true; i += 1; }
"--agent" if i + 1 < args.len() => {
agent = Some(args[i + 1].clone());
i += 2;
}
_ => { i += 1; }
}
}
let store = capnp_store::Store::load()?;
if let Some(agent_name) = agent {
// Generate a specific agent prompt
let prompt = neuro::agent_prompt(&store, &agent_name, count)?;
println!("{}", prompt);
Ok(())
} else {
neuro::consolidation_batch(&store, count, auto)
}
}
fn cmd_log() -> Result<(), String> {
let store = capnp_store::Store::load()?;
for event in store.retrieval_log.iter().rev().take(20) {
println!("[{}] q=\"{}\"{} results",
event.timestamp, event.query, event.results.len());
for r in &event.results {
println!(" {}", r);
}
}
Ok(())
}
fn cmd_params() -> Result<(), String> {
let store = capnp_store::Store::load()?;
println!("decay_factor: {}", store.params.decay_factor);
println!("use_boost: {}", store.params.use_boost);
println!("prune_threshold: {}", store.params.prune_threshold);
println!("edge_decay: {}", store.params.edge_decay);
println!("max_hops: {}", store.params.max_hops);
println!("min_activation: {}", store.params.min_activation);
Ok(())
}
fn cmd_link(args: &[String]) -> Result<(), String> {
if args.is_empty() {
return Err("Usage: poc-memory link KEY".into());
}
let key = args.join(" ");
let store = capnp_store::Store::load()?;
let resolved = store.resolve_key(&key)?;
let g = store.build_graph();
println!("Neighbors of '{}':", resolved);
let neighbors = g.neighbors(&resolved);
for (i, (n, strength)) in neighbors.iter().enumerate() {
let cc = g.clustering_coefficient(n);
println!(" {:2}. [{:.2}] {} (cc={:.3})", i + 1, strength, n, cc);
}
Ok(())
}
fn cmd_replay_queue(args: &[String]) -> Result<(), String> {
let mut count = 10usize;
let mut i = 0;
while i < args.len() {
match args[i].as_str() {
"--count" if i + 1 < args.len() => {
count = args[i + 1].parse().map_err(|_| "invalid count")?;
i += 2;
}
_ => { i += 1; }
}
}
let store = capnp_store::Store::load()?;
let queue = neuro::replay_queue(&store, count);
println!("Replay queue ({} items):", queue.len());
for (i, item) in queue.iter().enumerate() {
println!(" {:2}. [{:.3}] {} (interval={}d, emotion={:.1})",
i + 1, item.priority, item.key, item.interval_days, item.emotion);
}
Ok(())
}
fn cmd_consolidate_session() -> Result<(), String> {
let store = capnp_store::Store::load()?;
let plan = neuro::consolidation_plan(&store);
println!("{}", neuro::format_plan(&plan));
Ok(())
}
fn cmd_daily_check() -> Result<(), String> {
let store = capnp_store::Store::load()?;
let report = neuro::daily_check(&store);
print!("{}", report);
Ok(())
}
fn cmd_link_add(args: &[String]) -> Result<(), String> {
if args.len() < 2 {
return Err("Usage: poc-memory link-add SOURCE TARGET [REASON]".into());
}
let mut store = capnp_store::Store::load()?;
let source = store.resolve_key(&args[0])?;
let target = store.resolve_key(&args[1])?;
let reason = if args.len() > 2 { args[2..].join(" ") } else { String::new() };
// Find UUIDs
let source_uuid = store.nodes.get(&source)
.map(|n| n.uuid)
.ok_or_else(|| format!("source not found: {}", source))?;
let target_uuid = store.nodes.get(&target)
.map(|n| n.uuid)
.ok_or_else(|| format!("target not found: {}", target))?;
// Check if link already exists
let exists = store.relations.iter().any(|r|
r.source_key == source && r.target_key == target && !r.deleted
);
if exists {
println!("Link already exists: {}{}", source, target);
return Ok(());
}
let rel = capnp_store::Store::new_relation(
source_uuid, target_uuid,
capnp_store::RelationType::Auto,
0.5,
&source, &target,
);
store.add_relation(rel)?;
if !reason.is_empty() {
println!("+ {}{} ({})", source, target, reason);
} else {
println!("+ {}{}", source, target);
}
Ok(())
}
fn cmd_link_impact(args: &[String]) -> Result<(), String> {
if args.len() < 2 {
return Err("Usage: poc-memory link-impact SOURCE TARGET".into());
}
let store = capnp_store::Store::load()?;
let source = store.resolve_key(&args[0])?;
let target = store.resolve_key(&args[1])?;
let g = store.build_graph();
let impact = g.link_impact(&source, &target);
println!("Link impact: {}{}", source, target);
println!(" Source degree: {} Target degree: {}", impact.source_deg, impact.target_deg);
println!(" Hub link: {} Same community: {}", impact.is_hub_link, impact.same_community);
println!(" ΔCC source: {:+.4} ΔCC target: {:+.4}", impact.delta_cc_source, impact.delta_cc_target);
println!(" ΔGini: {:+.6}", impact.delta_gini);
println!(" Assessment: {}", impact.assessment);
Ok(())
}
fn cmd_apply_agent(args: &[String]) -> Result<(), String> {
let home = env::var("HOME").unwrap_or_default();
let results_dir = std::path::PathBuf::from(&home)
.join(".claude/memory/agent-results");
if !results_dir.exists() {
println!("No agent results directory");
return Ok(());
}
let mut store = capnp_store::Store::load()?;
let mut applied = 0;
let mut errors = 0;
let process_all = args.iter().any(|a| a == "--all");
// Find .json result files
let mut files: Vec<_> = std::fs::read_dir(&results_dir)
.map_err(|e| format!("read results dir: {}", e))?
.filter_map(|e| e.ok())
.filter(|e| e.path().extension().map(|x| x == "json").unwrap_or(false))
.collect();
files.sort_by_key(|e| e.path());
for entry in &files {
let path = entry.path();
let content = match std::fs::read_to_string(&path) {
Ok(c) => c,
Err(e) => {
eprintln!(" Skip {}: {}", path.display(), e);
errors += 1;
continue;
}
};
let data: serde_json::Value = match serde_json::from_str(&content) {
Ok(d) => d,
Err(e) => {
eprintln!(" Skip {}: parse error: {}", path.display(), e);
errors += 1;
continue;
}
};
// Check for agent_result with links
let agent_result = data.get("agent_result").or(Some(&data));
let links = match agent_result.and_then(|r| r.get("links")).and_then(|l| l.as_array()) {
Some(l) => l,
None => continue,
};
let entry_text = data.get("entry_text")
.and_then(|v| v.as_str())
.unwrap_or("");
let source_start = agent_result
.and_then(|r| r.get("source_start"))
.and_then(|v| v.as_u64());
let source_end = agent_result
.and_then(|r| r.get("source_end"))
.and_then(|v| v.as_u64());
println!("Processing {}:", path.file_name().unwrap().to_string_lossy());
if let (Some(start), Some(end)) = (source_start, source_end) {
println!(" Source: L{}-L{}", start, end);
}
for link in links {
let target = match link.get("target").and_then(|v| v.as_str()) {
Some(t) => t,
None => continue,
};
let reason = link.get("reason").and_then(|v| v.as_str()).unwrap_or("");
// Skip NOTE: targets (new topics, not existing nodes)
if target.starts_with("NOTE:") {
println!(" NOTE: {}{}", &target[5..], reason);
continue;
}
// Try to resolve the target key and link from journal entry
let resolved = match store.resolve_key(target) {
Ok(r) => r,
Err(_) => {
println!(" SKIP {} (not found in graph)", target);
continue;
}
};
let source_key = match find_journal_node(&store, entry_text) {
Some(k) => k,
None => {
println!(" SKIP {} (no matching journal node)", target);
continue;
}
};
// Get UUIDs for both nodes
let source_uuid = match store.nodes.get(&source_key) {
Some(n) => n.uuid,
None => continue,
};
let target_uuid = match store.nodes.get(&resolved) {
Some(n) => n.uuid,
None => continue,
};
let rel = capnp_store::Store::new_relation(
source_uuid, target_uuid,
capnp_store::RelationType::Link,
0.5,
&source_key, &resolved,
);
if let Err(e) = store.add_relation(rel) {
eprintln!(" Error adding relation: {}", e);
errors += 1;
} else {
println!(" LINK {}{} ({})", source_key, resolved, reason);
applied += 1;
}
}
// Move processed file to avoid re-processing
if !process_all {
let done_dir = results_dir.join("done");
std::fs::create_dir_all(&done_dir).ok();
let dest = done_dir.join(path.file_name().unwrap());
std::fs::rename(&path, &dest).ok();
}
}
if applied > 0 {
store.save()?;
}
println!("\nApplied {} links ({} errors, {} files processed)",
applied, errors, files.len());
Ok(())
}
/// Find the journal node that best matches the given entry text
fn find_journal_node(store: &capnp_store::Store, entry_text: &str) -> Option<String> {
if entry_text.is_empty() {
return None;
}
// Extract keywords from entry text
let words: Vec<&str> = entry_text.split_whitespace()
.filter(|w| w.len() > 5)
.take(5)
.collect();
// Find journal nodes whose content matches the most keywords
let mut best_key = None;
let mut best_score = 0;
for (key, node) in &store.nodes {
if !key.starts_with("journal.md#") {
continue;
}
let content_lower = node.content.to_lowercase();
let score: usize = words.iter()
.filter(|w| content_lower.contains(&w.to_lowercase()))
.count();
if score > best_score {
best_score = score;
best_key = Some(key.clone());
}
}
best_key
}
fn cmd_digest(args: &[String]) -> Result<(), String> {
if args.is_empty() {
return Err("Usage: poc-memory digest daily [DATE] | weekly [DATE]".into());
}
let home = env::var("HOME").unwrap_or_default();
let scripts_dir = std::path::PathBuf::from(&home).join("poc/memory/scripts");
match args[0].as_str() {
"daily" => {
let mut cmd = std::process::Command::new("python3");
cmd.arg(scripts_dir.join("daily-digest.py"));
if args.len() > 1 {
cmd.arg(&args[1]);
}
// Unset CLAUDECODE for nested claude calls
cmd.env_remove("CLAUDECODE");
let status = cmd.status()
.map_err(|e| format!("run daily-digest.py: {}", e))?;
if !status.success() {
return Err("daily-digest.py failed".into());
}
Ok(())
}
"weekly" => {
let mut cmd = std::process::Command::new("python3");
cmd.arg(scripts_dir.join("weekly-digest.py"));
if args.len() > 1 {
cmd.arg(&args[1]);
}
cmd.env_remove("CLAUDECODE");
let status = cmd.status()
.map_err(|e| format!("run weekly-digest.py: {}", e))?;
if !status.success() {
return Err("weekly-digest.py failed".into());
}
Ok(())
}
_ => Err(format!("Unknown digest type: {}. Use: daily, weekly", args[0])),
}
}
fn cmd_trace(args: &[String]) -> Result<(), String> {
if args.is_empty() {
return Err("Usage: poc-memory trace KEY".into());
}
let key = args.join(" ");
let store = capnp_store::Store::load()?;
let resolved = store.resolve_key(&key)?;
let g = store.build_graph();
let node = store.nodes.get(&resolved)
.ok_or_else(|| format!("Node not found: {}", resolved))?;
// Display the node itself
println!("=== {} ===", resolved);
println!("Type: {:?} Category: {} Weight: {:.2}",
node.node_type, node.category.label(), node.weight);
if !node.source_ref.is_empty() {
println!("Source: {}", node.source_ref);
}
// Show content preview
let preview = if node.content.len() > 200 {
let end = node.content.floor_char_boundary(200);
format!("{}...", &node.content[..end])
} else {
node.content.clone()
};
println!("\n{}\n", preview);
// Walk neighbors, grouped by node type
let neighbors = g.neighbors(&resolved);
let mut episodic_session = Vec::new();
let mut episodic_daily = Vec::new();
let mut episodic_weekly = Vec::new();
let mut semantic = Vec::new();
for (n, strength) in &neighbors {
if let Some(nnode) = store.nodes.get(n.as_str()) {
match nnode.node_type {
capnp_store::NodeType::EpisodicSession =>
episodic_session.push((n.clone(), *strength, nnode)),
capnp_store::NodeType::EpisodicDaily =>
episodic_daily.push((n.clone(), *strength, nnode)),
capnp_store::NodeType::EpisodicWeekly =>
episodic_weekly.push((n.clone(), *strength, nnode)),
capnp_store::NodeType::Semantic =>
semantic.push((n.clone(), *strength, nnode)),
}
}
}
if !episodic_weekly.is_empty() {
println!("Weekly digests:");
for (k, s, n) in &episodic_weekly {
let preview = n.content.lines().next().unwrap_or("").chars().take(80).collect::<String>();
println!(" [{:.2}] {}{}", s, k, preview);
}
}
if !episodic_daily.is_empty() {
println!("Daily digests:");
for (k, s, n) in &episodic_daily {
let preview = n.content.lines().next().unwrap_or("").chars().take(80).collect::<String>();
println!(" [{:.2}] {}{}", s, k, preview);
}
}
if !episodic_session.is_empty() {
println!("Session entries:");
for (k, s, n) in &episodic_session {
let preview = n.content.lines()
.find(|l| !l.is_empty() && !l.starts_with("<!--"))
.unwrap_or("").chars().take(80).collect::<String>();
println!(" [{:.2}] {}", s, k);
if !n.source_ref.is_empty() {
println!(" ↳ source: {}", n.source_ref);
}
println!(" {}", preview);
}
}
if !semantic.is_empty() {
println!("Semantic links:");
for (k, s, _) in &semantic {
println!(" [{:.2}] {}", s, k);
}
}
// Summary
println!("\nLinks: {} session, {} daily, {} weekly, {} semantic",
episodic_session.len(), episodic_daily.len(),
episodic_weekly.len(), semantic.len());
Ok(())
}
fn cmd_interference(args: &[String]) -> Result<(), String> {
let mut threshold = 0.4f32;
let mut i = 0;
while i < args.len() {
match args[i].as_str() {
"--threshold" if i + 1 < args.len() => {
threshold = args[i + 1].parse().map_err(|_| "invalid threshold")?;
i += 2;
}
_ => { i += 1; }
}
}
let store = capnp_store::Store::load()?;
let g = store.build_graph();
let pairs = neuro::detect_interference(&store, &g, threshold);
if pairs.is_empty() {
println!("No interfering pairs above threshold {:.2}", threshold);
} else {
println!("Interfering pairs (similarity > {:.2}, different communities):", threshold);
for (a, b, sim) in &pairs {
println!(" [{:.3}] {}{}", sim, a, b);
}
}
Ok(())
}

386
src/migrate.rs Normal file
View file

@ -0,0 +1,386 @@
// Migration from old weights.json + markdown marker system
//
// Reads:
// ~/.claude/memory/weights.json (1,874 entries with metrics)
// ~/.claude/memory/*.md (content + mem markers + edges)
//
// Emits:
// ~/.claude/memory/nodes.capnp (all nodes with preserved metadata)
// ~/.claude/memory/relations.capnp (all edges from markers + md links)
// ~/.claude/memory/state.json (derived cache)
//
// Old files are preserved as backup. Run once.
use crate::capnp_store::{
self, Store, Node, Category, NodeType, Provenance, RelationType,
parse_units,
};
use serde::Deserialize;
use uuid::Uuid;
use std::collections::HashMap;
use std::env;
use std::fs;
use std::path::{Path, PathBuf};
use std::time::{SystemTime, UNIX_EPOCH};
fn home() -> PathBuf {
PathBuf::from(env::var("HOME").expect("HOME not set"))
}
fn now_epoch() -> f64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs_f64()
}
// Old system data structures (just enough for deserialization)
#[derive(Deserialize)]
struct OldStore {
#[serde(default)]
entries: HashMap<String, OldEntry>,
#[serde(default)]
retrieval_log: Vec<OldRetrievalEvent>,
#[serde(default)]
params: OldParams,
}
#[derive(Deserialize)]
struct OldEntry {
weight: f64,
created: String,
#[serde(default)]
last_retrieved: Option<String>,
#[serde(default)]
last_used: Option<String>,
#[serde(default)]
retrievals: u32,
#[serde(default)]
uses: u32,
#[serde(default)]
wrongs: u32,
#[serde(default = "default_category")]
category: String,
}
fn default_category() -> String { "General".to_string() }
#[derive(Deserialize)]
struct OldRetrievalEvent {
query: String,
timestamp: String,
results: Vec<String>,
#[serde(default)]
used: Option<Vec<String>>,
}
#[derive(Deserialize)]
struct OldParams {
#[serde(default = "default_0_7")]
default_weight: f64,
#[serde(default = "default_0_95")]
decay_factor: f64,
#[serde(default = "default_0_15")]
use_boost: f64,
#[serde(default = "default_0_1")]
prune_threshold: f64,
#[serde(default = "default_0_3")]
edge_decay: f64,
#[serde(default = "default_3")]
max_hops: u32,
#[serde(default = "default_0_05")]
min_activation: f64,
}
impl Default for OldParams {
fn default() -> Self {
OldParams {
default_weight: 0.7,
decay_factor: 0.95,
use_boost: 0.15,
prune_threshold: 0.1,
edge_decay: 0.3,
max_hops: 3,
min_activation: 0.05,
}
}
}
fn default_0_7() -> f64 { 0.7 }
fn default_0_95() -> f64 { 0.95 }
fn default_0_15() -> f64 { 0.15 }
fn default_0_1() -> f64 { 0.1 }
fn default_0_3() -> f64 { 0.3 }
fn default_3() -> u32 { 3 }
fn default_0_05() -> f64 { 0.05 }
fn parse_old_category(s: &str) -> Category {
match s {
"Core" | "core" => Category::Core,
"Technical" | "technical" | "tech" => Category::Technical,
"Observation" | "observation" | "obs" => Category::Observation,
"Task" | "task" => Category::Task,
_ => Category::General,
}
}
pub fn migrate() -> Result<(), String> {
let weights_path = home().join(".claude/memory/weights.json");
let memory_dir = home().join(".claude/memory");
let nodes_path = memory_dir.join("nodes.capnp");
let rels_path = memory_dir.join("relations.capnp");
// Safety check
if nodes_path.exists() || rels_path.exists() {
return Err("nodes.capnp or relations.capnp already exist. \
Remove them first if you want to re-migrate.".into());
}
// Load old store
let old_store: OldStore = if weights_path.exists() {
let data = fs::read_to_string(&weights_path)
.map_err(|e| format!("read weights.json: {}", e))?;
serde_json::from_str(&data)
.map_err(|e| format!("parse weights.json: {}", e))?
} else {
eprintln!("Warning: no weights.json found, migrating markdown only");
OldStore {
entries: HashMap::new(),
retrieval_log: Vec::new(),
params: OldParams::default(),
}
};
eprintln!("Old store: {} entries, {} retrieval events",
old_store.entries.len(), old_store.retrieval_log.len());
// Scan markdown files to get content + edges
let mut units_by_key: HashMap<String, capnp_store::MemoryUnit> = HashMap::new();
scan_markdown_dir(&memory_dir, &mut units_by_key)?;
eprintln!("Scanned {} markdown units", units_by_key.len());
// Create new store
let mut store = Store::default();
// Migrate params
store.params.default_weight = old_store.params.default_weight;
store.params.decay_factor = old_store.params.decay_factor;
store.params.use_boost = old_store.params.use_boost;
store.params.prune_threshold = old_store.params.prune_threshold;
store.params.edge_decay = old_store.params.edge_decay;
store.params.max_hops = old_store.params.max_hops;
store.params.min_activation = old_store.params.min_activation;
// Migrate retrieval log
store.retrieval_log = old_store.retrieval_log.iter().map(|e| {
capnp_store::RetrievalEvent {
query: e.query.clone(),
timestamp: e.timestamp.clone(),
results: e.results.clone(),
used: e.used.clone(),
}
}).collect();
// Phase 1: Create nodes
// Merge old entries (weight metadata) with markdown units (content)
let mut all_nodes: Vec<Node> = Vec::new();
let mut key_to_uuid: HashMap<String, [u8; 16]> = HashMap::new();
// First, all entries from the old store
for (key, old_entry) in &old_store.entries {
let uuid = *Uuid::new_v4().as_bytes();
key_to_uuid.insert(key.clone(), uuid);
let content = units_by_key.get(key)
.map(|u| u.content.clone())
.unwrap_or_default();
let state_tag = units_by_key.get(key)
.and_then(|u| u.state.clone())
.unwrap_or_default();
let node = Node {
uuid,
version: 1,
timestamp: now_epoch(),
node_type: if key.contains("journal") {
NodeType::EpisodicSession
} else {
NodeType::Semantic
},
provenance: Provenance::Manual,
key: key.clone(),
content,
weight: old_entry.weight as f32,
category: parse_old_category(&old_entry.category),
emotion: 0.0,
deleted: false,
source_ref: String::new(),
created: old_entry.created.clone(),
retrievals: old_entry.retrievals,
uses: old_entry.uses,
wrongs: old_entry.wrongs,
state_tag,
last_replayed: 0.0,
spaced_repetition_interval: 1,
community_id: None,
clustering_coefficient: None,
schema_fit: None,
degree: None,
};
all_nodes.push(node);
}
// Then, any markdown units not in the old store
for (key, unit) in &units_by_key {
if key_to_uuid.contains_key(key) { continue; }
let uuid = *Uuid::new_v4().as_bytes();
key_to_uuid.insert(key.clone(), uuid);
let node = Node {
uuid,
version: 1,
timestamp: now_epoch(),
node_type: if key.contains("journal") {
NodeType::EpisodicSession
} else {
NodeType::Semantic
},
provenance: Provenance::Manual,
key: key.clone(),
content: unit.content.clone(),
weight: 0.7,
category: Category::General,
emotion: 0.0,
deleted: false,
source_ref: String::new(),
created: String::new(),
retrievals: 0,
uses: 0,
wrongs: 0,
state_tag: unit.state.clone().unwrap_or_default(),
last_replayed: 0.0,
spaced_repetition_interval: 1,
community_id: None,
clustering_coefficient: None,
schema_fit: None,
degree: None,
};
all_nodes.push(node);
}
// Write nodes to capnp log
store.append_nodes(&all_nodes)?;
for node in &all_nodes {
store.uuid_to_key.insert(node.uuid, node.key.clone());
store.nodes.insert(node.key.clone(), node.clone());
}
eprintln!("Migrated {} nodes", all_nodes.len());
// Phase 2: Create relations from markdown links + causal edges
let mut all_relations = Vec::new();
for (key, unit) in &units_by_key {
let source_uuid = match key_to_uuid.get(key) {
Some(u) => *u,
None => continue,
};
// Association links (bidirectional)
for link in unit.marker_links.iter().chain(unit.md_links.iter()) {
let target_uuid = match key_to_uuid.get(link) {
Some(u) => *u,
None => continue,
};
// Avoid duplicate relations
let exists = all_relations.iter().any(|r: &capnp_store::Relation|
(r.source == source_uuid && r.target == target_uuid) ||
(r.source == target_uuid && r.target == source_uuid));
if exists { continue; }
all_relations.push(Store::new_relation(
source_uuid, target_uuid,
RelationType::Link, 1.0,
key, link,
));
}
// Causal edges (directed)
for cause in &unit.causes {
let cause_uuid = match key_to_uuid.get(cause) {
Some(u) => *u,
None => continue,
};
all_relations.push(Store::new_relation(
cause_uuid, source_uuid,
RelationType::Causal, 1.0,
cause, key,
));
}
}
// Write relations to capnp log
store.append_relations(&all_relations)?;
store.relations = all_relations;
eprintln!("Migrated {} relations", store.relations.len());
// Phase 3: Compute graph metrics
store.update_graph_metrics();
// Save derived cache
store.save()?;
eprintln!("Migration complete. Files:");
eprintln!(" {}", nodes_path.display());
eprintln!(" {}", rels_path.display());
eprintln!(" {}", memory_dir.join("state.json").display());
// Verify
let g = store.build_graph();
eprintln!("\nVerification:");
eprintln!(" Nodes: {}", store.nodes.len());
eprintln!(" Relations: {}", store.relations.len());
eprintln!(" Graph edges: {}", g.edge_count());
eprintln!(" Communities: {}", g.community_count());
eprintln!(" Avg CC: {:.4}", g.avg_clustering_coefficient());
Ok(())
}
fn scan_markdown_dir(
dir: &Path,
units: &mut HashMap<String, capnp_store::MemoryUnit>,
) -> Result<(), String> {
let entries = fs::read_dir(dir)
.map_err(|e| format!("read dir {}: {}", dir.display(), e))?;
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
scan_markdown_dir(&path, units)?;
continue;
}
let Some(ext) = path.extension() else { continue };
if ext != "md" { continue }
let filename = path.file_name().unwrap().to_string_lossy().to_string();
let content = match fs::read_to_string(&path) {
Ok(c) => c,
Err(_) => continue,
};
for unit in parse_units(&filename, &content) {
units.insert(unit.key.clone(), unit);
}
}
Ok(())
}

707
src/neuro.rs Normal file
View file

@ -0,0 +1,707 @@
// Neuroscience-inspired memory algorithms
//
// Systematic replay (hippocampal replay), schema assimilation,
// interference detection, emotional gating, consolidation priority
// scoring, and the agent consolidation harness.
use crate::capnp_store::Store;
use crate::graph::{self, Graph};
use crate::similarity;
use std::time::{SystemTime, UNIX_EPOCH};
fn now_epoch() -> f64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs_f64()
}
const SECS_PER_DAY: f64 = 86400.0;
/// Consolidation priority: how urgently a node needs attention
///
/// priority = (1 - schema_fit) × spaced_repetition_due × emotion × (1 + interference)
pub fn consolidation_priority(store: &Store, key: &str, graph: &Graph) -> f64 {
let node = match store.nodes.get(key) {
Some(n) => n,
None => return 0.0,
};
// Schema fit: 0 = poorly integrated, 1 = well integrated
let fit = graph::schema_fit(graph, key) as f64;
let fit_factor = 1.0 - fit;
// Spaced repetition: how overdue is this node for replay?
let interval_secs = node.spaced_repetition_interval as f64 * SECS_PER_DAY;
let time_since_replay = if node.last_replayed > 0.0 {
(now_epoch() - node.last_replayed).max(0.0)
} else {
// Never replayed — treat as very overdue
interval_secs * 3.0
};
let overdue_ratio = (time_since_replay / interval_secs).min(5.0);
// Emotional intensity: higher emotion = higher priority
let emotion_factor = 1.0 + (node.emotion as f64 / 10.0);
fit_factor * overdue_ratio * emotion_factor
}
/// Item in the replay queue
pub struct ReplayItem {
pub key: String,
pub priority: f64,
pub interval_days: u32,
pub emotion: f32,
pub schema_fit: f32,
}
/// Generate the replay queue: nodes ordered by consolidation priority
pub fn replay_queue(store: &Store, count: usize) -> Vec<ReplayItem> {
let graph = store.build_graph();
let fits = graph::schema_fit_all(&graph);
let mut items: Vec<ReplayItem> = store.nodes.iter()
.map(|(key, node)| {
let priority = consolidation_priority(store, key, &graph);
let fit = fits.get(key).copied().unwrap_or(0.0);
ReplayItem {
key: key.clone(),
priority,
interval_days: node.spaced_repetition_interval,
emotion: node.emotion,
schema_fit: fit,
}
})
.collect();
items.sort_by(|a, b| b.priority.partial_cmp(&a.priority).unwrap());
items.truncate(count);
items
}
/// Detect interfering memory pairs: high text similarity but different communities
pub fn detect_interference(
store: &Store,
graph: &Graph,
threshold: f32,
) -> Vec<(String, String, f32)> {
let communities = graph.communities();
// Only compare nodes within a reasonable set — take the most active ones
let mut docs: Vec<(String, String)> = store.nodes.iter()
.filter(|(_, n)| n.content.len() > 50) // skip tiny nodes
.map(|(k, n)| (k.clone(), n.content.clone()))
.collect();
// For large stores, sample to keep pairwise comparison feasible
if docs.len() > 200 {
docs.sort_by(|a, b| b.1.len().cmp(&a.1.len()));
docs.truncate(200);
}
let similar = similarity::pairwise_similar(&docs, threshold);
// Filter to pairs in different communities
similar.into_iter()
.filter(|(a, b, _)| {
let ca = communities.get(a);
let cb = communities.get(b);
match (ca, cb) {
(Some(a), Some(b)) => a != b,
_ => true, // if community unknown, flag it
}
})
.collect()
}
/// Schema assimilation scoring for a new node.
/// Returns how easily the node integrates into existing structure.
///
/// High fit (>0.5): auto-link, done
/// Medium fit (0.2-0.5): agent reviews, proposes links
/// Low fit (<0.2): deep examination needed — new schema seed, bridge, or noise?
pub fn schema_assimilation(store: &Store, key: &str) -> (f32, &'static str) {
let graph = store.build_graph();
let fit = graph::schema_fit(&graph, key);
let recommendation = if fit > 0.5 {
"auto-integrate"
} else if fit > 0.2 {
"agent-review"
} else if graph.degree(key) > 0 {
"deep-examine-bridge"
} else {
"deep-examine-orphan"
};
(fit, recommendation)
}
/// Prompt template directory
fn prompts_dir() -> std::path::PathBuf {
// Check for prompts relative to binary, then fall back to ~/poc/memory/prompts/
let home = std::env::var("HOME").unwrap_or_default();
std::path::PathBuf::from(home).join("poc/memory/prompts")
}
/// Load a prompt template, replacing {{PLACEHOLDER}} with data
fn load_prompt(name: &str, replacements: &[(&str, &str)]) -> Result<String, String> {
let path = prompts_dir().join(format!("{}.md", name));
let mut content = std::fs::read_to_string(&path)
.map_err(|e| format!("load prompt {}: {}", path.display(), e))?;
for (placeholder, data) in replacements {
content = content.replace(placeholder, data);
}
Ok(content)
}
/// Format topology header for agent prompts — current graph health metrics
fn format_topology_header(graph: &Graph) -> String {
let sigma = graph.small_world_sigma();
let alpha = graph.degree_power_law_exponent();
let gini = graph.degree_gini();
let avg_cc = graph.avg_clustering_coefficient();
let n = graph.nodes().len();
let e = graph.edge_count();
format!(
"## Current graph topology\n\
Nodes: {} Edges: {} Communities: {}\n\
Small-world σ: {:.1} Power-law α: {:.2} Degree Gini: {:.3}\n\
Avg clustering coefficient: {:.4}\n\n\
Each node below shows its hub-link ratio (fraction of edges to top-5% degree nodes).\n\
Use `poc-memory link-impact SOURCE TARGET` to evaluate proposed links.\n\n",
n, e, graph.community_count(), sigma, alpha, gini, avg_cc)
}
/// Compute the hub degree threshold (top 5% by degree)
fn hub_threshold(graph: &Graph) -> usize {
let mut degrees: Vec<usize> = graph.nodes().iter()
.map(|k| graph.degree(k))
.collect();
degrees.sort_unstable();
if degrees.len() >= 20 {
degrees[degrees.len() * 95 / 100]
} else {
usize::MAX
}
}
/// Format node data section for prompt templates
fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> String {
let hub_thresh = hub_threshold(graph);
let mut out = String::new();
for item in items {
let node = match store.nodes.get(&item.key) {
Some(n) => n,
None => continue,
};
out.push_str(&format!("## {} \n", item.key));
out.push_str(&format!("Priority: {:.3} Schema fit: {:.3} Emotion: {:.1} ",
item.priority, item.schema_fit, item.emotion));
out.push_str(&format!("Category: {} Interval: {}d\n",
node.category.label(), node.spaced_repetition_interval));
if let Some(community) = node.community_id {
out.push_str(&format!("Community: {} ", community));
}
let deg = graph.degree(&item.key);
let cc = graph.clustering_coefficient(&item.key);
// Hub-link ratio: what fraction of this node's edges go to hubs?
let neighbors = graph.neighbors(&item.key);
let hub_links = neighbors.iter()
.filter(|(n, _)| graph.degree(n) >= hub_thresh)
.count();
let hub_ratio = if deg > 0 { hub_links as f32 / deg as f32 } else { 0.0 };
let is_hub = deg >= hub_thresh;
out.push_str(&format!("Degree: {} CC: {:.3} Hub-link ratio: {:.0}% ({}/{})",
deg, cc, hub_ratio * 100.0, hub_links, deg));
if is_hub {
out.push_str(" ← THIS IS A HUB");
} else if hub_ratio > 0.6 {
out.push_str(" ← mostly hub-connected, needs lateral links");
}
out.push('\n');
// Content (truncated for large nodes)
let content = &node.content;
if content.len() > 1500 {
let end = content.floor_char_boundary(1500);
out.push_str(&format!("\nContent ({} chars, truncated):\n{}\n[...]\n\n",
content.len(), &content[..end]));
} else {
out.push_str(&format!("\nContent:\n{}\n\n", content));
}
// Neighbors
let neighbors = graph.neighbors(&item.key);
if !neighbors.is_empty() {
out.push_str("Neighbors:\n");
for (n, strength) in neighbors.iter().take(15) {
let n_cc = graph.clustering_coefficient(n);
let n_community = store.nodes.get(n.as_str())
.and_then(|n| n.community_id);
out.push_str(&format!(" - {} (str={:.2}, cc={:.3}",
n, strength, n_cc));
if let Some(c) = n_community {
out.push_str(&format!(", c{}", c));
}
out.push_str(")\n");
}
}
out.push_str("\n---\n\n");
}
out
}
/// Format health data for the health agent prompt
fn format_health_section(store: &Store, graph: &Graph) -> String {
let health = graph::health_report(graph, store);
let mut out = health;
out.push_str("\n\n## Weight distribution\n");
// Weight histogram
let mut buckets = [0u32; 10]; // 0.0-0.1, 0.1-0.2, ..., 0.9-1.0
for node in store.nodes.values() {
let bucket = ((node.weight * 10.0) as usize).min(9);
buckets[bucket] += 1;
}
for (i, &count) in buckets.iter().enumerate() {
let lo = i as f32 / 10.0;
let hi = (i + 1) as f32 / 10.0;
let bar: String = std::iter::repeat('█').take((count as usize) / 10).collect();
out.push_str(&format!(" {:.1}-{:.1}: {:4} {}\n", lo, hi, count, bar));
}
// Near-prune nodes
let near_prune: Vec<_> = store.nodes.iter()
.filter(|(_, n)| n.weight < 0.15)
.map(|(k, n)| (k.clone(), n.weight))
.collect();
if !near_prune.is_empty() {
out.push_str(&format!("\n## Near-prune nodes ({} total)\n", near_prune.len()));
for (k, w) in near_prune.iter().take(20) {
out.push_str(&format!(" [{:.3}] {}\n", w, k));
}
}
// Community sizes
let communities = graph.communities();
let mut comm_sizes: std::collections::HashMap<u32, Vec<String>> = std::collections::HashMap::new();
for (key, &label) in communities {
comm_sizes.entry(label).or_default().push(key.clone());
}
let mut sizes: Vec<_> = comm_sizes.iter()
.map(|(id, members)| (*id, members.len(), members.clone()))
.collect();
sizes.sort_by(|a, b| b.1.cmp(&a.1));
out.push_str("\n## Largest communities\n");
for (id, size, members) in sizes.iter().take(10) {
out.push_str(&format!(" Community {} ({} nodes): ", id, size));
let sample: Vec<_> = members.iter().take(5).map(|s| s.as_str()).collect();
out.push_str(&sample.join(", "));
if *size > 5 { out.push_str(", ..."); }
out.push('\n');
}
out
}
/// Format interference pairs for the separator agent prompt
fn format_pairs_section(
pairs: &[(String, String, f32)],
store: &Store,
graph: &Graph,
) -> String {
let mut out = String::new();
let communities = graph.communities();
for (a, b, sim) in pairs {
out.push_str(&format!("## Pair: similarity={:.3}\n", sim));
let ca = communities.get(a).map(|c| format!("c{}", c)).unwrap_or_else(|| "?".into());
let cb = communities.get(b).map(|c| format!("c{}", c)).unwrap_or_else(|| "?".into());
// Node A
out.push_str(&format!("\n### {} ({})\n", a, ca));
if let Some(node) = store.nodes.get(a) {
let content = if node.content.len() > 500 {
let end = node.content.floor_char_boundary(500);
format!("{}...", &node.content[..end])
} else {
node.content.clone()
};
out.push_str(&format!("Category: {} Weight: {:.2}\n{}\n",
node.category.label(), node.weight, content));
}
// Node B
out.push_str(&format!("\n### {} ({})\n", b, cb));
if let Some(node) = store.nodes.get(b) {
let content = if node.content.len() > 500 {
let end = node.content.floor_char_boundary(500);
format!("{}...", &node.content[..end])
} else {
node.content.clone()
};
out.push_str(&format!("Category: {} Weight: {:.2}\n{}\n",
node.category.label(), node.weight, content));
}
out.push_str("\n---\n\n");
}
out
}
/// Run agent consolidation on top-priority nodes
pub fn consolidation_batch(store: &Store, count: usize, auto: bool) -> Result<(), String> {
let graph = store.build_graph();
let items = replay_queue(store, count);
if items.is_empty() {
println!("No nodes to consolidate.");
return Ok(());
}
let nodes_section = format_nodes_section(store, &items, &graph);
if auto {
// Generate the replay agent prompt with data filled in
let prompt = load_prompt("replay", &[("{{NODES}}", &nodes_section)])?;
println!("{}", prompt);
} else {
// Interactive: show what needs attention and available agent types
println!("Consolidation batch ({} nodes):\n", items.len());
for item in &items {
let node_type = store.nodes.get(&item.key)
.map(|n| if n.key.contains("journal") { "episodic" } else { "semantic" })
.unwrap_or("?");
println!(" [{:.3}] {} (fit={:.3}, interval={}d, type={})",
item.priority, item.key, item.schema_fit, item.interval_days, node_type);
}
// Also show interference pairs
let pairs = detect_interference(store, &graph, 0.6);
if !pairs.is_empty() {
println!("\nInterfering pairs ({}):", pairs.len());
for (a, b, sim) in pairs.iter().take(5) {
println!(" [{:.3}] {}{}", sim, a, b);
}
}
println!("\nAgent prompts:");
println!(" --auto Generate replay agent prompt");
println!(" --agent replay Replay agent (schema assimilation)");
println!(" --agent linker Linker agent (relational binding)");
println!(" --agent separator Separator agent (pattern separation)");
println!(" --agent transfer Transfer agent (CLS episodic→semantic)");
println!(" --agent health Health agent (synaptic homeostasis)");
}
Ok(())
}
/// Generate a specific agent prompt with filled-in data
pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<String, String> {
let graph = store.build_graph();
let topology = format_topology_header(&graph);
match agent {
"replay" => {
let items = replay_queue(store, count);
let nodes_section = format_nodes_section(store, &items, &graph);
load_prompt("replay", &[("{{TOPOLOGY}}", &topology), ("{{NODES}}", &nodes_section)])
}
"linker" => {
// Filter to episodic entries
let mut items = replay_queue(store, count * 2);
items.retain(|item| {
store.nodes.get(&item.key)
.map(|n| matches!(n.node_type, crate::capnp_store::NodeType::EpisodicSession))
.unwrap_or(false)
|| item.key.contains("journal")
|| item.key.contains("session")
});
items.truncate(count);
let nodes_section = format_nodes_section(store, &items, &graph);
load_prompt("linker", &[("{{TOPOLOGY}}", &topology), ("{{NODES}}", &nodes_section)])
}
"separator" => {
let pairs = detect_interference(store, &graph, 0.5);
let pairs_section = format_pairs_section(&pairs, store, &graph);
load_prompt("separator", &[("{{TOPOLOGY}}", &topology), ("{{PAIRS}}", &pairs_section)])
}
"transfer" => {
// Recent episodic entries
let mut episodes: Vec<_> = store.nodes.iter()
.filter(|(k, _)| k.contains("journal") || k.contains("session"))
.map(|(k, n)| (k.clone(), n.timestamp))
.collect();
episodes.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
episodes.truncate(count);
let episode_keys: Vec<_> = episodes.iter().map(|(k, _)| k.clone()).collect();
let items: Vec<ReplayItem> = episode_keys.iter()
.filter_map(|k| {
let node = store.nodes.get(k)?;
let fit = graph::schema_fit(&graph, k);
Some(ReplayItem {
key: k.clone(),
priority: consolidation_priority(store, k, &graph),
interval_days: node.spaced_repetition_interval,
emotion: node.emotion,
schema_fit: fit,
})
})
.collect();
let episodes_section = format_nodes_section(store, &items, &graph);
load_prompt("transfer", &[("{{TOPOLOGY}}", &topology), ("{{EPISODES}}", &episodes_section)])
}
"health" => {
let health_section = format_health_section(store, &graph);
load_prompt("health", &[("{{TOPOLOGY}}", &topology), ("{{HEALTH}}", &health_section)])
}
_ => Err(format!("Unknown agent: {}. Use: replay, linker, separator, transfer, health", agent)),
}
}
/// Agent allocation from the control loop
pub struct ConsolidationPlan {
pub replay_count: usize,
pub linker_count: usize,
pub separator_count: usize,
pub transfer_count: usize,
pub run_health: bool,
pub rationale: Vec<String>,
}
/// Analyze metrics and decide how much each agent needs to run.
///
/// This is the control loop: metrics → error signal → agent allocation.
/// Target values are based on healthy small-world networks.
pub fn consolidation_plan(store: &Store) -> ConsolidationPlan {
let graph = store.build_graph();
let alpha = graph.degree_power_law_exponent();
let gini = graph.degree_gini();
let avg_fit = {
let fits = graph::schema_fit_all(&graph);
if fits.is_empty() { 0.0 } else {
fits.values().sum::<f32>() / fits.len() as f32
}
};
let interference_pairs = detect_interference(store, &graph, 0.5);
let interference_count = interference_pairs.len();
// Count episodic vs semantic nodes
let episodic_count = store.nodes.iter()
.filter(|(k, _)| k.contains("journal") || k.contains("session"))
.count();
let semantic_count = store.nodes.len() - episodic_count;
let episodic_ratio = if store.nodes.is_empty() { 0.0 }
else { episodic_count as f32 / store.nodes.len() as f32 };
let mut plan = ConsolidationPlan {
replay_count: 0,
linker_count: 0,
separator_count: 0,
transfer_count: 0,
run_health: true, // always run health first
rationale: Vec::new(),
};
// Target: α ≥ 2.5 (healthy scale-free)
// Current distance determines replay + linker allocation
if alpha < 2.0 {
plan.replay_count += 10;
plan.linker_count += 5;
plan.rationale.push(format!(
"α={:.2} (target ≥2.5): extreme hub dominance → 10 replay + 5 linker for lateral links",
alpha));
} else if alpha < 2.5 {
plan.replay_count += 5;
plan.linker_count += 3;
plan.rationale.push(format!(
"α={:.2} (target ≥2.5): moderate hub dominance → 5 replay + 3 linker",
alpha));
} else {
plan.replay_count += 3;
plan.rationale.push(format!(
"α={:.2}: healthy — 3 replay for maintenance", alpha));
}
// Target: Gini ≤ 0.4
if gini > 0.5 {
plan.replay_count += 3;
plan.rationale.push(format!(
"Gini={:.3} (target ≤0.4): high inequality → +3 replay (lateral focus)",
gini));
}
// Target: avg schema fit ≥ 0.2
if avg_fit < 0.1 {
plan.replay_count += 5;
plan.rationale.push(format!(
"Schema fit={:.3} (target ≥0.2): very poor integration → +5 replay",
avg_fit));
} else if avg_fit < 0.2 {
plan.replay_count += 2;
plan.rationale.push(format!(
"Schema fit={:.3} (target ≥0.2): low integration → +2 replay",
avg_fit));
}
// Interference: >100 pairs is a lot, <10 is clean
if interference_count > 100 {
plan.separator_count += 10;
plan.rationale.push(format!(
"Interference: {} pairs (target <50) → 10 separator",
interference_count));
} else if interference_count > 20 {
plan.separator_count += 5;
plan.rationale.push(format!(
"Interference: {} pairs (target <50) → 5 separator",
interference_count));
} else if interference_count > 0 {
plan.separator_count += interference_count.min(3);
plan.rationale.push(format!(
"Interference: {} pairs → {} separator",
interference_count, plan.separator_count));
}
// Episodic → semantic transfer
// If >60% of nodes are episodic, knowledge isn't being extracted
if episodic_ratio > 0.6 {
plan.transfer_count += 10;
plan.rationale.push(format!(
"Episodic ratio: {:.0}% ({}/{}) → 10 transfer (knowledge extraction needed)",
episodic_ratio * 100.0, episodic_count, store.nodes.len()));
} else if episodic_ratio > 0.4 {
plan.transfer_count += 5;
plan.rationale.push(format!(
"Episodic ratio: {:.0}% → 5 transfer",
episodic_ratio * 100.0));
}
plan
}
/// Format the consolidation plan for display
pub fn format_plan(plan: &ConsolidationPlan) -> String {
let mut out = String::from("Consolidation Plan\n==================\n\n");
out.push_str("Analysis:\n");
for r in &plan.rationale {
out.push_str(&format!("{}\n", r));
}
out.push_str("\nAgent allocation:\n");
if plan.run_health {
out.push_str(" 1. health — system audit\n");
}
let mut step = 2;
if plan.replay_count > 0 {
out.push_str(&format!(" {}. replay ×{:2} — schema assimilation + lateral linking\n",
step, plan.replay_count));
step += 1;
}
if plan.linker_count > 0 {
out.push_str(&format!(" {}. linker ×{:2} — relational binding from episodes\n",
step, plan.linker_count));
step += 1;
}
if plan.separator_count > 0 {
out.push_str(&format!(" {}. separator ×{} — pattern separation\n",
step, plan.separator_count));
step += 1;
}
if plan.transfer_count > 0 {
out.push_str(&format!(" {}. transfer ×{:2} — episodic→semantic extraction\n",
step, plan.transfer_count));
}
let total = plan.replay_count + plan.linker_count
+ plan.separator_count + plan.transfer_count
+ if plan.run_health { 1 } else { 0 };
out.push_str(&format!("\nTotal agent runs: {}\n", total));
out
}
/// Brief daily check: compare current metrics to last snapshot
pub fn daily_check(store: &Store) -> String {
let graph = store.build_graph();
let alpha = graph.degree_power_law_exponent();
let gini = graph.degree_gini();
let sigma = graph.small_world_sigma();
let avg_cc = graph.avg_clustering_coefficient();
let avg_fit = {
let fits = graph::schema_fit_all(&graph);
if fits.is_empty() { 0.0 } else {
fits.values().sum::<f32>() / fits.len() as f32
}
};
let history = graph::load_metrics_history();
let prev = history.last();
let mut out = String::from("Memory daily check\n");
// Current state
out.push_str(&format!(" σ={:.1} α={:.2} gini={:.3} cc={:.4} fit={:.3}\n",
sigma, alpha, gini, avg_cc, avg_fit));
// Trend
if let Some(p) = prev {
let d_sigma = sigma - p.sigma;
let d_alpha = alpha - p.alpha;
let d_gini = gini - p.gini;
out.push_str(&format!(" Δσ={:+.1} Δα={:+.2} Δgini={:+.3}\n",
d_sigma, d_alpha, d_gini));
// Assessment
let mut issues = Vec::new();
if alpha < 2.0 { issues.push("hub dominance critical"); }
if gini > 0.5 { issues.push("high inequality"); }
if avg_fit < 0.1 { issues.push("poor integration"); }
if d_sigma < -5.0 { issues.push("σ declining"); }
if d_alpha < -0.1 { issues.push("α declining"); }
if d_gini > 0.02 { issues.push("inequality increasing"); }
if issues.is_empty() {
out.push_str(" Status: healthy\n");
} else {
out.push_str(&format!(" Status: needs attention — {}\n", issues.join(", ")));
out.push_str(" Run: poc-memory consolidate-session\n");
}
} else {
out.push_str(" (first snapshot, no trend data yet)\n");
}
// Log this snapshot too
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH).unwrap().as_secs_f64();
let date = {
let o = std::process::Command::new("date").arg("+%Y-%m-%d %H:%M")
.output().unwrap_or_else(|_| std::process::Command::new("echo").output().unwrap());
String::from_utf8_lossy(&o.stdout).trim().to_string()
};
graph::save_metrics_snapshot(&graph::MetricsSnapshot {
timestamp: now, date,
nodes: graph.nodes().len(),
edges: graph.edge_count(),
communities: graph.community_count(),
sigma, alpha, gini, avg_cc,
avg_path_length: graph.avg_path_length(),
avg_schema_fit: avg_fit,
});
out
}

146
src/search.rs Normal file
View file

@ -0,0 +1,146 @@
// Spreading activation search across the memory graph
//
// Same model as the old system but richer: uses graph edge strengths,
// supports circumscription parameter for blending associative vs
// causal walks, and benefits from community-aware result grouping.
use crate::capnp_store::Store;
use crate::graph::Graph;
use std::cmp::Ordering;
use std::collections::{HashMap, HashSet, VecDeque};
pub struct SearchResult {
pub key: String,
pub activation: f64,
pub is_direct: bool,
pub snippet: Option<String>,
}
/// Spreading activation with circumscription parameter.
///
/// circ = 0.0: field mode — all edges (default, broad resonance)
/// circ = 1.0: causal mode — prefer causal edges
fn spreading_activation(
seeds: &[(String, f64)],
graph: &Graph,
store: &Store,
_circumscription: f64,
) -> Vec<(String, f64)> {
let params = &store.params;
let mut activation: HashMap<String, f64> = HashMap::new();
let mut queue: VecDeque<(String, f64, u32)> = VecDeque::new();
for (key, act) in seeds {
let current = activation.entry(key.clone()).or_insert(0.0);
if *act > *current {
*current = *act;
queue.push_back((key.clone(), *act, 0));
}
}
while let Some((key, act, depth)) = queue.pop_front() {
if depth >= params.max_hops { continue; }
for (neighbor, strength) in graph.neighbors(&key) {
let neighbor_weight = store.nodes.get(neighbor.as_str())
.map(|n| n.weight as f64)
.unwrap_or(params.default_weight);
let propagated = act * params.edge_decay * neighbor_weight * strength as f64;
if propagated < params.min_activation { continue; }
let current = activation.entry(neighbor.clone()).or_insert(0.0);
if propagated > *current {
*current = propagated;
queue.push_back((neighbor.clone(), propagated, depth + 1));
}
}
}
let mut results: Vec<_> = activation.into_iter().collect();
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
results
}
/// Full search: find direct hits, spread activation, return ranked results
pub fn search(query: &str, store: &Store) -> Vec<SearchResult> {
let graph = store.build_graph();
let query_lower = query.to_lowercase();
let query_tokens: Vec<&str> = query_lower.split_whitespace().collect();
let mut seeds: Vec<(String, f64)> = Vec::new();
let mut snippets: HashMap<String, String> = HashMap::new();
for (key, node) in &store.nodes {
let content_lower = node.content.to_lowercase();
let exact_match = content_lower.contains(&query_lower);
let token_match = query_tokens.len() > 1
&& query_tokens.iter().all(|t| content_lower.contains(t));
if exact_match || token_match {
let weight = node.weight as f64;
let activation = if exact_match { weight } else { weight * 0.85 };
seeds.push((key.clone(), activation));
let snippet: String = node.content.lines()
.filter(|l| {
let ll = l.to_lowercase();
if exact_match && ll.contains(&query_lower) { return true; }
query_tokens.iter().any(|t| ll.contains(t))
})
.take(3)
.map(|l| {
let t = l.trim();
if t.len() > 100 {
let end = t.floor_char_boundary(97);
format!("{}...", &t[..end])
} else {
t.to_string()
}
})
.collect::<Vec<_>>()
.join("\n ");
snippets.insert(key.clone(), snippet);
}
}
if seeds.is_empty() {
return Vec::new();
}
let direct_hits: HashSet<String> = seeds.iter().map(|(k, _)| k.clone()).collect();
let raw_results = spreading_activation(&seeds, &graph, store, 0.0);
raw_results.into_iter().map(|(key, activation)| {
let is_direct = direct_hits.contains(&key);
let snippet = snippets.get(&key).cloned();
SearchResult { key, activation, is_direct, snippet }
}).collect()
}
/// Extract meaningful search terms from natural language.
/// Strips common English stop words, returns up to max_terms words.
pub fn extract_query_terms(text: &str, max_terms: usize) -> String {
const STOP_WORDS: &[&str] = &[
"the", "a", "an", "is", "are", "was", "were", "do", "does", "did",
"have", "has", "had", "will", "would", "could", "should", "can",
"may", "might", "shall", "been", "being", "to", "of", "in", "for",
"on", "with", "at", "by", "from", "as", "but", "or", "and", "not",
"no", "if", "then", "than", "that", "this", "it", "its", "my",
"your", "our", "we", "you", "i", "me", "he", "she", "they", "them",
"what", "how", "why", "when", "where", "about", "just", "let",
"want", "tell", "show", "think", "know", "see", "look", "make",
"get", "go", "some", "any", "all", "very", "really", "also", "too",
"so", "up", "out", "here", "there",
];
text.to_lowercase()
.split(|c: char| !c.is_alphanumeric())
.filter(|w| !w.is_empty() && w.len() > 2 && !STOP_WORDS.contains(w))
.take(max_terms)
.collect::<Vec<_>>()
.join(" ")
}

135
src/similarity.rs Normal file
View file

@ -0,0 +1,135 @@
// Text similarity: Porter stemming + BM25
//
// Used for interference detection (similar content, different communities)
// and schema fit scoring. Intentionally simple — ~100 lines, no
// external dependencies.
use std::collections::HashMap;
/// Minimal Porter stemmer — handles the most common English suffixes.
/// Not linguistically complete but good enough for similarity matching.
pub fn stem(word: &str) -> String {
let w = word.to_lowercase();
if w.len() <= 3 { return w; }
let w = strip_suffix(&w, "ation", "ate");
let w = strip_suffix(&w, "ness", "");
let w = strip_suffix(&w, "ment", "");
let w = strip_suffix(&w, "ting", "t");
let w = strip_suffix(&w, "ling", "l");
let w = strip_suffix(&w, "ring", "r");
let w = strip_suffix(&w, "ning", "n");
let w = strip_suffix(&w, "ding", "d");
let w = strip_suffix(&w, "ping", "p");
let w = strip_suffix(&w, "ging", "g");
let w = strip_suffix(&w, "ying", "y");
let w = strip_suffix(&w, "ied", "y");
let w = strip_suffix(&w, "ies", "y");
let w = strip_suffix(&w, "ing", "");
let w = strip_suffix(&w, "ed", "");
let w = strip_suffix(&w, "ly", "");
let w = strip_suffix(&w, "er", "");
let w = strip_suffix(&w, "al", "");
strip_suffix(&w, "s", "")
}
fn strip_suffix(word: &str, suffix: &str, replacement: &str) -> String {
if word.len() > suffix.len() + 2 && word.ends_with(suffix) {
let base = &word[..word.len() - suffix.len()];
format!("{}{}", base, replacement)
} else {
word.to_string()
}
}
/// Tokenize and stem a text into a term frequency map
pub fn term_frequencies(text: &str) -> HashMap<String, u32> {
let mut tf = HashMap::new();
for word in text.split(|c: char| !c.is_alphanumeric()) {
if word.len() > 2 {
let stemmed = stem(word);
*tf.entry(stemmed).or_default() += 1;
}
}
tf
}
/// Cosine similarity between two documents using stemmed term frequencies.
/// Returns 0.0 for disjoint vocabularies, 1.0 for identical content.
pub fn cosine_similarity(doc_a: &str, doc_b: &str) -> f32 {
let tf_a = term_frequencies(doc_a);
let tf_b = term_frequencies(doc_b);
if tf_a.is_empty() || tf_b.is_empty() {
return 0.0;
}
// Dot product
let mut dot = 0.0f64;
for (term, &freq_a) in &tf_a {
if let Some(&freq_b) = tf_b.get(term) {
dot += freq_a as f64 * freq_b as f64;
}
}
// Magnitudes
let mag_a: f64 = tf_a.values().map(|&f| (f as f64).powi(2)).sum::<f64>().sqrt();
let mag_b: f64 = tf_b.values().map(|&f| (f as f64).powi(2)).sum::<f64>().sqrt();
if mag_a < 1e-10 || mag_b < 1e-10 {
return 0.0;
}
(dot / (mag_a * mag_b)) as f32
}
/// Compute pairwise similarity for a set of documents.
/// Returns pairs with similarity above threshold.
pub fn pairwise_similar(
docs: &[(String, String)], // (key, content)
threshold: f32,
) -> Vec<(String, String, f32)> {
let mut results = Vec::new();
for i in 0..docs.len() {
for j in (i + 1)..docs.len() {
let sim = cosine_similarity(&docs[i].1, &docs[j].1);
if sim >= threshold {
results.push((docs[i].0.clone(), docs[j].0.clone(), sim));
}
}
}
results.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap());
results
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_stem() {
assert_eq!(stem("running"), "runn"); // -ning → n
assert_eq!(stem("talking"), "talk"); // not matched by specific consonant rules
assert_eq!(stem("slowly"), "slow"); // -ly
// The stemmer is minimal — it doesn't need to be perfect,
// just consistent enough that related words collide.
assert_eq!(stem("observations"), "observation"); // -s stripped, -ation stays (word too short after)
}
#[test]
fn test_cosine_identical() {
let text = "the quick brown fox jumps over the lazy dog";
let sim = cosine_similarity(text, text);
assert!((sim - 1.0).abs() < 0.01, "identical docs should have sim ~1.0, got {}", sim);
}
#[test]
fn test_cosine_different() {
let a = "kernel filesystem transaction restart handling";
let b = "cooking recipe chocolate cake baking temperature";
let sim = cosine_similarity(a, b);
assert!(sim < 0.1, "unrelated docs should have low sim, got {}", sim);
}
}