poc-memory v0.4.0: graph-structured memory with consolidation pipeline
Rust core: - Cap'n Proto append-only storage (nodes + relations) - Graph algorithms: clustering coefficient, community detection, schema fit, small-world metrics, interference detection - BM25 text similarity with Porter stemming - Spaced repetition replay queue - Commands: search, init, health, status, graph, categorize, link-add, link-impact, decay, consolidate-session, etc. Python scripts: - Episodic digest pipeline: daily/weekly/monthly-digest.py - retroactive-digest.py for backfilling - consolidation-agents.py: 3 parallel Sonnet agents - apply-consolidation.py: structured action extraction + apply - digest-link-parser.py: extract ~400 explicit links from digests - content-promotion-agent.py: promote episodic obs to semantic files - bulk-categorize.py: categorize all nodes via single Sonnet call - consolidation-loop.py: multi-round automated consolidation Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
commit
23fac4e5fe
35 changed files with 9388 additions and 0 deletions
186
src/bin/memory-search.rs
Normal file
186
src/bin/memory-search.rs
Normal file
|
|
@ -0,0 +1,186 @@
|
|||
// memory-search: hook binary for ambient memory retrieval
|
||||
//
|
||||
// Reads JSON from stdin (Claude Code UserPromptSubmit hook format),
|
||||
// searches memory for relevant entries, outputs results tagged with
|
||||
// an anti-injection cookie.
|
||||
//
|
||||
// This is a thin wrapper that delegates to the poc-memory search
|
||||
// engine but formats output for the hook protocol.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::fs;
|
||||
use std::io::{self, Read, Write};
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
|
||||
fn main() {
|
||||
let mut input = String::new();
|
||||
io::stdin().read_to_string(&mut input).unwrap_or_default();
|
||||
|
||||
let json: serde_json::Value = match serde_json::from_str(&input) {
|
||||
Ok(v) => v,
|
||||
Err(_) => return,
|
||||
};
|
||||
|
||||
let prompt = json["prompt"].as_str().unwrap_or("");
|
||||
let session_id = json["session_id"].as_str().unwrap_or("");
|
||||
|
||||
if prompt.is_empty() || session_id.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip short prompts
|
||||
let word_count = prompt.split_whitespace().count();
|
||||
if word_count < 3 {
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip system/idle prompts
|
||||
for prefix in &["Kent is AFK", "You're on your own", "IRC mention"] {
|
||||
if prompt.starts_with(prefix) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Extract search terms (strip stop words)
|
||||
let query = extract_query_terms(prompt, 3);
|
||||
if query.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Run poc-memory search
|
||||
let output = Command::new("poc-memory")
|
||||
.args(["search", &query])
|
||||
.output();
|
||||
|
||||
let search_output = match output {
|
||||
Ok(o) if o.status.success() => String::from_utf8_lossy(&o.stdout).to_string(),
|
||||
_ => return,
|
||||
};
|
||||
|
||||
if search_output.trim().is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Session state for dedup
|
||||
let state_dir = PathBuf::from("/tmp/claude-memory-search");
|
||||
fs::create_dir_all(&state_dir).ok();
|
||||
|
||||
let cookie = load_or_create_cookie(&state_dir, session_id);
|
||||
let seen = load_seen(&state_dir, session_id);
|
||||
|
||||
// Parse search output and filter
|
||||
let mut result_output = String::new();
|
||||
let mut count = 0;
|
||||
let max_entries = 5;
|
||||
|
||||
for line in search_output.lines() {
|
||||
if count >= max_entries { break; }
|
||||
|
||||
// Lines starting with → or space+number are results
|
||||
let trimmed = line.trim();
|
||||
if trimmed.is_empty() { continue; }
|
||||
|
||||
// Extract key from result line like "→ 1. [0.83/0.83] identity.md (c4)"
|
||||
if let Some(key) = extract_key_from_line(trimmed) {
|
||||
if seen.contains(&key) { continue; }
|
||||
mark_seen(&state_dir, session_id, &key);
|
||||
result_output.push_str(line);
|
||||
result_output.push('\n');
|
||||
count += 1;
|
||||
} else if count > 0 {
|
||||
// Snippet line following a result
|
||||
result_output.push_str(line);
|
||||
result_output.push('\n');
|
||||
}
|
||||
}
|
||||
|
||||
if count == 0 { return; }
|
||||
|
||||
println!("Recalled memories [{}]:", cookie);
|
||||
print!("{}", result_output);
|
||||
}
|
||||
|
||||
fn extract_query_terms(text: &str, max_terms: usize) -> String {
|
||||
const STOP_WORDS: &[&str] = &[
|
||||
"the", "a", "an", "is", "are", "was", "were", "do", "does", "did",
|
||||
"have", "has", "had", "will", "would", "could", "should", "can",
|
||||
"may", "might", "shall", "been", "being", "to", "of", "in", "for",
|
||||
"on", "with", "at", "by", "from", "as", "but", "or", "and", "not",
|
||||
"no", "if", "then", "than", "that", "this", "it", "its", "my",
|
||||
"your", "our", "we", "you", "i", "me", "he", "she", "they", "them",
|
||||
"what", "how", "why", "when", "where", "about", "just", "let",
|
||||
"want", "tell", "show", "think", "know", "see", "look", "make",
|
||||
"get", "go", "some", "any", "all", "very", "really", "also", "too",
|
||||
"so", "up", "out", "here", "there",
|
||||
];
|
||||
|
||||
text.to_lowercase()
|
||||
.split(|c: char| !c.is_alphanumeric())
|
||||
.filter(|w| !w.is_empty() && w.len() > 2 && !STOP_WORDS.contains(w))
|
||||
.take(max_terms)
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
|
||||
fn extract_key_from_line(line: &str) -> Option<String> {
|
||||
// Match lines like "→ 1. [0.83/0.83] identity.md (c4)"
|
||||
// or " 1. [0.83/0.83] identity.md (c4)"
|
||||
let after_bracket = line.find("] ")?;
|
||||
let rest = &line[after_bracket + 2..];
|
||||
// Key is from here until optional " (c" or end of line
|
||||
let key_end = rest.find(" (c").unwrap_or(rest.len());
|
||||
let key = rest[..key_end].trim();
|
||||
if key.is_empty() || !key.contains('.') {
|
||||
None
|
||||
} else {
|
||||
Some(key.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
fn load_or_create_cookie(dir: &PathBuf, session_id: &str) -> String {
|
||||
let path = dir.join(format!("cookie-{}", session_id));
|
||||
if path.exists() {
|
||||
fs::read_to_string(&path).unwrap_or_default().trim().to_string()
|
||||
} else {
|
||||
let cookie = generate_cookie();
|
||||
fs::write(&path, &cookie).ok();
|
||||
cookie
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_cookie() -> String {
|
||||
let out = Command::new("head")
|
||||
.args(["-c", "12", "/dev/urandom"])
|
||||
.output()
|
||||
.expect("failed to read urandom");
|
||||
out.stdout.iter()
|
||||
.map(|b| {
|
||||
let idx = (*b as usize) % 62;
|
||||
if idx < 10 { (b'0' + idx as u8) as char }
|
||||
else if idx < 36 { (b'a' + (idx - 10) as u8) as char }
|
||||
else { (b'A' + (idx - 36) as u8) as char }
|
||||
})
|
||||
.take(16)
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn load_seen(dir: &PathBuf, session_id: &str) -> HashSet<String> {
|
||||
let path = dir.join(format!("seen-{}", session_id));
|
||||
if path.exists() {
|
||||
fs::read_to_string(path)
|
||||
.unwrap_or_default()
|
||||
.lines()
|
||||
.map(|s| s.to_string())
|
||||
.collect()
|
||||
} else {
|
||||
HashSet::new()
|
||||
}
|
||||
}
|
||||
|
||||
fn mark_seen(dir: &PathBuf, session_id: &str, key: &str) {
|
||||
let path = dir.join(format!("seen-{}", session_id));
|
||||
if let Ok(mut f) = fs::OpenOptions::new().create(true).append(true).open(path) {
|
||||
writeln!(f, "{}", key).ok();
|
||||
}
|
||||
}
|
||||
1067
src/capnp_store.rs
Normal file
1067
src/capnp_store.rs
Normal file
File diff suppressed because it is too large
Load diff
685
src/graph.rs
Normal file
685
src/graph.rs
Normal file
|
|
@ -0,0 +1,685 @@
|
|||
// Graph algorithms: clustering coefficient, community detection (label
|
||||
// propagation), schema fit scoring, small-world metrics, consolidation
|
||||
// priority scoring.
|
||||
//
|
||||
// The Graph is built from the Store's nodes + relations. Edges are
|
||||
// undirected for clustering/community (even causal edges count as
|
||||
// connections), but relation type and direction are preserved for
|
||||
// specific queries.
|
||||
|
||||
use crate::capnp_store::{Store, RelationType};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
|
||||
/// Weighted edge in the graph
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Edge {
|
||||
pub target: String,
|
||||
pub strength: f32,
|
||||
pub rel_type: RelationType,
|
||||
}
|
||||
|
||||
/// The in-memory graph built from store nodes + relations
|
||||
pub struct Graph {
|
||||
/// Adjacency list: node key → list of edges
|
||||
adj: HashMap<String, Vec<Edge>>,
|
||||
/// All node keys
|
||||
keys: HashSet<String>,
|
||||
/// Community labels (from label propagation)
|
||||
communities: HashMap<String, u32>,
|
||||
}
|
||||
|
||||
impl Graph {
|
||||
pub fn nodes(&self) -> &HashSet<String> {
|
||||
&self.keys
|
||||
}
|
||||
|
||||
pub fn degree(&self, key: &str) -> usize {
|
||||
self.adj.get(key).map(|e| e.len()).unwrap_or(0)
|
||||
}
|
||||
|
||||
pub fn edge_count(&self) -> usize {
|
||||
self.adj.values().map(|e| e.len()).sum::<usize>() / 2
|
||||
}
|
||||
|
||||
/// All neighbor keys with strengths
|
||||
pub fn neighbors(&self, key: &str) -> Vec<(&String, f32)> {
|
||||
self.adj.get(key)
|
||||
.map(|edges| edges.iter().map(|e| (&e.target, e.strength)).collect())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Just neighbor keys
|
||||
pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> {
|
||||
self.adj.get(key)
|
||||
.map(|edges| edges.iter().map(|e| e.target.as_str()).collect())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
pub fn community_count(&self) -> usize {
|
||||
let labels: HashSet<_> = self.communities.values().collect();
|
||||
labels.len()
|
||||
}
|
||||
|
||||
pub fn communities(&self) -> &HashMap<String, u32> {
|
||||
&self.communities
|
||||
}
|
||||
|
||||
/// Local clustering coefficient: fraction of a node's neighbors
|
||||
/// that are also neighbors of each other.
|
||||
/// cc(v) = 2E / (deg * (deg - 1))
|
||||
pub fn clustering_coefficient(&self, key: &str) -> f32 {
|
||||
let neighbors = self.neighbor_keys(key);
|
||||
let deg = neighbors.len();
|
||||
if deg < 2 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect();
|
||||
let mut triangles = 0u32;
|
||||
for i in 0..neighbor_vec.len() {
|
||||
for j in (i + 1)..neighbor_vec.len() {
|
||||
let ni_neighbors = self.neighbor_keys(neighbor_vec[i]);
|
||||
if ni_neighbors.contains(neighbor_vec[j]) {
|
||||
triangles += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0))
|
||||
}
|
||||
|
||||
/// Average clustering coefficient across all nodes with deg >= 2
|
||||
pub fn avg_clustering_coefficient(&self) -> f32 {
|
||||
let mut sum = 0.0f32;
|
||||
let mut count = 0u32;
|
||||
for key in &self.keys {
|
||||
if self.degree(key) >= 2 {
|
||||
sum += self.clustering_coefficient(key);
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
if count == 0 { 0.0 } else { sum / count as f32 }
|
||||
}
|
||||
|
||||
/// Average shortest path length (sampled BFS from up to 100 nodes)
|
||||
pub fn avg_path_length(&self) -> f32 {
|
||||
let sample: Vec<&String> = self.keys.iter().take(100).collect();
|
||||
if sample.is_empty() { return 0.0; }
|
||||
|
||||
let mut total_dist = 0u64;
|
||||
let mut total_pairs = 0u64;
|
||||
|
||||
for &start in &sample {
|
||||
let dists = self.bfs_distances(start);
|
||||
for d in dists.values() {
|
||||
if *d > 0 {
|
||||
total_dist += *d as u64;
|
||||
total_pairs += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if total_pairs == 0 { 0.0 } else { total_dist as f32 / total_pairs as f32 }
|
||||
}
|
||||
|
||||
fn bfs_distances(&self, start: &str) -> HashMap<String, u32> {
|
||||
let mut dist = HashMap::new();
|
||||
let mut queue = VecDeque::new();
|
||||
dist.insert(start.to_string(), 0u32);
|
||||
queue.push_back(start.to_string());
|
||||
|
||||
while let Some(node) = queue.pop_front() {
|
||||
let d = dist[&node];
|
||||
for neighbor in self.neighbor_keys(&node) {
|
||||
if !dist.contains_key(neighbor) {
|
||||
dist.insert(neighbor.to_string(), d + 1);
|
||||
queue.push_back(neighbor.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
dist
|
||||
}
|
||||
|
||||
/// Power-law exponent α of the degree distribution.
|
||||
///
|
||||
/// Estimated via MLE: α = 1 + n / Σ ln(k_i / (k_min - 0.5))
|
||||
/// α ≈ 2: extreme hub dominance (fragile)
|
||||
/// α ≈ 3: healthy scale-free
|
||||
/// α > 3: approaching random graph (egalitarian)
|
||||
pub fn degree_power_law_exponent(&self) -> f32 {
|
||||
let mut degrees: Vec<usize> = self.keys.iter()
|
||||
.map(|k| self.degree(k))
|
||||
.filter(|&d| d > 0) // exclude isolates
|
||||
.collect();
|
||||
if degrees.len() < 10 { return 0.0; } // not enough data
|
||||
|
||||
degrees.sort_unstable();
|
||||
let k_min = degrees[0] as f64;
|
||||
if k_min < 1.0 { return 0.0; }
|
||||
|
||||
let n = degrees.len() as f64;
|
||||
let sum_ln: f64 = degrees.iter()
|
||||
.map(|&k| (k as f64 / (k_min - 0.5)).ln())
|
||||
.sum();
|
||||
|
||||
if sum_ln <= 0.0 { return 0.0; }
|
||||
(1.0 + n / sum_ln) as f32
|
||||
}
|
||||
|
||||
/// Gini coefficient of the degree distribution.
|
||||
///
|
||||
/// 0 = perfectly egalitarian (all nodes same degree)
|
||||
/// 1 = maximally unequal (one node has all edges)
|
||||
/// Measures hub concentration independent of distribution shape.
|
||||
pub fn degree_gini(&self) -> f32 {
|
||||
let mut degrees: Vec<f64> = self.keys.iter()
|
||||
.map(|k| self.degree(k) as f64)
|
||||
.collect();
|
||||
let n = degrees.len();
|
||||
if n < 2 { return 0.0; }
|
||||
|
||||
degrees.sort_by(|a, b| a.partial_cmp(b).unwrap());
|
||||
let mean = degrees.iter().sum::<f64>() / n as f64;
|
||||
if mean < 1e-10 { return 0.0; }
|
||||
|
||||
// Gini = (2 Σ i·x_i) / (n Σ x_i) - (n+1)/n
|
||||
let weighted_sum: f64 = degrees.iter().enumerate()
|
||||
.map(|(i, &d)| (i as f64 + 1.0) * d)
|
||||
.sum();
|
||||
let total = degrees.iter().sum::<f64>();
|
||||
|
||||
let gini = (2.0 * weighted_sum) / (n as f64 * total) - (n as f64 + 1.0) / n as f64;
|
||||
gini.max(0.0) as f32
|
||||
}
|
||||
|
||||
/// Small-world coefficient σ = (C/C_rand) / (L/L_rand)
|
||||
/// C_rand ≈ <k>/n, L_rand ≈ ln(n)/ln(<k>)
|
||||
pub fn small_world_sigma(&self) -> f32 {
|
||||
let n = self.keys.len() as f32;
|
||||
if n < 10.0 { return 0.0; }
|
||||
|
||||
let avg_degree = self.adj.values()
|
||||
.map(|e| e.len() as f32)
|
||||
.sum::<f32>() / n;
|
||||
if avg_degree < 1.0 { return 0.0; }
|
||||
|
||||
let c = self.avg_clustering_coefficient();
|
||||
let l = self.avg_path_length();
|
||||
|
||||
let c_rand = avg_degree / n;
|
||||
let l_rand = n.ln() / avg_degree.ln();
|
||||
|
||||
if c_rand < 1e-10 || l_rand < 1e-10 || l < 1e-10 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
(c / c_rand) / (l / l_rand)
|
||||
}
|
||||
}
|
||||
|
||||
/// Impact of adding a hypothetical edge
|
||||
#[derive(Debug)]
|
||||
pub struct LinkImpact {
|
||||
pub source: String,
|
||||
pub target: String,
|
||||
pub source_deg: usize,
|
||||
pub target_deg: usize,
|
||||
/// Is this a hub link? (either endpoint in top 5% by degree)
|
||||
pub is_hub_link: bool,
|
||||
/// Are both endpoints in the same community?
|
||||
pub same_community: bool,
|
||||
/// Change in clustering coefficient for source
|
||||
pub delta_cc_source: f32,
|
||||
/// Change in clustering coefficient for target
|
||||
pub delta_cc_target: f32,
|
||||
/// Change in degree Gini (positive = more hub-dominated)
|
||||
pub delta_gini: f32,
|
||||
/// Qualitative assessment
|
||||
pub assessment: &'static str,
|
||||
}
|
||||
|
||||
impl Graph {
|
||||
/// Simulate adding an edge and report impact on topology metrics.
|
||||
///
|
||||
/// Doesn't modify the graph — computes what would change if the
|
||||
/// edge were added.
|
||||
pub fn link_impact(&self, source: &str, target: &str) -> LinkImpact {
|
||||
let source_deg = self.degree(source);
|
||||
let target_deg = self.degree(target);
|
||||
|
||||
// Hub threshold: top 5% by degree
|
||||
let mut all_degrees: Vec<usize> = self.keys.iter()
|
||||
.map(|k| self.degree(k))
|
||||
.collect();
|
||||
all_degrees.sort_unstable();
|
||||
let hub_threshold = if all_degrees.len() >= 20 {
|
||||
all_degrees[all_degrees.len() * 95 / 100]
|
||||
} else {
|
||||
usize::MAX // can't define hubs with <20 nodes
|
||||
};
|
||||
let is_hub_link = source_deg >= hub_threshold || target_deg >= hub_threshold;
|
||||
|
||||
// Community check
|
||||
let sc = self.communities.get(source);
|
||||
let tc = self.communities.get(target);
|
||||
let same_community = match (sc, tc) {
|
||||
(Some(a), Some(b)) => a == b,
|
||||
_ => false,
|
||||
};
|
||||
|
||||
// CC change for source: adding target as neighbor changes the
|
||||
// triangle count. New triangles form for each node that's a
|
||||
// neighbor of BOTH source and target.
|
||||
let source_neighbors = self.neighbor_keys(source);
|
||||
let target_neighbors = self.neighbor_keys(target);
|
||||
let shared_neighbors = source_neighbors.intersection(&target_neighbors).count();
|
||||
|
||||
let cc_before_source = self.clustering_coefficient(source);
|
||||
let cc_before_target = self.clustering_coefficient(target);
|
||||
|
||||
// Estimate new CC for source after adding edge
|
||||
let new_source_deg = source_deg + 1;
|
||||
let new_source_triangles = if source_deg >= 2 {
|
||||
// Current triangles + new ones from shared neighbors
|
||||
let current_triangles = (cc_before_source
|
||||
* source_deg as f32 * (source_deg as f32 - 1.0) / 2.0) as u32;
|
||||
current_triangles + shared_neighbors as u32
|
||||
} else {
|
||||
shared_neighbors as u32
|
||||
};
|
||||
let cc_after_source = if new_source_deg >= 2 {
|
||||
(2.0 * new_source_triangles as f32)
|
||||
/ (new_source_deg as f32 * (new_source_deg as f32 - 1.0))
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
let new_target_deg = target_deg + 1;
|
||||
let new_target_triangles = if target_deg >= 2 {
|
||||
let current_triangles = (cc_before_target
|
||||
* target_deg as f32 * (target_deg as f32 - 1.0) / 2.0) as u32;
|
||||
current_triangles + shared_neighbors as u32
|
||||
} else {
|
||||
shared_neighbors as u32
|
||||
};
|
||||
let cc_after_target = if new_target_deg >= 2 {
|
||||
(2.0 * new_target_triangles as f32)
|
||||
/ (new_target_deg as f32 * (new_target_deg as f32 - 1.0))
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
// Gini change via influence function:
|
||||
// IF(x; Gini, F) = (2F(x) - 1) * x/μ - Gini - 1
|
||||
// Adding an edge increments two degrees. The net ΔGini is the sum
|
||||
// of influence contributions from both endpoints shifting up by 1.
|
||||
let gini_before = self.degree_gini();
|
||||
let n = self.keys.len();
|
||||
let total_degree: f64 = self.keys.iter()
|
||||
.map(|k| self.degree(k) as f64)
|
||||
.sum();
|
||||
let mean_deg = if n > 0 { total_degree / n as f64 } else { 1.0 };
|
||||
|
||||
// CDF at each endpoint's degree: fraction of nodes with degree ≤ d
|
||||
let delta_gini = if mean_deg > 1e-10 && n >= 2 {
|
||||
// Count nodes with degree ≤ source_deg and ≤ target_deg
|
||||
let f_source = self.keys.iter()
|
||||
.filter(|k| self.degree(k) <= source_deg)
|
||||
.count() as f64 / n as f64;
|
||||
let f_target = self.keys.iter()
|
||||
.filter(|k| self.degree(k) <= target_deg)
|
||||
.count() as f64 / n as f64;
|
||||
|
||||
// Influence of incrementing source's degree by 1
|
||||
let new_source = (source_deg + 1) as f64;
|
||||
let if_source = (2.0 * f_source - 1.0) * new_source / mean_deg
|
||||
- gini_before as f64 - 1.0;
|
||||
// Influence of incrementing target's degree by 1
|
||||
let new_target = (target_deg + 1) as f64;
|
||||
let if_target = (2.0 * f_target - 1.0) * new_target / mean_deg
|
||||
- gini_before as f64 - 1.0;
|
||||
|
||||
// Scale: each point contributes 1/n to the distribution
|
||||
((if_source + if_target) / n as f64) as f32
|
||||
} else {
|
||||
0.0f32
|
||||
};
|
||||
|
||||
// Qualitative assessment
|
||||
let assessment = if is_hub_link && same_community {
|
||||
"hub-reinforcing: strengthens existing star topology"
|
||||
} else if is_hub_link && !same_community {
|
||||
"hub-bridging: cross-community but through a hub"
|
||||
} else if !is_hub_link && same_community && shared_neighbors > 0 {
|
||||
"lateral-clustering: strengthens local mesh topology"
|
||||
} else if !is_hub_link && !same_community {
|
||||
"lateral-bridging: best kind — cross-community lateral link"
|
||||
} else if !is_hub_link && same_community {
|
||||
"lateral-local: connects peripheral nodes in same community"
|
||||
} else {
|
||||
"neutral"
|
||||
};
|
||||
|
||||
LinkImpact {
|
||||
source: source.to_string(),
|
||||
target: target.to_string(),
|
||||
source_deg,
|
||||
target_deg,
|
||||
is_hub_link,
|
||||
same_community,
|
||||
delta_cc_source: cc_after_source - cc_before_source,
|
||||
delta_cc_target: cc_after_target - cc_before_target,
|
||||
delta_gini: delta_gini,
|
||||
assessment,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Build graph from store data
|
||||
pub fn build_graph(store: &Store) -> Graph {
|
||||
let mut adj: HashMap<String, Vec<Edge>> = HashMap::new();
|
||||
let keys: HashSet<String> = store.nodes.keys().cloned().collect();
|
||||
|
||||
// Build adjacency from relations
|
||||
for rel in &store.relations {
|
||||
let source_key = &rel.source_key;
|
||||
let target_key = &rel.target_key;
|
||||
|
||||
// Both keys must exist as nodes
|
||||
if !keys.contains(source_key) || !keys.contains(target_key) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Add bidirectional edges (even for causal — direction is metadata)
|
||||
adj.entry(source_key.clone()).or_default().push(Edge {
|
||||
target: target_key.clone(),
|
||||
strength: rel.strength,
|
||||
rel_type: rel.rel_type,
|
||||
});
|
||||
adj.entry(target_key.clone()).or_default().push(Edge {
|
||||
target: source_key.clone(),
|
||||
strength: rel.strength,
|
||||
rel_type: rel.rel_type,
|
||||
});
|
||||
}
|
||||
|
||||
// Run community detection
|
||||
let communities = label_propagation(&keys, &adj, 20);
|
||||
|
||||
Graph { adj, keys, communities }
|
||||
}
|
||||
|
||||
/// Label propagation community detection.
|
||||
///
|
||||
/// Each node starts with its own label. Each iteration: adopt the most
|
||||
/// common label among neighbors (weighted by edge strength). Iterate
|
||||
/// until stable or max_iterations.
|
||||
fn label_propagation(
|
||||
keys: &HashSet<String>,
|
||||
adj: &HashMap<String, Vec<Edge>>,
|
||||
max_iterations: u32,
|
||||
) -> HashMap<String, u32> {
|
||||
// Initialize: each node gets its own label
|
||||
let key_vec: Vec<String> = keys.iter().cloned().collect();
|
||||
let mut labels: HashMap<String, u32> = key_vec.iter()
|
||||
.enumerate()
|
||||
.map(|(i, k)| (k.clone(), i as u32))
|
||||
.collect();
|
||||
|
||||
for _iter in 0..max_iterations {
|
||||
let mut changed = false;
|
||||
|
||||
for key in &key_vec {
|
||||
let edges = match adj.get(key) {
|
||||
Some(e) => e,
|
||||
None => continue,
|
||||
};
|
||||
if edges.is_empty() { continue; }
|
||||
|
||||
// Count weighted votes for each label
|
||||
let mut votes: HashMap<u32, f32> = HashMap::new();
|
||||
for edge in edges {
|
||||
if let Some(&label) = labels.get(&edge.target) {
|
||||
*votes.entry(label).or_default() += edge.strength;
|
||||
}
|
||||
}
|
||||
|
||||
// Adopt the label with most votes
|
||||
if let Some((&best_label, _)) = votes.iter()
|
||||
.max_by(|a, b| a.1.partial_cmp(b.1).unwrap())
|
||||
{
|
||||
let current = labels[key];
|
||||
if best_label != current {
|
||||
labels.insert(key.clone(), best_label);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !changed { break; }
|
||||
}
|
||||
|
||||
// Compact labels to 0..n
|
||||
let mut label_map: HashMap<u32, u32> = HashMap::new();
|
||||
let mut next_id = 0;
|
||||
for label in labels.values_mut() {
|
||||
let new_label = *label_map.entry(*label).or_insert_with(|| {
|
||||
let id = next_id;
|
||||
next_id += 1;
|
||||
id
|
||||
});
|
||||
*label = new_label;
|
||||
}
|
||||
|
||||
labels
|
||||
}
|
||||
|
||||
/// Schema fit: for a node, measure how well-connected its neighbors are
|
||||
/// to each other. High density + high CC among neighbors = good schema fit.
|
||||
pub fn schema_fit(graph: &Graph, key: &str) -> f32 {
|
||||
let neighbors = graph.neighbor_keys(key);
|
||||
let n = neighbors.len();
|
||||
if n < 2 {
|
||||
return 0.0; // isolated or leaf — no schema context
|
||||
}
|
||||
|
||||
// Count edges among neighbors
|
||||
let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect();
|
||||
let mut inter_edges = 0u32;
|
||||
for i in 0..neighbor_vec.len() {
|
||||
for j in (i + 1)..neighbor_vec.len() {
|
||||
let ni_neighbors = graph.neighbor_keys(neighbor_vec[i]);
|
||||
if ni_neighbors.contains(neighbor_vec[j]) {
|
||||
inter_edges += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let max_edges = (n * (n - 1)) / 2;
|
||||
let density = if max_edges == 0 { 0.0 } else {
|
||||
inter_edges as f32 / max_edges as f32
|
||||
};
|
||||
|
||||
// Combine neighborhood density with own CC
|
||||
let cc = graph.clustering_coefficient(key);
|
||||
(density + cc) / 2.0
|
||||
}
|
||||
|
||||
/// Compute schema fit for all nodes
|
||||
pub fn schema_fit_all(graph: &Graph) -> HashMap<String, f32> {
|
||||
graph.nodes().iter()
|
||||
.map(|key| (key.clone(), schema_fit(graph, key)))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// A snapshot of graph topology metrics, for tracking evolution over time
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct MetricsSnapshot {
|
||||
pub timestamp: f64,
|
||||
pub date: String,
|
||||
pub nodes: usize,
|
||||
pub edges: usize,
|
||||
pub communities: usize,
|
||||
pub sigma: f32,
|
||||
pub alpha: f32,
|
||||
pub gini: f32,
|
||||
pub avg_cc: f32,
|
||||
pub avg_path_length: f32,
|
||||
pub avg_schema_fit: f32,
|
||||
}
|
||||
|
||||
fn metrics_log_path() -> std::path::PathBuf {
|
||||
let home = std::env::var("HOME").unwrap_or_default();
|
||||
std::path::PathBuf::from(home).join(".claude/memory/metrics.jsonl")
|
||||
}
|
||||
|
||||
/// Load previous metrics snapshots
|
||||
pub fn load_metrics_history() -> Vec<MetricsSnapshot> {
|
||||
let path = metrics_log_path();
|
||||
let content = match std::fs::read_to_string(&path) {
|
||||
Ok(c) => c,
|
||||
Err(_) => return Vec::new(),
|
||||
};
|
||||
content.lines()
|
||||
.filter_map(|line| serde_json::from_str(line).ok())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Append a metrics snapshot to the log
|
||||
pub fn save_metrics_snapshot(snap: &MetricsSnapshot) {
|
||||
let path = metrics_log_path();
|
||||
if let Ok(json) = serde_json::to_string(snap) {
|
||||
use std::io::Write;
|
||||
if let Ok(mut f) = std::fs::OpenOptions::new()
|
||||
.create(true).append(true).open(&path)
|
||||
{
|
||||
let _ = writeln!(f, "{}", json);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Health report: summary of graph metrics
|
||||
pub fn health_report(graph: &Graph, store: &Store) -> String {
|
||||
let n = graph.nodes().len();
|
||||
let e = graph.edge_count();
|
||||
let avg_cc = graph.avg_clustering_coefficient();
|
||||
let avg_pl = graph.avg_path_length();
|
||||
let sigma = graph.small_world_sigma();
|
||||
let communities = graph.community_count();
|
||||
|
||||
// Community sizes
|
||||
let mut comm_sizes: HashMap<u32, usize> = HashMap::new();
|
||||
for label in graph.communities().values() {
|
||||
*comm_sizes.entry(*label).or_default() += 1;
|
||||
}
|
||||
let mut sizes: Vec<usize> = comm_sizes.values().copied().collect();
|
||||
sizes.sort_unstable_by(|a, b| b.cmp(a));
|
||||
|
||||
// Degree distribution
|
||||
let mut degrees: Vec<usize> = graph.nodes().iter()
|
||||
.map(|k| graph.degree(k))
|
||||
.collect();
|
||||
degrees.sort_unstable();
|
||||
let max_deg = degrees.last().copied().unwrap_or(0);
|
||||
let median_deg = if degrees.is_empty() { 0 } else { degrees[degrees.len() / 2] };
|
||||
let avg_deg = if n == 0 { 0.0 } else {
|
||||
degrees.iter().sum::<usize>() as f64 / n as f64
|
||||
};
|
||||
|
||||
// Topology metrics
|
||||
let alpha = graph.degree_power_law_exponent();
|
||||
let gini = graph.degree_gini();
|
||||
|
||||
// Schema fit distribution
|
||||
let fits = schema_fit_all(graph);
|
||||
let avg_fit = if fits.is_empty() { 0.0 } else {
|
||||
fits.values().sum::<f32>() / fits.len() as f32
|
||||
};
|
||||
let low_fit = fits.values().filter(|&&f| f < 0.1).count();
|
||||
|
||||
// Category breakdown
|
||||
let cats = store.category_counts();
|
||||
|
||||
// Snapshot current metrics and log
|
||||
let now = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH).unwrap().as_secs_f64();
|
||||
let date = {
|
||||
let out = std::process::Command::new("date").arg("+%Y-%m-%d %H:%M")
|
||||
.output().unwrap_or_else(|_| std::process::Command::new("echo").output().unwrap());
|
||||
String::from_utf8_lossy(&out.stdout).trim().to_string()
|
||||
};
|
||||
let snap = MetricsSnapshot {
|
||||
timestamp: now,
|
||||
date: date.clone(),
|
||||
nodes: n, edges: e, communities,
|
||||
sigma, alpha, gini, avg_cc,
|
||||
avg_path_length: avg_pl,
|
||||
avg_schema_fit: avg_fit,
|
||||
};
|
||||
save_metrics_snapshot(&snap);
|
||||
|
||||
// Load history for deltas
|
||||
let history = load_metrics_history();
|
||||
let prev = if history.len() >= 2 {
|
||||
Some(&history[history.len() - 2]) // second-to-last (last is the one we just wrote)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
fn delta(current: f32, prev: Option<f32>) -> String {
|
||||
match prev {
|
||||
Some(p) => {
|
||||
let d = current - p;
|
||||
if d.abs() < 0.001 { String::new() }
|
||||
else { format!(" (Δ{:+.3})", d) }
|
||||
}
|
||||
None => String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
let sigma_d = delta(sigma, prev.map(|p| p.sigma));
|
||||
let alpha_d = delta(alpha, prev.map(|p| p.alpha));
|
||||
let gini_d = delta(gini, prev.map(|p| p.gini));
|
||||
let cc_d = delta(avg_cc, prev.map(|p| p.avg_cc));
|
||||
let fit_d = delta(avg_fit, prev.map(|p| p.avg_schema_fit));
|
||||
|
||||
let mut report = format!(
|
||||
"Memory Health Report
|
||||
====================
|
||||
Nodes: {n} Relations: {e} Communities: {communities}
|
||||
|
||||
Degree: max={max_deg} median={median_deg} avg={avg_deg:.1}
|
||||
Clustering coefficient (avg): {avg_cc:.4}{cc_d}
|
||||
Average path length: {avg_pl:.2}
|
||||
Small-world σ: {sigma:.3}{sigma_d} (>1 = small-world)
|
||||
Power-law α: {alpha:.2}{alpha_d} (2=hub-dominated, 3=healthy, >3=egalitarian)
|
||||
Degree Gini: {gini:.3}{gini_d} (0=equal, 1=one-hub)
|
||||
|
||||
Community sizes (top 5): {top5}
|
||||
Schema fit: avg={avg_fit:.3}{fit_d} low-fit (<0.1): {low_fit} nodes
|
||||
|
||||
Categories: core={core} tech={tech} gen={gen} obs={obs} task={task}",
|
||||
top5 = sizes.iter().take(5)
|
||||
.map(|s| s.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", "),
|
||||
core = cats.get("core").unwrap_or(&0),
|
||||
tech = cats.get("tech").unwrap_or(&0),
|
||||
gen = cats.get("gen").unwrap_or(&0),
|
||||
obs = cats.get("obs").unwrap_or(&0),
|
||||
task = cats.get("task").unwrap_or(&0),
|
||||
);
|
||||
|
||||
// Show history trend if we have enough data points
|
||||
if history.len() >= 3 {
|
||||
report.push_str("\n\nMetrics history (last 5):\n");
|
||||
for snap in history.iter().rev().take(5).collect::<Vec<_>>().into_iter().rev() {
|
||||
report.push_str(&format!(" {} — σ={:.1} α={:.2} gini={:.3} cc={:.4} fit={:.3}\n",
|
||||
snap.date, snap.sigma, snap.alpha, snap.gini, snap.avg_cc, snap.avg_schema_fit));
|
||||
}
|
||||
}
|
||||
|
||||
report
|
||||
}
|
||||
766
src/main.rs
Normal file
766
src/main.rs
Normal file
|
|
@ -0,0 +1,766 @@
|
|||
#![allow(dead_code)]
|
||||
// poc-memory: graph-structured memory with append-only Cap'n Proto storage
|
||||
//
|
||||
// Architecture:
|
||||
// nodes.capnp - append-only content node log
|
||||
// relations.capnp - append-only relation log
|
||||
// state.bin - derived KV cache (rebuilt from logs when stale)
|
||||
//
|
||||
// Graph algorithms: clustering coefficient, community detection (label
|
||||
// propagation), schema fit scoring, small-world metrics, consolidation
|
||||
// priority. Text similarity via BM25 with Porter stemming.
|
||||
//
|
||||
// Neuroscience-inspired: spaced repetition replay, emotional gating,
|
||||
// interference detection, schema assimilation, reconsolidation.
|
||||
|
||||
mod capnp_store;
|
||||
mod graph;
|
||||
mod search;
|
||||
mod similarity;
|
||||
mod migrate;
|
||||
mod neuro;
|
||||
|
||||
pub mod memory_capnp {
|
||||
include!(concat!(env!("OUT_DIR"), "/schema/memory_capnp.rs"));
|
||||
}
|
||||
|
||||
use std::env;
|
||||
use std::process;
|
||||
|
||||
fn main() {
|
||||
let args: Vec<String> = env::args().collect();
|
||||
if args.len() < 2 {
|
||||
usage();
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
let result = match args[1].as_str() {
|
||||
"search" => cmd_search(&args[2..]),
|
||||
"init" => cmd_init(),
|
||||
"migrate" => cmd_migrate(),
|
||||
"health" => cmd_health(),
|
||||
"status" => cmd_status(),
|
||||
"graph" => cmd_graph(),
|
||||
"used" => cmd_used(&args[2..]),
|
||||
"wrong" => cmd_wrong(&args[2..]),
|
||||
"gap" => cmd_gap(&args[2..]),
|
||||
"categorize" => cmd_categorize(&args[2..]),
|
||||
"decay" => cmd_decay(),
|
||||
"consolidate-batch" => cmd_consolidate_batch(&args[2..]),
|
||||
"log" => cmd_log(),
|
||||
"params" => cmd_params(),
|
||||
"link" => cmd_link(&args[2..]),
|
||||
"replay-queue" => cmd_replay_queue(&args[2..]),
|
||||
"interference" => cmd_interference(&args[2..]),
|
||||
"link-add" => cmd_link_add(&args[2..]),
|
||||
"link-impact" => cmd_link_impact(&args[2..]),
|
||||
"consolidate-session" => cmd_consolidate_session(),
|
||||
"daily-check" => cmd_daily_check(),
|
||||
"apply-agent" => cmd_apply_agent(&args[2..]),
|
||||
"digest" => cmd_digest(&args[2..]),
|
||||
"trace" => cmd_trace(&args[2..]),
|
||||
_ => {
|
||||
eprintln!("Unknown command: {}", args[1]);
|
||||
usage();
|
||||
process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
if let Err(e) = result {
|
||||
eprintln!("Error: {}", e);
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
fn usage() {
|
||||
eprintln!("poc-memory v0.4.0 — graph-structured memory store
|
||||
|
||||
Commands:
|
||||
search QUERY [QUERY...] Search memory (AND logic across terms)
|
||||
init Scan markdown files, index all memory units
|
||||
migrate Migrate from old weights.json system
|
||||
health Report graph metrics (CC, communities, small-world)
|
||||
status Summary of memory state
|
||||
graph Show graph structure overview
|
||||
used KEY Mark a memory as useful (boosts weight)
|
||||
wrong KEY [CONTEXT] Mark a memory as wrong/irrelevant
|
||||
gap DESCRIPTION Record a gap in memory coverage
|
||||
categorize KEY CATEGORY Reassign category (core/tech/gen/obs/task)
|
||||
decay Apply daily weight decay
|
||||
consolidate-batch [--count N] [--auto]
|
||||
Run agent consolidation on priority nodes
|
||||
log Show recent retrieval log
|
||||
params Show current parameters
|
||||
link N Interactive graph walk from search result N
|
||||
replay-queue [--count N] Show spaced repetition replay queue
|
||||
interference [--threshold F]
|
||||
Detect potentially confusable memory pairs
|
||||
link-add SOURCE TARGET [REASON]
|
||||
Add a link between two nodes
|
||||
link-impact SOURCE TARGET Simulate adding an edge, report topology impact
|
||||
consolidate-session Analyze metrics, plan agent allocation
|
||||
daily-check Brief metrics check (for cron/notifications)
|
||||
apply-agent [--all] Import pending agent results into the graph
|
||||
digest daily [DATE] Generate daily episodic digest (default: today)
|
||||
digest weekly [DATE] Generate weekly digest (any date in target week)
|
||||
trace KEY Walk temporal links: semantic ↔ episodic ↔ conversation");
|
||||
}
|
||||
|
||||
fn cmd_search(args: &[String]) -> Result<(), String> {
|
||||
if args.is_empty() {
|
||||
return Err("Usage: poc-memory search QUERY [QUERY...]".into());
|
||||
}
|
||||
let query = args.join(" ");
|
||||
let mut store = capnp_store::Store::load()?;
|
||||
let results = search::search(&query, &store);
|
||||
|
||||
if results.is_empty() {
|
||||
eprintln!("No results for '{}'", query);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Log retrieval
|
||||
store.log_retrieval(&query, &results.iter().map(|r| r.key.clone()).collect::<Vec<_>>());
|
||||
store.save()?;
|
||||
|
||||
for (i, r) in results.iter().enumerate().take(15) {
|
||||
let marker = if r.is_direct { "→" } else { " " };
|
||||
let weight = store.node_weight(&r.key).unwrap_or(0.0);
|
||||
print!("{}{:2}. [{:.2}/{:.2}] {}", marker, i + 1, r.activation, weight, r.key);
|
||||
if let Some(community) = store.node_community(&r.key) {
|
||||
print!(" (c{})", community);
|
||||
}
|
||||
println!();
|
||||
if let Some(ref snippet) = r.snippet {
|
||||
println!(" {}", snippet);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_init() -> Result<(), String> {
|
||||
let mut store = capnp_store::Store::load()?;
|
||||
let count = store.init_from_markdown()?;
|
||||
store.save()?;
|
||||
println!("Indexed {} memory units", count);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_migrate() -> Result<(), String> {
|
||||
migrate::migrate()
|
||||
}
|
||||
|
||||
fn cmd_health() -> Result<(), String> {
|
||||
let store = capnp_store::Store::load()?;
|
||||
let g = store.build_graph();
|
||||
let health = graph::health_report(&g, &store);
|
||||
println!("{}", health);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_status() -> Result<(), String> {
|
||||
let store = capnp_store::Store::load()?;
|
||||
let node_count = store.nodes.len();
|
||||
let rel_count = store.relations.len();
|
||||
let categories = store.category_counts();
|
||||
|
||||
println!("Nodes: {} Relations: {}", node_count, rel_count);
|
||||
println!("Categories: core={} tech={} gen={} obs={} task={}",
|
||||
categories.get("core").unwrap_or(&0),
|
||||
categories.get("tech").unwrap_or(&0),
|
||||
categories.get("gen").unwrap_or(&0),
|
||||
categories.get("obs").unwrap_or(&0),
|
||||
categories.get("task").unwrap_or(&0),
|
||||
);
|
||||
|
||||
let g = store.build_graph();
|
||||
println!("Graph edges: {} Communities: {}",
|
||||
g.edge_count(), g.community_count());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_graph() -> Result<(), String> {
|
||||
let store = capnp_store::Store::load()?;
|
||||
let g = store.build_graph();
|
||||
|
||||
// Show top-10 highest degree nodes
|
||||
let mut degrees: Vec<_> = g.nodes().iter()
|
||||
.map(|k| (k.clone(), g.degree(k)))
|
||||
.collect();
|
||||
degrees.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
|
||||
println!("Top nodes by degree:");
|
||||
for (key, deg) in degrees.iter().take(10) {
|
||||
let cc = g.clustering_coefficient(key);
|
||||
println!(" {:40} deg={:3} cc={:.3}", key, deg, cc);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_used(args: &[String]) -> Result<(), String> {
|
||||
if args.is_empty() {
|
||||
return Err("Usage: poc-memory used KEY".into());
|
||||
}
|
||||
let key = args.join(" ");
|
||||
let mut store = capnp_store::Store::load()?;
|
||||
let resolved = store.resolve_key(&key)?;
|
||||
store.mark_used(&resolved);
|
||||
store.save()?;
|
||||
println!("Marked '{}' as used", resolved);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_wrong(args: &[String]) -> Result<(), String> {
|
||||
if args.is_empty() {
|
||||
return Err("Usage: poc-memory wrong KEY [CONTEXT]".into());
|
||||
}
|
||||
let key = &args[0];
|
||||
let ctx = if args.len() > 1 { Some(args[1..].join(" ")) } else { None };
|
||||
let mut store = capnp_store::Store::load()?;
|
||||
let resolved = store.resolve_key(key)?;
|
||||
store.mark_wrong(&resolved, ctx.as_deref());
|
||||
store.save()?;
|
||||
println!("Marked '{}' as wrong", resolved);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_gap(args: &[String]) -> Result<(), String> {
|
||||
if args.is_empty() {
|
||||
return Err("Usage: poc-memory gap DESCRIPTION".into());
|
||||
}
|
||||
let desc = args.join(" ");
|
||||
let mut store = capnp_store::Store::load()?;
|
||||
store.record_gap(&desc);
|
||||
store.save()?;
|
||||
println!("Recorded gap: {}", desc);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_categorize(args: &[String]) -> Result<(), String> {
|
||||
if args.len() < 2 {
|
||||
return Err("Usage: poc-memory categorize KEY CATEGORY".into());
|
||||
}
|
||||
let key = &args[0];
|
||||
let cat = &args[1];
|
||||
let mut store = capnp_store::Store::load()?;
|
||||
let resolved = store.resolve_key(key)?;
|
||||
store.categorize(&resolved, cat)?;
|
||||
store.save()?;
|
||||
println!("Set '{}' category to {}", resolved, cat);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_decay() -> Result<(), String> {
|
||||
let mut store = capnp_store::Store::load()?;
|
||||
let (decayed, pruned) = store.decay();
|
||||
store.save()?;
|
||||
println!("Decayed {} nodes, pruned {} below threshold", decayed, pruned);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_consolidate_batch(args: &[String]) -> Result<(), String> {
|
||||
let mut count = 5usize;
|
||||
let mut auto = false;
|
||||
let mut agent: Option<String> = None;
|
||||
let mut i = 0;
|
||||
while i < args.len() {
|
||||
match args[i].as_str() {
|
||||
"--count" if i + 1 < args.len() => {
|
||||
count = args[i + 1].parse().map_err(|_| "invalid count")?;
|
||||
i += 2;
|
||||
}
|
||||
"--auto" => { auto = true; i += 1; }
|
||||
"--agent" if i + 1 < args.len() => {
|
||||
agent = Some(args[i + 1].clone());
|
||||
i += 2;
|
||||
}
|
||||
_ => { i += 1; }
|
||||
}
|
||||
}
|
||||
|
||||
let store = capnp_store::Store::load()?;
|
||||
|
||||
if let Some(agent_name) = agent {
|
||||
// Generate a specific agent prompt
|
||||
let prompt = neuro::agent_prompt(&store, &agent_name, count)?;
|
||||
println!("{}", prompt);
|
||||
Ok(())
|
||||
} else {
|
||||
neuro::consolidation_batch(&store, count, auto)
|
||||
}
|
||||
}
|
||||
|
||||
fn cmd_log() -> Result<(), String> {
|
||||
let store = capnp_store::Store::load()?;
|
||||
for event in store.retrieval_log.iter().rev().take(20) {
|
||||
println!("[{}] q=\"{}\" → {} results",
|
||||
event.timestamp, event.query, event.results.len());
|
||||
for r in &event.results {
|
||||
println!(" {}", r);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_params() -> Result<(), String> {
|
||||
let store = capnp_store::Store::load()?;
|
||||
println!("decay_factor: {}", store.params.decay_factor);
|
||||
println!("use_boost: {}", store.params.use_boost);
|
||||
println!("prune_threshold: {}", store.params.prune_threshold);
|
||||
println!("edge_decay: {}", store.params.edge_decay);
|
||||
println!("max_hops: {}", store.params.max_hops);
|
||||
println!("min_activation: {}", store.params.min_activation);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_link(args: &[String]) -> Result<(), String> {
|
||||
if args.is_empty() {
|
||||
return Err("Usage: poc-memory link KEY".into());
|
||||
}
|
||||
let key = args.join(" ");
|
||||
let store = capnp_store::Store::load()?;
|
||||
let resolved = store.resolve_key(&key)?;
|
||||
let g = store.build_graph();
|
||||
|
||||
println!("Neighbors of '{}':", resolved);
|
||||
let neighbors = g.neighbors(&resolved);
|
||||
for (i, (n, strength)) in neighbors.iter().enumerate() {
|
||||
let cc = g.clustering_coefficient(n);
|
||||
println!(" {:2}. [{:.2}] {} (cc={:.3})", i + 1, strength, n, cc);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_replay_queue(args: &[String]) -> Result<(), String> {
|
||||
let mut count = 10usize;
|
||||
let mut i = 0;
|
||||
while i < args.len() {
|
||||
match args[i].as_str() {
|
||||
"--count" if i + 1 < args.len() => {
|
||||
count = args[i + 1].parse().map_err(|_| "invalid count")?;
|
||||
i += 2;
|
||||
}
|
||||
_ => { i += 1; }
|
||||
}
|
||||
}
|
||||
let store = capnp_store::Store::load()?;
|
||||
let queue = neuro::replay_queue(&store, count);
|
||||
println!("Replay queue ({} items):", queue.len());
|
||||
for (i, item) in queue.iter().enumerate() {
|
||||
println!(" {:2}. [{:.3}] {} (interval={}d, emotion={:.1})",
|
||||
i + 1, item.priority, item.key, item.interval_days, item.emotion);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_consolidate_session() -> Result<(), String> {
|
||||
let store = capnp_store::Store::load()?;
|
||||
let plan = neuro::consolidation_plan(&store);
|
||||
println!("{}", neuro::format_plan(&plan));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_daily_check() -> Result<(), String> {
|
||||
let store = capnp_store::Store::load()?;
|
||||
let report = neuro::daily_check(&store);
|
||||
print!("{}", report);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_link_add(args: &[String]) -> Result<(), String> {
|
||||
if args.len() < 2 {
|
||||
return Err("Usage: poc-memory link-add SOURCE TARGET [REASON]".into());
|
||||
}
|
||||
let mut store = capnp_store::Store::load()?;
|
||||
let source = store.resolve_key(&args[0])?;
|
||||
let target = store.resolve_key(&args[1])?;
|
||||
let reason = if args.len() > 2 { args[2..].join(" ") } else { String::new() };
|
||||
|
||||
// Find UUIDs
|
||||
let source_uuid = store.nodes.get(&source)
|
||||
.map(|n| n.uuid)
|
||||
.ok_or_else(|| format!("source not found: {}", source))?;
|
||||
let target_uuid = store.nodes.get(&target)
|
||||
.map(|n| n.uuid)
|
||||
.ok_or_else(|| format!("target not found: {}", target))?;
|
||||
|
||||
// Check if link already exists
|
||||
let exists = store.relations.iter().any(|r|
|
||||
r.source_key == source && r.target_key == target && !r.deleted
|
||||
);
|
||||
if exists {
|
||||
println!("Link already exists: {} → {}", source, target);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let rel = capnp_store::Store::new_relation(
|
||||
source_uuid, target_uuid,
|
||||
capnp_store::RelationType::Auto,
|
||||
0.5,
|
||||
&source, &target,
|
||||
);
|
||||
store.add_relation(rel)?;
|
||||
if !reason.is_empty() {
|
||||
println!("+ {} → {} ({})", source, target, reason);
|
||||
} else {
|
||||
println!("+ {} → {}", source, target);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_link_impact(args: &[String]) -> Result<(), String> {
|
||||
if args.len() < 2 {
|
||||
return Err("Usage: poc-memory link-impact SOURCE TARGET".into());
|
||||
}
|
||||
let store = capnp_store::Store::load()?;
|
||||
let source = store.resolve_key(&args[0])?;
|
||||
let target = store.resolve_key(&args[1])?;
|
||||
let g = store.build_graph();
|
||||
|
||||
let impact = g.link_impact(&source, &target);
|
||||
|
||||
println!("Link impact: {} → {}", source, target);
|
||||
println!(" Source degree: {} Target degree: {}", impact.source_deg, impact.target_deg);
|
||||
println!(" Hub link: {} Same community: {}", impact.is_hub_link, impact.same_community);
|
||||
println!(" ΔCC source: {:+.4} ΔCC target: {:+.4}", impact.delta_cc_source, impact.delta_cc_target);
|
||||
println!(" ΔGini: {:+.6}", impact.delta_gini);
|
||||
println!(" Assessment: {}", impact.assessment);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_apply_agent(args: &[String]) -> Result<(), String> {
|
||||
let home = env::var("HOME").unwrap_or_default();
|
||||
let results_dir = std::path::PathBuf::from(&home)
|
||||
.join(".claude/memory/agent-results");
|
||||
|
||||
if !results_dir.exists() {
|
||||
println!("No agent results directory");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut store = capnp_store::Store::load()?;
|
||||
let mut applied = 0;
|
||||
let mut errors = 0;
|
||||
|
||||
let process_all = args.iter().any(|a| a == "--all");
|
||||
|
||||
// Find .json result files
|
||||
let mut files: Vec<_> = std::fs::read_dir(&results_dir)
|
||||
.map_err(|e| format!("read results dir: {}", e))?
|
||||
.filter_map(|e| e.ok())
|
||||
.filter(|e| e.path().extension().map(|x| x == "json").unwrap_or(false))
|
||||
.collect();
|
||||
files.sort_by_key(|e| e.path());
|
||||
|
||||
for entry in &files {
|
||||
let path = entry.path();
|
||||
let content = match std::fs::read_to_string(&path) {
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
eprintln!(" Skip {}: {}", path.display(), e);
|
||||
errors += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let data: serde_json::Value = match serde_json::from_str(&content) {
|
||||
Ok(d) => d,
|
||||
Err(e) => {
|
||||
eprintln!(" Skip {}: parse error: {}", path.display(), e);
|
||||
errors += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Check for agent_result with links
|
||||
let agent_result = data.get("agent_result").or(Some(&data));
|
||||
let links = match agent_result.and_then(|r| r.get("links")).and_then(|l| l.as_array()) {
|
||||
Some(l) => l,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let entry_text = data.get("entry_text")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("");
|
||||
let source_start = agent_result
|
||||
.and_then(|r| r.get("source_start"))
|
||||
.and_then(|v| v.as_u64());
|
||||
let source_end = agent_result
|
||||
.and_then(|r| r.get("source_end"))
|
||||
.and_then(|v| v.as_u64());
|
||||
|
||||
println!("Processing {}:", path.file_name().unwrap().to_string_lossy());
|
||||
if let (Some(start), Some(end)) = (source_start, source_end) {
|
||||
println!(" Source: L{}-L{}", start, end);
|
||||
}
|
||||
|
||||
for link in links {
|
||||
let target = match link.get("target").and_then(|v| v.as_str()) {
|
||||
Some(t) => t,
|
||||
None => continue,
|
||||
};
|
||||
let reason = link.get("reason").and_then(|v| v.as_str()).unwrap_or("");
|
||||
|
||||
// Skip NOTE: targets (new topics, not existing nodes)
|
||||
if target.starts_with("NOTE:") {
|
||||
println!(" NOTE: {} — {}", &target[5..], reason);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try to resolve the target key and link from journal entry
|
||||
let resolved = match store.resolve_key(target) {
|
||||
Ok(r) => r,
|
||||
Err(_) => {
|
||||
println!(" SKIP {} (not found in graph)", target);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let source_key = match find_journal_node(&store, entry_text) {
|
||||
Some(k) => k,
|
||||
None => {
|
||||
println!(" SKIP {} (no matching journal node)", target);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Get UUIDs for both nodes
|
||||
let source_uuid = match store.nodes.get(&source_key) {
|
||||
Some(n) => n.uuid,
|
||||
None => continue,
|
||||
};
|
||||
let target_uuid = match store.nodes.get(&resolved) {
|
||||
Some(n) => n.uuid,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let rel = capnp_store::Store::new_relation(
|
||||
source_uuid, target_uuid,
|
||||
capnp_store::RelationType::Link,
|
||||
0.5,
|
||||
&source_key, &resolved,
|
||||
);
|
||||
if let Err(e) = store.add_relation(rel) {
|
||||
eprintln!(" Error adding relation: {}", e);
|
||||
errors += 1;
|
||||
} else {
|
||||
println!(" LINK {} → {} ({})", source_key, resolved, reason);
|
||||
applied += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Move processed file to avoid re-processing
|
||||
if !process_all {
|
||||
let done_dir = results_dir.join("done");
|
||||
std::fs::create_dir_all(&done_dir).ok();
|
||||
let dest = done_dir.join(path.file_name().unwrap());
|
||||
std::fs::rename(&path, &dest).ok();
|
||||
}
|
||||
}
|
||||
|
||||
if applied > 0 {
|
||||
store.save()?;
|
||||
}
|
||||
|
||||
println!("\nApplied {} links ({} errors, {} files processed)",
|
||||
applied, errors, files.len());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Find the journal node that best matches the given entry text
|
||||
fn find_journal_node(store: &capnp_store::Store, entry_text: &str) -> Option<String> {
|
||||
if entry_text.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Extract keywords from entry text
|
||||
let words: Vec<&str> = entry_text.split_whitespace()
|
||||
.filter(|w| w.len() > 5)
|
||||
.take(5)
|
||||
.collect();
|
||||
|
||||
// Find journal nodes whose content matches the most keywords
|
||||
let mut best_key = None;
|
||||
let mut best_score = 0;
|
||||
|
||||
for (key, node) in &store.nodes {
|
||||
if !key.starts_with("journal.md#") {
|
||||
continue;
|
||||
}
|
||||
let content_lower = node.content.to_lowercase();
|
||||
let score: usize = words.iter()
|
||||
.filter(|w| content_lower.contains(&w.to_lowercase()))
|
||||
.count();
|
||||
if score > best_score {
|
||||
best_score = score;
|
||||
best_key = Some(key.clone());
|
||||
}
|
||||
}
|
||||
|
||||
best_key
|
||||
}
|
||||
|
||||
fn cmd_digest(args: &[String]) -> Result<(), String> {
|
||||
if args.is_empty() {
|
||||
return Err("Usage: poc-memory digest daily [DATE] | weekly [DATE]".into());
|
||||
}
|
||||
|
||||
let home = env::var("HOME").unwrap_or_default();
|
||||
let scripts_dir = std::path::PathBuf::from(&home).join("poc/memory/scripts");
|
||||
|
||||
match args[0].as_str() {
|
||||
"daily" => {
|
||||
let mut cmd = std::process::Command::new("python3");
|
||||
cmd.arg(scripts_dir.join("daily-digest.py"));
|
||||
if args.len() > 1 {
|
||||
cmd.arg(&args[1]);
|
||||
}
|
||||
// Unset CLAUDECODE for nested claude calls
|
||||
cmd.env_remove("CLAUDECODE");
|
||||
let status = cmd.status()
|
||||
.map_err(|e| format!("run daily-digest.py: {}", e))?;
|
||||
if !status.success() {
|
||||
return Err("daily-digest.py failed".into());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
"weekly" => {
|
||||
let mut cmd = std::process::Command::new("python3");
|
||||
cmd.arg(scripts_dir.join("weekly-digest.py"));
|
||||
if args.len() > 1 {
|
||||
cmd.arg(&args[1]);
|
||||
}
|
||||
cmd.env_remove("CLAUDECODE");
|
||||
let status = cmd.status()
|
||||
.map_err(|e| format!("run weekly-digest.py: {}", e))?;
|
||||
if !status.success() {
|
||||
return Err("weekly-digest.py failed".into());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
_ => Err(format!("Unknown digest type: {}. Use: daily, weekly", args[0])),
|
||||
}
|
||||
}
|
||||
|
||||
fn cmd_trace(args: &[String]) -> Result<(), String> {
|
||||
if args.is_empty() {
|
||||
return Err("Usage: poc-memory trace KEY".into());
|
||||
}
|
||||
let key = args.join(" ");
|
||||
let store = capnp_store::Store::load()?;
|
||||
let resolved = store.resolve_key(&key)?;
|
||||
let g = store.build_graph();
|
||||
|
||||
let node = store.nodes.get(&resolved)
|
||||
.ok_or_else(|| format!("Node not found: {}", resolved))?;
|
||||
|
||||
// Display the node itself
|
||||
println!("=== {} ===", resolved);
|
||||
println!("Type: {:?} Category: {} Weight: {:.2}",
|
||||
node.node_type, node.category.label(), node.weight);
|
||||
if !node.source_ref.is_empty() {
|
||||
println!("Source: {}", node.source_ref);
|
||||
}
|
||||
|
||||
// Show content preview
|
||||
let preview = if node.content.len() > 200 {
|
||||
let end = node.content.floor_char_boundary(200);
|
||||
format!("{}...", &node.content[..end])
|
||||
} else {
|
||||
node.content.clone()
|
||||
};
|
||||
println!("\n{}\n", preview);
|
||||
|
||||
// Walk neighbors, grouped by node type
|
||||
let neighbors = g.neighbors(&resolved);
|
||||
let mut episodic_session = Vec::new();
|
||||
let mut episodic_daily = Vec::new();
|
||||
let mut episodic_weekly = Vec::new();
|
||||
let mut semantic = Vec::new();
|
||||
|
||||
for (n, strength) in &neighbors {
|
||||
if let Some(nnode) = store.nodes.get(n.as_str()) {
|
||||
match nnode.node_type {
|
||||
capnp_store::NodeType::EpisodicSession =>
|
||||
episodic_session.push((n.clone(), *strength, nnode)),
|
||||
capnp_store::NodeType::EpisodicDaily =>
|
||||
episodic_daily.push((n.clone(), *strength, nnode)),
|
||||
capnp_store::NodeType::EpisodicWeekly =>
|
||||
episodic_weekly.push((n.clone(), *strength, nnode)),
|
||||
capnp_store::NodeType::Semantic =>
|
||||
semantic.push((n.clone(), *strength, nnode)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !episodic_weekly.is_empty() {
|
||||
println!("Weekly digests:");
|
||||
for (k, s, n) in &episodic_weekly {
|
||||
let preview = n.content.lines().next().unwrap_or("").chars().take(80).collect::<String>();
|
||||
println!(" [{:.2}] {} — {}", s, k, preview);
|
||||
}
|
||||
}
|
||||
|
||||
if !episodic_daily.is_empty() {
|
||||
println!("Daily digests:");
|
||||
for (k, s, n) in &episodic_daily {
|
||||
let preview = n.content.lines().next().unwrap_or("").chars().take(80).collect::<String>();
|
||||
println!(" [{:.2}] {} — {}", s, k, preview);
|
||||
}
|
||||
}
|
||||
|
||||
if !episodic_session.is_empty() {
|
||||
println!("Session entries:");
|
||||
for (k, s, n) in &episodic_session {
|
||||
let preview = n.content.lines()
|
||||
.find(|l| !l.is_empty() && !l.starts_with("<!--"))
|
||||
.unwrap_or("").chars().take(80).collect::<String>();
|
||||
println!(" [{:.2}] {}", s, k);
|
||||
if !n.source_ref.is_empty() {
|
||||
println!(" ↳ source: {}", n.source_ref);
|
||||
}
|
||||
println!(" {}", preview);
|
||||
}
|
||||
}
|
||||
|
||||
if !semantic.is_empty() {
|
||||
println!("Semantic links:");
|
||||
for (k, s, _) in &semantic {
|
||||
println!(" [{:.2}] {}", s, k);
|
||||
}
|
||||
}
|
||||
|
||||
// Summary
|
||||
println!("\nLinks: {} session, {} daily, {} weekly, {} semantic",
|
||||
episodic_session.len(), episodic_daily.len(),
|
||||
episodic_weekly.len(), semantic.len());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_interference(args: &[String]) -> Result<(), String> {
|
||||
let mut threshold = 0.4f32;
|
||||
let mut i = 0;
|
||||
while i < args.len() {
|
||||
match args[i].as_str() {
|
||||
"--threshold" if i + 1 < args.len() => {
|
||||
threshold = args[i + 1].parse().map_err(|_| "invalid threshold")?;
|
||||
i += 2;
|
||||
}
|
||||
_ => { i += 1; }
|
||||
}
|
||||
}
|
||||
let store = capnp_store::Store::load()?;
|
||||
let g = store.build_graph();
|
||||
let pairs = neuro::detect_interference(&store, &g, threshold);
|
||||
|
||||
if pairs.is_empty() {
|
||||
println!("No interfering pairs above threshold {:.2}", threshold);
|
||||
} else {
|
||||
println!("Interfering pairs (similarity > {:.2}, different communities):", threshold);
|
||||
for (a, b, sim) in &pairs {
|
||||
println!(" [{:.3}] {} ↔ {}", sim, a, b);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
386
src/migrate.rs
Normal file
386
src/migrate.rs
Normal file
|
|
@ -0,0 +1,386 @@
|
|||
// Migration from old weights.json + markdown marker system
|
||||
//
|
||||
// Reads:
|
||||
// ~/.claude/memory/weights.json (1,874 entries with metrics)
|
||||
// ~/.claude/memory/*.md (content + mem markers + edges)
|
||||
//
|
||||
// Emits:
|
||||
// ~/.claude/memory/nodes.capnp (all nodes with preserved metadata)
|
||||
// ~/.claude/memory/relations.capnp (all edges from markers + md links)
|
||||
// ~/.claude/memory/state.json (derived cache)
|
||||
//
|
||||
// Old files are preserved as backup. Run once.
|
||||
|
||||
use crate::capnp_store::{
|
||||
self, Store, Node, Category, NodeType, Provenance, RelationType,
|
||||
parse_units,
|
||||
};
|
||||
|
||||
use serde::Deserialize;
|
||||
use uuid::Uuid;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
fn home() -> PathBuf {
|
||||
PathBuf::from(env::var("HOME").expect("HOME not set"))
|
||||
}
|
||||
|
||||
fn now_epoch() -> f64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs_f64()
|
||||
}
|
||||
|
||||
// Old system data structures (just enough for deserialization)
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OldStore {
|
||||
#[serde(default)]
|
||||
entries: HashMap<String, OldEntry>,
|
||||
#[serde(default)]
|
||||
retrieval_log: Vec<OldRetrievalEvent>,
|
||||
#[serde(default)]
|
||||
params: OldParams,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OldEntry {
|
||||
weight: f64,
|
||||
created: String,
|
||||
#[serde(default)]
|
||||
last_retrieved: Option<String>,
|
||||
#[serde(default)]
|
||||
last_used: Option<String>,
|
||||
#[serde(default)]
|
||||
retrievals: u32,
|
||||
#[serde(default)]
|
||||
uses: u32,
|
||||
#[serde(default)]
|
||||
wrongs: u32,
|
||||
#[serde(default = "default_category")]
|
||||
category: String,
|
||||
}
|
||||
|
||||
fn default_category() -> String { "General".to_string() }
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OldRetrievalEvent {
|
||||
query: String,
|
||||
timestamp: String,
|
||||
results: Vec<String>,
|
||||
#[serde(default)]
|
||||
used: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OldParams {
|
||||
#[serde(default = "default_0_7")]
|
||||
default_weight: f64,
|
||||
#[serde(default = "default_0_95")]
|
||||
decay_factor: f64,
|
||||
#[serde(default = "default_0_15")]
|
||||
use_boost: f64,
|
||||
#[serde(default = "default_0_1")]
|
||||
prune_threshold: f64,
|
||||
#[serde(default = "default_0_3")]
|
||||
edge_decay: f64,
|
||||
#[serde(default = "default_3")]
|
||||
max_hops: u32,
|
||||
#[serde(default = "default_0_05")]
|
||||
min_activation: f64,
|
||||
}
|
||||
|
||||
impl Default for OldParams {
|
||||
fn default() -> Self {
|
||||
OldParams {
|
||||
default_weight: 0.7,
|
||||
decay_factor: 0.95,
|
||||
use_boost: 0.15,
|
||||
prune_threshold: 0.1,
|
||||
edge_decay: 0.3,
|
||||
max_hops: 3,
|
||||
min_activation: 0.05,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn default_0_7() -> f64 { 0.7 }
|
||||
fn default_0_95() -> f64 { 0.95 }
|
||||
fn default_0_15() -> f64 { 0.15 }
|
||||
fn default_0_1() -> f64 { 0.1 }
|
||||
fn default_0_3() -> f64 { 0.3 }
|
||||
fn default_3() -> u32 { 3 }
|
||||
fn default_0_05() -> f64 { 0.05 }
|
||||
|
||||
fn parse_old_category(s: &str) -> Category {
|
||||
match s {
|
||||
"Core" | "core" => Category::Core,
|
||||
"Technical" | "technical" | "tech" => Category::Technical,
|
||||
"Observation" | "observation" | "obs" => Category::Observation,
|
||||
"Task" | "task" => Category::Task,
|
||||
_ => Category::General,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn migrate() -> Result<(), String> {
|
||||
let weights_path = home().join(".claude/memory/weights.json");
|
||||
let memory_dir = home().join(".claude/memory");
|
||||
let nodes_path = memory_dir.join("nodes.capnp");
|
||||
let rels_path = memory_dir.join("relations.capnp");
|
||||
|
||||
// Safety check
|
||||
if nodes_path.exists() || rels_path.exists() {
|
||||
return Err("nodes.capnp or relations.capnp already exist. \
|
||||
Remove them first if you want to re-migrate.".into());
|
||||
}
|
||||
|
||||
// Load old store
|
||||
let old_store: OldStore = if weights_path.exists() {
|
||||
let data = fs::read_to_string(&weights_path)
|
||||
.map_err(|e| format!("read weights.json: {}", e))?;
|
||||
serde_json::from_str(&data)
|
||||
.map_err(|e| format!("parse weights.json: {}", e))?
|
||||
} else {
|
||||
eprintln!("Warning: no weights.json found, migrating markdown only");
|
||||
OldStore {
|
||||
entries: HashMap::new(),
|
||||
retrieval_log: Vec::new(),
|
||||
params: OldParams::default(),
|
||||
}
|
||||
};
|
||||
|
||||
eprintln!("Old store: {} entries, {} retrieval events",
|
||||
old_store.entries.len(), old_store.retrieval_log.len());
|
||||
|
||||
// Scan markdown files to get content + edges
|
||||
let mut units_by_key: HashMap<String, capnp_store::MemoryUnit> = HashMap::new();
|
||||
scan_markdown_dir(&memory_dir, &mut units_by_key)?;
|
||||
|
||||
eprintln!("Scanned {} markdown units", units_by_key.len());
|
||||
|
||||
// Create new store
|
||||
let mut store = Store::default();
|
||||
|
||||
// Migrate params
|
||||
store.params.default_weight = old_store.params.default_weight;
|
||||
store.params.decay_factor = old_store.params.decay_factor;
|
||||
store.params.use_boost = old_store.params.use_boost;
|
||||
store.params.prune_threshold = old_store.params.prune_threshold;
|
||||
store.params.edge_decay = old_store.params.edge_decay;
|
||||
store.params.max_hops = old_store.params.max_hops;
|
||||
store.params.min_activation = old_store.params.min_activation;
|
||||
|
||||
// Migrate retrieval log
|
||||
store.retrieval_log = old_store.retrieval_log.iter().map(|e| {
|
||||
capnp_store::RetrievalEvent {
|
||||
query: e.query.clone(),
|
||||
timestamp: e.timestamp.clone(),
|
||||
results: e.results.clone(),
|
||||
used: e.used.clone(),
|
||||
}
|
||||
}).collect();
|
||||
|
||||
// Phase 1: Create nodes
|
||||
// Merge old entries (weight metadata) with markdown units (content)
|
||||
let mut all_nodes: Vec<Node> = Vec::new();
|
||||
let mut key_to_uuid: HashMap<String, [u8; 16]> = HashMap::new();
|
||||
|
||||
// First, all entries from the old store
|
||||
for (key, old_entry) in &old_store.entries {
|
||||
let uuid = *Uuid::new_v4().as_bytes();
|
||||
key_to_uuid.insert(key.clone(), uuid);
|
||||
|
||||
let content = units_by_key.get(key)
|
||||
.map(|u| u.content.clone())
|
||||
.unwrap_or_default();
|
||||
|
||||
let state_tag = units_by_key.get(key)
|
||||
.and_then(|u| u.state.clone())
|
||||
.unwrap_or_default();
|
||||
|
||||
let node = Node {
|
||||
uuid,
|
||||
version: 1,
|
||||
timestamp: now_epoch(),
|
||||
node_type: if key.contains("journal") {
|
||||
NodeType::EpisodicSession
|
||||
} else {
|
||||
NodeType::Semantic
|
||||
},
|
||||
provenance: Provenance::Manual,
|
||||
key: key.clone(),
|
||||
content,
|
||||
weight: old_entry.weight as f32,
|
||||
category: parse_old_category(&old_entry.category),
|
||||
emotion: 0.0,
|
||||
deleted: false,
|
||||
source_ref: String::new(),
|
||||
created: old_entry.created.clone(),
|
||||
retrievals: old_entry.retrievals,
|
||||
uses: old_entry.uses,
|
||||
wrongs: old_entry.wrongs,
|
||||
state_tag,
|
||||
last_replayed: 0.0,
|
||||
spaced_repetition_interval: 1,
|
||||
community_id: None,
|
||||
clustering_coefficient: None,
|
||||
schema_fit: None,
|
||||
degree: None,
|
||||
};
|
||||
all_nodes.push(node);
|
||||
}
|
||||
|
||||
// Then, any markdown units not in the old store
|
||||
for (key, unit) in &units_by_key {
|
||||
if key_to_uuid.contains_key(key) { continue; }
|
||||
|
||||
let uuid = *Uuid::new_v4().as_bytes();
|
||||
key_to_uuid.insert(key.clone(), uuid);
|
||||
|
||||
let node = Node {
|
||||
uuid,
|
||||
version: 1,
|
||||
timestamp: now_epoch(),
|
||||
node_type: if key.contains("journal") {
|
||||
NodeType::EpisodicSession
|
||||
} else {
|
||||
NodeType::Semantic
|
||||
},
|
||||
provenance: Provenance::Manual,
|
||||
key: key.clone(),
|
||||
content: unit.content.clone(),
|
||||
weight: 0.7,
|
||||
category: Category::General,
|
||||
emotion: 0.0,
|
||||
deleted: false,
|
||||
source_ref: String::new(),
|
||||
created: String::new(),
|
||||
retrievals: 0,
|
||||
uses: 0,
|
||||
wrongs: 0,
|
||||
state_tag: unit.state.clone().unwrap_or_default(),
|
||||
last_replayed: 0.0,
|
||||
spaced_repetition_interval: 1,
|
||||
community_id: None,
|
||||
clustering_coefficient: None,
|
||||
schema_fit: None,
|
||||
degree: None,
|
||||
};
|
||||
all_nodes.push(node);
|
||||
}
|
||||
|
||||
// Write nodes to capnp log
|
||||
store.append_nodes(&all_nodes)?;
|
||||
for node in &all_nodes {
|
||||
store.uuid_to_key.insert(node.uuid, node.key.clone());
|
||||
store.nodes.insert(node.key.clone(), node.clone());
|
||||
}
|
||||
|
||||
eprintln!("Migrated {} nodes", all_nodes.len());
|
||||
|
||||
// Phase 2: Create relations from markdown links + causal edges
|
||||
let mut all_relations = Vec::new();
|
||||
|
||||
for (key, unit) in &units_by_key {
|
||||
let source_uuid = match key_to_uuid.get(key) {
|
||||
Some(u) => *u,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
// Association links (bidirectional)
|
||||
for link in unit.marker_links.iter().chain(unit.md_links.iter()) {
|
||||
let target_uuid = match key_to_uuid.get(link) {
|
||||
Some(u) => *u,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
// Avoid duplicate relations
|
||||
let exists = all_relations.iter().any(|r: &capnp_store::Relation|
|
||||
(r.source == source_uuid && r.target == target_uuid) ||
|
||||
(r.source == target_uuid && r.target == source_uuid));
|
||||
if exists { continue; }
|
||||
|
||||
all_relations.push(Store::new_relation(
|
||||
source_uuid, target_uuid,
|
||||
RelationType::Link, 1.0,
|
||||
key, link,
|
||||
));
|
||||
}
|
||||
|
||||
// Causal edges (directed)
|
||||
for cause in &unit.causes {
|
||||
let cause_uuid = match key_to_uuid.get(cause) {
|
||||
Some(u) => *u,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
all_relations.push(Store::new_relation(
|
||||
cause_uuid, source_uuid,
|
||||
RelationType::Causal, 1.0,
|
||||
cause, key,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Write relations to capnp log
|
||||
store.append_relations(&all_relations)?;
|
||||
store.relations = all_relations;
|
||||
|
||||
eprintln!("Migrated {} relations", store.relations.len());
|
||||
|
||||
// Phase 3: Compute graph metrics
|
||||
store.update_graph_metrics();
|
||||
|
||||
// Save derived cache
|
||||
store.save()?;
|
||||
|
||||
eprintln!("Migration complete. Files:");
|
||||
eprintln!(" {}", nodes_path.display());
|
||||
eprintln!(" {}", rels_path.display());
|
||||
eprintln!(" {}", memory_dir.join("state.json").display());
|
||||
|
||||
// Verify
|
||||
let g = store.build_graph();
|
||||
eprintln!("\nVerification:");
|
||||
eprintln!(" Nodes: {}", store.nodes.len());
|
||||
eprintln!(" Relations: {}", store.relations.len());
|
||||
eprintln!(" Graph edges: {}", g.edge_count());
|
||||
eprintln!(" Communities: {}", g.community_count());
|
||||
eprintln!(" Avg CC: {:.4}", g.avg_clustering_coefficient());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn scan_markdown_dir(
|
||||
dir: &Path,
|
||||
units: &mut HashMap<String, capnp_store::MemoryUnit>,
|
||||
) -> Result<(), String> {
|
||||
let entries = fs::read_dir(dir)
|
||||
.map_err(|e| format!("read dir {}: {}", dir.display(), e))?;
|
||||
|
||||
for entry in entries.flatten() {
|
||||
let path = entry.path();
|
||||
if path.is_dir() {
|
||||
scan_markdown_dir(&path, units)?;
|
||||
continue;
|
||||
}
|
||||
let Some(ext) = path.extension() else { continue };
|
||||
if ext != "md" { continue }
|
||||
|
||||
let filename = path.file_name().unwrap().to_string_lossy().to_string();
|
||||
let content = match fs::read_to_string(&path) {
|
||||
Ok(c) => c,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
for unit in parse_units(&filename, &content) {
|
||||
units.insert(unit.key.clone(), unit);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
707
src/neuro.rs
Normal file
707
src/neuro.rs
Normal file
|
|
@ -0,0 +1,707 @@
|
|||
// Neuroscience-inspired memory algorithms
|
||||
//
|
||||
// Systematic replay (hippocampal replay), schema assimilation,
|
||||
// interference detection, emotional gating, consolidation priority
|
||||
// scoring, and the agent consolidation harness.
|
||||
|
||||
use crate::capnp_store::Store;
|
||||
use crate::graph::{self, Graph};
|
||||
use crate::similarity;
|
||||
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
fn now_epoch() -> f64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs_f64()
|
||||
}
|
||||
|
||||
const SECS_PER_DAY: f64 = 86400.0;
|
||||
|
||||
/// Consolidation priority: how urgently a node needs attention
|
||||
///
|
||||
/// priority = (1 - schema_fit) × spaced_repetition_due × emotion × (1 + interference)
|
||||
pub fn consolidation_priority(store: &Store, key: &str, graph: &Graph) -> f64 {
|
||||
let node = match store.nodes.get(key) {
|
||||
Some(n) => n,
|
||||
None => return 0.0,
|
||||
};
|
||||
|
||||
// Schema fit: 0 = poorly integrated, 1 = well integrated
|
||||
let fit = graph::schema_fit(graph, key) as f64;
|
||||
let fit_factor = 1.0 - fit;
|
||||
|
||||
// Spaced repetition: how overdue is this node for replay?
|
||||
let interval_secs = node.spaced_repetition_interval as f64 * SECS_PER_DAY;
|
||||
let time_since_replay = if node.last_replayed > 0.0 {
|
||||
(now_epoch() - node.last_replayed).max(0.0)
|
||||
} else {
|
||||
// Never replayed — treat as very overdue
|
||||
interval_secs * 3.0
|
||||
};
|
||||
let overdue_ratio = (time_since_replay / interval_secs).min(5.0);
|
||||
|
||||
// Emotional intensity: higher emotion = higher priority
|
||||
let emotion_factor = 1.0 + (node.emotion as f64 / 10.0);
|
||||
|
||||
fit_factor * overdue_ratio * emotion_factor
|
||||
}
|
||||
|
||||
/// Item in the replay queue
|
||||
pub struct ReplayItem {
|
||||
pub key: String,
|
||||
pub priority: f64,
|
||||
pub interval_days: u32,
|
||||
pub emotion: f32,
|
||||
pub schema_fit: f32,
|
||||
}
|
||||
|
||||
/// Generate the replay queue: nodes ordered by consolidation priority
|
||||
pub fn replay_queue(store: &Store, count: usize) -> Vec<ReplayItem> {
|
||||
let graph = store.build_graph();
|
||||
let fits = graph::schema_fit_all(&graph);
|
||||
|
||||
let mut items: Vec<ReplayItem> = store.nodes.iter()
|
||||
.map(|(key, node)| {
|
||||
let priority = consolidation_priority(store, key, &graph);
|
||||
let fit = fits.get(key).copied().unwrap_or(0.0);
|
||||
ReplayItem {
|
||||
key: key.clone(),
|
||||
priority,
|
||||
interval_days: node.spaced_repetition_interval,
|
||||
emotion: node.emotion,
|
||||
schema_fit: fit,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
items.sort_by(|a, b| b.priority.partial_cmp(&a.priority).unwrap());
|
||||
items.truncate(count);
|
||||
items
|
||||
}
|
||||
|
||||
/// Detect interfering memory pairs: high text similarity but different communities
|
||||
pub fn detect_interference(
|
||||
store: &Store,
|
||||
graph: &Graph,
|
||||
threshold: f32,
|
||||
) -> Vec<(String, String, f32)> {
|
||||
let communities = graph.communities();
|
||||
|
||||
// Only compare nodes within a reasonable set — take the most active ones
|
||||
let mut docs: Vec<(String, String)> = store.nodes.iter()
|
||||
.filter(|(_, n)| n.content.len() > 50) // skip tiny nodes
|
||||
.map(|(k, n)| (k.clone(), n.content.clone()))
|
||||
.collect();
|
||||
|
||||
// For large stores, sample to keep pairwise comparison feasible
|
||||
if docs.len() > 200 {
|
||||
docs.sort_by(|a, b| b.1.len().cmp(&a.1.len()));
|
||||
docs.truncate(200);
|
||||
}
|
||||
|
||||
let similar = similarity::pairwise_similar(&docs, threshold);
|
||||
|
||||
// Filter to pairs in different communities
|
||||
similar.into_iter()
|
||||
.filter(|(a, b, _)| {
|
||||
let ca = communities.get(a);
|
||||
let cb = communities.get(b);
|
||||
match (ca, cb) {
|
||||
(Some(a), Some(b)) => a != b,
|
||||
_ => true, // if community unknown, flag it
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Schema assimilation scoring for a new node.
|
||||
/// Returns how easily the node integrates into existing structure.
|
||||
///
|
||||
/// High fit (>0.5): auto-link, done
|
||||
/// Medium fit (0.2-0.5): agent reviews, proposes links
|
||||
/// Low fit (<0.2): deep examination needed — new schema seed, bridge, or noise?
|
||||
pub fn schema_assimilation(store: &Store, key: &str) -> (f32, &'static str) {
|
||||
let graph = store.build_graph();
|
||||
let fit = graph::schema_fit(&graph, key);
|
||||
|
||||
let recommendation = if fit > 0.5 {
|
||||
"auto-integrate"
|
||||
} else if fit > 0.2 {
|
||||
"agent-review"
|
||||
} else if graph.degree(key) > 0 {
|
||||
"deep-examine-bridge"
|
||||
} else {
|
||||
"deep-examine-orphan"
|
||||
};
|
||||
|
||||
(fit, recommendation)
|
||||
}
|
||||
|
||||
/// Prompt template directory
|
||||
fn prompts_dir() -> std::path::PathBuf {
|
||||
// Check for prompts relative to binary, then fall back to ~/poc/memory/prompts/
|
||||
let home = std::env::var("HOME").unwrap_or_default();
|
||||
std::path::PathBuf::from(home).join("poc/memory/prompts")
|
||||
}
|
||||
|
||||
/// Load a prompt template, replacing {{PLACEHOLDER}} with data
|
||||
fn load_prompt(name: &str, replacements: &[(&str, &str)]) -> Result<String, String> {
|
||||
let path = prompts_dir().join(format!("{}.md", name));
|
||||
let mut content = std::fs::read_to_string(&path)
|
||||
.map_err(|e| format!("load prompt {}: {}", path.display(), e))?;
|
||||
for (placeholder, data) in replacements {
|
||||
content = content.replace(placeholder, data);
|
||||
}
|
||||
Ok(content)
|
||||
}
|
||||
|
||||
/// Format topology header for agent prompts — current graph health metrics
|
||||
fn format_topology_header(graph: &Graph) -> String {
|
||||
let sigma = graph.small_world_sigma();
|
||||
let alpha = graph.degree_power_law_exponent();
|
||||
let gini = graph.degree_gini();
|
||||
let avg_cc = graph.avg_clustering_coefficient();
|
||||
let n = graph.nodes().len();
|
||||
let e = graph.edge_count();
|
||||
|
||||
format!(
|
||||
"## Current graph topology\n\
|
||||
Nodes: {} Edges: {} Communities: {}\n\
|
||||
Small-world σ: {:.1} Power-law α: {:.2} Degree Gini: {:.3}\n\
|
||||
Avg clustering coefficient: {:.4}\n\n\
|
||||
Each node below shows its hub-link ratio (fraction of edges to top-5% degree nodes).\n\
|
||||
Use `poc-memory link-impact SOURCE TARGET` to evaluate proposed links.\n\n",
|
||||
n, e, graph.community_count(), sigma, alpha, gini, avg_cc)
|
||||
}
|
||||
|
||||
/// Compute the hub degree threshold (top 5% by degree)
|
||||
fn hub_threshold(graph: &Graph) -> usize {
|
||||
let mut degrees: Vec<usize> = graph.nodes().iter()
|
||||
.map(|k| graph.degree(k))
|
||||
.collect();
|
||||
degrees.sort_unstable();
|
||||
if degrees.len() >= 20 {
|
||||
degrees[degrees.len() * 95 / 100]
|
||||
} else {
|
||||
usize::MAX
|
||||
}
|
||||
}
|
||||
|
||||
/// Format node data section for prompt templates
|
||||
fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> String {
|
||||
let hub_thresh = hub_threshold(graph);
|
||||
let mut out = String::new();
|
||||
for item in items {
|
||||
let node = match store.nodes.get(&item.key) {
|
||||
Some(n) => n,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
out.push_str(&format!("## {} \n", item.key));
|
||||
out.push_str(&format!("Priority: {:.3} Schema fit: {:.3} Emotion: {:.1} ",
|
||||
item.priority, item.schema_fit, item.emotion));
|
||||
out.push_str(&format!("Category: {} Interval: {}d\n",
|
||||
node.category.label(), node.spaced_repetition_interval));
|
||||
|
||||
if let Some(community) = node.community_id {
|
||||
out.push_str(&format!("Community: {} ", community));
|
||||
}
|
||||
let deg = graph.degree(&item.key);
|
||||
let cc = graph.clustering_coefficient(&item.key);
|
||||
|
||||
// Hub-link ratio: what fraction of this node's edges go to hubs?
|
||||
let neighbors = graph.neighbors(&item.key);
|
||||
let hub_links = neighbors.iter()
|
||||
.filter(|(n, _)| graph.degree(n) >= hub_thresh)
|
||||
.count();
|
||||
let hub_ratio = if deg > 0 { hub_links as f32 / deg as f32 } else { 0.0 };
|
||||
let is_hub = deg >= hub_thresh;
|
||||
|
||||
out.push_str(&format!("Degree: {} CC: {:.3} Hub-link ratio: {:.0}% ({}/{})",
|
||||
deg, cc, hub_ratio * 100.0, hub_links, deg));
|
||||
if is_hub {
|
||||
out.push_str(" ← THIS IS A HUB");
|
||||
} else if hub_ratio > 0.6 {
|
||||
out.push_str(" ← mostly hub-connected, needs lateral links");
|
||||
}
|
||||
out.push('\n');
|
||||
|
||||
// Content (truncated for large nodes)
|
||||
let content = &node.content;
|
||||
if content.len() > 1500 {
|
||||
let end = content.floor_char_boundary(1500);
|
||||
out.push_str(&format!("\nContent ({} chars, truncated):\n{}\n[...]\n\n",
|
||||
content.len(), &content[..end]));
|
||||
} else {
|
||||
out.push_str(&format!("\nContent:\n{}\n\n", content));
|
||||
}
|
||||
|
||||
// Neighbors
|
||||
let neighbors = graph.neighbors(&item.key);
|
||||
if !neighbors.is_empty() {
|
||||
out.push_str("Neighbors:\n");
|
||||
for (n, strength) in neighbors.iter().take(15) {
|
||||
let n_cc = graph.clustering_coefficient(n);
|
||||
let n_community = store.nodes.get(n.as_str())
|
||||
.and_then(|n| n.community_id);
|
||||
out.push_str(&format!(" - {} (str={:.2}, cc={:.3}",
|
||||
n, strength, n_cc));
|
||||
if let Some(c) = n_community {
|
||||
out.push_str(&format!(", c{}", c));
|
||||
}
|
||||
out.push_str(")\n");
|
||||
}
|
||||
}
|
||||
out.push_str("\n---\n\n");
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Format health data for the health agent prompt
|
||||
fn format_health_section(store: &Store, graph: &Graph) -> String {
|
||||
let health = graph::health_report(graph, store);
|
||||
|
||||
let mut out = health;
|
||||
out.push_str("\n\n## Weight distribution\n");
|
||||
|
||||
// Weight histogram
|
||||
let mut buckets = [0u32; 10]; // 0.0-0.1, 0.1-0.2, ..., 0.9-1.0
|
||||
for node in store.nodes.values() {
|
||||
let bucket = ((node.weight * 10.0) as usize).min(9);
|
||||
buckets[bucket] += 1;
|
||||
}
|
||||
for (i, &count) in buckets.iter().enumerate() {
|
||||
let lo = i as f32 / 10.0;
|
||||
let hi = (i + 1) as f32 / 10.0;
|
||||
let bar: String = std::iter::repeat('█').take((count as usize) / 10).collect();
|
||||
out.push_str(&format!(" {:.1}-{:.1}: {:4} {}\n", lo, hi, count, bar));
|
||||
}
|
||||
|
||||
// Near-prune nodes
|
||||
let near_prune: Vec<_> = store.nodes.iter()
|
||||
.filter(|(_, n)| n.weight < 0.15)
|
||||
.map(|(k, n)| (k.clone(), n.weight))
|
||||
.collect();
|
||||
if !near_prune.is_empty() {
|
||||
out.push_str(&format!("\n## Near-prune nodes ({} total)\n", near_prune.len()));
|
||||
for (k, w) in near_prune.iter().take(20) {
|
||||
out.push_str(&format!(" [{:.3}] {}\n", w, k));
|
||||
}
|
||||
}
|
||||
|
||||
// Community sizes
|
||||
let communities = graph.communities();
|
||||
let mut comm_sizes: std::collections::HashMap<u32, Vec<String>> = std::collections::HashMap::new();
|
||||
for (key, &label) in communities {
|
||||
comm_sizes.entry(label).or_default().push(key.clone());
|
||||
}
|
||||
let mut sizes: Vec<_> = comm_sizes.iter()
|
||||
.map(|(id, members)| (*id, members.len(), members.clone()))
|
||||
.collect();
|
||||
sizes.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
|
||||
out.push_str("\n## Largest communities\n");
|
||||
for (id, size, members) in sizes.iter().take(10) {
|
||||
out.push_str(&format!(" Community {} ({} nodes): ", id, size));
|
||||
let sample: Vec<_> = members.iter().take(5).map(|s| s.as_str()).collect();
|
||||
out.push_str(&sample.join(", "));
|
||||
if *size > 5 { out.push_str(", ..."); }
|
||||
out.push('\n');
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
/// Format interference pairs for the separator agent prompt
|
||||
fn format_pairs_section(
|
||||
pairs: &[(String, String, f32)],
|
||||
store: &Store,
|
||||
graph: &Graph,
|
||||
) -> String {
|
||||
let mut out = String::new();
|
||||
let communities = graph.communities();
|
||||
|
||||
for (a, b, sim) in pairs {
|
||||
out.push_str(&format!("## Pair: similarity={:.3}\n", sim));
|
||||
|
||||
let ca = communities.get(a).map(|c| format!("c{}", c)).unwrap_or_else(|| "?".into());
|
||||
let cb = communities.get(b).map(|c| format!("c{}", c)).unwrap_or_else(|| "?".into());
|
||||
|
||||
// Node A
|
||||
out.push_str(&format!("\n### {} ({})\n", a, ca));
|
||||
if let Some(node) = store.nodes.get(a) {
|
||||
let content = if node.content.len() > 500 {
|
||||
let end = node.content.floor_char_boundary(500);
|
||||
format!("{}...", &node.content[..end])
|
||||
} else {
|
||||
node.content.clone()
|
||||
};
|
||||
out.push_str(&format!("Category: {} Weight: {:.2}\n{}\n",
|
||||
node.category.label(), node.weight, content));
|
||||
}
|
||||
|
||||
// Node B
|
||||
out.push_str(&format!("\n### {} ({})\n", b, cb));
|
||||
if let Some(node) = store.nodes.get(b) {
|
||||
let content = if node.content.len() > 500 {
|
||||
let end = node.content.floor_char_boundary(500);
|
||||
format!("{}...", &node.content[..end])
|
||||
} else {
|
||||
node.content.clone()
|
||||
};
|
||||
out.push_str(&format!("Category: {} Weight: {:.2}\n{}\n",
|
||||
node.category.label(), node.weight, content));
|
||||
}
|
||||
|
||||
out.push_str("\n---\n\n");
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Run agent consolidation on top-priority nodes
|
||||
pub fn consolidation_batch(store: &Store, count: usize, auto: bool) -> Result<(), String> {
|
||||
let graph = store.build_graph();
|
||||
let items = replay_queue(store, count);
|
||||
|
||||
if items.is_empty() {
|
||||
println!("No nodes to consolidate.");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let nodes_section = format_nodes_section(store, &items, &graph);
|
||||
|
||||
if auto {
|
||||
// Generate the replay agent prompt with data filled in
|
||||
let prompt = load_prompt("replay", &[("{{NODES}}", &nodes_section)])?;
|
||||
println!("{}", prompt);
|
||||
} else {
|
||||
// Interactive: show what needs attention and available agent types
|
||||
println!("Consolidation batch ({} nodes):\n", items.len());
|
||||
for item in &items {
|
||||
let node_type = store.nodes.get(&item.key)
|
||||
.map(|n| if n.key.contains("journal") { "episodic" } else { "semantic" })
|
||||
.unwrap_or("?");
|
||||
println!(" [{:.3}] {} (fit={:.3}, interval={}d, type={})",
|
||||
item.priority, item.key, item.schema_fit, item.interval_days, node_type);
|
||||
}
|
||||
|
||||
// Also show interference pairs
|
||||
let pairs = detect_interference(store, &graph, 0.6);
|
||||
if !pairs.is_empty() {
|
||||
println!("\nInterfering pairs ({}):", pairs.len());
|
||||
for (a, b, sim) in pairs.iter().take(5) {
|
||||
println!(" [{:.3}] {} ↔ {}", sim, a, b);
|
||||
}
|
||||
}
|
||||
|
||||
println!("\nAgent prompts:");
|
||||
println!(" --auto Generate replay agent prompt");
|
||||
println!(" --agent replay Replay agent (schema assimilation)");
|
||||
println!(" --agent linker Linker agent (relational binding)");
|
||||
println!(" --agent separator Separator agent (pattern separation)");
|
||||
println!(" --agent transfer Transfer agent (CLS episodic→semantic)");
|
||||
println!(" --agent health Health agent (synaptic homeostasis)");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Generate a specific agent prompt with filled-in data
|
||||
pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<String, String> {
|
||||
let graph = store.build_graph();
|
||||
let topology = format_topology_header(&graph);
|
||||
|
||||
match agent {
|
||||
"replay" => {
|
||||
let items = replay_queue(store, count);
|
||||
let nodes_section = format_nodes_section(store, &items, &graph);
|
||||
load_prompt("replay", &[("{{TOPOLOGY}}", &topology), ("{{NODES}}", &nodes_section)])
|
||||
}
|
||||
"linker" => {
|
||||
// Filter to episodic entries
|
||||
let mut items = replay_queue(store, count * 2);
|
||||
items.retain(|item| {
|
||||
store.nodes.get(&item.key)
|
||||
.map(|n| matches!(n.node_type, crate::capnp_store::NodeType::EpisodicSession))
|
||||
.unwrap_or(false)
|
||||
|| item.key.contains("journal")
|
||||
|| item.key.contains("session")
|
||||
});
|
||||
items.truncate(count);
|
||||
let nodes_section = format_nodes_section(store, &items, &graph);
|
||||
load_prompt("linker", &[("{{TOPOLOGY}}", &topology), ("{{NODES}}", &nodes_section)])
|
||||
}
|
||||
"separator" => {
|
||||
let pairs = detect_interference(store, &graph, 0.5);
|
||||
let pairs_section = format_pairs_section(&pairs, store, &graph);
|
||||
load_prompt("separator", &[("{{TOPOLOGY}}", &topology), ("{{PAIRS}}", &pairs_section)])
|
||||
}
|
||||
"transfer" => {
|
||||
// Recent episodic entries
|
||||
let mut episodes: Vec<_> = store.nodes.iter()
|
||||
.filter(|(k, _)| k.contains("journal") || k.contains("session"))
|
||||
.map(|(k, n)| (k.clone(), n.timestamp))
|
||||
.collect();
|
||||
episodes.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
|
||||
episodes.truncate(count);
|
||||
|
||||
let episode_keys: Vec<_> = episodes.iter().map(|(k, _)| k.clone()).collect();
|
||||
let items: Vec<ReplayItem> = episode_keys.iter()
|
||||
.filter_map(|k| {
|
||||
let node = store.nodes.get(k)?;
|
||||
let fit = graph::schema_fit(&graph, k);
|
||||
Some(ReplayItem {
|
||||
key: k.clone(),
|
||||
priority: consolidation_priority(store, k, &graph),
|
||||
interval_days: node.spaced_repetition_interval,
|
||||
emotion: node.emotion,
|
||||
schema_fit: fit,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
let episodes_section = format_nodes_section(store, &items, &graph);
|
||||
load_prompt("transfer", &[("{{TOPOLOGY}}", &topology), ("{{EPISODES}}", &episodes_section)])
|
||||
}
|
||||
"health" => {
|
||||
let health_section = format_health_section(store, &graph);
|
||||
load_prompt("health", &[("{{TOPOLOGY}}", &topology), ("{{HEALTH}}", &health_section)])
|
||||
}
|
||||
_ => Err(format!("Unknown agent: {}. Use: replay, linker, separator, transfer, health", agent)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Agent allocation from the control loop
|
||||
pub struct ConsolidationPlan {
|
||||
pub replay_count: usize,
|
||||
pub linker_count: usize,
|
||||
pub separator_count: usize,
|
||||
pub transfer_count: usize,
|
||||
pub run_health: bool,
|
||||
pub rationale: Vec<String>,
|
||||
}
|
||||
|
||||
/// Analyze metrics and decide how much each agent needs to run.
|
||||
///
|
||||
/// This is the control loop: metrics → error signal → agent allocation.
|
||||
/// Target values are based on healthy small-world networks.
|
||||
pub fn consolidation_plan(store: &Store) -> ConsolidationPlan {
|
||||
let graph = store.build_graph();
|
||||
let alpha = graph.degree_power_law_exponent();
|
||||
let gini = graph.degree_gini();
|
||||
let avg_fit = {
|
||||
let fits = graph::schema_fit_all(&graph);
|
||||
if fits.is_empty() { 0.0 } else {
|
||||
fits.values().sum::<f32>() / fits.len() as f32
|
||||
}
|
||||
};
|
||||
let interference_pairs = detect_interference(store, &graph, 0.5);
|
||||
let interference_count = interference_pairs.len();
|
||||
|
||||
// Count episodic vs semantic nodes
|
||||
let episodic_count = store.nodes.iter()
|
||||
.filter(|(k, _)| k.contains("journal") || k.contains("session"))
|
||||
.count();
|
||||
let semantic_count = store.nodes.len() - episodic_count;
|
||||
let episodic_ratio = if store.nodes.is_empty() { 0.0 }
|
||||
else { episodic_count as f32 / store.nodes.len() as f32 };
|
||||
|
||||
let mut plan = ConsolidationPlan {
|
||||
replay_count: 0,
|
||||
linker_count: 0,
|
||||
separator_count: 0,
|
||||
transfer_count: 0,
|
||||
run_health: true, // always run health first
|
||||
rationale: Vec::new(),
|
||||
};
|
||||
|
||||
// Target: α ≥ 2.5 (healthy scale-free)
|
||||
// Current distance determines replay + linker allocation
|
||||
if alpha < 2.0 {
|
||||
plan.replay_count += 10;
|
||||
plan.linker_count += 5;
|
||||
plan.rationale.push(format!(
|
||||
"α={:.2} (target ≥2.5): extreme hub dominance → 10 replay + 5 linker for lateral links",
|
||||
alpha));
|
||||
} else if alpha < 2.5 {
|
||||
plan.replay_count += 5;
|
||||
plan.linker_count += 3;
|
||||
plan.rationale.push(format!(
|
||||
"α={:.2} (target ≥2.5): moderate hub dominance → 5 replay + 3 linker",
|
||||
alpha));
|
||||
} else {
|
||||
plan.replay_count += 3;
|
||||
plan.rationale.push(format!(
|
||||
"α={:.2}: healthy — 3 replay for maintenance", alpha));
|
||||
}
|
||||
|
||||
// Target: Gini ≤ 0.4
|
||||
if gini > 0.5 {
|
||||
plan.replay_count += 3;
|
||||
plan.rationale.push(format!(
|
||||
"Gini={:.3} (target ≤0.4): high inequality → +3 replay (lateral focus)",
|
||||
gini));
|
||||
}
|
||||
|
||||
// Target: avg schema fit ≥ 0.2
|
||||
if avg_fit < 0.1 {
|
||||
plan.replay_count += 5;
|
||||
plan.rationale.push(format!(
|
||||
"Schema fit={:.3} (target ≥0.2): very poor integration → +5 replay",
|
||||
avg_fit));
|
||||
} else if avg_fit < 0.2 {
|
||||
plan.replay_count += 2;
|
||||
plan.rationale.push(format!(
|
||||
"Schema fit={:.3} (target ≥0.2): low integration → +2 replay",
|
||||
avg_fit));
|
||||
}
|
||||
|
||||
// Interference: >100 pairs is a lot, <10 is clean
|
||||
if interference_count > 100 {
|
||||
plan.separator_count += 10;
|
||||
plan.rationale.push(format!(
|
||||
"Interference: {} pairs (target <50) → 10 separator",
|
||||
interference_count));
|
||||
} else if interference_count > 20 {
|
||||
plan.separator_count += 5;
|
||||
plan.rationale.push(format!(
|
||||
"Interference: {} pairs (target <50) → 5 separator",
|
||||
interference_count));
|
||||
} else if interference_count > 0 {
|
||||
plan.separator_count += interference_count.min(3);
|
||||
plan.rationale.push(format!(
|
||||
"Interference: {} pairs → {} separator",
|
||||
interference_count, plan.separator_count));
|
||||
}
|
||||
|
||||
// Episodic → semantic transfer
|
||||
// If >60% of nodes are episodic, knowledge isn't being extracted
|
||||
if episodic_ratio > 0.6 {
|
||||
plan.transfer_count += 10;
|
||||
plan.rationale.push(format!(
|
||||
"Episodic ratio: {:.0}% ({}/{}) → 10 transfer (knowledge extraction needed)",
|
||||
episodic_ratio * 100.0, episodic_count, store.nodes.len()));
|
||||
} else if episodic_ratio > 0.4 {
|
||||
plan.transfer_count += 5;
|
||||
plan.rationale.push(format!(
|
||||
"Episodic ratio: {:.0}% → 5 transfer",
|
||||
episodic_ratio * 100.0));
|
||||
}
|
||||
|
||||
plan
|
||||
}
|
||||
|
||||
/// Format the consolidation plan for display
|
||||
pub fn format_plan(plan: &ConsolidationPlan) -> String {
|
||||
let mut out = String::from("Consolidation Plan\n==================\n\n");
|
||||
|
||||
out.push_str("Analysis:\n");
|
||||
for r in &plan.rationale {
|
||||
out.push_str(&format!(" • {}\n", r));
|
||||
}
|
||||
|
||||
out.push_str("\nAgent allocation:\n");
|
||||
if plan.run_health {
|
||||
out.push_str(" 1. health — system audit\n");
|
||||
}
|
||||
let mut step = 2;
|
||||
if plan.replay_count > 0 {
|
||||
out.push_str(&format!(" {}. replay ×{:2} — schema assimilation + lateral linking\n",
|
||||
step, plan.replay_count));
|
||||
step += 1;
|
||||
}
|
||||
if plan.linker_count > 0 {
|
||||
out.push_str(&format!(" {}. linker ×{:2} — relational binding from episodes\n",
|
||||
step, plan.linker_count));
|
||||
step += 1;
|
||||
}
|
||||
if plan.separator_count > 0 {
|
||||
out.push_str(&format!(" {}. separator ×{} — pattern separation\n",
|
||||
step, plan.separator_count));
|
||||
step += 1;
|
||||
}
|
||||
if plan.transfer_count > 0 {
|
||||
out.push_str(&format!(" {}. transfer ×{:2} — episodic→semantic extraction\n",
|
||||
step, plan.transfer_count));
|
||||
}
|
||||
|
||||
let total = plan.replay_count + plan.linker_count
|
||||
+ plan.separator_count + plan.transfer_count
|
||||
+ if plan.run_health { 1 } else { 0 };
|
||||
out.push_str(&format!("\nTotal agent runs: {}\n", total));
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
/// Brief daily check: compare current metrics to last snapshot
|
||||
pub fn daily_check(store: &Store) -> String {
|
||||
let graph = store.build_graph();
|
||||
let alpha = graph.degree_power_law_exponent();
|
||||
let gini = graph.degree_gini();
|
||||
let sigma = graph.small_world_sigma();
|
||||
let avg_cc = graph.avg_clustering_coefficient();
|
||||
let avg_fit = {
|
||||
let fits = graph::schema_fit_all(&graph);
|
||||
if fits.is_empty() { 0.0 } else {
|
||||
fits.values().sum::<f32>() / fits.len() as f32
|
||||
}
|
||||
};
|
||||
|
||||
let history = graph::load_metrics_history();
|
||||
let prev = history.last();
|
||||
|
||||
let mut out = String::from("Memory daily check\n");
|
||||
|
||||
// Current state
|
||||
out.push_str(&format!(" σ={:.1} α={:.2} gini={:.3} cc={:.4} fit={:.3}\n",
|
||||
sigma, alpha, gini, avg_cc, avg_fit));
|
||||
|
||||
// Trend
|
||||
if let Some(p) = prev {
|
||||
let d_sigma = sigma - p.sigma;
|
||||
let d_alpha = alpha - p.alpha;
|
||||
let d_gini = gini - p.gini;
|
||||
|
||||
out.push_str(&format!(" Δσ={:+.1} Δα={:+.2} Δgini={:+.3}\n",
|
||||
d_sigma, d_alpha, d_gini));
|
||||
|
||||
// Assessment
|
||||
let mut issues = Vec::new();
|
||||
if alpha < 2.0 { issues.push("hub dominance critical"); }
|
||||
if gini > 0.5 { issues.push("high inequality"); }
|
||||
if avg_fit < 0.1 { issues.push("poor integration"); }
|
||||
if d_sigma < -5.0 { issues.push("σ declining"); }
|
||||
if d_alpha < -0.1 { issues.push("α declining"); }
|
||||
if d_gini > 0.02 { issues.push("inequality increasing"); }
|
||||
|
||||
if issues.is_empty() {
|
||||
out.push_str(" Status: healthy\n");
|
||||
} else {
|
||||
out.push_str(&format!(" Status: needs attention — {}\n", issues.join(", ")));
|
||||
out.push_str(" Run: poc-memory consolidate-session\n");
|
||||
}
|
||||
} else {
|
||||
out.push_str(" (first snapshot, no trend data yet)\n");
|
||||
}
|
||||
|
||||
// Log this snapshot too
|
||||
let now = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH).unwrap().as_secs_f64();
|
||||
let date = {
|
||||
let o = std::process::Command::new("date").arg("+%Y-%m-%d %H:%M")
|
||||
.output().unwrap_or_else(|_| std::process::Command::new("echo").output().unwrap());
|
||||
String::from_utf8_lossy(&o.stdout).trim().to_string()
|
||||
};
|
||||
graph::save_metrics_snapshot(&graph::MetricsSnapshot {
|
||||
timestamp: now, date,
|
||||
nodes: graph.nodes().len(),
|
||||
edges: graph.edge_count(),
|
||||
communities: graph.community_count(),
|
||||
sigma, alpha, gini, avg_cc,
|
||||
avg_path_length: graph.avg_path_length(),
|
||||
avg_schema_fit: avg_fit,
|
||||
});
|
||||
|
||||
out
|
||||
}
|
||||
146
src/search.rs
Normal file
146
src/search.rs
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
// Spreading activation search across the memory graph
|
||||
//
|
||||
// Same model as the old system but richer: uses graph edge strengths,
|
||||
// supports circumscription parameter for blending associative vs
|
||||
// causal walks, and benefits from community-aware result grouping.
|
||||
|
||||
use crate::capnp_store::Store;
|
||||
use crate::graph::Graph;
|
||||
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
|
||||
pub struct SearchResult {
|
||||
pub key: String,
|
||||
pub activation: f64,
|
||||
pub is_direct: bool,
|
||||
pub snippet: Option<String>,
|
||||
}
|
||||
|
||||
/// Spreading activation with circumscription parameter.
|
||||
///
|
||||
/// circ = 0.0: field mode — all edges (default, broad resonance)
|
||||
/// circ = 1.0: causal mode — prefer causal edges
|
||||
fn spreading_activation(
|
||||
seeds: &[(String, f64)],
|
||||
graph: &Graph,
|
||||
store: &Store,
|
||||
_circumscription: f64,
|
||||
) -> Vec<(String, f64)> {
|
||||
let params = &store.params;
|
||||
|
||||
let mut activation: HashMap<String, f64> = HashMap::new();
|
||||
let mut queue: VecDeque<(String, f64, u32)> = VecDeque::new();
|
||||
|
||||
for (key, act) in seeds {
|
||||
let current = activation.entry(key.clone()).or_insert(0.0);
|
||||
if *act > *current {
|
||||
*current = *act;
|
||||
queue.push_back((key.clone(), *act, 0));
|
||||
}
|
||||
}
|
||||
|
||||
while let Some((key, act, depth)) = queue.pop_front() {
|
||||
if depth >= params.max_hops { continue; }
|
||||
|
||||
for (neighbor, strength) in graph.neighbors(&key) {
|
||||
let neighbor_weight = store.nodes.get(neighbor.as_str())
|
||||
.map(|n| n.weight as f64)
|
||||
.unwrap_or(params.default_weight);
|
||||
|
||||
let propagated = act * params.edge_decay * neighbor_weight * strength as f64;
|
||||
if propagated < params.min_activation { continue; }
|
||||
|
||||
let current = activation.entry(neighbor.clone()).or_insert(0.0);
|
||||
if propagated > *current {
|
||||
*current = propagated;
|
||||
queue.push_back((neighbor.clone(), propagated, depth + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut results: Vec<_> = activation.into_iter().collect();
|
||||
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
|
||||
results
|
||||
}
|
||||
|
||||
/// Full search: find direct hits, spread activation, return ranked results
|
||||
pub fn search(query: &str, store: &Store) -> Vec<SearchResult> {
|
||||
let graph = store.build_graph();
|
||||
let query_lower = query.to_lowercase();
|
||||
let query_tokens: Vec<&str> = query_lower.split_whitespace().collect();
|
||||
|
||||
let mut seeds: Vec<(String, f64)> = Vec::new();
|
||||
let mut snippets: HashMap<String, String> = HashMap::new();
|
||||
|
||||
for (key, node) in &store.nodes {
|
||||
let content_lower = node.content.to_lowercase();
|
||||
|
||||
let exact_match = content_lower.contains(&query_lower);
|
||||
let token_match = query_tokens.len() > 1
|
||||
&& query_tokens.iter().all(|t| content_lower.contains(t));
|
||||
|
||||
if exact_match || token_match {
|
||||
let weight = node.weight as f64;
|
||||
let activation = if exact_match { weight } else { weight * 0.85 };
|
||||
seeds.push((key.clone(), activation));
|
||||
|
||||
let snippet: String = node.content.lines()
|
||||
.filter(|l| {
|
||||
let ll = l.to_lowercase();
|
||||
if exact_match && ll.contains(&query_lower) { return true; }
|
||||
query_tokens.iter().any(|t| ll.contains(t))
|
||||
})
|
||||
.take(3)
|
||||
.map(|l| {
|
||||
let t = l.trim();
|
||||
if t.len() > 100 {
|
||||
let end = t.floor_char_boundary(97);
|
||||
format!("{}...", &t[..end])
|
||||
} else {
|
||||
t.to_string()
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n ");
|
||||
snippets.insert(key.clone(), snippet);
|
||||
}
|
||||
}
|
||||
|
||||
if seeds.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let direct_hits: HashSet<String> = seeds.iter().map(|(k, _)| k.clone()).collect();
|
||||
let raw_results = spreading_activation(&seeds, &graph, store, 0.0);
|
||||
|
||||
raw_results.into_iter().map(|(key, activation)| {
|
||||
let is_direct = direct_hits.contains(&key);
|
||||
let snippet = snippets.get(&key).cloned();
|
||||
SearchResult { key, activation, is_direct, snippet }
|
||||
}).collect()
|
||||
}
|
||||
|
||||
/// Extract meaningful search terms from natural language.
|
||||
/// Strips common English stop words, returns up to max_terms words.
|
||||
pub fn extract_query_terms(text: &str, max_terms: usize) -> String {
|
||||
const STOP_WORDS: &[&str] = &[
|
||||
"the", "a", "an", "is", "are", "was", "were", "do", "does", "did",
|
||||
"have", "has", "had", "will", "would", "could", "should", "can",
|
||||
"may", "might", "shall", "been", "being", "to", "of", "in", "for",
|
||||
"on", "with", "at", "by", "from", "as", "but", "or", "and", "not",
|
||||
"no", "if", "then", "than", "that", "this", "it", "its", "my",
|
||||
"your", "our", "we", "you", "i", "me", "he", "she", "they", "them",
|
||||
"what", "how", "why", "when", "where", "about", "just", "let",
|
||||
"want", "tell", "show", "think", "know", "see", "look", "make",
|
||||
"get", "go", "some", "any", "all", "very", "really", "also", "too",
|
||||
"so", "up", "out", "here", "there",
|
||||
];
|
||||
|
||||
text.to_lowercase()
|
||||
.split(|c: char| !c.is_alphanumeric())
|
||||
.filter(|w| !w.is_empty() && w.len() > 2 && !STOP_WORDS.contains(w))
|
||||
.take(max_terms)
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
135
src/similarity.rs
Normal file
135
src/similarity.rs
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
// Text similarity: Porter stemming + BM25
|
||||
//
|
||||
// Used for interference detection (similar content, different communities)
|
||||
// and schema fit scoring. Intentionally simple — ~100 lines, no
|
||||
// external dependencies.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Minimal Porter stemmer — handles the most common English suffixes.
|
||||
/// Not linguistically complete but good enough for similarity matching.
|
||||
pub fn stem(word: &str) -> String {
|
||||
let w = word.to_lowercase();
|
||||
if w.len() <= 3 { return w; }
|
||||
|
||||
let w = strip_suffix(&w, "ation", "ate");
|
||||
let w = strip_suffix(&w, "ness", "");
|
||||
let w = strip_suffix(&w, "ment", "");
|
||||
let w = strip_suffix(&w, "ting", "t");
|
||||
let w = strip_suffix(&w, "ling", "l");
|
||||
let w = strip_suffix(&w, "ring", "r");
|
||||
let w = strip_suffix(&w, "ning", "n");
|
||||
let w = strip_suffix(&w, "ding", "d");
|
||||
let w = strip_suffix(&w, "ping", "p");
|
||||
let w = strip_suffix(&w, "ging", "g");
|
||||
let w = strip_suffix(&w, "ying", "y");
|
||||
let w = strip_suffix(&w, "ied", "y");
|
||||
let w = strip_suffix(&w, "ies", "y");
|
||||
let w = strip_suffix(&w, "ing", "");
|
||||
let w = strip_suffix(&w, "ed", "");
|
||||
let w = strip_suffix(&w, "ly", "");
|
||||
let w = strip_suffix(&w, "er", "");
|
||||
let w = strip_suffix(&w, "al", "");
|
||||
strip_suffix(&w, "s", "")
|
||||
}
|
||||
|
||||
fn strip_suffix(word: &str, suffix: &str, replacement: &str) -> String {
|
||||
if word.len() > suffix.len() + 2 && word.ends_with(suffix) {
|
||||
let base = &word[..word.len() - suffix.len()];
|
||||
format!("{}{}", base, replacement)
|
||||
} else {
|
||||
word.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// Tokenize and stem a text into a term frequency map
|
||||
pub fn term_frequencies(text: &str) -> HashMap<String, u32> {
|
||||
let mut tf = HashMap::new();
|
||||
for word in text.split(|c: char| !c.is_alphanumeric()) {
|
||||
if word.len() > 2 {
|
||||
let stemmed = stem(word);
|
||||
*tf.entry(stemmed).or_default() += 1;
|
||||
}
|
||||
}
|
||||
tf
|
||||
}
|
||||
|
||||
/// Cosine similarity between two documents using stemmed term frequencies.
|
||||
/// Returns 0.0 for disjoint vocabularies, 1.0 for identical content.
|
||||
pub fn cosine_similarity(doc_a: &str, doc_b: &str) -> f32 {
|
||||
let tf_a = term_frequencies(doc_a);
|
||||
let tf_b = term_frequencies(doc_b);
|
||||
|
||||
if tf_a.is_empty() || tf_b.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
// Dot product
|
||||
let mut dot = 0.0f64;
|
||||
for (term, &freq_a) in &tf_a {
|
||||
if let Some(&freq_b) = tf_b.get(term) {
|
||||
dot += freq_a as f64 * freq_b as f64;
|
||||
}
|
||||
}
|
||||
|
||||
// Magnitudes
|
||||
let mag_a: f64 = tf_a.values().map(|&f| (f as f64).powi(2)).sum::<f64>().sqrt();
|
||||
let mag_b: f64 = tf_b.values().map(|&f| (f as f64).powi(2)).sum::<f64>().sqrt();
|
||||
|
||||
if mag_a < 1e-10 || mag_b < 1e-10 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
(dot / (mag_a * mag_b)) as f32
|
||||
}
|
||||
|
||||
/// Compute pairwise similarity for a set of documents.
|
||||
/// Returns pairs with similarity above threshold.
|
||||
pub fn pairwise_similar(
|
||||
docs: &[(String, String)], // (key, content)
|
||||
threshold: f32,
|
||||
) -> Vec<(String, String, f32)> {
|
||||
let mut results = Vec::new();
|
||||
|
||||
for i in 0..docs.len() {
|
||||
for j in (i + 1)..docs.len() {
|
||||
let sim = cosine_similarity(&docs[i].1, &docs[j].1);
|
||||
if sim >= threshold {
|
||||
results.push((docs[i].0.clone(), docs[j].0.clone(), sim));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap());
|
||||
results
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_stem() {
|
||||
assert_eq!(stem("running"), "runn"); // -ning → n
|
||||
assert_eq!(stem("talking"), "talk"); // not matched by specific consonant rules
|
||||
assert_eq!(stem("slowly"), "slow"); // -ly
|
||||
// The stemmer is minimal — it doesn't need to be perfect,
|
||||
// just consistent enough that related words collide.
|
||||
assert_eq!(stem("observations"), "observation"); // -s stripped, -ation stays (word too short after)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cosine_identical() {
|
||||
let text = "the quick brown fox jumps over the lazy dog";
|
||||
let sim = cosine_similarity(text, text);
|
||||
assert!((sim - 1.0).abs() < 0.01, "identical docs should have sim ~1.0, got {}", sim);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cosine_different() {
|
||||
let a = "kernel filesystem transaction restart handling";
|
||||
let b = "cooking recipe chocolate cake baking temperature";
|
||||
let sim = cosine_similarity(a, b);
|
||||
assert!(sim < 0.1, "unrelated docs should have low sim, got {}", sim);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue