split into workspace: poc-memory and poc-daemon subcrates
poc-daemon (notification routing, idle timer, IRC, Telegram) was already fully self-contained with no imports from the poc-memory library. Now it's a proper separate crate with its own Cargo.toml and capnp schema. poc-memory retains the store, graph, search, neuro, knowledge, and the jobkit-based memory maintenance daemon (daemon.rs). Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
This commit is contained in:
parent
488fd5a0aa
commit
fc48ac7c7f
53 changed files with 108 additions and 76 deletions
333
poc-memory/src/audit.rs
Normal file
333
poc-memory/src/audit.rs
Normal file
|
|
@ -0,0 +1,333 @@
|
|||
// Link audit: walk every link in the graph, batch to Sonnet for quality review.
|
||||
//
|
||||
// Each batch of links gets reviewed by Sonnet, which returns per-link actions:
|
||||
// KEEP, DELETE, RETARGET, WEAKEN, STRENGTHEN. Batches run in parallel via rayon.
|
||||
|
||||
use crate::llm::call_sonnet;
|
||||
use crate::store::{self, Store, new_relation};
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
struct LinkInfo {
|
||||
rel_idx: usize,
|
||||
source_key: String,
|
||||
target_key: String,
|
||||
source_content: String,
|
||||
target_content: String,
|
||||
strength: f32,
|
||||
target_sections: Vec<String>,
|
||||
}
|
||||
|
||||
pub struct AuditStats {
|
||||
pub kept: usize,
|
||||
pub deleted: usize,
|
||||
pub retargeted: usize,
|
||||
pub weakened: usize,
|
||||
pub strengthened: usize,
|
||||
pub errors: usize,
|
||||
}
|
||||
|
||||
fn build_audit_prompt(batch: &[LinkInfo], batch_num: usize, total_batches: usize) -> String {
|
||||
let mut prompt = format!(
|
||||
"You are auditing memory graph links for quality (batch {}/{}).\n\n\
|
||||
For each numbered link, decide what to do:\n\n\
|
||||
KEEP N — link is meaningful, leave it\n\
|
||||
DELETE N — link is noise, accidental, or too generic to be useful\n\
|
||||
RETARGET N new_key — link points to the right topic area but wrong node;\n\
|
||||
\x20 retarget to a more specific section (listed under each link)\n\
|
||||
WEAKEN N strength — link is marginal; reduce strength (0.1-0.3)\n\
|
||||
STRENGTHEN N strength — link is important but underweighted; increase (0.8-1.0)\n\n\
|
||||
Output exactly one action per link number, nothing else.\n\n\
|
||||
Links to review:\n\n",
|
||||
batch_num, total_batches);
|
||||
|
||||
for (i, link) in batch.iter().enumerate() {
|
||||
let n = i + 1;
|
||||
prompt.push_str(&format!(
|
||||
"--- Link {} ---\n\
|
||||
{} → {} (strength={:.2})\n\n\
|
||||
Source content:\n{}\n\n\
|
||||
Target content:\n{}\n",
|
||||
n, link.source_key, link.target_key, link.strength,
|
||||
&link.source_content, &link.target_content));
|
||||
|
||||
if !link.target_sections.is_empty() {
|
||||
prompt.push_str(
|
||||
"\nTarget has sections (consider RETARGET to a more specific one):\n");
|
||||
for s in &link.target_sections {
|
||||
prompt.push_str(&format!(" - {}\n", s));
|
||||
}
|
||||
}
|
||||
prompt.push('\n');
|
||||
}
|
||||
|
||||
prompt
|
||||
}
|
||||
|
||||
fn parse_audit_response(response: &str, batch_size: usize) -> Vec<(usize, AuditAction)> {
|
||||
let mut actions = Vec::new();
|
||||
|
||||
for line in response.lines() {
|
||||
let line = line.trim();
|
||||
if line.is_empty() { continue; }
|
||||
|
||||
let parts: Vec<&str> = line.splitn(3, ' ').collect();
|
||||
if parts.len() < 2 { continue; }
|
||||
|
||||
let action = parts[0].to_uppercase();
|
||||
let idx: usize = match parts[1].parse::<usize>() {
|
||||
Ok(n) if n >= 1 && n <= batch_size => n - 1,
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
let audit_action = match action.as_str() {
|
||||
"KEEP" => AuditAction::Keep,
|
||||
"DELETE" => AuditAction::Delete,
|
||||
"RETARGET" => {
|
||||
if parts.len() < 3 { continue; }
|
||||
AuditAction::Retarget(parts[2].trim().to_string())
|
||||
}
|
||||
"WEAKEN" => {
|
||||
if parts.len() < 3 { continue; }
|
||||
match parts[2].trim().parse::<f32>() {
|
||||
Ok(s) => AuditAction::Weaken(s),
|
||||
Err(_) => continue,
|
||||
}
|
||||
}
|
||||
"STRENGTHEN" => {
|
||||
if parts.len() < 3 { continue; }
|
||||
match parts[2].trim().parse::<f32>() {
|
||||
Ok(s) => AuditAction::Strengthen(s),
|
||||
Err(_) => continue,
|
||||
}
|
||||
}
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
actions.push((idx, audit_action));
|
||||
}
|
||||
|
||||
actions
|
||||
}
|
||||
|
||||
enum AuditAction {
|
||||
Keep,
|
||||
Delete,
|
||||
Retarget(String),
|
||||
Weaken(f32),
|
||||
Strengthen(f32),
|
||||
}
|
||||
|
||||
/// Run a full link audit: walk every link, batch to Sonnet, apply results.
|
||||
pub fn link_audit(store: &mut Store, apply: bool) -> Result<AuditStats, String> {
|
||||
// Collect all non-deleted relations with their info
|
||||
let mut links: Vec<LinkInfo> = Vec::new();
|
||||
|
||||
for (idx, rel) in store.relations.iter().enumerate() {
|
||||
if rel.deleted { continue; }
|
||||
|
||||
let source_content = store.nodes.get(&rel.source_key)
|
||||
.map(|n| n.content.clone()).unwrap_or_default();
|
||||
let target_content = store.nodes.get(&rel.target_key)
|
||||
.map(|n| n.content.clone()).unwrap_or_default();
|
||||
|
||||
// Find section children of target if it's file-level
|
||||
let target_sections = if !rel.target_key.contains('#') {
|
||||
let prefix = format!("{}#", rel.target_key);
|
||||
store.nodes.keys()
|
||||
.filter(|k| k.starts_with(&prefix))
|
||||
.cloned()
|
||||
.collect()
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
links.push(LinkInfo {
|
||||
rel_idx: idx,
|
||||
source_key: rel.source_key.clone(),
|
||||
target_key: rel.target_key.clone(),
|
||||
source_content,
|
||||
target_content,
|
||||
strength: rel.strength,
|
||||
target_sections,
|
||||
});
|
||||
}
|
||||
|
||||
let total = links.len();
|
||||
println!("Link audit: {} links to review", total);
|
||||
if !apply {
|
||||
println!("DRY RUN — use --apply to make changes");
|
||||
}
|
||||
|
||||
// Batch by char budget (~100K chars per prompt)
|
||||
let char_budget = 100_000usize;
|
||||
let mut batches: Vec<Vec<usize>> = Vec::new();
|
||||
let mut current_batch: Vec<usize> = Vec::new();
|
||||
let mut current_chars = 0usize;
|
||||
|
||||
for (i, link) in links.iter().enumerate() {
|
||||
let link_chars = link.source_content.len() + link.target_content.len() + 200;
|
||||
if !current_batch.is_empty() && current_chars + link_chars > char_budget {
|
||||
batches.push(std::mem::take(&mut current_batch));
|
||||
current_chars = 0;
|
||||
}
|
||||
current_batch.push(i);
|
||||
current_chars += link_chars;
|
||||
}
|
||||
if !current_batch.is_empty() {
|
||||
batches.push(current_batch);
|
||||
}
|
||||
|
||||
let total_batches = batches.len();
|
||||
println!("{} batches (avg {} links/batch)\n", total_batches,
|
||||
if total_batches > 0 { total / total_batches } else { 0 });
|
||||
|
||||
use rayon::prelude::*;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
// Build all batch prompts up front
|
||||
let batch_data: Vec<(usize, Vec<LinkInfo>, String)> = batches.iter().enumerate()
|
||||
.map(|(batch_idx, batch_indices)| {
|
||||
let batch_infos: Vec<LinkInfo> = batch_indices.iter().map(|&i| {
|
||||
let l = &links[i];
|
||||
LinkInfo {
|
||||
rel_idx: l.rel_idx,
|
||||
source_key: l.source_key.clone(),
|
||||
target_key: l.target_key.clone(),
|
||||
source_content: l.source_content.clone(),
|
||||
target_content: l.target_content.clone(),
|
||||
strength: l.strength,
|
||||
target_sections: l.target_sections.clone(),
|
||||
}
|
||||
}).collect();
|
||||
let prompt = build_audit_prompt(&batch_infos, batch_idx + 1, total_batches);
|
||||
(batch_idx, batch_infos, prompt)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Progress counter
|
||||
let done = AtomicUsize::new(0);
|
||||
|
||||
// Run batches in parallel via rayon
|
||||
let batch_results: Vec<_> = batch_data.par_iter()
|
||||
.map(|(batch_idx, batch_infos, prompt)| {
|
||||
let response = call_sonnet("audit", prompt);
|
||||
let completed = done.fetch_add(1, Ordering::Relaxed) + 1;
|
||||
eprint!("\r Batches: {}/{} done", completed, total_batches);
|
||||
(*batch_idx, batch_infos, response)
|
||||
})
|
||||
.collect();
|
||||
eprintln!(); // newline after progress
|
||||
|
||||
// Process results sequentially
|
||||
let mut stats = AuditStats {
|
||||
kept: 0, deleted: 0, retargeted: 0, weakened: 0, strengthened: 0, errors: 0,
|
||||
};
|
||||
let mut deletions: Vec<usize> = Vec::new();
|
||||
let mut retargets: Vec<(usize, String)> = Vec::new();
|
||||
let mut strength_changes: Vec<(usize, f32)> = Vec::new();
|
||||
|
||||
for (batch_idx, batch_infos, response) in &batch_results {
|
||||
let response = match response {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
eprintln!(" Batch {}: error: {}", batch_idx + 1, e);
|
||||
stats.errors += batch_infos.len();
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let actions = parse_audit_response(response, batch_infos.len());
|
||||
|
||||
let mut responded: HashSet<usize> = HashSet::new();
|
||||
|
||||
for (idx, action) in &actions {
|
||||
responded.insert(*idx);
|
||||
let link = &batch_infos[*idx];
|
||||
|
||||
match action {
|
||||
AuditAction::Keep => {
|
||||
stats.kept += 1;
|
||||
}
|
||||
AuditAction::Delete => {
|
||||
println!(" DELETE {} → {}", link.source_key, link.target_key);
|
||||
deletions.push(link.rel_idx);
|
||||
stats.deleted += 1;
|
||||
}
|
||||
AuditAction::Retarget(new_target) => {
|
||||
println!(" RETARGET {} → {} (was {})",
|
||||
link.source_key, new_target, link.target_key);
|
||||
retargets.push((link.rel_idx, new_target.clone()));
|
||||
stats.retargeted += 1;
|
||||
}
|
||||
AuditAction::Weaken(s) => {
|
||||
println!(" WEAKEN {} → {} (str {:.2} → {:.2})",
|
||||
link.source_key, link.target_key, link.strength, s);
|
||||
strength_changes.push((link.rel_idx, *s));
|
||||
stats.weakened += 1;
|
||||
}
|
||||
AuditAction::Strengthen(s) => {
|
||||
println!(" STRENGTHEN {} → {} (str {:.2} → {:.2})",
|
||||
link.source_key, link.target_key, link.strength, s);
|
||||
strength_changes.push((link.rel_idx, *s));
|
||||
stats.strengthened += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for i in 0..batch_infos.len() {
|
||||
if !responded.contains(&i) {
|
||||
stats.kept += 1;
|
||||
}
|
||||
}
|
||||
|
||||
println!(" Batch {}/{}: +{}kept +{}del +{}retarget +{}weak +{}strong",
|
||||
batch_idx + 1, total_batches,
|
||||
stats.kept, stats.deleted, stats.retargeted, stats.weakened, stats.strengthened);
|
||||
}
|
||||
|
||||
// Apply changes
|
||||
if apply && (stats.deleted > 0 || stats.retargeted > 0
|
||||
|| stats.weakened > 0 || stats.strengthened > 0) {
|
||||
println!("\nApplying changes...");
|
||||
|
||||
// Deletions: soft-delete
|
||||
for rel_idx in &deletions {
|
||||
store.relations[*rel_idx].deleted = true;
|
||||
}
|
||||
|
||||
// Strength changes
|
||||
for (rel_idx, new_strength) in &strength_changes {
|
||||
store.relations[*rel_idx].strength = *new_strength;
|
||||
}
|
||||
|
||||
// Retargets: soft-delete old, create new
|
||||
for (rel_idx, new_target) in &retargets {
|
||||
let source_key = store.relations[*rel_idx].source_key.clone();
|
||||
let old_strength = store.relations[*rel_idx].strength;
|
||||
let source_uuid = store.nodes.get(&source_key)
|
||||
.map(|n| n.uuid).unwrap_or([0u8; 16]);
|
||||
let target_uuid = store.nodes.get(new_target)
|
||||
.map(|n| n.uuid).unwrap_or([0u8; 16]);
|
||||
|
||||
// Soft-delete old
|
||||
store.relations[*rel_idx].deleted = true;
|
||||
|
||||
// Create new
|
||||
if target_uuid != [0u8; 16] {
|
||||
let new_rel = new_relation(
|
||||
source_uuid, target_uuid,
|
||||
store::RelationType::Auto,
|
||||
old_strength,
|
||||
&source_key, new_target,
|
||||
);
|
||||
store.add_relation(new_rel).ok();
|
||||
}
|
||||
}
|
||||
|
||||
store.save()?;
|
||||
println!("Saved.");
|
||||
}
|
||||
|
||||
Ok(stats)
|
||||
}
|
||||
185
poc-memory/src/bin/memory-search.rs
Normal file
185
poc-memory/src/bin/memory-search.rs
Normal file
|
|
@ -0,0 +1,185 @@
|
|||
// memory-search: combined hook for session context loading + ambient memory retrieval
|
||||
//
|
||||
// On first prompt per session: loads full memory context (identity, journal, etc.)
|
||||
// On subsequent prompts: searches memory for relevant entries
|
||||
// On post-compaction: reloads full context
|
||||
//
|
||||
// Reads JSON from stdin (Claude Code UserPromptSubmit hook format),
|
||||
// outputs results for injection into the conversation.
|
||||
|
||||
use poc_memory::search;
|
||||
use poc_memory::store;
|
||||
use std::collections::HashSet;
|
||||
use std::fs;
|
||||
use std::io::{self, Read, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
use std::time::{Duration, SystemTime};
|
||||
|
||||
fn main() {
|
||||
let mut input = String::new();
|
||||
io::stdin().read_to_string(&mut input).unwrap_or_default();
|
||||
|
||||
let json: serde_json::Value = match serde_json::from_str(&input) {
|
||||
Ok(v) => v,
|
||||
Err(_) => return,
|
||||
};
|
||||
|
||||
let prompt = json["prompt"].as_str().unwrap_or("");
|
||||
let session_id = json["session_id"].as_str().unwrap_or("");
|
||||
|
||||
if prompt.is_empty() || session_id.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let state_dir = PathBuf::from("/tmp/claude-memory-search");
|
||||
fs::create_dir_all(&state_dir).ok();
|
||||
|
||||
// Detect post-compaction reload
|
||||
let is_compaction = prompt.contains("continued from a previous conversation");
|
||||
|
||||
// First prompt or post-compaction: load full context
|
||||
let cookie_path = state_dir.join(format!("cookie-{}", session_id));
|
||||
let is_first = !cookie_path.exists();
|
||||
|
||||
if is_first || is_compaction {
|
||||
// Create/touch the cookie
|
||||
let cookie = if is_first {
|
||||
let c = generate_cookie();
|
||||
fs::write(&cookie_path, &c).ok();
|
||||
c
|
||||
} else {
|
||||
fs::read_to_string(&cookie_path).unwrap_or_default().trim().to_string()
|
||||
};
|
||||
|
||||
// Load full memory context
|
||||
if let Ok(output) = Command::new("poc-memory").args(["load-context"]).output() {
|
||||
if output.status.success() {
|
||||
let ctx = String::from_utf8_lossy(&output.stdout);
|
||||
if !ctx.trim().is_empty() {
|
||||
print!("{}", ctx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// On first prompt, also bump lookup counter for the cookie
|
||||
let _ = cookie; // used for tagging below
|
||||
}
|
||||
|
||||
// Always do ambient search (skip on very short or system prompts)
|
||||
let word_count = prompt.split_whitespace().count();
|
||||
if word_count < 3 {
|
||||
return;
|
||||
}
|
||||
|
||||
for prefix in &["is AFK", "You're on your own", "IRC mention"] {
|
||||
if prompt.starts_with(prefix) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
let query = search::extract_query_terms(prompt, 3);
|
||||
if query.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let store = match store::Store::load() {
|
||||
Ok(s) => s,
|
||||
Err(_) => return,
|
||||
};
|
||||
|
||||
let results = search::search(&query, &store);
|
||||
if results.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Format results like poc-memory search output
|
||||
let search_output = search::format_results(&results);
|
||||
|
||||
let cookie = fs::read_to_string(&cookie_path).unwrap_or_default().trim().to_string();
|
||||
let seen = load_seen(&state_dir, session_id);
|
||||
|
||||
let mut result_output = String::new();
|
||||
let mut count = 0;
|
||||
let max_entries = 5;
|
||||
|
||||
for line in search_output.lines() {
|
||||
if count >= max_entries { break; }
|
||||
|
||||
let trimmed = line.trim();
|
||||
if trimmed.is_empty() { continue; }
|
||||
|
||||
if let Some(key) = extract_key_from_line(trimmed) {
|
||||
if seen.contains(&key) { continue; }
|
||||
mark_seen(&state_dir, session_id, &key);
|
||||
result_output.push_str(line);
|
||||
result_output.push('\n');
|
||||
count += 1;
|
||||
} else if count > 0 {
|
||||
result_output.push_str(line);
|
||||
result_output.push('\n');
|
||||
}
|
||||
}
|
||||
|
||||
if count == 0 { return; }
|
||||
|
||||
println!("Recalled memories [{}]:", cookie);
|
||||
print!("{}", result_output);
|
||||
|
||||
// Clean up stale state files (opportunistic)
|
||||
cleanup_stale_files(&state_dir, Duration::from_secs(86400));
|
||||
}
|
||||
|
||||
|
||||
fn extract_key_from_line(line: &str) -> Option<String> {
|
||||
let after_bracket = line.find("] ")?;
|
||||
let rest = &line[after_bracket + 2..];
|
||||
let key_end = rest.find(" (c").unwrap_or(rest.len());
|
||||
let key = rest[..key_end].trim();
|
||||
if key.is_empty() || !key.contains('.') {
|
||||
None
|
||||
} else {
|
||||
Some(key.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_cookie() -> String {
|
||||
uuid::Uuid::new_v4().as_simple().to_string()[..12].to_string()
|
||||
}
|
||||
|
||||
fn load_seen(dir: &Path, session_id: &str) -> HashSet<String> {
|
||||
let path = dir.join(format!("seen-{}", session_id));
|
||||
if path.exists() {
|
||||
fs::read_to_string(path)
|
||||
.unwrap_or_default()
|
||||
.lines()
|
||||
.map(|s| s.to_string())
|
||||
.collect()
|
||||
} else {
|
||||
HashSet::new()
|
||||
}
|
||||
}
|
||||
|
||||
fn mark_seen(dir: &Path, session_id: &str, key: &str) {
|
||||
let path = dir.join(format!("seen-{}", session_id));
|
||||
if let Ok(mut f) = fs::OpenOptions::new().create(true).append(true).open(path) {
|
||||
writeln!(f, "{}", key).ok();
|
||||
}
|
||||
}
|
||||
|
||||
fn cleanup_stale_files(dir: &Path, max_age: Duration) {
|
||||
let entries = match fs::read_dir(dir) {
|
||||
Ok(e) => e,
|
||||
Err(_) => return,
|
||||
};
|
||||
let cutoff = SystemTime::now() - max_age;
|
||||
for entry in entries.flatten() {
|
||||
if let Ok(meta) = entry.metadata() {
|
||||
if let Ok(modified) = meta.modified() {
|
||||
if modified < cutoff {
|
||||
fs::remove_file(entry.path()).ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
171
poc-memory/src/bin/poc-hook.rs
Normal file
171
poc-memory/src/bin/poc-hook.rs
Normal file
|
|
@ -0,0 +1,171 @@
|
|||
// Unified Claude Code hook.
|
||||
//
|
||||
// Single binary handling all hook events:
|
||||
// UserPromptSubmit — signal daemon, check notifications, check context
|
||||
// PostToolUse — check context (rate-limited)
|
||||
// Stop — signal daemon response
|
||||
//
|
||||
// Replaces: record-user-message-time.sh, check-notifications.sh,
|
||||
// check-context-usage.sh, notify-done.sh, context-check
|
||||
|
||||
use serde_json::Value;
|
||||
use std::fs;
|
||||
use std::io::{self, Read};
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
const CONTEXT_THRESHOLD: u64 = 130_000;
|
||||
const RATE_LIMIT_SECS: u64 = 60;
|
||||
const SOCK_PATH: &str = ".claude/hooks/idle-timer.sock";
|
||||
|
||||
fn now_secs() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs()
|
||||
}
|
||||
|
||||
fn home() -> PathBuf {
|
||||
PathBuf::from(std::env::var("HOME").unwrap_or_else(|_| "/root".into()))
|
||||
}
|
||||
|
||||
fn daemon_cmd(args: &[&str]) {
|
||||
Command::new("poc-daemon")
|
||||
.args(args)
|
||||
.stdout(std::process::Stdio::null())
|
||||
.stderr(std::process::Stdio::null())
|
||||
.status()
|
||||
.ok();
|
||||
}
|
||||
|
||||
fn daemon_available() -> bool {
|
||||
home().join(SOCK_PATH).exists()
|
||||
}
|
||||
|
||||
fn signal_user() {
|
||||
let pane = std::env::var("TMUX_PANE").unwrap_or_default();
|
||||
if pane.is_empty() {
|
||||
daemon_cmd(&["user"]);
|
||||
} else {
|
||||
daemon_cmd(&["user", &pane]);
|
||||
}
|
||||
}
|
||||
|
||||
fn signal_response() {
|
||||
daemon_cmd(&["response"]);
|
||||
}
|
||||
|
||||
fn check_notifications() {
|
||||
if !daemon_available() {
|
||||
return;
|
||||
}
|
||||
let output = Command::new("poc-daemon")
|
||||
.arg("notifications")
|
||||
.output()
|
||||
.ok();
|
||||
if let Some(out) = output {
|
||||
let text = String::from_utf8_lossy(&out.stdout);
|
||||
if !text.trim().is_empty() {
|
||||
println!("You have pending notifications:");
|
||||
print!("{text}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn check_context(transcript: &PathBuf, rate_limit: bool) {
|
||||
if rate_limit {
|
||||
let rate_file = PathBuf::from("/tmp/claude-context-check-last");
|
||||
if let Ok(s) = fs::read_to_string(&rate_file) {
|
||||
if let Ok(last) = s.trim().parse::<u64>() {
|
||||
if now_secs() - last < RATE_LIMIT_SECS {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
let _ = fs::write(&rate_file, now_secs().to_string());
|
||||
}
|
||||
|
||||
if !transcript.exists() {
|
||||
return;
|
||||
}
|
||||
|
||||
let content = match fs::read_to_string(transcript) {
|
||||
Ok(c) => c,
|
||||
Err(_) => return,
|
||||
};
|
||||
|
||||
let mut usage: u64 = 0;
|
||||
for line in content.lines().rev().take(500) {
|
||||
if !line.contains("cache_read_input_tokens") {
|
||||
continue;
|
||||
}
|
||||
if let Ok(v) = serde_json::from_str::<Value>(line) {
|
||||
let u = &v["message"]["usage"];
|
||||
let input_tokens = u["input_tokens"].as_u64().unwrap_or(0);
|
||||
let cache_creation = u["cache_creation_input_tokens"].as_u64().unwrap_or(0);
|
||||
let cache_read = u["cache_read_input_tokens"].as_u64().unwrap_or(0);
|
||||
usage = input_tokens + cache_creation + cache_read;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if usage > CONTEXT_THRESHOLD {
|
||||
print!(
|
||||
"\
|
||||
CONTEXT WARNING: Compaction approaching ({usage} tokens). Write a journal entry NOW.
|
||||
|
||||
Use `poc-memory journal-write \"entry text\"` to save a dated entry covering:
|
||||
- What you're working on and current state (done / in progress / blocked)
|
||||
- Key things learned this session (patterns, debugging insights)
|
||||
- Anything half-finished that needs pickup
|
||||
|
||||
Keep it narrative, not a task log."
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let mut input = String::new();
|
||||
io::stdin().read_to_string(&mut input).ok();
|
||||
|
||||
let hook: Value = match serde_json::from_str(&input) {
|
||||
Ok(v) => v,
|
||||
Err(_) => return,
|
||||
};
|
||||
|
||||
let hook_type = hook["hook_event_name"].as_str().unwrap_or("unknown");
|
||||
let transcript = hook["transcript_path"]
|
||||
.as_str()
|
||||
.filter(|p| !p.is_empty())
|
||||
.map(PathBuf::from);
|
||||
|
||||
// Daemon agent calls set POC_AGENT=1 — skip all signaling.
|
||||
// Without this, the daemon's claude -p calls trigger hooks that
|
||||
// signal "user active", keeping the idle timer permanently reset.
|
||||
if std::env::var("POC_AGENT").is_ok() {
|
||||
return;
|
||||
}
|
||||
|
||||
match hook_type {
|
||||
"UserPromptSubmit" => {
|
||||
signal_user();
|
||||
check_notifications();
|
||||
if let Some(ref t) = transcript {
|
||||
check_context(t, false);
|
||||
}
|
||||
}
|
||||
"PostToolUse" => {
|
||||
if let Some(ref t) = transcript {
|
||||
check_context(t, true);
|
||||
}
|
||||
}
|
||||
"Stop" => {
|
||||
let stop_hook_active = hook["stop_hook_active"].as_bool().unwrap_or(false);
|
||||
if !stop_hook_active {
|
||||
signal_response();
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
185
poc-memory/src/config.rs
Normal file
185
poc-memory/src/config.rs
Normal file
|
|
@ -0,0 +1,185 @@
|
|||
// Configuration for poc-memory
|
||||
//
|
||||
// Loaded from ~/.config/poc-memory/config.jsonl (or POC_MEMORY_CONFIG env).
|
||||
// Falls back to sensible defaults if no config file exists.
|
||||
//
|
||||
// Format: JSONL — one JSON object per line.
|
||||
// First line with "config" key: global settings.
|
||||
// Lines with "group" key: context loading groups (order preserved).
|
||||
//
|
||||
// Example:
|
||||
// {"config": {"user_name": "Alice", "data_dir": "~/.claude/memory"}}
|
||||
// {"group": "identity", "keys": ["identity"]}
|
||||
// {"group": "orientation", "keys": ["where-am-i.md"], "source": "file"}
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
static CONFIG: OnceLock<Config> = OnceLock::new();
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum ContextSource {
|
||||
Store,
|
||||
File,
|
||||
Journal,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ContextGroup {
|
||||
pub label: String,
|
||||
pub keys: Vec<String>,
|
||||
pub source: ContextSource,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Config {
|
||||
/// Display name for the human user in transcripts/prompts.
|
||||
pub user_name: String,
|
||||
/// Display name for the AI assistant.
|
||||
pub assistant_name: String,
|
||||
/// Base directory for memory data (store, logs, status).
|
||||
pub data_dir: PathBuf,
|
||||
/// Directory containing Claude session transcripts.
|
||||
pub projects_dir: PathBuf,
|
||||
/// Core node keys that should never be decayed/deleted.
|
||||
pub core_nodes: Vec<String>,
|
||||
/// How many days of journal to include in load-context.
|
||||
pub journal_days: u32,
|
||||
/// Max journal entries to include in load-context.
|
||||
pub journal_max: usize,
|
||||
/// Ordered context groups for session-start loading.
|
||||
pub context_groups: Vec<ContextGroup>,
|
||||
/// Max concurrent LLM calls in the daemon.
|
||||
pub llm_concurrency: usize,
|
||||
/// Separate Claude config dir for background agent work (daemon jobs).
|
||||
/// If set, passed as CLAUDE_CONFIG_DIR so the daemon authenticates
|
||||
/// with different OAuth credentials than the interactive session.
|
||||
pub agent_config_dir: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
fn default() -> Self {
|
||||
let home = PathBuf::from(std::env::var("HOME").expect("HOME not set"));
|
||||
Self {
|
||||
user_name: "User".to_string(),
|
||||
assistant_name: "Assistant".to_string(),
|
||||
data_dir: home.join(".claude/memory"),
|
||||
projects_dir: home.join(".claude/projects"),
|
||||
core_nodes: vec!["identity".to_string(), "core-practices".to_string()],
|
||||
journal_days: 7,
|
||||
journal_max: 20,
|
||||
context_groups: vec![
|
||||
ContextGroup {
|
||||
label: "identity".into(),
|
||||
keys: vec!["identity".into()],
|
||||
source: ContextSource::Store,
|
||||
},
|
||||
ContextGroup {
|
||||
label: "core-practices".into(),
|
||||
keys: vec!["core-practices".into()],
|
||||
source: ContextSource::Store,
|
||||
},
|
||||
],
|
||||
llm_concurrency: 1,
|
||||
agent_config_dir: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Config {
|
||||
fn load_from_file() -> Self {
|
||||
let path = std::env::var("POC_MEMORY_CONFIG")
|
||||
.map(PathBuf::from)
|
||||
.unwrap_or_else(|_| {
|
||||
PathBuf::from(std::env::var("HOME").expect("HOME not set"))
|
||||
.join(".config/poc-memory/config.jsonl")
|
||||
});
|
||||
|
||||
let mut config = Config::default();
|
||||
|
||||
let Ok(content) = std::fs::read_to_string(&path) else {
|
||||
return config;
|
||||
};
|
||||
|
||||
let mut context_groups: Vec<ContextGroup> = Vec::new();
|
||||
|
||||
// Parse as a stream of JSON values (handles multi-line objects)
|
||||
let stream = serde_json::Deserializer::from_str(&content)
|
||||
.into_iter::<serde_json::Value>();
|
||||
|
||||
for result in stream {
|
||||
let Ok(obj) = result else { continue };
|
||||
|
||||
// Global config line
|
||||
if let Some(cfg) = obj.get("config") {
|
||||
if let Some(s) = cfg.get("user_name").and_then(|v| v.as_str()) {
|
||||
config.user_name = s.to_string();
|
||||
}
|
||||
if let Some(s) = cfg.get("assistant_name").and_then(|v| v.as_str()) {
|
||||
config.assistant_name = s.to_string();
|
||||
}
|
||||
if let Some(s) = cfg.get("data_dir").and_then(|v| v.as_str()) {
|
||||
config.data_dir = expand_home(s);
|
||||
}
|
||||
if let Some(s) = cfg.get("projects_dir").and_then(|v| v.as_str()) {
|
||||
config.projects_dir = expand_home(s);
|
||||
}
|
||||
if let Some(arr) = cfg.get("core_nodes").and_then(|v| v.as_array()) {
|
||||
config.core_nodes = arr.iter()
|
||||
.filter_map(|v| v.as_str().map(|s| s.to_string()))
|
||||
.collect();
|
||||
}
|
||||
if let Some(d) = cfg.get("journal_days").and_then(|v| v.as_u64()) {
|
||||
config.journal_days = d as u32;
|
||||
}
|
||||
if let Some(m) = cfg.get("journal_max").and_then(|v| v.as_u64()) {
|
||||
config.journal_max = m as usize;
|
||||
}
|
||||
if let Some(n) = cfg.get("llm_concurrency").and_then(|v| v.as_u64()) {
|
||||
config.llm_concurrency = n.max(1) as usize;
|
||||
}
|
||||
if let Some(s) = cfg.get("agent_config_dir").and_then(|v| v.as_str()) {
|
||||
config.agent_config_dir = Some(expand_home(s));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Context group line
|
||||
if let Some(label) = obj.get("group").and_then(|v| v.as_str()) {
|
||||
let keys = obj.get("keys")
|
||||
.and_then(|v| v.as_array())
|
||||
.map(|arr| arr.iter()
|
||||
.filter_map(|v| v.as_str().map(|s| s.to_string()))
|
||||
.collect())
|
||||
.unwrap_or_default();
|
||||
|
||||
let source = match obj.get("source").and_then(|v| v.as_str()) {
|
||||
Some("file") => ContextSource::File,
|
||||
Some("journal") => ContextSource::Journal,
|
||||
_ => ContextSource::Store,
|
||||
};
|
||||
|
||||
context_groups.push(ContextGroup { label: label.to_string(), keys, source });
|
||||
}
|
||||
}
|
||||
|
||||
if !context_groups.is_empty() {
|
||||
config.context_groups = context_groups;
|
||||
}
|
||||
|
||||
config
|
||||
}
|
||||
}
|
||||
|
||||
fn expand_home(path: &str) -> PathBuf {
|
||||
if let Some(rest) = path.strip_prefix("~/") {
|
||||
PathBuf::from(std::env::var("HOME").expect("HOME not set")).join(rest)
|
||||
} else {
|
||||
PathBuf::from(path)
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the global config (loaded once on first access).
|
||||
pub fn get() -> &'static Config {
|
||||
CONFIG.get_or_init(Config::load_from_file)
|
||||
}
|
||||
434
poc-memory/src/consolidate.rs
Normal file
434
poc-memory/src/consolidate.rs
Normal file
|
|
@ -0,0 +1,434 @@
|
|||
// Consolidation pipeline: plan → agents → apply → digests → links
|
||||
//
|
||||
// consolidate_full() runs the full autonomous consolidation:
|
||||
// 1. Plan: analyze metrics, allocate agents
|
||||
// 2. Execute: run each agent (Sonnet calls), save reports
|
||||
// 3. Apply: extract and apply actions from reports
|
||||
// 4. Digest: generate missing daily/weekly/monthly digests
|
||||
// 5. Links: apply links extracted from digests
|
||||
// 6. Summary: final metrics comparison
|
||||
//
|
||||
// apply_consolidation() processes consolidation reports independently.
|
||||
|
||||
use crate::digest;
|
||||
use crate::llm::{call_sonnet, parse_json_response};
|
||||
use crate::neuro;
|
||||
use crate::store::{self, Store, new_relation};
|
||||
|
||||
|
||||
/// Append a line to the log buffer.
|
||||
fn log_line(buf: &mut String, line: &str) {
|
||||
buf.push_str(line);
|
||||
buf.push('\n');
|
||||
}
|
||||
|
||||
/// Run the full autonomous consolidation pipeline with logging.
|
||||
/// If `on_progress` is provided, it's called at each significant step.
|
||||
pub fn consolidate_full(store: &mut Store) -> Result<(), String> {
|
||||
consolidate_full_with_progress(store, &|_| {})
|
||||
}
|
||||
|
||||
pub fn consolidate_full_with_progress(
|
||||
store: &mut Store,
|
||||
on_progress: &dyn Fn(&str),
|
||||
) -> Result<(), String> {
|
||||
let start = std::time::Instant::now();
|
||||
let log_key = format!("_consolidate-log-{}",
|
||||
store::format_datetime(store::now_epoch()).replace([':', '-', 'T'], ""));
|
||||
let mut log_buf = String::new();
|
||||
|
||||
log_line(&mut log_buf, "=== CONSOLIDATE FULL ===");
|
||||
log_line(&mut log_buf, &format!("Started: {}", store::format_datetime(store::now_epoch())));
|
||||
log_line(&mut log_buf, &format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()));
|
||||
log_line(&mut log_buf, "");
|
||||
|
||||
// --- Step 1: Plan ---
|
||||
log_line(&mut log_buf, "--- Step 1: Plan ---");
|
||||
on_progress("planning");
|
||||
let plan = neuro::consolidation_plan(store);
|
||||
let plan_text = neuro::format_plan(&plan);
|
||||
log_line(&mut log_buf, &plan_text);
|
||||
println!("{}", plan_text);
|
||||
|
||||
let total_agents = plan.replay_count + plan.linker_count
|
||||
+ plan.separator_count + plan.transfer_count
|
||||
+ if plan.run_health { 1 } else { 0 };
|
||||
log_line(&mut log_buf, &format!("Total agents to run: {}", total_agents));
|
||||
|
||||
// --- Step 2: Execute agents ---
|
||||
log_line(&mut log_buf, "\n--- Step 2: Execute agents ---");
|
||||
let mut reports: Vec<String> = Vec::new();
|
||||
let mut agent_num = 0usize;
|
||||
let mut agent_errors = 0usize;
|
||||
|
||||
// Build the list of (agent_type, batch_size) runs
|
||||
let mut runs: Vec<(&str, usize)> = Vec::new();
|
||||
if plan.run_health {
|
||||
runs.push(("health", 0));
|
||||
}
|
||||
if plan.replay_count > 0 {
|
||||
let batch = 5;
|
||||
let mut remaining = plan.replay_count;
|
||||
while remaining > 0 {
|
||||
let this_batch = remaining.min(batch);
|
||||
runs.push(("replay", this_batch));
|
||||
remaining -= this_batch;
|
||||
}
|
||||
}
|
||||
if plan.linker_count > 0 {
|
||||
let batch = 5;
|
||||
let mut remaining = plan.linker_count;
|
||||
while remaining > 0 {
|
||||
let this_batch = remaining.min(batch);
|
||||
runs.push(("linker", this_batch));
|
||||
remaining -= this_batch;
|
||||
}
|
||||
}
|
||||
if plan.separator_count > 0 {
|
||||
let batch = 5;
|
||||
let mut remaining = plan.separator_count;
|
||||
while remaining > 0 {
|
||||
let this_batch = remaining.min(batch);
|
||||
runs.push(("separator", this_batch));
|
||||
remaining -= this_batch;
|
||||
}
|
||||
}
|
||||
if plan.transfer_count > 0 {
|
||||
let batch = 5;
|
||||
let mut remaining = plan.transfer_count;
|
||||
while remaining > 0 {
|
||||
let this_batch = remaining.min(batch);
|
||||
runs.push(("transfer", this_batch));
|
||||
remaining -= this_batch;
|
||||
}
|
||||
}
|
||||
|
||||
for (agent_type, count) in &runs {
|
||||
agent_num += 1;
|
||||
let label = if *count > 0 {
|
||||
format!("[{}/{}] {} (batch={})", agent_num, runs.len(), agent_type, count)
|
||||
} else {
|
||||
format!("[{}/{}] {}", agent_num, runs.len(), agent_type)
|
||||
};
|
||||
|
||||
log_line(&mut log_buf, &format!("\n{}", label));
|
||||
on_progress(&label);
|
||||
println!("{}", label);
|
||||
|
||||
// Reload store to pick up changes from previous agents
|
||||
if agent_num > 1 {
|
||||
*store = Store::load()?;
|
||||
}
|
||||
|
||||
let prompt = match neuro::agent_prompt(store, agent_type, *count) {
|
||||
Ok(p) => p,
|
||||
Err(e) => {
|
||||
let msg = format!(" ERROR building prompt: {}", e);
|
||||
log_line(&mut log_buf, &msg);
|
||||
eprintln!("{}", msg);
|
||||
agent_errors += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
log_line(&mut log_buf, &format!(" Prompt: {} chars (~{} tokens)",
|
||||
prompt.len(), prompt.len() / 4));
|
||||
|
||||
let response = match call_sonnet("consolidate", &prompt) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
let msg = format!(" ERROR from Sonnet: {}", e);
|
||||
log_line(&mut log_buf, &msg);
|
||||
eprintln!("{}", msg);
|
||||
agent_errors += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Store report as a node
|
||||
let ts = store::format_datetime(store::now_epoch())
|
||||
.replace([':', '-', 'T'], "");
|
||||
let report_key = format!("_consolidation-{}-{}", agent_type, ts);
|
||||
store.upsert_provenance(&report_key, &response,
|
||||
store::Provenance::AgentConsolidate).ok();
|
||||
reports.push(report_key.clone());
|
||||
|
||||
let msg = format!(" Done: {} lines → {}", response.lines().count(), report_key);
|
||||
log_line(&mut log_buf, &msg);
|
||||
on_progress(&msg);
|
||||
println!("{}", msg);
|
||||
}
|
||||
|
||||
log_line(&mut log_buf, &format!("\nAgents complete: {} run, {} errors",
|
||||
agent_num - agent_errors, agent_errors));
|
||||
|
||||
// --- Step 3: Apply consolidation actions ---
|
||||
log_line(&mut log_buf, "\n--- Step 3: Apply consolidation actions ---");
|
||||
on_progress("applying actions");
|
||||
println!("\n--- Applying consolidation actions ---");
|
||||
*store = Store::load()?;
|
||||
|
||||
if reports.is_empty() {
|
||||
log_line(&mut log_buf, " No reports to apply.");
|
||||
} else {
|
||||
match apply_consolidation(store, true, None) {
|
||||
Ok(()) => log_line(&mut log_buf, " Applied."),
|
||||
Err(e) => {
|
||||
let msg = format!(" ERROR applying consolidation: {}", e);
|
||||
log_line(&mut log_buf, &msg);
|
||||
eprintln!("{}", msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --- Step 3b: Link orphans ---
|
||||
log_line(&mut log_buf, "\n--- Step 3b: Link orphans ---");
|
||||
on_progress("linking orphans");
|
||||
println!("\n--- Linking orphan nodes ---");
|
||||
*store = Store::load()?;
|
||||
|
||||
let (lo_orphans, lo_added) = neuro::link_orphans(store, 2, 3, 0.15);
|
||||
log_line(&mut log_buf, &format!(" {} orphans, {} links added", lo_orphans, lo_added));
|
||||
|
||||
// --- Step 3c: Cap degree ---
|
||||
log_line(&mut log_buf, "\n--- Step 3c: Cap degree ---");
|
||||
on_progress("capping degree");
|
||||
println!("\n--- Capping node degree ---");
|
||||
*store = Store::load()?;
|
||||
|
||||
match store.cap_degree(50) {
|
||||
Ok((hubs, pruned)) => {
|
||||
store.save()?;
|
||||
log_line(&mut log_buf, &format!(" {} hubs capped, {} edges pruned", hubs, pruned));
|
||||
}
|
||||
Err(e) => log_line(&mut log_buf, &format!(" ERROR: {}", e)),
|
||||
}
|
||||
|
||||
// --- Step 4: Digest auto ---
|
||||
log_line(&mut log_buf, "\n--- Step 4: Digest auto ---");
|
||||
on_progress("generating digests");
|
||||
println!("\n--- Generating missing digests ---");
|
||||
*store = Store::load()?;
|
||||
|
||||
match digest::digest_auto(store) {
|
||||
Ok(()) => log_line(&mut log_buf, " Digests done."),
|
||||
Err(e) => {
|
||||
let msg = format!(" ERROR in digest auto: {}", e);
|
||||
log_line(&mut log_buf, &msg);
|
||||
eprintln!("{}", msg);
|
||||
}
|
||||
}
|
||||
|
||||
// --- Step 5: Apply digest links ---
|
||||
log_line(&mut log_buf, "\n--- Step 5: Apply digest links ---");
|
||||
on_progress("applying digest links");
|
||||
println!("\n--- Applying digest links ---");
|
||||
*store = Store::load()?;
|
||||
|
||||
let links = digest::parse_all_digest_links(store);
|
||||
let (applied, skipped, fallbacks) = digest::apply_digest_links(store, &links);
|
||||
store.save()?;
|
||||
log_line(&mut log_buf, &format!(" {} links applied, {} skipped, {} fallbacks",
|
||||
applied, skipped, fallbacks));
|
||||
|
||||
// --- Step 6: Summary ---
|
||||
let elapsed = start.elapsed();
|
||||
log_line(&mut log_buf, "\n--- Summary ---");
|
||||
log_line(&mut log_buf, &format!("Finished: {}", store::format_datetime(store::now_epoch())));
|
||||
log_line(&mut log_buf, &format!("Duration: {:.0}s", elapsed.as_secs_f64()));
|
||||
*store = Store::load()?;
|
||||
log_line(&mut log_buf, &format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()));
|
||||
|
||||
let summary = format!(
|
||||
"\n=== CONSOLIDATE FULL COMPLETE ===\n\
|
||||
Duration: {:.0}s\n\
|
||||
Agents: {} run, {} errors\n\
|
||||
Nodes: {} Relations: {}\n",
|
||||
elapsed.as_secs_f64(),
|
||||
agent_num - agent_errors, agent_errors,
|
||||
store.nodes.len(), store.relations.len(),
|
||||
);
|
||||
log_line(&mut log_buf, &summary);
|
||||
println!("{}", summary);
|
||||
|
||||
// Store the log as a node
|
||||
store.upsert_provenance(&log_key, &log_buf,
|
||||
store::Provenance::AgentConsolidate).ok();
|
||||
store.save()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Find the most recent set of consolidation report keys from the store.
|
||||
fn find_consolidation_reports(store: &Store) -> Vec<String> {
|
||||
let mut keys: Vec<&String> = store.nodes.keys()
|
||||
.filter(|k| k.starts_with("_consolidation-"))
|
||||
.collect();
|
||||
keys.sort();
|
||||
keys.reverse();
|
||||
|
||||
if keys.is_empty() { return Vec::new(); }
|
||||
|
||||
// Group by timestamp (last segment after last '-')
|
||||
let latest_ts = keys[0].rsplit('-').next().unwrap_or("").to_string();
|
||||
|
||||
keys.into_iter()
|
||||
.filter(|k| k.ends_with(&latest_ts))
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn build_consolidation_prompt(store: &Store, report_keys: &[String]) -> Result<String, String> {
|
||||
let mut report_text = String::new();
|
||||
for key in report_keys {
|
||||
let content = store.nodes.get(key)
|
||||
.map(|n| n.content.as_str())
|
||||
.unwrap_or("");
|
||||
report_text.push_str(&format!("\n{}\n## Report: {}\n\n{}\n",
|
||||
"=".repeat(60), key, content));
|
||||
}
|
||||
|
||||
neuro::load_prompt("consolidation", &[("{{REPORTS}}", &report_text)])
|
||||
}
|
||||
|
||||
/// Run the full apply-consolidation pipeline.
|
||||
pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_key: Option<&str>) -> Result<(), String> {
|
||||
let reports = if let Some(key) = report_key {
|
||||
vec![key.to_string()]
|
||||
} else {
|
||||
find_consolidation_reports(store)
|
||||
};
|
||||
|
||||
if reports.is_empty() {
|
||||
println!("No consolidation reports found.");
|
||||
println!("Run consolidation-agents first.");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
println!("Found {} reports:", reports.len());
|
||||
for r in &reports {
|
||||
println!(" {}", r);
|
||||
}
|
||||
|
||||
println!("\nExtracting actions from reports...");
|
||||
let prompt = build_consolidation_prompt(store, &reports)?;
|
||||
println!(" Prompt: {} chars", prompt.len());
|
||||
|
||||
let response = call_sonnet("consolidate", &prompt)?;
|
||||
|
||||
let actions_value = parse_json_response(&response)?;
|
||||
let actions = actions_value.as_array()
|
||||
.ok_or("expected JSON array of actions")?;
|
||||
|
||||
println!(" {} actions extracted", actions.len());
|
||||
|
||||
// Store actions in the store
|
||||
let timestamp = store::format_datetime(store::now_epoch())
|
||||
.replace([':', '-'], "");
|
||||
let actions_key = format!("_consolidation-actions-{}", timestamp);
|
||||
let actions_json = serde_json::to_string_pretty(&actions_value).unwrap();
|
||||
store.upsert_provenance(&actions_key, &actions_json,
|
||||
store::Provenance::AgentConsolidate).ok();
|
||||
println!(" Stored: {}", actions_key);
|
||||
|
||||
let link_actions: Vec<_> = actions.iter()
|
||||
.filter(|a| a.get("action").and_then(|v| v.as_str()) == Some("link"))
|
||||
.collect();
|
||||
let manual_actions: Vec<_> = actions.iter()
|
||||
.filter(|a| a.get("action").and_then(|v| v.as_str()) == Some("manual"))
|
||||
.collect();
|
||||
|
||||
if !do_apply {
|
||||
// Dry run
|
||||
println!("\n{}", "=".repeat(60));
|
||||
println!("DRY RUN — {} actions proposed", actions.len());
|
||||
println!("{}\n", "=".repeat(60));
|
||||
|
||||
if !link_actions.is_empty() {
|
||||
println!("## Links to add ({})\n", link_actions.len());
|
||||
for (i, a) in link_actions.iter().enumerate() {
|
||||
let src = a.get("source").and_then(|v| v.as_str()).unwrap_or("?");
|
||||
let tgt = a.get("target").and_then(|v| v.as_str()).unwrap_or("?");
|
||||
let reason = a.get("reason").and_then(|v| v.as_str()).unwrap_or("");
|
||||
println!(" {:2}. {} → {} ({})", i + 1, src, tgt, reason);
|
||||
}
|
||||
}
|
||||
if !manual_actions.is_empty() {
|
||||
println!("\n## Manual actions needed ({})\n", manual_actions.len());
|
||||
for a in &manual_actions {
|
||||
let prio = a.get("priority").and_then(|v| v.as_str()).unwrap_or("?");
|
||||
let desc = a.get("description").and_then(|v| v.as_str()).unwrap_or("?");
|
||||
println!(" [{}] {}", prio, desc);
|
||||
}
|
||||
}
|
||||
println!("\n{}", "=".repeat(60));
|
||||
println!("To apply: poc-memory apply-consolidation --apply");
|
||||
println!("{}", "=".repeat(60));
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Apply
|
||||
let mut applied = 0usize;
|
||||
let mut skipped = 0usize;
|
||||
|
||||
if !link_actions.is_empty() {
|
||||
println!("\nApplying {} links...", link_actions.len());
|
||||
for a in &link_actions {
|
||||
let src = a.get("source").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let tgt = a.get("target").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if src.is_empty() || tgt.is_empty() { skipped += 1; continue; }
|
||||
|
||||
let source = match store.resolve_key(src) {
|
||||
Ok(s) => s,
|
||||
Err(e) => { println!(" ? {} → {}: {}", src, tgt, e); skipped += 1; continue; }
|
||||
};
|
||||
let target = match store.resolve_key(tgt) {
|
||||
Ok(t) => t,
|
||||
Err(e) => { println!(" ? {} → {}: {}", src, tgt, e); skipped += 1; continue; }
|
||||
};
|
||||
|
||||
// Refine target to best-matching section
|
||||
let source_content = store.nodes.get(&source)
|
||||
.map(|n| n.content.as_str()).unwrap_or("");
|
||||
let target = neuro::refine_target(store, source_content, &target);
|
||||
|
||||
let exists = store.relations.iter().any(|r|
|
||||
r.source_key == source && r.target_key == target && !r.deleted
|
||||
);
|
||||
if exists { skipped += 1; continue; }
|
||||
|
||||
let source_uuid = match store.nodes.get(&source) { Some(n) => n.uuid, None => { skipped += 1; continue; } };
|
||||
let target_uuid = match store.nodes.get(&target) { Some(n) => n.uuid, None => { skipped += 1; continue; } };
|
||||
|
||||
let rel = new_relation(
|
||||
source_uuid, target_uuid,
|
||||
store::RelationType::Auto,
|
||||
0.5,
|
||||
&source, &target,
|
||||
);
|
||||
if store.add_relation(rel).is_ok() {
|
||||
println!(" + {} → {}", source, target);
|
||||
applied += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !manual_actions.is_empty() {
|
||||
println!("\n## Manual actions (not auto-applied):\n");
|
||||
for a in &manual_actions {
|
||||
let prio = a.get("priority").and_then(|v| v.as_str()).unwrap_or("?");
|
||||
let desc = a.get("description").and_then(|v| v.as_str()).unwrap_or("?");
|
||||
println!(" [{}] {}", prio, desc);
|
||||
}
|
||||
}
|
||||
|
||||
if applied > 0 {
|
||||
store.save()?;
|
||||
}
|
||||
|
||||
println!("\n{}", "=".repeat(60));
|
||||
println!("Applied: {} Skipped: {} Manual: {}", applied, skipped, manual_actions.len());
|
||||
println!("{}", "=".repeat(60));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
1229
poc-memory/src/daemon.rs
Normal file
1229
poc-memory/src/daemon.rs
Normal file
File diff suppressed because it is too large
Load diff
498
poc-memory/src/digest.rs
Normal file
498
poc-memory/src/digest.rs
Normal file
|
|
@ -0,0 +1,498 @@
|
|||
// Episodic digest generation: daily, weekly, monthly, auto
|
||||
//
|
||||
// Three digest levels form a temporal hierarchy: daily digests summarize
|
||||
// journal entries, weekly digests summarize dailies, monthly digests
|
||||
// summarize weeklies. All three share the same generate/auto-detect
|
||||
// pipeline, parameterized by DigestLevel.
|
||||
|
||||
use crate::llm::{call_sonnet, semantic_keys};
|
||||
use crate::store::{self, Store, new_relation};
|
||||
use crate::neuro;
|
||||
|
||||
use chrono::{Datelike, Duration, Local, NaiveDate};
|
||||
use regex::Regex;
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
// --- Digest level descriptors ---
|
||||
|
||||
struct DigestLevel {
|
||||
name: &'static str,
|
||||
title: &'static str,
|
||||
period: &'static str,
|
||||
input_title: &'static str,
|
||||
timeout: u64,
|
||||
child_name: Option<&'static str>, // None = journal (leaf), Some = child digest files
|
||||
/// Expand an arg into (canonical_label, dates covered).
|
||||
label_dates: fn(&str) -> Result<(String, Vec<String>), String>,
|
||||
/// Map a YYYY-MM-DD date to this level's label.
|
||||
date_to_label: fn(&str) -> Option<String>,
|
||||
}
|
||||
|
||||
const DAILY: DigestLevel = DigestLevel {
|
||||
name: "daily",
|
||||
title: "Daily",
|
||||
period: "Date",
|
||||
input_title: "Journal entries",
|
||||
timeout: 300,
|
||||
child_name: None,
|
||||
label_dates: |date| Ok((date.to_string(), vec![date.to_string()])),
|
||||
date_to_label: |date| Some(date.to_string()),
|
||||
};
|
||||
|
||||
/// Week label and 7 dates (Mon-Sun) for the week containing `date`.
|
||||
fn week_dates(date: &str) -> Result<(String, Vec<String>), String> {
|
||||
let nd = NaiveDate::parse_from_str(date, "%Y-%m-%d")
|
||||
.map_err(|e| format!("bad date '{}': {}", date, e))?;
|
||||
let iso = nd.iso_week();
|
||||
let week_label = format!("{}-W{:02}", iso.year(), iso.week());
|
||||
let monday = nd - Duration::days(nd.weekday().num_days_from_monday() as i64);
|
||||
let dates = (0..7)
|
||||
.map(|i| (monday + Duration::days(i)).format("%Y-%m-%d").to_string())
|
||||
.collect();
|
||||
Ok((week_label, dates))
|
||||
}
|
||||
|
||||
const WEEKLY: DigestLevel = DigestLevel {
|
||||
name: "weekly",
|
||||
title: "Weekly",
|
||||
period: "Week",
|
||||
input_title: "Daily digests",
|
||||
timeout: 300,
|
||||
child_name: Some("daily"),
|
||||
label_dates: |arg| {
|
||||
if !arg.contains('W') {
|
||||
return week_dates(arg);
|
||||
}
|
||||
let (y, w) = arg.split_once("-W")
|
||||
.ok_or_else(|| format!("bad week label: {}", arg))?;
|
||||
let year: i32 = y.parse().map_err(|_| format!("bad week year: {}", arg))?;
|
||||
let week: u32 = w.parse().map_err(|_| format!("bad week number: {}", arg))?;
|
||||
let monday = NaiveDate::from_isoywd_opt(year, week, chrono::Weekday::Mon)
|
||||
.ok_or_else(|| format!("invalid week: {}", arg))?;
|
||||
let dates = (0..7)
|
||||
.map(|i| (monday + Duration::days(i)).format("%Y-%m-%d").to_string())
|
||||
.collect();
|
||||
Ok((arg.to_string(), dates))
|
||||
},
|
||||
date_to_label: |date| week_dates(date).ok().map(|(l, _)| l),
|
||||
};
|
||||
|
||||
const MONTHLY: DigestLevel = DigestLevel {
|
||||
name: "monthly",
|
||||
title: "Monthly",
|
||||
period: "Month",
|
||||
input_title: "Weekly digests",
|
||||
timeout: 600,
|
||||
child_name: Some("weekly"),
|
||||
label_dates: |arg| {
|
||||
let (year, month) = if arg.len() <= 7 {
|
||||
let d = NaiveDate::parse_from_str(&format!("{}-01", arg), "%Y-%m-%d")
|
||||
.map_err(|e| format!("bad month '{}': {}", arg, e))?;
|
||||
(d.year(), d.month())
|
||||
} else {
|
||||
let d = NaiveDate::parse_from_str(arg, "%Y-%m-%d")
|
||||
.map_err(|e| format!("bad date '{}': {}", arg, e))?;
|
||||
(d.year(), d.month())
|
||||
};
|
||||
let label = format!("{}-{:02}", year, month);
|
||||
let mut dates = Vec::new();
|
||||
let mut day = 1u32;
|
||||
while let Some(date) = NaiveDate::from_ymd_opt(year, month, day) {
|
||||
if date.month() != month { break; }
|
||||
dates.push(date.format("%Y-%m-%d").to_string());
|
||||
day += 1;
|
||||
}
|
||||
Ok((label, dates))
|
||||
},
|
||||
date_to_label: |date| NaiveDate::parse_from_str(date, "%Y-%m-%d")
|
||||
.ok().map(|d| format!("{}-{:02}", d.year(), d.month())),
|
||||
};
|
||||
|
||||
const LEVELS: &[&DigestLevel] = &[&DAILY, &WEEKLY, &MONTHLY];
|
||||
|
||||
/// Store key for a digest node: "daily-2026-03-04", "weekly-2026-W09", etc.
|
||||
fn digest_node_key(level_name: &str, label: &str) -> String {
|
||||
format!("{}-{}", level_name, label)
|
||||
}
|
||||
|
||||
// --- Input gathering ---
|
||||
|
||||
/// Load child digest content from the store.
|
||||
fn load_child_digests(store: &Store, prefix: &str, labels: &[String]) -> Vec<(String, String)> {
|
||||
let mut digests = Vec::new();
|
||||
for label in labels {
|
||||
let key = digest_node_key(prefix, label);
|
||||
if let Some(node) = store.nodes.get(&key) {
|
||||
digests.push((label.clone(), node.content.clone()));
|
||||
}
|
||||
}
|
||||
digests
|
||||
}
|
||||
|
||||
/// Unified: gather inputs for any digest level.
|
||||
fn gather(level: &DigestLevel, store: &Store, arg: &str) -> Result<(String, Vec<(String, String)>), String> {
|
||||
let (label, dates) = (level.label_dates)(arg)?;
|
||||
|
||||
let inputs = if let Some(child_name) = level.child_name {
|
||||
// Map parent's dates through child's date_to_label → child labels
|
||||
let child = LEVELS.iter()
|
||||
.find(|l| l.name == child_name)
|
||||
.expect("invalid child_name");
|
||||
let child_labels: Vec<String> = dates.iter()
|
||||
.filter_map(|d| (child.date_to_label)(d))
|
||||
.collect::<BTreeSet<_>>()
|
||||
.into_iter()
|
||||
.collect();
|
||||
load_child_digests(store, child_name, &child_labels)
|
||||
} else {
|
||||
// Leaf level: scan store for episodic entries matching date
|
||||
let mut entries: Vec<_> = store.nodes.values()
|
||||
.filter(|n| n.node_type == store::NodeType::EpisodicSession
|
||||
&& n.timestamp > 0
|
||||
&& store::format_date(n.timestamp) == label)
|
||||
.map(|n| {
|
||||
(store::format_datetime(n.timestamp), n.content.clone())
|
||||
})
|
||||
.collect();
|
||||
entries.sort_by(|a, b| a.0.cmp(&b.0));
|
||||
entries
|
||||
};
|
||||
|
||||
Ok((label, inputs))
|
||||
}
|
||||
|
||||
/// Unified: find candidate labels for auto-generation (past, not yet generated).
|
||||
fn find_candidates(level: &DigestLevel, dates: &[String], today: &str) -> Vec<String> {
|
||||
let today_label = (level.date_to_label)(today);
|
||||
dates.iter()
|
||||
.filter_map(|d| (level.date_to_label)(d))
|
||||
.collect::<BTreeSet<_>>()
|
||||
.into_iter()
|
||||
.filter(|l| Some(l) != today_label.as_ref())
|
||||
.collect()
|
||||
}
|
||||
|
||||
// --- Unified generator ---
|
||||
|
||||
fn format_inputs(inputs: &[(String, String)], daily: bool) -> String {
|
||||
let mut text = String::new();
|
||||
for (label, content) in inputs {
|
||||
if daily {
|
||||
text.push_str(&format!("\n### {}\n\n{}\n", label, content));
|
||||
} else {
|
||||
text.push_str(&format!("\n---\n## {}\n{}\n", label, content));
|
||||
}
|
||||
}
|
||||
text
|
||||
}
|
||||
|
||||
fn generate_digest(
|
||||
store: &mut Store,
|
||||
level: &DigestLevel,
|
||||
label: &str,
|
||||
inputs: &[(String, String)],
|
||||
) -> Result<(), String> {
|
||||
println!("Generating {} digest for {}...", level.name, label);
|
||||
|
||||
if inputs.is_empty() {
|
||||
println!(" No inputs found for {}", label);
|
||||
return Ok(());
|
||||
}
|
||||
println!(" {} inputs", inputs.len());
|
||||
|
||||
let keys = semantic_keys(store);
|
||||
let keys_text = keys.iter()
|
||||
.map(|k| format!(" - {}", k))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
let content = format_inputs(inputs, level.child_name.is_none());
|
||||
let covered = inputs.iter()
|
||||
.map(|(l, _)| l.as_str())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
|
||||
let prompt = neuro::load_prompt("digest", &[
|
||||
("{{LEVEL}}", level.title),
|
||||
("{{PERIOD}}", level.period),
|
||||
("{{INPUT_TITLE}}", level.input_title),
|
||||
("{{LABEL}}", label),
|
||||
("{{CONTENT}}", &content),
|
||||
("{{COVERED}}", &covered),
|
||||
("{{KEYS}}", &keys_text),
|
||||
])?;
|
||||
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
|
||||
|
||||
println!(" Calling Sonnet...");
|
||||
let digest = call_sonnet("digest", &prompt)?;
|
||||
|
||||
let key = digest_node_key(level.name, label);
|
||||
store.upsert_provenance(&key, &digest, store::Provenance::AgentDigest)?;
|
||||
store.save()?;
|
||||
println!(" Stored: {}", key);
|
||||
|
||||
println!(" Done: {} lines", digest.lines().count());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// --- Public API ---
|
||||
|
||||
pub fn generate(store: &mut Store, level_name: &str, arg: &str) -> Result<(), String> {
|
||||
let level = LEVELS.iter()
|
||||
.find(|l| l.name == level_name)
|
||||
.ok_or_else(|| format!("unknown digest level: {}", level_name))?;
|
||||
let (label, inputs) = gather(level, store, arg)?;
|
||||
generate_digest(store, level, &label, &inputs)
|
||||
}
|
||||
|
||||
// --- Auto-detect and generate missing digests ---
|
||||
|
||||
pub fn digest_auto(store: &mut Store) -> Result<(), String> {
|
||||
let today = Local::now().format("%Y-%m-%d").to_string();
|
||||
|
||||
// Collect all dates with episodic entries
|
||||
let dates: Vec<String> = store.nodes.values()
|
||||
.filter(|n| n.node_type == store::NodeType::EpisodicSession && n.timestamp > 0)
|
||||
.map(|n| store::format_date(n.timestamp))
|
||||
.collect::<BTreeSet<_>>()
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let mut total = 0u32;
|
||||
|
||||
for level in LEVELS {
|
||||
let candidates = find_candidates(level, &dates, &today);
|
||||
let mut generated = 0u32;
|
||||
let mut skipped = 0u32;
|
||||
|
||||
for arg in &candidates {
|
||||
let (label, inputs) = gather(level, store, arg)?;
|
||||
let key = digest_node_key(level.name, &label);
|
||||
if store.nodes.contains_key(&key) {
|
||||
skipped += 1;
|
||||
continue;
|
||||
}
|
||||
if inputs.is_empty() { continue; }
|
||||
println!("[auto] Missing {} digest for {}", level.name, label);
|
||||
generate_digest(store, level, &label, &inputs)?;
|
||||
generated += 1;
|
||||
}
|
||||
|
||||
println!("[auto] {}: {} generated, {} existed", level.name, generated, skipped);
|
||||
total += generated;
|
||||
}
|
||||
|
||||
if total == 0 {
|
||||
println!("[auto] All digests up to date.");
|
||||
} else {
|
||||
println!("[auto] Generated {} total digests.", total);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// --- Digest link parsing ---
|
||||
// Replaces digest-link-parser.py: parses ## Links sections from digest
|
||||
// files and applies them to the memory graph.
|
||||
|
||||
/// A parsed link from a digest's Links section.
|
||||
pub struct DigestLink {
|
||||
pub source: String,
|
||||
pub target: String,
|
||||
pub reason: String,
|
||||
pub file: String,
|
||||
}
|
||||
|
||||
/// Normalize a raw link target to a poc-memory key.
|
||||
fn normalize_link_key(raw: &str) -> String {
|
||||
let key = raw.trim().trim_matches('`').trim();
|
||||
if key.is_empty() { return String::new(); }
|
||||
|
||||
// Self-references
|
||||
let lower = key.to_lowercase();
|
||||
if lower.starts_with("this ") { return String::new(); }
|
||||
|
||||
let mut key = key.to_string();
|
||||
|
||||
// Strip .md suffix if present
|
||||
if let Some(stripped) = key.strip_suffix(".md") {
|
||||
key = stripped.to_string();
|
||||
} else if key.contains('#') {
|
||||
let (file, section) = key.split_once('#').unwrap();
|
||||
if let Some(bare) = file.strip_suffix(".md") {
|
||||
key = format!("{}#{}", bare, section);
|
||||
}
|
||||
}
|
||||
|
||||
// weekly/2026-W06 → weekly-2026-W06, etc.
|
||||
if let Some(pos) = key.find('/') {
|
||||
let prefix = &key[..pos];
|
||||
if prefix == "daily" || prefix == "weekly" || prefix == "monthly" {
|
||||
let rest = &key[pos + 1..];
|
||||
key = format!("{}-{}", prefix, rest);
|
||||
}
|
||||
}
|
||||
|
||||
// Bare date → daily digest
|
||||
let date_re = Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap();
|
||||
if date_re.is_match(&key) {
|
||||
key = format!("daily-{}", key);
|
||||
}
|
||||
|
||||
key
|
||||
}
|
||||
|
||||
/// Parse the Links section from a digest node's content.
|
||||
fn parse_digest_node_links(key: &str, content: &str) -> Vec<DigestLink> {
|
||||
|
||||
let link_re = Regex::new(r"^-\s+(.+?)\s*[→↔←]\s*(.+?)(?:\s*\((.+?)\))?\s*$").unwrap();
|
||||
let header_re = Regex::new(r"^##\s+Links").unwrap();
|
||||
let mut links = Vec::new();
|
||||
let mut in_links = false;
|
||||
|
||||
for line in content.lines() {
|
||||
if header_re.is_match(line) {
|
||||
in_links = true;
|
||||
continue;
|
||||
}
|
||||
if in_links && line.starts_with("## ") {
|
||||
in_links = false;
|
||||
continue;
|
||||
}
|
||||
if !in_links { continue; }
|
||||
if line.starts_with("###") || line.starts_with("**") { continue; }
|
||||
|
||||
if let Some(cap) = link_re.captures(line) {
|
||||
let raw_source = cap[1].trim();
|
||||
let raw_target = cap[2].trim();
|
||||
let reason = cap.get(3).map(|m| m.as_str().to_string()).unwrap_or_default();
|
||||
|
||||
let mut source = normalize_link_key(raw_source);
|
||||
let mut target = normalize_link_key(raw_target);
|
||||
|
||||
// Replace self-references with digest key
|
||||
if source.is_empty() { source = key.to_string(); }
|
||||
if target.is_empty() { target = key.to_string(); }
|
||||
|
||||
// Handle "this daily/weekly/monthly" in raw text
|
||||
let raw_s_lower = raw_source.to_lowercase();
|
||||
let raw_t_lower = raw_target.to_lowercase();
|
||||
if raw_s_lower.contains("this daily") || raw_s_lower.contains("this weekly")
|
||||
|| raw_s_lower.contains("this monthly")
|
||||
{
|
||||
source = key.to_string();
|
||||
}
|
||||
if raw_t_lower.contains("this daily") || raw_t_lower.contains("this weekly")
|
||||
|| raw_t_lower.contains("this monthly")
|
||||
{
|
||||
target = key.to_string();
|
||||
}
|
||||
|
||||
// Skip NEW: and self-links
|
||||
if source.starts_with("NEW:") || target.starts_with("NEW:") { continue; }
|
||||
if source == target { continue; }
|
||||
|
||||
links.push(DigestLink { source, target, reason, file: key.to_string() });
|
||||
}
|
||||
}
|
||||
|
||||
links
|
||||
}
|
||||
|
||||
/// Parse links from all digest nodes in the store.
|
||||
pub fn parse_all_digest_links(store: &Store) -> Vec<DigestLink> {
|
||||
let mut all_links = Vec::new();
|
||||
|
||||
let mut digest_keys: Vec<&String> = store.nodes.iter()
|
||||
.filter(|(_, n)| matches!(n.node_type,
|
||||
store::NodeType::EpisodicDaily
|
||||
| store::NodeType::EpisodicWeekly
|
||||
| store::NodeType::EpisodicMonthly))
|
||||
.map(|(k, _)| k)
|
||||
.collect();
|
||||
digest_keys.sort();
|
||||
|
||||
for key in digest_keys {
|
||||
if let Some(node) = store.nodes.get(key) {
|
||||
all_links.extend(parse_digest_node_links(key, &node.content));
|
||||
}
|
||||
}
|
||||
|
||||
// Deduplicate by (source, target) pair
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
all_links.retain(|link| seen.insert((link.source.clone(), link.target.clone())));
|
||||
|
||||
all_links
|
||||
}
|
||||
|
||||
/// Apply parsed digest links to the store.
|
||||
pub fn apply_digest_links(store: &mut Store, links: &[DigestLink]) -> (usize, usize, usize) {
|
||||
let mut applied = 0usize;
|
||||
let mut skipped = 0usize;
|
||||
let mut fallbacks = 0usize;
|
||||
|
||||
for link in links {
|
||||
// Try resolving both keys
|
||||
let source = match store.resolve_key(&link.source) {
|
||||
Ok(s) => s,
|
||||
Err(_) => {
|
||||
// Try stripping section anchor as fallback
|
||||
if let Some(base) = link.source.split('#').next() {
|
||||
match store.resolve_key(base) {
|
||||
Ok(s) => { fallbacks += 1; s }
|
||||
Err(_) => { skipped += 1; continue; }
|
||||
}
|
||||
} else {
|
||||
skipped += 1; continue;
|
||||
}
|
||||
}
|
||||
};
|
||||
let target = match store.resolve_key(&link.target) {
|
||||
Ok(t) => t,
|
||||
Err(_) => {
|
||||
if let Some(base) = link.target.split('#').next() {
|
||||
match store.resolve_key(base) {
|
||||
Ok(t) => { fallbacks += 1; t }
|
||||
Err(_) => { skipped += 1; continue; }
|
||||
}
|
||||
} else {
|
||||
skipped += 1; continue;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Refine target to best-matching section if available
|
||||
let source_content = store.nodes.get(&source)
|
||||
.map(|n| n.content.as_str()).unwrap_or("");
|
||||
let target = neuro::refine_target(store, source_content, &target);
|
||||
|
||||
if source == target { skipped += 1; continue; }
|
||||
|
||||
// Check if link already exists
|
||||
let exists = store.relations.iter().any(|r|
|
||||
r.source_key == source && r.target_key == target && !r.deleted
|
||||
);
|
||||
if exists { skipped += 1; continue; }
|
||||
|
||||
let source_uuid = match store.nodes.get(&source) {
|
||||
Some(n) => n.uuid,
|
||||
None => { skipped += 1; continue; }
|
||||
};
|
||||
let target_uuid = match store.nodes.get(&target) {
|
||||
Some(n) => n.uuid,
|
||||
None => { skipped += 1; continue; }
|
||||
};
|
||||
|
||||
let rel = new_relation(
|
||||
source_uuid, target_uuid,
|
||||
store::RelationType::Link,
|
||||
0.5,
|
||||
&source, &target,
|
||||
);
|
||||
if store.add_relation(rel).is_ok() {
|
||||
println!(" + {} → {}", source, target);
|
||||
applied += 1;
|
||||
}
|
||||
}
|
||||
|
||||
(applied, skipped, fallbacks)
|
||||
}
|
||||
460
poc-memory/src/enrich.rs
Normal file
460
poc-memory/src/enrich.rs
Normal file
|
|
@ -0,0 +1,460 @@
|
|||
// Journal enrichment and experience mining
|
||||
//
|
||||
// Two modes of processing conversation transcripts:
|
||||
// journal_enrich — enrich a specific journal entry with source location and links
|
||||
// experience_mine — retroactively find experiential moments not yet journaled
|
||||
//
|
||||
// Both extract conversation from JSONL transcripts, build prompts, call Sonnet,
|
||||
// and apply results to the store.
|
||||
|
||||
use crate::llm::{call_sonnet, parse_json_response, semantic_keys};
|
||||
use crate::neuro;
|
||||
use crate::store::{self, Store, new_node, new_relation};
|
||||
|
||||
use regex::Regex;
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::collections::HashSet;
|
||||
use std::fs;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
use crate::store::StoreView;
|
||||
|
||||
/// Parse a timestamp string like "2026-03-05T19:56" to unix epoch seconds.
|
||||
fn parse_timestamp_to_epoch(ts: &str) -> Option<i64> {
|
||||
use chrono::{Local, NaiveDateTime, TimeZone};
|
||||
// Try common formats
|
||||
let formats = ["%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M"];
|
||||
for fmt in &formats {
|
||||
if let Ok(ndt) = NaiveDateTime::parse_from_str(ts, fmt) {
|
||||
if let Some(dt) = Local.from_local_datetime(&ndt).earliest() {
|
||||
return Some(dt.timestamp());
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Compute the store dedup key for a transcript file.
|
||||
/// This is the same key experience_mine uses to mark a transcript as mined.
|
||||
pub fn transcript_dedup_key(path: &str) -> Result<String, String> {
|
||||
let bytes = fs::read(path).map_err(|e| format!("read {}: {}", path, e))?;
|
||||
let mut hasher = DefaultHasher::new();
|
||||
bytes.hash(&mut hasher);
|
||||
Ok(format!("_mined-transcripts#h-{:016x}", hasher.finish()))
|
||||
}
|
||||
|
||||
/// Check if a transcript has already been mined (dedup key exists in store).
|
||||
pub fn is_transcript_mined(store: &impl StoreView, path: &str) -> bool {
|
||||
match transcript_dedup_key(path) {
|
||||
Ok(key) => store.node_content(&key).is_some(),
|
||||
Err(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Dedup key for a transcript based on its filename (UUID).
|
||||
/// Used by the daemon reconcile loop — no file reads needed.
|
||||
pub fn transcript_filename_key(path: &str) -> String {
|
||||
let filename = std::path::Path::new(path)
|
||||
.file_stem()
|
||||
.map(|s| s.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|| path.to_string());
|
||||
format!("_mined-transcripts#f-{}", filename)
|
||||
}
|
||||
|
||||
/// Get the set of all mined transcript keys (both content-hash and filename)
|
||||
/// from the store. Load once per daemon tick, check many.
|
||||
pub fn mined_transcript_keys() -> HashSet<String> {
|
||||
use crate::store::AnyView;
|
||||
let Ok(view) = AnyView::load() else { return HashSet::new() };
|
||||
let mut keys = HashSet::new();
|
||||
view.for_each_node(|key, _, _| {
|
||||
if key.starts_with("_mined-transcripts#") {
|
||||
keys.insert(key.to_string());
|
||||
}
|
||||
});
|
||||
keys
|
||||
}
|
||||
|
||||
/// Check if a transcript has been mined, given a pre-loaded set of mined keys.
|
||||
/// Checks filename-based key only (no file read). Sessions mined before the
|
||||
/// filename key was added will pass through and short-circuit in experience_mine
|
||||
/// via the content hash check — a one-time cost on first restart after this change.
|
||||
pub fn is_transcript_mined_with_keys(mined: &HashSet<String>, path: &str) -> bool {
|
||||
mined.contains(&transcript_filename_key(path))
|
||||
}
|
||||
|
||||
/// Extract user/assistant messages with line numbers from a JSONL transcript.
|
||||
/// (line_number, role, text, timestamp)
|
||||
pub fn extract_conversation(jsonl_path: &str) -> Result<Vec<(usize, String, String, String)>, String> {
|
||||
let content = fs::read_to_string(jsonl_path)
|
||||
.map_err(|e| format!("read {}: {}", jsonl_path, e))?;
|
||||
|
||||
let mut messages = Vec::new();
|
||||
for (i, line) in content.lines().enumerate() {
|
||||
let obj: serde_json::Value = match serde_json::from_str(line) {
|
||||
Ok(v) => v,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if msg_type != "user" && msg_type != "assistant" { continue; }
|
||||
|
||||
let timestamp = obj.get("timestamp")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
let msg = obj.get("message").unwrap_or(&obj);
|
||||
let content = msg.get("content");
|
||||
|
||||
let text = match content {
|
||||
Some(serde_json::Value::String(s)) => s.clone(),
|
||||
Some(serde_json::Value::Array(arr)) => {
|
||||
arr.iter()
|
||||
.filter_map(|c| {
|
||||
// Only extract text blocks; skip tool_use, tool_result, thinking, etc.
|
||||
let is_text = c.get("type").and_then(|v| v.as_str()) == Some("text");
|
||||
if is_text {
|
||||
c.get("text").and_then(|v| v.as_str()).map(|s| s.to_string())
|
||||
} else {
|
||||
c.as_str().map(|s| s.to_string())
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
}
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
let text = text.trim().to_string();
|
||||
if text.is_empty() { continue; }
|
||||
|
||||
messages.push((i + 1, msg_type.to_string(), text, timestamp));
|
||||
}
|
||||
|
||||
Ok(messages)
|
||||
}
|
||||
|
||||
pub const COMPACTION_MARKER: &str = "This session is being continued from a previous conversation that ran out of context";
|
||||
|
||||
/// Split extracted messages into segments at compaction boundaries.
|
||||
/// Each segment represents one continuous conversation before context was compacted.
|
||||
pub fn split_on_compaction(messages: Vec<(usize, String, String, String)>) -> Vec<Vec<(usize, String, String, String)>> {
|
||||
let mut segments: Vec<Vec<(usize, String, String, String)>> = Vec::new();
|
||||
let mut current = Vec::new();
|
||||
|
||||
for msg in messages {
|
||||
if msg.1 == "user" && msg.2.starts_with(COMPACTION_MARKER) {
|
||||
if !current.is_empty() {
|
||||
segments.push(current);
|
||||
current = Vec::new();
|
||||
}
|
||||
// The continuation message itself is part of the new segment
|
||||
current.push(msg);
|
||||
} else {
|
||||
current.push(msg);
|
||||
}
|
||||
}
|
||||
if !current.is_empty() {
|
||||
segments.push(current);
|
||||
}
|
||||
|
||||
segments
|
||||
}
|
||||
|
||||
/// Format conversation messages for the prompt (truncating long messages).
|
||||
fn format_conversation(messages: &[(usize, String, String, String)]) -> String {
|
||||
messages.iter()
|
||||
.map(|(line, role, text, ts)| {
|
||||
let text = if text.len() > 2000 {
|
||||
format!("{}...[truncated]", &text[..text.floor_char_boundary(1800)])
|
||||
} else {
|
||||
text.clone()
|
||||
};
|
||||
if ts.is_empty() {
|
||||
format!("L{} [{}]: {}", line, role, text)
|
||||
} else {
|
||||
format!("L{} [{}] {}: {}", line, role, &ts[..ts.len().min(19)], text)
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n\n")
|
||||
}
|
||||
|
||||
fn build_journal_prompt(
|
||||
entry_text: &str,
|
||||
conversation: &str,
|
||||
keys: &[String],
|
||||
grep_line: usize,
|
||||
) -> Result<String, String> {
|
||||
let keys_text: String = keys.iter()
|
||||
.map(|k| format!(" - {}", k))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
neuro::load_prompt("journal-enrich", &[
|
||||
("{{GREP_LINE}}", &grep_line.to_string()),
|
||||
("{{ENTRY_TEXT}}", entry_text),
|
||||
("{{KEYS}}", &keys_text),
|
||||
("{{CONVERSATION}}", conversation),
|
||||
])
|
||||
}
|
||||
|
||||
/// Enrich a journal entry with conversation context and link proposals.
|
||||
pub fn journal_enrich(
|
||||
store: &mut Store,
|
||||
jsonl_path: &str,
|
||||
entry_text: &str,
|
||||
grep_line: usize,
|
||||
) -> Result<(), String> {
|
||||
println!("Extracting conversation from {}...", jsonl_path);
|
||||
let messages = extract_conversation(jsonl_path)?;
|
||||
let conversation = format_conversation(&messages);
|
||||
println!(" {} messages, {} chars", messages.len(), conversation.len());
|
||||
|
||||
let keys = semantic_keys(store);
|
||||
println!(" {} semantic keys", keys.len());
|
||||
|
||||
let prompt = build_journal_prompt(entry_text, &conversation, &keys, grep_line)?;
|
||||
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
|
||||
|
||||
println!(" Calling Sonnet...");
|
||||
let response = call_sonnet("enrich", &prompt)?;
|
||||
|
||||
let result = parse_json_response(&response)?;
|
||||
|
||||
// Report results
|
||||
let source_start = result.get("source_start").and_then(|v| v.as_u64()).unwrap_or(0);
|
||||
let source_end = result.get("source_end").and_then(|v| v.as_u64()).unwrap_or(0);
|
||||
let links = result.get("links").and_then(|v| v.as_array());
|
||||
let insights = result.get("missed_insights").and_then(|v| v.as_array());
|
||||
|
||||
println!(" Source: L{}-L{}", source_start, source_end);
|
||||
println!(" Links: {}", links.map_or(0, |l| l.len()));
|
||||
println!(" Missed insights: {}", insights.map_or(0, |l| l.len()));
|
||||
|
||||
// Apply links
|
||||
if let Some(links) = links {
|
||||
for link in links {
|
||||
let target = link.get("target").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let reason = link.get("reason").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if target.is_empty() || target.starts_with("NOTE:") {
|
||||
if let Some(note) = target.strip_prefix("NOTE:") {
|
||||
println!(" NOTE: {} — {}", note, reason);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Resolve target and find journal node
|
||||
let resolved = match store.resolve_key(target) {
|
||||
Ok(r) => r,
|
||||
Err(_) => { println!(" SKIP {} (not in graph)", target); continue; }
|
||||
};
|
||||
let source_key = match store.find_journal_node(entry_text) {
|
||||
Some(k) => k,
|
||||
None => { println!(" SKIP {} (no matching journal node)", target); continue; }
|
||||
};
|
||||
|
||||
// Refine target to best-matching section
|
||||
let source_content = store.nodes.get(&source_key)
|
||||
.map(|n| n.content.as_str()).unwrap_or("");
|
||||
let resolved = neuro::refine_target(store, source_content, &resolved);
|
||||
|
||||
let source_uuid = match store.nodes.get(&source_key) {
|
||||
Some(n) => n.uuid,
|
||||
None => continue,
|
||||
};
|
||||
let target_uuid = match store.nodes.get(&resolved) {
|
||||
Some(n) => n.uuid,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let rel = new_relation(
|
||||
source_uuid, target_uuid,
|
||||
store::RelationType::Link,
|
||||
0.5,
|
||||
&source_key, &resolved,
|
||||
);
|
||||
if store.add_relation(rel).is_ok() {
|
||||
println!(" LINK {} → {} ({})", source_key, resolved, reason);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
store.save()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Mine a conversation transcript for experiential moments not yet journaled.
|
||||
/// If `segment` is Some, only process that compaction segment of the file.
|
||||
pub fn experience_mine(
|
||||
store: &mut Store,
|
||||
jsonl_path: &str,
|
||||
segment: Option<usize>,
|
||||
) -> Result<usize, String> {
|
||||
println!("Experience mining: {}", jsonl_path);
|
||||
|
||||
// Transcript-level dedup: hash the file content and check if already mined
|
||||
let transcript_bytes = fs::read(jsonl_path)
|
||||
.map_err(|e| format!("reading transcript: {}", e))?;
|
||||
let mut hasher = DefaultHasher::new();
|
||||
transcript_bytes.hash(&mut hasher);
|
||||
let hash = hasher.finish();
|
||||
let dedup_key = format!("_mined-transcripts#h-{:016x}", hash);
|
||||
|
||||
if store.nodes.contains_key(&dedup_key) {
|
||||
// Backfill filename key if missing (transcripts mined before this key existed)
|
||||
let fname_key = transcript_filename_key(jsonl_path);
|
||||
if !store.nodes.contains_key(&fname_key) {
|
||||
let mut node = new_node(&fname_key, &format!("Backfilled from {}", dedup_key));
|
||||
node.provenance = store::Provenance::AgentExperienceMine;
|
||||
let _ = store.upsert_node(node);
|
||||
store.save()?;
|
||||
}
|
||||
println!(" Already mined this transcript ({}), skipping.", &dedup_key[24..]);
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let all_messages = extract_conversation(jsonl_path)?;
|
||||
|
||||
// If segment is specified, extract just that segment; otherwise process all messages
|
||||
let messages = match segment {
|
||||
Some(idx) => {
|
||||
let segments = split_on_compaction(all_messages);
|
||||
segments.into_iter().nth(idx)
|
||||
.ok_or_else(|| format!("segment {} out of range", idx))?
|
||||
}
|
||||
None => all_messages,
|
||||
};
|
||||
|
||||
let conversation = format_conversation(&messages);
|
||||
println!(" {} messages, {} chars", messages.len(), conversation.len());
|
||||
|
||||
// Load core identity nodes for context
|
||||
let cfg = crate::config::get();
|
||||
let identity: String = cfg.core_nodes.iter()
|
||||
.filter_map(|k| store.nodes.get(k).map(|n| n.content.as_str()))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n\n");
|
||||
|
||||
// Get recent episodic entries to avoid duplication
|
||||
let mut journal: Vec<_> = store.nodes.values()
|
||||
.filter(|node| matches!(node.node_type, store::NodeType::EpisodicSession))
|
||||
.collect();
|
||||
journal.sort_by_key(|n| n.timestamp);
|
||||
let recent: String = journal.iter().rev().take(10)
|
||||
.map(|n| format!("---\n{}\n", n.content))
|
||||
.collect();
|
||||
|
||||
let keys = semantic_keys(store);
|
||||
let keys_text: String = keys.iter()
|
||||
.map(|k| format!(" - {}", k))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
let prompt = neuro::load_prompt("experience", &[
|
||||
("{{IDENTITY}}", &identity),
|
||||
("{{RECENT_JOURNAL}}", &recent),
|
||||
("{{KEYS}}", &keys_text),
|
||||
("{{CONVERSATION}}", &conversation),
|
||||
])?;
|
||||
let est_tokens = prompt.len() / 4;
|
||||
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), est_tokens);
|
||||
|
||||
if est_tokens > 150_000 {
|
||||
println!(" Skipping: prompt too large ({} tokens > 150k limit)", est_tokens);
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
println!(" Calling Sonnet...");
|
||||
let response = call_sonnet("experience-mine", &prompt)?;
|
||||
|
||||
let entries = parse_json_response(&response)?;
|
||||
let entries = match entries.as_array() {
|
||||
Some(arr) => arr.clone(),
|
||||
None => return Err("expected JSON array".to_string()),
|
||||
};
|
||||
|
||||
if entries.is_empty() {
|
||||
println!(" No missed experiences found.");
|
||||
} else {
|
||||
println!(" Found {} experiential moments:", entries.len());
|
||||
}
|
||||
let mut count = 0;
|
||||
for entry in &entries {
|
||||
let ts = entry.get("timestamp").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let content = entry.get("content").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if content.is_empty() { continue; }
|
||||
|
||||
// Format with timestamp header
|
||||
let full_content = if ts.is_empty() {
|
||||
content.to_string()
|
||||
} else {
|
||||
format!("## {}\n\n{}", ts, content)
|
||||
};
|
||||
|
||||
// Generate key from timestamp
|
||||
let key_slug: String = content.chars()
|
||||
.filter(|c| c.is_alphanumeric() || *c == ' ')
|
||||
.take(50)
|
||||
.collect::<String>()
|
||||
.trim()
|
||||
.to_lowercase()
|
||||
.replace(' ', "-");
|
||||
let key = if ts.is_empty() {
|
||||
format!("journal#j-mined-{}", key_slug)
|
||||
} else {
|
||||
format!("journal#j-{}-{}", ts.to_lowercase().replace(':', "-"), key_slug)
|
||||
};
|
||||
|
||||
// Check for duplicate
|
||||
if store.nodes.contains_key(&key) {
|
||||
println!(" SKIP {} (duplicate)", key);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Write to store — use event timestamp, not mining time
|
||||
let mut node = new_node(&key, &full_content);
|
||||
node.node_type = store::NodeType::EpisodicSession;
|
||||
node.provenance = store::Provenance::AgentExperienceMine;
|
||||
if !ts.is_empty() {
|
||||
if let Some(epoch) = parse_timestamp_to_epoch(ts) {
|
||||
node.created_at = epoch;
|
||||
}
|
||||
}
|
||||
let _ = store.upsert_node(node);
|
||||
count += 1;
|
||||
|
||||
let preview = if content.len() > 80 {
|
||||
let end = content.floor_char_boundary(77);
|
||||
&content[..end]
|
||||
} else {
|
||||
content
|
||||
};
|
||||
println!(" + [{}] {}...", ts, preview);
|
||||
}
|
||||
|
||||
// Record this transcript/segment as mined (even if count == 0, to prevent re-runs)
|
||||
let fname_key = match segment {
|
||||
Some(idx) => format!("{}.{}", transcript_filename_key(jsonl_path), idx),
|
||||
None => transcript_filename_key(jsonl_path),
|
||||
};
|
||||
let dedup_content = format!("Mined {} ({} entries)", jsonl_path, count);
|
||||
let mut fname_node = new_node(&fname_key, &dedup_content);
|
||||
fname_node.provenance = store::Provenance::AgentExperienceMine;
|
||||
let _ = store.upsert_node(fname_node);
|
||||
|
||||
// For unsegmented calls, also write the content-hash key for backwards compat
|
||||
if segment.is_none() {
|
||||
let mut dedup_node = new_node(&dedup_key, &dedup_content);
|
||||
dedup_node.provenance = store::Provenance::AgentExperienceMine;
|
||||
let _ = store.upsert_node(dedup_node);
|
||||
}
|
||||
|
||||
if count > 0 {
|
||||
println!(" Saved {} new journal entries.", count);
|
||||
}
|
||||
store.save()?;
|
||||
println!("Done: {} new entries mined.", count);
|
||||
Ok(count)
|
||||
}
|
||||
345
poc-memory/src/fact_mine.rs
Normal file
345
poc-memory/src/fact_mine.rs
Normal file
|
|
@ -0,0 +1,345 @@
|
|||
// fact_mine.rs — extract atomic factual claims from conversation transcripts
|
||||
//
|
||||
// Chunks conversation text into overlapping windows, sends each to Haiku
|
||||
// for extraction, deduplicates by claim text. Output: JSON array of facts.
|
||||
//
|
||||
// Uses Haiku (not Sonnet) for cost efficiency on high-volume extraction.
|
||||
|
||||
use crate::config;
|
||||
use crate::llm;
|
||||
use crate::store::{self, Provenance};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashSet;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
const CHARS_PER_TOKEN: usize = 4;
|
||||
const WINDOW_TOKENS: usize = 2000;
|
||||
const OVERLAP_TOKENS: usize = 200;
|
||||
const WINDOW_CHARS: usize = WINDOW_TOKENS * CHARS_PER_TOKEN;
|
||||
const OVERLAP_CHARS: usize = OVERLAP_TOKENS * CHARS_PER_TOKEN;
|
||||
|
||||
fn extraction_prompt() -> String {
|
||||
let cfg = config::get();
|
||||
format!(
|
||||
r#"Extract atomic factual claims from this conversation excerpt.
|
||||
|
||||
Speakers are labeled [{user}] and [{assistant}] in the transcript.
|
||||
Use their proper names in claims — not "the user" or "the assistant."
|
||||
|
||||
Each claim should be:
|
||||
- A single verifiable statement
|
||||
- Specific enough to be useful in isolation
|
||||
- Tagged with domain (e.g., bcachefs/btree, bcachefs/alloc, bcachefs/journal,
|
||||
bcachefs/ec, bcachefs/reconcile, rust/idioms, workflow/preferences,
|
||||
linux/kernel, memory/design, identity/personal)
|
||||
- Tagged with confidence: "stated" (explicitly said), "implied" (logically follows),
|
||||
or "speculative" (hypothesis, not confirmed)
|
||||
- Include which speaker said it ("{user}", "{assistant}", or "Unknown")
|
||||
|
||||
Do NOT extract:
|
||||
- Opinions or subjective assessments
|
||||
- Conversational filler or greetings
|
||||
- Things that are obviously common knowledge
|
||||
- Restatements of the same fact (pick the clearest version)
|
||||
- System messages, tool outputs, or error logs (extract what was LEARNED from them)
|
||||
- Anything about the conversation itself ("{user} and {assistant} discussed...")
|
||||
- Facts only relevant to this specific conversation (e.g. transient file paths, mid-debug state)
|
||||
|
||||
Output as a JSON array. Each element:
|
||||
{{
|
||||
"claim": "the exact factual statement",
|
||||
"domain": "category/subcategory",
|
||||
"confidence": "stated|implied|speculative",
|
||||
"speaker": "{user}|{assistant}|Unknown"
|
||||
}}
|
||||
|
||||
If the excerpt contains no extractable facts, output an empty array: []
|
||||
|
||||
--- CONVERSATION EXCERPT ---
|
||||
"#, user = cfg.user_name, assistant = cfg.assistant_name)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Fact {
|
||||
pub claim: String,
|
||||
pub domain: String,
|
||||
pub confidence: String,
|
||||
pub speaker: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub source_file: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub source_chunk: Option<usize>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub source_offset: Option<usize>,
|
||||
}
|
||||
|
||||
struct Message {
|
||||
role: String,
|
||||
text: String,
|
||||
timestamp: String,
|
||||
}
|
||||
|
||||
/// Extract user/assistant text messages from a JSONL transcript.
|
||||
fn extract_conversation(path: &Path) -> Vec<Message> {
|
||||
let cfg = config::get();
|
||||
let Ok(content) = fs::read_to_string(path) else { return Vec::new() };
|
||||
let mut messages = Vec::new();
|
||||
|
||||
for line in content.lines() {
|
||||
let Ok(obj) = serde_json::from_str::<serde_json::Value>(line) else { continue };
|
||||
|
||||
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if msg_type != "user" && msg_type != "assistant" {
|
||||
continue;
|
||||
}
|
||||
|
||||
let timestamp = obj.get("timestamp")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
let msg = obj.get("message").unwrap_or(&obj);
|
||||
let content = msg.get("content");
|
||||
|
||||
let text = match content {
|
||||
Some(serde_json::Value::String(s)) => s.clone(),
|
||||
Some(serde_json::Value::Array(arr)) => {
|
||||
let texts: Vec<&str> = arr.iter()
|
||||
.filter_map(|block| {
|
||||
let obj = block.as_object()?;
|
||||
if obj.get("type")?.as_str()? != "text" {
|
||||
return None;
|
||||
}
|
||||
let t = obj.get("text")?.as_str()?;
|
||||
if t.contains("<system-reminder>") {
|
||||
return None;
|
||||
}
|
||||
Some(t)
|
||||
})
|
||||
.collect();
|
||||
texts.join("\n")
|
||||
}
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
let text = text.trim().to_string();
|
||||
if text.len() < 20 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let role = if msg_type == "user" {
|
||||
cfg.user_name.clone()
|
||||
} else {
|
||||
cfg.assistant_name.clone()
|
||||
};
|
||||
messages.push(Message { role, text, timestamp });
|
||||
}
|
||||
|
||||
messages
|
||||
}
|
||||
|
||||
/// Format messages into a single text for chunking.
|
||||
fn format_for_extraction(messages: &[Message]) -> String {
|
||||
messages.iter()
|
||||
.map(|msg| {
|
||||
let text = if msg.text.len() > 3000 {
|
||||
// Find a char boundary near 2800
|
||||
let trunc = msg.text.floor_char_boundary(2800);
|
||||
format!("{}\n[...truncated...]", &msg.text[..trunc])
|
||||
} else {
|
||||
msg.text.clone()
|
||||
};
|
||||
let ts = if msg.timestamp.len() >= 19 { &msg.timestamp[..19] } else { "" };
|
||||
if ts.is_empty() {
|
||||
format!("[{}] {}", msg.role, text)
|
||||
} else {
|
||||
format!("[{} {}] {}", msg.role, ts, text)
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n\n")
|
||||
}
|
||||
|
||||
/// Split text into overlapping windows, breaking at paragraph boundaries.
|
||||
fn chunk_text(text: &str) -> Vec<(usize, &str)> {
|
||||
let mut chunks = Vec::new();
|
||||
let mut start = 0;
|
||||
|
||||
while start < text.len() {
|
||||
let mut end = text.floor_char_boundary((start + WINDOW_CHARS).min(text.len()));
|
||||
|
||||
// Try to break at a paragraph boundary
|
||||
if end < text.len() {
|
||||
if let Some(para) = text[start..end].rfind("\n\n") {
|
||||
if para > WINDOW_CHARS / 2 {
|
||||
end = start + para;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
chunks.push((start, &text[start..end]));
|
||||
|
||||
let next = text.floor_char_boundary(end.saturating_sub(OVERLAP_CHARS));
|
||||
if next <= start {
|
||||
start = end;
|
||||
} else {
|
||||
start = next;
|
||||
}
|
||||
}
|
||||
|
||||
chunks
|
||||
}
|
||||
|
||||
/// Parse JSON facts from model response.
|
||||
fn parse_facts(response: &str) -> Vec<Fact> {
|
||||
let cleaned = response.trim();
|
||||
// Strip markdown code block
|
||||
let cleaned = if cleaned.starts_with("```") {
|
||||
cleaned.lines()
|
||||
.filter(|l| !l.starts_with("```"))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
} else {
|
||||
cleaned.to_string()
|
||||
};
|
||||
|
||||
// Find JSON array
|
||||
let start = cleaned.find('[');
|
||||
let end = cleaned.rfind(']');
|
||||
let (Some(start), Some(end)) = (start, end) else { return Vec::new() };
|
||||
|
||||
serde_json::from_str(&cleaned[start..=end]).unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Mine a single transcript for atomic facts.
|
||||
/// The optional `progress` callback receives status strings (e.g. "chunk 3/47").
|
||||
pub fn mine_transcript(
|
||||
path: &Path,
|
||||
dry_run: bool,
|
||||
progress: Option<&dyn Fn(&str)>,
|
||||
) -> Result<Vec<Fact>, String> {
|
||||
let filename = path.file_name()
|
||||
.map(|n| n.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|| "unknown".into());
|
||||
let log = |msg: &str| {
|
||||
eprintln!("{}", msg);
|
||||
if let Some(cb) = progress { cb(msg); }
|
||||
};
|
||||
|
||||
log(&format!("Mining: {}", filename));
|
||||
|
||||
let messages = extract_conversation(path);
|
||||
if messages.is_empty() {
|
||||
log("No messages found");
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
log(&format!("{} messages extracted", messages.len()));
|
||||
|
||||
let text = format_for_extraction(&messages);
|
||||
let chunks = chunk_text(&text);
|
||||
log(&format!("{} chunks ({} chars)", chunks.len(), text.len()));
|
||||
|
||||
if dry_run {
|
||||
for (i, (offset, chunk)) in chunks.iter().enumerate() {
|
||||
eprintln!("\n--- Chunk {} (offset {}, {} chars) ---", i + 1, offset, chunk.len());
|
||||
let preview = if chunk.len() > 500 { &chunk[..chunk.floor_char_boundary(500)] } else { chunk };
|
||||
eprintln!("{}", preview);
|
||||
if chunk.len() > 500 {
|
||||
eprintln!(" ... ({} more chars)", chunk.len() - 500);
|
||||
}
|
||||
}
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let prompt_prefix = extraction_prompt();
|
||||
let mut all_facts = Vec::new();
|
||||
for (i, (_offset, chunk)) in chunks.iter().enumerate() {
|
||||
let status = format!("chunk {}/{} ({} chars)", i + 1, chunks.len(), chunk.len());
|
||||
eprint!(" {}...", status);
|
||||
if let Some(cb) = progress { cb(&status); }
|
||||
|
||||
let prompt = format!("{}{}\n\n--- END OF EXCERPT ---\n\nReturn ONLY a JSON array of factual claims, or [] if none.", prompt_prefix, chunk);
|
||||
let response = match llm::call_haiku("fact-mine", &prompt) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
eprintln!(" error: {}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let mut facts = parse_facts(&response);
|
||||
for fact in &mut facts {
|
||||
fact.source_file = Some(filename.clone());
|
||||
fact.source_chunk = Some(i + 1);
|
||||
fact.source_offset = Some(*_offset);
|
||||
}
|
||||
|
||||
eprintln!(" {} facts", facts.len());
|
||||
all_facts.extend(facts);
|
||||
}
|
||||
|
||||
// Deduplicate by claim text
|
||||
let mut seen = HashSet::new();
|
||||
let before = all_facts.len();
|
||||
all_facts.retain(|f| seen.insert(f.claim.to_lowercase()));
|
||||
let dupes = before - all_facts.len();
|
||||
if dupes > 0 {
|
||||
log(&format!("{} duplicates removed", dupes));
|
||||
}
|
||||
|
||||
log(&format!("Total: {} unique facts", all_facts.len()));
|
||||
Ok(all_facts)
|
||||
}
|
||||
|
||||
/// Mine a transcript and store facts in the capnp store.
|
||||
/// Returns the number of facts stored.
|
||||
/// The optional `progress` callback receives status strings for daemon display.
|
||||
pub fn mine_and_store(
|
||||
path: &Path,
|
||||
progress: Option<&dyn Fn(&str)>,
|
||||
) -> Result<usize, String> {
|
||||
let facts = mine_transcript(path, false, progress)?;
|
||||
|
||||
let filename = path.file_name()
|
||||
.map(|n| n.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|| "unknown".into());
|
||||
|
||||
let key = format!("_facts-{}", filename.trim_end_matches(".jsonl"));
|
||||
|
||||
// Always write a marker so we don't re-queue empty transcripts
|
||||
let json = if facts.is_empty() {
|
||||
"[]".to_string()
|
||||
} else {
|
||||
serde_json::to_string_pretty(&facts)
|
||||
.map_err(|e| format!("serialize facts: {}", e))?
|
||||
};
|
||||
|
||||
let mut store = store::Store::load()?;
|
||||
store.upsert_provenance(&key, &json, Provenance::AgentFactMine)?;
|
||||
store.save()?;
|
||||
|
||||
eprintln!(" Stored {} facts as {}", facts.len(), key);
|
||||
Ok(facts.len())
|
||||
}
|
||||
|
||||
/// Mine transcripts, returning all facts. Skips files with fewer than min_messages.
|
||||
pub fn mine_batch(paths: &[&Path], min_messages: usize, dry_run: bool) -> Result<Vec<Fact>, String> {
|
||||
let mut all_facts = Vec::new();
|
||||
|
||||
for path in paths {
|
||||
let messages = extract_conversation(path);
|
||||
if messages.len() < min_messages {
|
||||
eprintln!("Skipping {} ({} messages < {})",
|
||||
path.file_name().map(|n| n.to_string_lossy()).unwrap_or_default(),
|
||||
messages.len(), min_messages);
|
||||
continue;
|
||||
}
|
||||
|
||||
let facts = mine_transcript(path, dry_run, None)?;
|
||||
all_facts.extend(facts);
|
||||
}
|
||||
|
||||
Ok(all_facts)
|
||||
}
|
||||
719
poc-memory/src/graph.rs
Normal file
719
poc-memory/src/graph.rs
Normal file
|
|
@ -0,0 +1,719 @@
|
|||
// Graph algorithms: clustering coefficient, community detection (label
|
||||
// propagation), schema fit scoring, small-world metrics, consolidation
|
||||
// priority scoring.
|
||||
//
|
||||
// The Graph is built from the Store's nodes + relations. Edges are
|
||||
// undirected for clustering/community (even causal edges count as
|
||||
// connections), but relation type and direction are preserved for
|
||||
// specific queries.
|
||||
|
||||
use crate::store::{Store, RelationType, StoreView};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
|
||||
/// Weighted edge in the graph
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Edge {
|
||||
pub target: String,
|
||||
pub strength: f32,
|
||||
pub rel_type: RelationType,
|
||||
}
|
||||
|
||||
/// The in-memory graph built from store nodes + relations
|
||||
pub struct Graph {
|
||||
/// Adjacency list: node key → list of edges
|
||||
adj: HashMap<String, Vec<Edge>>,
|
||||
/// All node keys
|
||||
keys: HashSet<String>,
|
||||
/// Community labels (from label propagation)
|
||||
communities: HashMap<String, u32>,
|
||||
}
|
||||
|
||||
impl Graph {
|
||||
pub fn nodes(&self) -> &HashSet<String> {
|
||||
&self.keys
|
||||
}
|
||||
|
||||
pub fn degree(&self, key: &str) -> usize {
|
||||
self.adj.get(key).map(|e| e.len()).unwrap_or(0)
|
||||
}
|
||||
|
||||
pub fn edge_count(&self) -> usize {
|
||||
self.adj.values().map(|e| e.len()).sum::<usize>() / 2
|
||||
}
|
||||
|
||||
/// All edges for a node (full Edge data including rel_type)
|
||||
pub fn edges_of(&self, key: &str) -> &[Edge] {
|
||||
self.adj.get(key)
|
||||
.map(|v| v.as_slice())
|
||||
.unwrap_or(&[])
|
||||
}
|
||||
|
||||
/// All neighbor keys with strengths
|
||||
pub fn neighbors(&self, key: &str) -> Vec<(&String, f32)> {
|
||||
self.adj.get(key)
|
||||
.map(|edges| edges.iter().map(|e| (&e.target, e.strength)).collect())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Just neighbor keys
|
||||
pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> {
|
||||
self.adj.get(key)
|
||||
.map(|edges| edges.iter().map(|e| e.target.as_str()).collect())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
pub fn community_count(&self) -> usize {
|
||||
let labels: HashSet<_> = self.communities.values().collect();
|
||||
labels.len()
|
||||
}
|
||||
|
||||
pub fn communities(&self) -> &HashMap<String, u32> {
|
||||
&self.communities
|
||||
}
|
||||
|
||||
/// Hub degree threshold: top 5% by degree
|
||||
pub fn hub_threshold(&self) -> usize {
|
||||
let mut degrees: Vec<usize> = self.keys.iter()
|
||||
.map(|k| self.degree(k))
|
||||
.collect();
|
||||
degrees.sort_unstable();
|
||||
if degrees.len() >= 20 {
|
||||
degrees[degrees.len() * 95 / 100]
|
||||
} else {
|
||||
usize::MAX
|
||||
}
|
||||
}
|
||||
|
||||
/// Local clustering coefficient: fraction of a node's neighbors
|
||||
/// that are also neighbors of each other.
|
||||
/// cc(v) = 2E / (deg * (deg - 1))
|
||||
pub fn clustering_coefficient(&self, key: &str) -> f32 {
|
||||
let neighbors = self.neighbor_keys(key);
|
||||
let deg = neighbors.len();
|
||||
if deg < 2 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect();
|
||||
let mut triangles = 0u32;
|
||||
for i in 0..neighbor_vec.len() {
|
||||
for j in (i + 1)..neighbor_vec.len() {
|
||||
let ni_neighbors = self.neighbor_keys(neighbor_vec[i]);
|
||||
if ni_neighbors.contains(neighbor_vec[j]) {
|
||||
triangles += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0))
|
||||
}
|
||||
|
||||
/// Average clustering coefficient across all nodes with deg >= 2
|
||||
pub fn avg_clustering_coefficient(&self) -> f32 {
|
||||
let mut sum = 0.0f32;
|
||||
let mut count = 0u32;
|
||||
for key in &self.keys {
|
||||
if self.degree(key) >= 2 {
|
||||
sum += self.clustering_coefficient(key);
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
if count == 0 { 0.0 } else { sum / count as f32 }
|
||||
}
|
||||
|
||||
/// Average shortest path length (sampled BFS from up to 100 nodes)
|
||||
pub fn avg_path_length(&self) -> f32 {
|
||||
let sample: Vec<&String> = self.keys.iter().take(100).collect();
|
||||
if sample.is_empty() { return 0.0; }
|
||||
|
||||
let mut total_dist = 0u64;
|
||||
let mut total_pairs = 0u64;
|
||||
|
||||
for &start in &sample {
|
||||
let dists = self.bfs_distances(start);
|
||||
for d in dists.values() {
|
||||
if *d > 0 {
|
||||
total_dist += *d as u64;
|
||||
total_pairs += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if total_pairs == 0 { 0.0 } else { total_dist as f32 / total_pairs as f32 }
|
||||
}
|
||||
|
||||
fn bfs_distances(&self, start: &str) -> HashMap<String, u32> {
|
||||
let mut dist = HashMap::new();
|
||||
let mut queue = VecDeque::new();
|
||||
dist.insert(start.to_string(), 0u32);
|
||||
queue.push_back(start.to_string());
|
||||
|
||||
while let Some(node) = queue.pop_front() {
|
||||
let d = dist[&node];
|
||||
for neighbor in self.neighbor_keys(&node) {
|
||||
if !dist.contains_key(neighbor) {
|
||||
dist.insert(neighbor.to_string(), d + 1);
|
||||
queue.push_back(neighbor.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
dist
|
||||
}
|
||||
|
||||
/// Power-law exponent α of the degree distribution.
|
||||
///
|
||||
/// Estimated via MLE: α = 1 + n / Σ ln(k_i / (k_min - 0.5))
|
||||
/// α ≈ 2: extreme hub dominance (fragile)
|
||||
/// α ≈ 3: healthy scale-free
|
||||
/// α > 3: approaching random graph (egalitarian)
|
||||
pub fn degree_power_law_exponent(&self) -> f32 {
|
||||
let mut degrees: Vec<usize> = self.keys.iter()
|
||||
.map(|k| self.degree(k))
|
||||
.filter(|&d| d > 0) // exclude isolates
|
||||
.collect();
|
||||
if degrees.len() < 10 { return 0.0; } // not enough data
|
||||
|
||||
degrees.sort_unstable();
|
||||
let k_min = degrees[0] as f64;
|
||||
if k_min < 1.0 { return 0.0; }
|
||||
|
||||
let n = degrees.len() as f64;
|
||||
let sum_ln: f64 = degrees.iter()
|
||||
.map(|&k| (k as f64 / (k_min - 0.5)).ln())
|
||||
.sum();
|
||||
|
||||
if sum_ln <= 0.0 { return 0.0; }
|
||||
(1.0 + n / sum_ln) as f32
|
||||
}
|
||||
|
||||
/// Gini coefficient of the degree distribution.
|
||||
///
|
||||
/// 0 = perfectly egalitarian (all nodes same degree)
|
||||
/// 1 = maximally unequal (one node has all edges)
|
||||
/// Measures hub concentration independent of distribution shape.
|
||||
pub fn degree_gini(&self) -> f32 {
|
||||
let mut degrees: Vec<f64> = self.keys.iter()
|
||||
.map(|k| self.degree(k) as f64)
|
||||
.collect();
|
||||
let n = degrees.len();
|
||||
if n < 2 { return 0.0; }
|
||||
|
||||
degrees.sort_by(|a, b| a.total_cmp(b));
|
||||
let mean = degrees.iter().sum::<f64>() / n as f64;
|
||||
if mean < 1e-10 { return 0.0; }
|
||||
|
||||
// Gini = (2 Σ i·x_i) / (n Σ x_i) - (n+1)/n
|
||||
let weighted_sum: f64 = degrees.iter().enumerate()
|
||||
.map(|(i, &d)| (i as f64 + 1.0) * d)
|
||||
.sum();
|
||||
let total = degrees.iter().sum::<f64>();
|
||||
|
||||
let gini = (2.0 * weighted_sum) / (n as f64 * total) - (n as f64 + 1.0) / n as f64;
|
||||
gini.max(0.0) as f32
|
||||
}
|
||||
|
||||
/// Small-world coefficient σ = (C/C_rand) / (L/L_rand)
|
||||
/// C_rand ≈ <k>/n, L_rand ≈ ln(n)/ln(<k>)
|
||||
pub fn small_world_sigma(&self) -> f32 {
|
||||
let n = self.keys.len() as f32;
|
||||
if n < 10.0 { return 0.0; }
|
||||
|
||||
let avg_degree = self.adj.values()
|
||||
.map(|e| e.len() as f32)
|
||||
.sum::<f32>() / n;
|
||||
if avg_degree < 1.0 { return 0.0; }
|
||||
|
||||
let c = self.avg_clustering_coefficient();
|
||||
let l = self.avg_path_length();
|
||||
|
||||
let c_rand = avg_degree / n;
|
||||
let l_rand = n.ln() / avg_degree.ln();
|
||||
|
||||
if c_rand < 1e-10 || l_rand < 1e-10 || l < 1e-10 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
(c / c_rand) / (l / l_rand)
|
||||
}
|
||||
}
|
||||
|
||||
/// Impact of adding a hypothetical edge
|
||||
#[derive(Debug)]
|
||||
pub struct LinkImpact {
|
||||
pub source: String,
|
||||
pub target: String,
|
||||
pub source_deg: usize,
|
||||
pub target_deg: usize,
|
||||
/// Is this a hub link? (either endpoint in top 5% by degree)
|
||||
pub is_hub_link: bool,
|
||||
/// Are both endpoints in the same community?
|
||||
pub same_community: bool,
|
||||
/// Change in clustering coefficient for source
|
||||
pub delta_cc_source: f32,
|
||||
/// Change in clustering coefficient for target
|
||||
pub delta_cc_target: f32,
|
||||
/// Change in degree Gini (positive = more hub-dominated)
|
||||
pub delta_gini: f32,
|
||||
/// Qualitative assessment
|
||||
pub assessment: &'static str,
|
||||
}
|
||||
|
||||
impl Graph {
|
||||
/// Simulate adding an edge and report impact on topology metrics.
|
||||
///
|
||||
/// Doesn't modify the graph — computes what would change if the
|
||||
/// edge were added.
|
||||
pub fn link_impact(&self, source: &str, target: &str) -> LinkImpact {
|
||||
let source_deg = self.degree(source);
|
||||
let target_deg = self.degree(target);
|
||||
let hub_threshold = self.hub_threshold();
|
||||
let is_hub_link = source_deg >= hub_threshold || target_deg >= hub_threshold;
|
||||
|
||||
// Community check
|
||||
let sc = self.communities.get(source);
|
||||
let tc = self.communities.get(target);
|
||||
let same_community = match (sc, tc) {
|
||||
(Some(a), Some(b)) => a == b,
|
||||
_ => false,
|
||||
};
|
||||
|
||||
// CC change for source: adding target as neighbor changes the
|
||||
// triangle count. New triangles form for each node that's a
|
||||
// neighbor of BOTH source and target.
|
||||
let source_neighbors = self.neighbor_keys(source);
|
||||
let target_neighbors = self.neighbor_keys(target);
|
||||
let shared_neighbors = source_neighbors.intersection(&target_neighbors).count();
|
||||
|
||||
let cc_before_source = self.clustering_coefficient(source);
|
||||
let cc_before_target = self.clustering_coefficient(target);
|
||||
|
||||
// Estimate new CC for source after adding edge
|
||||
let new_source_deg = source_deg + 1;
|
||||
let new_source_triangles = if source_deg >= 2 {
|
||||
// Current triangles + new ones from shared neighbors
|
||||
let current_triangles = (cc_before_source
|
||||
* source_deg as f32 * (source_deg as f32 - 1.0) / 2.0) as u32;
|
||||
current_triangles + shared_neighbors as u32
|
||||
} else {
|
||||
shared_neighbors as u32
|
||||
};
|
||||
let cc_after_source = if new_source_deg >= 2 {
|
||||
(2.0 * new_source_triangles as f32)
|
||||
/ (new_source_deg as f32 * (new_source_deg as f32 - 1.0))
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
let new_target_deg = target_deg + 1;
|
||||
let new_target_triangles = if target_deg >= 2 {
|
||||
let current_triangles = (cc_before_target
|
||||
* target_deg as f32 * (target_deg as f32 - 1.0) / 2.0) as u32;
|
||||
current_triangles + shared_neighbors as u32
|
||||
} else {
|
||||
shared_neighbors as u32
|
||||
};
|
||||
let cc_after_target = if new_target_deg >= 2 {
|
||||
(2.0 * new_target_triangles as f32)
|
||||
/ (new_target_deg as f32 * (new_target_deg as f32 - 1.0))
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
// Gini change via influence function:
|
||||
// IF(x; Gini, F) = (2F(x) - 1) * x/μ - Gini - 1
|
||||
// Adding an edge increments two degrees. The net ΔGini is the sum
|
||||
// of influence contributions from both endpoints shifting up by 1.
|
||||
let gini_before = self.degree_gini();
|
||||
let n = self.keys.len();
|
||||
let total_degree: f64 = self.keys.iter()
|
||||
.map(|k| self.degree(k) as f64)
|
||||
.sum();
|
||||
let mean_deg = if n > 0 { total_degree / n as f64 } else { 1.0 };
|
||||
|
||||
// CDF at each endpoint's degree: fraction of nodes with degree ≤ d
|
||||
let delta_gini = if mean_deg > 1e-10 && n >= 2 {
|
||||
// Count nodes with degree ≤ source_deg and ≤ target_deg
|
||||
let f_source = self.keys.iter()
|
||||
.filter(|k| self.degree(k) <= source_deg)
|
||||
.count() as f64 / n as f64;
|
||||
let f_target = self.keys.iter()
|
||||
.filter(|k| self.degree(k) <= target_deg)
|
||||
.count() as f64 / n as f64;
|
||||
|
||||
// Influence of incrementing source's degree by 1
|
||||
let new_source = (source_deg + 1) as f64;
|
||||
let if_source = (2.0 * f_source - 1.0) * new_source / mean_deg
|
||||
- gini_before as f64 - 1.0;
|
||||
// Influence of incrementing target's degree by 1
|
||||
let new_target = (target_deg + 1) as f64;
|
||||
let if_target = (2.0 * f_target - 1.0) * new_target / mean_deg
|
||||
- gini_before as f64 - 1.0;
|
||||
|
||||
// Scale: each point contributes 1/n to the distribution
|
||||
((if_source + if_target) / n as f64) as f32
|
||||
} else {
|
||||
0.0f32
|
||||
};
|
||||
|
||||
// Qualitative assessment
|
||||
let assessment = if is_hub_link && same_community {
|
||||
"hub-reinforcing: strengthens existing star topology"
|
||||
} else if is_hub_link && !same_community {
|
||||
"hub-bridging: cross-community but through a hub"
|
||||
} else if !is_hub_link && same_community && shared_neighbors > 0 {
|
||||
"lateral-clustering: strengthens local mesh topology"
|
||||
} else if !is_hub_link && !same_community {
|
||||
"lateral-bridging: best kind — cross-community lateral link"
|
||||
} else if !is_hub_link && same_community {
|
||||
"lateral-local: connects peripheral nodes in same community"
|
||||
} else {
|
||||
"neutral"
|
||||
};
|
||||
|
||||
LinkImpact {
|
||||
source: source.to_string(),
|
||||
target: target.to_string(),
|
||||
source_deg,
|
||||
target_deg,
|
||||
is_hub_link,
|
||||
same_community,
|
||||
delta_cc_source: cc_after_source - cc_before_source,
|
||||
delta_cc_target: cc_after_target - cc_before_target,
|
||||
delta_gini,
|
||||
assessment,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Build graph from store data (with community detection)
|
||||
pub fn build_graph(store: &impl StoreView) -> Graph {
|
||||
let (adj, keys) = build_adjacency(store);
|
||||
let communities = label_propagation(&keys, &adj, 20);
|
||||
Graph { adj, keys, communities }
|
||||
}
|
||||
|
||||
/// Build graph without community detection — for spreading activation
|
||||
/// searches where we only need the adjacency list.
|
||||
pub fn build_graph_fast(store: &impl StoreView) -> Graph {
|
||||
let (adj, keys) = build_adjacency(store);
|
||||
Graph { adj, keys, communities: HashMap::new() }
|
||||
}
|
||||
|
||||
fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashSet<String>) {
|
||||
let mut adj: HashMap<String, Vec<Edge>> = HashMap::new();
|
||||
let mut keys: HashSet<String> = HashSet::new();
|
||||
|
||||
store.for_each_node(|key, _, _| {
|
||||
keys.insert(key.to_owned());
|
||||
});
|
||||
|
||||
store.for_each_relation(|source_key, target_key, strength, rel_type| {
|
||||
if !keys.contains(source_key) || !keys.contains(target_key) {
|
||||
return;
|
||||
}
|
||||
|
||||
adj.entry(source_key.to_owned()).or_default().push(Edge {
|
||||
target: target_key.to_owned(),
|
||||
strength,
|
||||
rel_type,
|
||||
});
|
||||
adj.entry(target_key.to_owned()).or_default().push(Edge {
|
||||
target: source_key.to_owned(),
|
||||
strength,
|
||||
rel_type,
|
||||
});
|
||||
});
|
||||
|
||||
(adj, keys)
|
||||
}
|
||||
|
||||
/// Label propagation community detection.
|
||||
///
|
||||
/// Each node starts with its own label. Each iteration: adopt the most
|
||||
/// common label among neighbors (weighted by edge strength). Iterate
|
||||
/// until stable or max_iterations.
|
||||
fn label_propagation(
|
||||
keys: &HashSet<String>,
|
||||
adj: &HashMap<String, Vec<Edge>>,
|
||||
max_iterations: u32,
|
||||
) -> HashMap<String, u32> {
|
||||
// Only consider edges above this strength for community votes.
|
||||
// Weak auto-links from triangle closure (0.15-0.35) bridge
|
||||
// unrelated clusters — filtering them lets natural communities emerge.
|
||||
let min_strength: f32 = 0.3;
|
||||
|
||||
// Initialize: each node gets its own label
|
||||
let key_vec: Vec<String> = keys.iter().cloned().collect();
|
||||
let mut labels: HashMap<String, u32> = key_vec.iter()
|
||||
.enumerate()
|
||||
.map(|(i, k)| (k.clone(), i as u32))
|
||||
.collect();
|
||||
|
||||
for _iter in 0..max_iterations {
|
||||
let mut changed = false;
|
||||
|
||||
for key in &key_vec {
|
||||
let edges = match adj.get(key) {
|
||||
Some(e) => e,
|
||||
None => continue,
|
||||
};
|
||||
if edges.is_empty() { continue; }
|
||||
|
||||
// Count weighted votes for each label (skip weak edges)
|
||||
let mut votes: HashMap<u32, f32> = HashMap::new();
|
||||
for edge in edges {
|
||||
if edge.strength < min_strength { continue; }
|
||||
if let Some(&label) = labels.get(&edge.target) {
|
||||
*votes.entry(label).or_default() += edge.strength;
|
||||
}
|
||||
}
|
||||
|
||||
// Adopt the label with most votes
|
||||
if let Some((&best_label, _)) = votes.iter()
|
||||
.max_by(|a, b| a.1.total_cmp(b.1))
|
||||
{
|
||||
let current = labels[key];
|
||||
if best_label != current {
|
||||
labels.insert(key.clone(), best_label);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !changed { break; }
|
||||
}
|
||||
|
||||
// Compact labels to 0..n
|
||||
let mut label_map: HashMap<u32, u32> = HashMap::new();
|
||||
let mut next_id = 0;
|
||||
for label in labels.values_mut() {
|
||||
let new_label = *label_map.entry(*label).or_insert_with(|| {
|
||||
let id = next_id;
|
||||
next_id += 1;
|
||||
id
|
||||
});
|
||||
*label = new_label;
|
||||
}
|
||||
|
||||
labels
|
||||
}
|
||||
|
||||
|
||||
/// A snapshot of graph topology metrics, for tracking evolution over time
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct MetricsSnapshot {
|
||||
pub timestamp: i64,
|
||||
pub date: String,
|
||||
pub nodes: usize,
|
||||
pub edges: usize,
|
||||
pub communities: usize,
|
||||
pub sigma: f32,
|
||||
pub alpha: f32,
|
||||
pub gini: f32,
|
||||
pub avg_cc: f32,
|
||||
pub avg_path_length: f32,
|
||||
// Removed: avg_schema_fit was identical to avg_cc.
|
||||
// Old snapshots with the field still deserialize (serde ignores unknown fields by default).
|
||||
}
|
||||
|
||||
fn metrics_log_path() -> std::path::PathBuf {
|
||||
let home = std::env::var("HOME").unwrap_or_default();
|
||||
std::path::PathBuf::from(home).join(".claude/memory/metrics.jsonl")
|
||||
}
|
||||
|
||||
/// Load previous metrics snapshots
|
||||
pub fn load_metrics_history() -> Vec<MetricsSnapshot> {
|
||||
let path = metrics_log_path();
|
||||
let content = match std::fs::read_to_string(&path) {
|
||||
Ok(c) => c,
|
||||
Err(_) => return Vec::new(),
|
||||
};
|
||||
content.lines()
|
||||
.filter_map(|line| serde_json::from_str(line).ok())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Append a metrics snapshot to the log
|
||||
pub fn save_metrics_snapshot(snap: &MetricsSnapshot) {
|
||||
let path = metrics_log_path();
|
||||
if let Ok(json) = serde_json::to_string(snap) {
|
||||
use std::io::Write;
|
||||
if let Ok(mut f) = std::fs::OpenOptions::new()
|
||||
.create(true).append(true).open(&path)
|
||||
{
|
||||
let _ = writeln!(f, "{}", json);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute current graph metrics as a snapshot (no side effects).
|
||||
pub fn current_metrics(graph: &Graph) -> MetricsSnapshot {
|
||||
let now = crate::store::now_epoch();
|
||||
let date = crate::store::format_datetime_space(now);
|
||||
MetricsSnapshot {
|
||||
timestamp: now,
|
||||
date,
|
||||
nodes: graph.nodes().len(),
|
||||
edges: graph.edge_count(),
|
||||
communities: graph.community_count(),
|
||||
sigma: graph.small_world_sigma(),
|
||||
alpha: graph.degree_power_law_exponent(),
|
||||
gini: graph.degree_gini(),
|
||||
avg_cc: graph.avg_clustering_coefficient(),
|
||||
avg_path_length: graph.avg_path_length(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Health report: summary of graph metrics.
|
||||
/// Saves a metrics snapshot as a side effect (callers who want pure
|
||||
/// computation should use `current_metrics` + `save_metrics_snapshot`).
|
||||
pub fn health_report(graph: &Graph, store: &Store) -> String {
|
||||
let snap = current_metrics(graph);
|
||||
save_metrics_snapshot(&snap);
|
||||
|
||||
let n = snap.nodes;
|
||||
let e = snap.edges;
|
||||
let avg_cc = snap.avg_cc;
|
||||
let avg_pl = snap.avg_path_length;
|
||||
let sigma = snap.sigma;
|
||||
let alpha = snap.alpha;
|
||||
let gini = snap.gini;
|
||||
let communities = snap.communities;
|
||||
|
||||
// Community sizes
|
||||
let mut comm_sizes: HashMap<u32, usize> = HashMap::new();
|
||||
for label in graph.communities().values() {
|
||||
*comm_sizes.entry(*label).or_default() += 1;
|
||||
}
|
||||
let mut sizes: Vec<usize> = comm_sizes.values().copied().collect();
|
||||
sizes.sort_unstable_by(|a, b| b.cmp(a));
|
||||
|
||||
// Degree distribution
|
||||
let mut degrees: Vec<usize> = graph.nodes().iter()
|
||||
.map(|k| graph.degree(k))
|
||||
.collect();
|
||||
degrees.sort_unstable();
|
||||
let max_deg = degrees.last().copied().unwrap_or(0);
|
||||
let median_deg = if degrees.is_empty() { 0 } else { degrees[degrees.len() / 2] };
|
||||
let avg_deg = if n == 0 { 0.0 } else {
|
||||
degrees.iter().sum::<usize>() as f64 / n as f64
|
||||
};
|
||||
|
||||
// Low-CC nodes: poorly integrated
|
||||
let low_cc = graph.nodes().iter()
|
||||
.filter(|k| graph.clustering_coefficient(k) < 0.1)
|
||||
.count();
|
||||
|
||||
// Orphan edges: relations referencing non-existent nodes
|
||||
let mut orphan_edges = 0usize;
|
||||
let mut orphan_sources: Vec<String> = Vec::new();
|
||||
let mut orphan_targets: Vec<String> = Vec::new();
|
||||
for rel in &store.relations {
|
||||
if rel.deleted { continue; }
|
||||
let s_missing = !store.nodes.contains_key(&rel.source_key);
|
||||
let t_missing = !store.nodes.contains_key(&rel.target_key);
|
||||
if s_missing || t_missing {
|
||||
orphan_edges += 1;
|
||||
if s_missing && !orphan_sources.contains(&rel.source_key) {
|
||||
orphan_sources.push(rel.source_key.clone());
|
||||
}
|
||||
if t_missing && !orphan_targets.contains(&rel.target_key) {
|
||||
orphan_targets.push(rel.target_key.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// NodeType breakdown
|
||||
let mut type_counts: std::collections::HashMap<&str, usize> = std::collections::HashMap::new();
|
||||
for node in store.nodes.values() {
|
||||
let label = match node.node_type {
|
||||
crate::store::NodeType::EpisodicSession => "episodic",
|
||||
crate::store::NodeType::EpisodicDaily => "daily",
|
||||
crate::store::NodeType::EpisodicWeekly => "weekly",
|
||||
crate::store::NodeType::EpisodicMonthly => "monthly",
|
||||
crate::store::NodeType::Semantic => "semantic",
|
||||
};
|
||||
*type_counts.entry(label).or_default() += 1;
|
||||
}
|
||||
|
||||
// Load history for deltas
|
||||
let history = load_metrics_history();
|
||||
let prev = if history.len() >= 2 {
|
||||
Some(&history[history.len() - 2]) // second-to-last (last is the one we just wrote)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
fn delta(current: f32, prev: Option<f32>) -> String {
|
||||
match prev {
|
||||
Some(p) => {
|
||||
let d = current - p;
|
||||
if d.abs() < 0.001 { String::new() }
|
||||
else { format!(" (Δ{:+.3})", d) }
|
||||
}
|
||||
None => String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
let sigma_d = delta(sigma, prev.map(|p| p.sigma));
|
||||
let alpha_d = delta(alpha, prev.map(|p| p.alpha));
|
||||
let gini_d = delta(gini, prev.map(|p| p.gini));
|
||||
let cc_d = delta(avg_cc, prev.map(|p| p.avg_cc));
|
||||
|
||||
let mut report = format!(
|
||||
"Memory Health Report
|
||||
====================
|
||||
Nodes: {n} Relations: {e} Communities: {communities}
|
||||
|
||||
Degree: max={max_deg} median={median_deg} avg={avg_deg:.1}
|
||||
Clustering coefficient (avg): {avg_cc:.4}{cc_d} low-CC (<0.1): {low_cc} nodes
|
||||
Average path length: {avg_pl:.2}
|
||||
Small-world σ: {sigma:.3}{sigma_d} (>1 = small-world)
|
||||
Power-law α: {alpha:.2}{alpha_d} (2=hub-dominated, 3=healthy, >3=egalitarian)
|
||||
Degree Gini: {gini:.3}{gini_d} (0=equal, 1=one-hub)
|
||||
|
||||
Community sizes (top 5): {top5}
|
||||
Types: semantic={semantic} episodic={episodic} daily={daily} weekly={weekly} monthly={monthly}",
|
||||
top5 = sizes.iter().take(5)
|
||||
.map(|s| s.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", "),
|
||||
semantic = type_counts.get("semantic").unwrap_or(&0),
|
||||
episodic = type_counts.get("episodic").unwrap_or(&0),
|
||||
daily = type_counts.get("daily").unwrap_or(&0),
|
||||
weekly = type_counts.get("weekly").unwrap_or(&0),
|
||||
monthly = type_counts.get("monthly").unwrap_or(&0),
|
||||
);
|
||||
|
||||
// Orphan edges
|
||||
if orphan_edges == 0 {
|
||||
report.push_str("\n\nBroken links: 0");
|
||||
} else {
|
||||
let mut all_missing: Vec<String> = orphan_sources;
|
||||
all_missing.extend(orphan_targets);
|
||||
all_missing.sort();
|
||||
all_missing.dedup();
|
||||
report.push_str(&format!(
|
||||
"\n\nBroken links: {} edges reference {} missing nodes",
|
||||
orphan_edges, all_missing.len()));
|
||||
for key in all_missing.iter().take(10) {
|
||||
report.push_str(&format!("\n - {}", key));
|
||||
}
|
||||
if all_missing.len() > 10 {
|
||||
report.push_str(&format!("\n ... and {} more", all_missing.len() - 10));
|
||||
}
|
||||
}
|
||||
|
||||
// Show history trend if we have enough data points
|
||||
if history.len() >= 3 {
|
||||
report.push_str("\n\nMetrics history (last 5):\n");
|
||||
for snap in &history[history.len().saturating_sub(5)..] {
|
||||
report.push_str(&format!(" {} — σ={:.1} α={:.2} gini={:.3} cc={:.4}\n",
|
||||
snap.date, snap.sigma, snap.alpha, snap.gini, snap.avg_cc));
|
||||
}
|
||||
}
|
||||
|
||||
report
|
||||
}
|
||||
959
poc-memory/src/knowledge.rs
Normal file
959
poc-memory/src/knowledge.rs
Normal file
|
|
@ -0,0 +1,959 @@
|
|||
// knowledge.rs — knowledge production agents and convergence loop
|
||||
//
|
||||
// Rust port of knowledge_agents.py + knowledge_loop.py.
|
||||
// Four agents mine the memory graph for new knowledge:
|
||||
// 1. Observation — extract facts from raw conversations
|
||||
// 2. Extractor — find patterns in node clusters
|
||||
// 3. Connector — find cross-domain structural connections
|
||||
// 4. Challenger — stress-test existing knowledge nodes
|
||||
//
|
||||
// The loop runs agents in sequence, applies results, measures
|
||||
// convergence via graph-structural metrics (sigma, CC, communities).
|
||||
|
||||
use crate::graph::Graph;
|
||||
use crate::llm;
|
||||
use crate::spectral;
|
||||
use crate::store::{self, Store, new_relation, RelationType};
|
||||
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
fn memory_dir() -> PathBuf {
|
||||
store::memory_dir()
|
||||
}
|
||||
|
||||
fn prompts_dir() -> PathBuf {
|
||||
let manifest = env!("CARGO_MANIFEST_DIR");
|
||||
PathBuf::from(manifest).join("prompts")
|
||||
}
|
||||
|
||||
fn projects_dir() -> PathBuf {
|
||||
let home = std::env::var("HOME").unwrap_or_else(|_| ".".into());
|
||||
PathBuf::from(home).join(".claude/projects")
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Action types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Action {
|
||||
pub kind: ActionKind,
|
||||
pub confidence: Confidence,
|
||||
pub weight: f64,
|
||||
pub depth: i32,
|
||||
pub applied: Option<bool>,
|
||||
pub rejected_reason: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum ActionKind {
|
||||
WriteNode {
|
||||
key: String,
|
||||
content: String,
|
||||
covers: Vec<String>,
|
||||
},
|
||||
Link {
|
||||
source: String,
|
||||
target: String,
|
||||
},
|
||||
Refine {
|
||||
key: String,
|
||||
content: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum Confidence {
|
||||
High,
|
||||
Medium,
|
||||
Low,
|
||||
}
|
||||
|
||||
impl Confidence {
|
||||
fn weight(self) -> f64 {
|
||||
match self {
|
||||
Self::High => 1.0,
|
||||
Self::Medium => 0.6,
|
||||
Self::Low => 0.3,
|
||||
}
|
||||
}
|
||||
|
||||
fn value(self) -> f64 {
|
||||
match self {
|
||||
Self::High => 0.9,
|
||||
Self::Medium => 0.6,
|
||||
Self::Low => 0.3,
|
||||
}
|
||||
}
|
||||
|
||||
fn parse(s: &str) -> Self {
|
||||
match s.to_lowercase().as_str() {
|
||||
"high" => Self::High,
|
||||
"low" => Self::Low,
|
||||
_ => Self::Medium,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Action parsing
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
pub fn parse_write_nodes(text: &str) -> Vec<Action> {
|
||||
let re = Regex::new(r"(?s)WRITE_NODE\s+(\S+)\s*\n(.*?)END_NODE").unwrap();
|
||||
let conf_re = Regex::new(r"(?i)CONFIDENCE:\s*(high|medium|low)").unwrap();
|
||||
let covers_re = Regex::new(r"COVERS:\s*(.+)").unwrap();
|
||||
|
||||
re.captures_iter(text)
|
||||
.map(|cap| {
|
||||
let key = cap[1].to_string();
|
||||
let mut content = cap[2].trim().to_string();
|
||||
|
||||
let confidence = conf_re
|
||||
.captures(&content)
|
||||
.map(|c| Confidence::parse(&c[1]))
|
||||
.unwrap_or(Confidence::Medium);
|
||||
content = conf_re.replace(&content, "").trim().to_string();
|
||||
|
||||
let covers: Vec<String> = covers_re
|
||||
.captures(&content)
|
||||
.map(|c| c[1].split(',').map(|s| s.trim().to_string()).collect())
|
||||
.unwrap_or_default();
|
||||
content = covers_re.replace(&content, "").trim().to_string();
|
||||
|
||||
Action {
|
||||
weight: confidence.weight(),
|
||||
kind: ActionKind::WriteNode { key, content, covers },
|
||||
confidence,
|
||||
depth: 0,
|
||||
applied: None,
|
||||
rejected_reason: None,
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn parse_links(text: &str) -> Vec<Action> {
|
||||
let re = Regex::new(r"(?m)^LINK\s+(\S+)\s+(\S+)").unwrap();
|
||||
re.captures_iter(text)
|
||||
.map(|cap| Action {
|
||||
kind: ActionKind::Link {
|
||||
source: cap[1].to_string(),
|
||||
target: cap[2].to_string(),
|
||||
},
|
||||
confidence: Confidence::Low,
|
||||
weight: 0.3,
|
||||
depth: -1,
|
||||
applied: None,
|
||||
rejected_reason: None,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn parse_refines(text: &str) -> Vec<Action> {
|
||||
let re = Regex::new(r"(?s)REFINE\s+(\S+)\s*\n(.*?)END_REFINE").unwrap();
|
||||
re.captures_iter(text)
|
||||
.map(|cap| {
|
||||
let key = cap[1].trim_matches('*').trim().to_string();
|
||||
Action {
|
||||
kind: ActionKind::Refine {
|
||||
key,
|
||||
content: cap[2].trim().to_string(),
|
||||
},
|
||||
confidence: Confidence::Medium,
|
||||
weight: 0.7,
|
||||
depth: 0,
|
||||
applied: None,
|
||||
rejected_reason: None,
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn parse_all_actions(text: &str) -> Vec<Action> {
|
||||
let mut actions = parse_write_nodes(text);
|
||||
actions.extend(parse_links(text));
|
||||
actions.extend(parse_refines(text));
|
||||
actions
|
||||
}
|
||||
|
||||
pub fn count_no_ops(text: &str) -> usize {
|
||||
let no_conn = Regex::new(r"\bNO_CONNECTION\b").unwrap().find_iter(text).count();
|
||||
let affirm = Regex::new(r"\bAFFIRM\b").unwrap().find_iter(text).count();
|
||||
let no_extract = Regex::new(r"\bNO_EXTRACTION\b").unwrap().find_iter(text).count();
|
||||
no_conn + affirm + no_extract
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Inference depth tracking
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const DEPTH_DB_KEY: &str = "_knowledge-depths";
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct DepthDb {
|
||||
depths: HashMap<String, i32>,
|
||||
}
|
||||
|
||||
impl DepthDb {
|
||||
pub fn load(store: &Store) -> Self {
|
||||
let depths = store.nodes.get(DEPTH_DB_KEY)
|
||||
.and_then(|n| serde_json::from_str(&n.content).ok())
|
||||
.unwrap_or_default();
|
||||
Self { depths }
|
||||
}
|
||||
|
||||
pub fn save(&self, store: &mut Store) {
|
||||
if let Ok(json) = serde_json::to_string(&self.depths) {
|
||||
store.upsert_provenance(DEPTH_DB_KEY, &json,
|
||||
store::Provenance::AgentKnowledgeObservation).ok();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get(&self, key: &str) -> i32 {
|
||||
self.depths.get(key).copied().unwrap_or(0)
|
||||
}
|
||||
|
||||
pub fn set(&mut self, key: String, depth: i32) {
|
||||
self.depths.insert(key, depth);
|
||||
}
|
||||
}
|
||||
|
||||
/// Agent base depths: observation=1, extractor=2, connector=3
|
||||
fn agent_base_depth(agent: &str) -> Option<i32> {
|
||||
match agent {
|
||||
"observation" => Some(1),
|
||||
"extractor" => Some(2),
|
||||
"connector" => Some(3),
|
||||
"challenger" => None,
|
||||
_ => Some(2),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn compute_action_depth(db: &DepthDb, action: &Action, agent: &str) -> i32 {
|
||||
match &action.kind {
|
||||
ActionKind::Link { .. } => -1,
|
||||
ActionKind::Refine { key, .. } => db.get(key),
|
||||
ActionKind::WriteNode { covers, .. } => {
|
||||
if !covers.is_empty() {
|
||||
covers.iter().map(|k| db.get(k)).max().unwrap_or(0) + 1
|
||||
} else {
|
||||
agent_base_depth(agent).unwrap_or(2)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Confidence threshold that scales with inference depth.
|
||||
pub fn required_confidence(depth: i32, base: f64) -> f64 {
|
||||
if depth <= 0 {
|
||||
return 0.0;
|
||||
}
|
||||
1.0 - (1.0 - base).powi(depth)
|
||||
}
|
||||
|
||||
/// Confidence bonus from real-world use.
|
||||
pub fn use_bonus(use_count: u32) -> f64 {
|
||||
if use_count == 0 {
|
||||
return 0.0;
|
||||
}
|
||||
1.0 - 1.0 / (1.0 + 0.15 * use_count as f64)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Action application
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn stamp_content(content: &str, agent: &str, timestamp: &str, depth: i32) -> String {
|
||||
format!("<!-- author: {} | created: {} | depth: {} -->\n{}", agent, timestamp, depth, content)
|
||||
}
|
||||
|
||||
/// Check if a link already exists between two keys.
|
||||
fn has_edge(store: &Store, source: &str, target: &str) -> bool {
|
||||
store.relations.iter().any(|r| {
|
||||
!r.deleted
|
||||
&& ((r.source_key == source && r.target_key == target)
|
||||
|| (r.source_key == target && r.target_key == source))
|
||||
})
|
||||
}
|
||||
|
||||
pub fn apply_action(
|
||||
store: &mut Store,
|
||||
action: &Action,
|
||||
agent: &str,
|
||||
timestamp: &str,
|
||||
depth: i32,
|
||||
) -> bool {
|
||||
let provenance = agent_provenance(agent);
|
||||
|
||||
match &action.kind {
|
||||
ActionKind::WriteNode { key, content, .. } => {
|
||||
let stamped = stamp_content(content, agent, timestamp, depth);
|
||||
store.upsert_provenance(key, &stamped, provenance).is_ok()
|
||||
}
|
||||
ActionKind::Link { source, target } => {
|
||||
if has_edge(store, source, target) {
|
||||
return false;
|
||||
}
|
||||
let source_uuid = match store.nodes.get(source.as_str()) {
|
||||
Some(n) => n.uuid,
|
||||
None => return false,
|
||||
};
|
||||
let target_uuid = match store.nodes.get(target.as_str()) {
|
||||
Some(n) => n.uuid,
|
||||
None => return false,
|
||||
};
|
||||
let mut rel = new_relation(
|
||||
source_uuid, target_uuid,
|
||||
RelationType::Link,
|
||||
0.3,
|
||||
source, target,
|
||||
);
|
||||
rel.provenance = provenance;
|
||||
store.add_relation(rel).is_ok()
|
||||
}
|
||||
ActionKind::Refine { key, content } => {
|
||||
let stamped = stamp_content(content, agent, timestamp, depth);
|
||||
store.upsert_provenance(key, &stamped, provenance).is_ok()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn agent_provenance(agent: &str) -> store::Provenance {
|
||||
match agent {
|
||||
"observation" => store::Provenance::AgentKnowledgeObservation,
|
||||
"extractor" | "pattern" => store::Provenance::AgentKnowledgePattern,
|
||||
"connector" => store::Provenance::AgentKnowledgeConnector,
|
||||
"challenger" => store::Provenance::AgentKnowledgeChallenger,
|
||||
_ => store::Provenance::Agent,
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Agent runners
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn load_prompt(name: &str) -> Result<String, String> {
|
||||
let path = prompts_dir().join(format!("{}.md", name));
|
||||
fs::read_to_string(&path).map_err(|e| format!("load prompt {}: {}", name, e))
|
||||
}
|
||||
|
||||
fn get_graph_topology(store: &Store, graph: &Graph) -> String {
|
||||
format!("Nodes: {} Relations: {}\n", store.nodes.len(), graph.edge_count())
|
||||
}
|
||||
|
||||
/// Strip <system-reminder> blocks from text
|
||||
fn strip_system_tags(text: &str) -> String {
|
||||
let re = Regex::new(r"(?s)<system-reminder>.*?</system-reminder>").unwrap();
|
||||
re.replace_all(text, "").trim().to_string()
|
||||
}
|
||||
|
||||
/// Extract human-readable dialogue from a conversation JSONL
|
||||
fn extract_conversation_text(path: &Path, max_chars: usize) -> String {
|
||||
let Ok(content) = fs::read_to_string(path) else { return String::new() };
|
||||
let mut fragments = Vec::new();
|
||||
let mut total = 0;
|
||||
|
||||
for line in content.lines() {
|
||||
let Ok(obj) = serde_json::from_str::<serde_json::Value>(line) else { continue };
|
||||
|
||||
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
|
||||
|
||||
if msg_type == "user" && obj.get("userType").and_then(|v| v.as_str()) == Some("external") {
|
||||
if let Some(text) = extract_text_content(&obj) {
|
||||
let text = strip_system_tags(&text);
|
||||
if text.starts_with("[Request interrupted") { continue; }
|
||||
if text.len() > 5 {
|
||||
fragments.push(format!("**{}:** {}", crate::config::get().user_name, text));
|
||||
total += text.len();
|
||||
}
|
||||
}
|
||||
} else if msg_type == "assistant" {
|
||||
if let Some(text) = extract_text_content(&obj) {
|
||||
let text = strip_system_tags(&text);
|
||||
if text.len() > 10 {
|
||||
fragments.push(format!("**{}:** {}", crate::config::get().assistant_name, text));
|
||||
total += text.len();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if total > max_chars { break; }
|
||||
}
|
||||
fragments.join("\n\n")
|
||||
}
|
||||
|
||||
fn extract_text_content(obj: &serde_json::Value) -> Option<String> {
|
||||
let msg = obj.get("message")?;
|
||||
let content = msg.get("content")?;
|
||||
if let Some(s) = content.as_str() {
|
||||
return Some(s.to_string());
|
||||
}
|
||||
if let Some(arr) = content.as_array() {
|
||||
let texts: Vec<&str> = arr.iter()
|
||||
.filter_map(|b| {
|
||||
if b.get("type")?.as_str()? == "text" {
|
||||
b.get("text")?.as_str()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
if !texts.is_empty() {
|
||||
return Some(texts.join("\n"));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Count short user messages (dialogue turns) in a JSONL
|
||||
fn count_dialogue_turns(path: &Path) -> usize {
|
||||
let Ok(content) = fs::read_to_string(path) else { return 0 };
|
||||
content.lines()
|
||||
.filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
|
||||
.filter(|obj| {
|
||||
obj.get("type").and_then(|v| v.as_str()) == Some("user")
|
||||
&& obj.get("userType").and_then(|v| v.as_str()) == Some("external")
|
||||
})
|
||||
.filter(|obj| {
|
||||
let text = extract_text_content(obj).unwrap_or_default();
|
||||
text.len() > 5 && text.len() < 500
|
||||
&& !text.starts_with("[Request interrupted")
|
||||
&& !text.starts_with("Implement the following")
|
||||
})
|
||||
.count()
|
||||
}
|
||||
|
||||
/// Select conversation fragments for the observation extractor
|
||||
fn select_conversation_fragments(n: usize) -> Vec<(String, String)> {
|
||||
let projects = projects_dir();
|
||||
if !projects.exists() { return Vec::new(); }
|
||||
|
||||
let mut jsonl_files: Vec<PathBuf> = Vec::new();
|
||||
if let Ok(dirs) = fs::read_dir(&projects) {
|
||||
for dir in dirs.filter_map(|e| e.ok()) {
|
||||
if !dir.path().is_dir() { continue; }
|
||||
if let Ok(files) = fs::read_dir(dir.path()) {
|
||||
for f in files.filter_map(|e| e.ok()) {
|
||||
let p = f.path();
|
||||
if p.extension().map(|x| x == "jsonl").unwrap_or(false) {
|
||||
if let Ok(meta) = p.metadata() {
|
||||
if meta.len() > 50_000 {
|
||||
jsonl_files.push(p);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut scored: Vec<(usize, PathBuf)> = jsonl_files.into_iter()
|
||||
.map(|f| (count_dialogue_turns(&f), f))
|
||||
.filter(|(turns, _)| *turns >= 10)
|
||||
.collect();
|
||||
scored.sort_by(|a, b| b.0.cmp(&a.0));
|
||||
|
||||
let mut fragments = Vec::new();
|
||||
for (_, f) in scored.iter().take(n * 2) {
|
||||
let session_id = f.file_stem()
|
||||
.map(|s| s.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|| "unknown".into());
|
||||
let text = extract_conversation_text(f, 8000);
|
||||
if text.len() > 500 {
|
||||
fragments.push((session_id, text));
|
||||
}
|
||||
if fragments.len() >= n { break; }
|
||||
}
|
||||
fragments
|
||||
}
|
||||
|
||||
pub fn run_observation_extractor(store: &Store, graph: &Graph, batch_size: usize) -> Result<String, String> {
|
||||
let template = load_prompt("observation-extractor")?;
|
||||
let topology = get_graph_topology(store, graph);
|
||||
let fragments = select_conversation_fragments(batch_size);
|
||||
|
||||
let mut results = Vec::new();
|
||||
for (i, (session_id, text)) in fragments.iter().enumerate() {
|
||||
eprintln!(" Observation extractor {}/{}: session {}... ({} chars)",
|
||||
i + 1, fragments.len(), &session_id[..session_id.len().min(12)], text.len());
|
||||
|
||||
let prompt = template
|
||||
.replace("{{TOPOLOGY}}", &topology)
|
||||
.replace("{{CONVERSATIONS}}", &format!("### Session {}\n\n{}", session_id, text));
|
||||
|
||||
let response = llm::call_sonnet("knowledge", &prompt)?;
|
||||
results.push(format!("## Session: {}\n\n{}", session_id, response));
|
||||
}
|
||||
Ok(results.join("\n\n---\n\n"))
|
||||
}
|
||||
|
||||
/// Load spectral embedding from disk
|
||||
fn load_spectral_embedding() -> HashMap<String, Vec<f64>> {
|
||||
spectral::load_embedding()
|
||||
.map(|emb| emb.coords)
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
fn spectral_distance(embedding: &HashMap<String, Vec<f64>>, a: &str, b: &str) -> f64 {
|
||||
let (Some(va), Some(vb)) = (embedding.get(a), embedding.get(b)) else {
|
||||
return f64::INFINITY;
|
||||
};
|
||||
let dot: f64 = va.iter().zip(vb.iter()).map(|(a, b)| a * b).sum();
|
||||
let norm_a: f64 = va.iter().map(|x| x * x).sum::<f64>().sqrt();
|
||||
let norm_b: f64 = vb.iter().map(|x| x * x).sum::<f64>().sqrt();
|
||||
if norm_a == 0.0 || norm_b == 0.0 {
|
||||
return f64::INFINITY;
|
||||
}
|
||||
1.0 - dot / (norm_a * norm_b)
|
||||
}
|
||||
|
||||
fn select_extractor_clusters(_store: &Store, n: usize) -> Vec<Vec<String>> {
|
||||
let embedding = load_spectral_embedding();
|
||||
let semantic_keys: Vec<&String> = embedding.keys().collect();
|
||||
|
||||
let cluster_size = 5;
|
||||
let mut used = HashSet::new();
|
||||
let mut clusters = Vec::new();
|
||||
|
||||
for _ in 0..n {
|
||||
let available: Vec<&&String> = semantic_keys.iter()
|
||||
.filter(|k| !used.contains(**k))
|
||||
.collect();
|
||||
if available.len() < cluster_size { break; }
|
||||
|
||||
let seed = available[0];
|
||||
let mut distances: Vec<(f64, &String)> = available.iter()
|
||||
.filter(|k| ***k != *seed)
|
||||
.map(|k| (spectral_distance(&embedding, seed, k), **k))
|
||||
.filter(|(d, _)| d.is_finite())
|
||||
.collect();
|
||||
distances.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
|
||||
|
||||
let cluster: Vec<String> = std::iter::once((*seed).clone())
|
||||
.chain(distances.iter().take(cluster_size - 1).map(|(_, k)| (*k).clone()))
|
||||
.collect();
|
||||
for k in &cluster { used.insert(k.clone()); }
|
||||
clusters.push(cluster);
|
||||
}
|
||||
clusters
|
||||
}
|
||||
|
||||
pub fn run_extractor(store: &Store, graph: &Graph, batch_size: usize) -> Result<String, String> {
|
||||
let template = load_prompt("extractor")?;
|
||||
let topology = get_graph_topology(store, graph);
|
||||
let clusters = select_extractor_clusters(store, batch_size);
|
||||
|
||||
let mut results = Vec::new();
|
||||
for (i, cluster) in clusters.iter().enumerate() {
|
||||
eprintln!(" Extractor cluster {}/{}: {} nodes", i + 1, clusters.len(), cluster.len());
|
||||
|
||||
let node_texts: Vec<String> = cluster.iter()
|
||||
.filter_map(|key| {
|
||||
let content = store.nodes.get(key)?.content.as_str();
|
||||
Some(format!("### {}\n{}", key, content))
|
||||
})
|
||||
.collect();
|
||||
if node_texts.is_empty() { continue; }
|
||||
|
||||
let prompt = template
|
||||
.replace("{{TOPOLOGY}}", &topology)
|
||||
.replace("{{NODES}}", &node_texts.join("\n\n"));
|
||||
|
||||
let response = llm::call_sonnet("knowledge", &prompt)?;
|
||||
results.push(format!("## Cluster {}: {}...\n\n{}", i + 1,
|
||||
cluster.iter().take(3).cloned().collect::<Vec<_>>().join(", "), response));
|
||||
}
|
||||
Ok(results.join("\n\n---\n\n"))
|
||||
}
|
||||
|
||||
fn select_connector_pairs(store: &Store, graph: &Graph, n: usize) -> Vec<(Vec<String>, Vec<String>)> {
|
||||
let embedding = load_spectral_embedding();
|
||||
let semantic_keys: Vec<&String> = embedding.keys().collect();
|
||||
|
||||
let mut pairs = Vec::new();
|
||||
let mut used = HashSet::new();
|
||||
|
||||
for seed in semantic_keys.iter().take(n * 10) {
|
||||
if used.contains(*seed) { continue; }
|
||||
|
||||
let mut near: Vec<(f64, &String)> = semantic_keys.iter()
|
||||
.filter(|k| ***k != **seed && !used.contains(**k))
|
||||
.map(|k| (spectral_distance(&embedding, seed, k), *k))
|
||||
.filter(|(d, _)| *d < 0.5 && d.is_finite())
|
||||
.collect();
|
||||
near.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
|
||||
|
||||
for (_, target) in near.iter().take(5) {
|
||||
if !has_edge(store, seed, target) {
|
||||
let _ = graph; // graph available for future use
|
||||
used.insert((*seed).clone());
|
||||
used.insert((*target).clone());
|
||||
pairs.push((vec![(*seed).clone()], vec![(*target).clone()]));
|
||||
break;
|
||||
}
|
||||
}
|
||||
if pairs.len() >= n { break; }
|
||||
}
|
||||
pairs
|
||||
}
|
||||
|
||||
pub fn run_connector(store: &Store, graph: &Graph, batch_size: usize) -> Result<String, String> {
|
||||
let template = load_prompt("connector")?;
|
||||
let topology = get_graph_topology(store, graph);
|
||||
let pairs = select_connector_pairs(store, graph, batch_size);
|
||||
|
||||
let mut results = Vec::new();
|
||||
for (i, (group_a, group_b)) in pairs.iter().enumerate() {
|
||||
eprintln!(" Connector pair {}/{}", i + 1, pairs.len());
|
||||
|
||||
let nodes_a: Vec<String> = group_a.iter()
|
||||
.filter_map(|k| {
|
||||
let c = store.nodes.get(k)?.content.as_str();
|
||||
Some(format!("### {}\n{}", k, c))
|
||||
})
|
||||
.collect();
|
||||
let nodes_b: Vec<String> = group_b.iter()
|
||||
.filter_map(|k| {
|
||||
let c = store.nodes.get(k)?.content.as_str();
|
||||
Some(format!("### {}\n{}", k, c))
|
||||
})
|
||||
.collect();
|
||||
|
||||
let prompt = template
|
||||
.replace("{{TOPOLOGY}}", &topology)
|
||||
.replace("{{NODES_A}}", &nodes_a.join("\n\n"))
|
||||
.replace("{{NODES_B}}", &nodes_b.join("\n\n"));
|
||||
|
||||
let response = llm::call_sonnet("knowledge", &prompt)?;
|
||||
results.push(format!("## Pair {}: {} ↔ {}\n\n{}",
|
||||
i + 1, group_a.join(", "), group_b.join(", "), response));
|
||||
}
|
||||
Ok(results.join("\n\n---\n\n"))
|
||||
}
|
||||
|
||||
pub fn run_challenger(store: &Store, graph: &Graph, batch_size: usize) -> Result<String, String> {
|
||||
let template = load_prompt("challenger")?;
|
||||
let topology = get_graph_topology(store, graph);
|
||||
|
||||
let mut candidates: Vec<(&String, usize)> = store.nodes.iter()
|
||||
.map(|(k, _)| (k, graph.degree(k)))
|
||||
.collect();
|
||||
candidates.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
|
||||
let mut results = Vec::new();
|
||||
for (i, (key, _)) in candidates.iter().take(batch_size).enumerate() {
|
||||
eprintln!(" Challenger {}/{}: {}", i + 1, batch_size.min(candidates.len()), key);
|
||||
|
||||
let content = match store.nodes.get(key.as_str()) {
|
||||
Some(n) => &n.content,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let prompt = template
|
||||
.replace("{{TOPOLOGY}}", &topology)
|
||||
.replace("{{NODE_KEY}}", key)
|
||||
.replace("{{NODE_CONTENT}}", content);
|
||||
|
||||
let response = llm::call_sonnet("knowledge", &prompt)?;
|
||||
results.push(format!("## Challenge: {}\n\n{}", key, response));
|
||||
}
|
||||
Ok(results.join("\n\n---\n\n"))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Convergence metrics
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CycleResult {
|
||||
pub cycle: usize,
|
||||
pub timestamp: String,
|
||||
pub total_actions: usize,
|
||||
pub total_applied: usize,
|
||||
pub total_no_ops: usize,
|
||||
pub depth_rejected: usize,
|
||||
pub weighted_delta: f64,
|
||||
pub graph_metrics_before: GraphMetrics,
|
||||
pub graph_metrics_after: GraphMetrics,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct GraphMetrics {
|
||||
pub nodes: usize,
|
||||
pub edges: usize,
|
||||
pub cc: f64,
|
||||
pub sigma: f64,
|
||||
pub communities: usize,
|
||||
}
|
||||
|
||||
impl GraphMetrics {
|
||||
pub fn from_graph(store: &Store, graph: &Graph) -> Self {
|
||||
Self {
|
||||
nodes: store.nodes.len(),
|
||||
edges: graph.edge_count(),
|
||||
cc: graph.avg_clustering_coefficient() as f64,
|
||||
sigma: graph.small_world_sigma() as f64,
|
||||
communities: graph.community_count(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn metric_stability(history: &[CycleResult], key: &str, window: usize) -> f64 {
|
||||
if history.len() < window { return f64::INFINITY; }
|
||||
|
||||
let values: Vec<f64> = history[history.len() - window..].iter()
|
||||
.map(|h| match key {
|
||||
"sigma" => h.graph_metrics_after.sigma,
|
||||
"cc" => h.graph_metrics_after.cc,
|
||||
"communities" => h.graph_metrics_after.communities as f64,
|
||||
_ => 0.0,
|
||||
})
|
||||
.collect();
|
||||
|
||||
if values.len() < 2 { return f64::INFINITY; }
|
||||
let mean = values.iter().sum::<f64>() / values.len() as f64;
|
||||
if mean == 0.0 { return 0.0; }
|
||||
let variance = values.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / values.len() as f64;
|
||||
variance.sqrt() / mean.abs()
|
||||
}
|
||||
|
||||
pub fn check_convergence(history: &[CycleResult], window: usize) -> bool {
|
||||
if history.len() < window { return false; }
|
||||
|
||||
let sigma_cv = metric_stability(history, "sigma", window);
|
||||
let cc_cv = metric_stability(history, "cc", window);
|
||||
let comm_cv = metric_stability(history, "communities", window);
|
||||
|
||||
let recent = &history[history.len() - window..];
|
||||
let avg_delta = recent.iter().map(|r| r.weighted_delta).sum::<f64>() / recent.len() as f64;
|
||||
|
||||
eprintln!("\n Convergence check (last {} cycles):", window);
|
||||
eprintln!(" sigma CV: {:.4} (< 0.05?)", sigma_cv);
|
||||
eprintln!(" CC CV: {:.4} (< 0.05?)", cc_cv);
|
||||
eprintln!(" community CV: {:.4} (< 0.10?)", comm_cv);
|
||||
eprintln!(" avg delta: {:.2} (< 1.00?)", avg_delta);
|
||||
|
||||
let structural = sigma_cv < 0.05 && cc_cv < 0.05 && comm_cv < 0.10;
|
||||
let behavioral = avg_delta < 1.0;
|
||||
|
||||
if structural && behavioral {
|
||||
eprintln!(" → CONVERGED");
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// The knowledge loop
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
pub struct KnowledgeLoopConfig {
|
||||
pub max_cycles: usize,
|
||||
pub batch_size: usize,
|
||||
pub window: usize,
|
||||
pub max_depth: i32,
|
||||
pub confidence_base: f64,
|
||||
}
|
||||
|
||||
impl Default for KnowledgeLoopConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
max_cycles: 20,
|
||||
batch_size: 5,
|
||||
window: 5,
|
||||
max_depth: 4,
|
||||
confidence_base: 0.3,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn run_knowledge_loop(config: &KnowledgeLoopConfig) -> Result<Vec<CycleResult>, String> {
|
||||
let mut store = Store::load()?;
|
||||
let mut depth_db = DepthDb::load(&store);
|
||||
let mut history = Vec::new();
|
||||
|
||||
eprintln!("Knowledge Loop — fixed-point iteration");
|
||||
eprintln!(" max_cycles={} batch_size={}", config.max_cycles, config.batch_size);
|
||||
eprintln!(" window={} max_depth={}", config.window, config.max_depth);
|
||||
|
||||
for cycle in 1..=config.max_cycles {
|
||||
let result = run_cycle(cycle, config, &mut depth_db)?;
|
||||
history.push(result);
|
||||
|
||||
if check_convergence(&history, config.window) {
|
||||
eprintln!("\n CONVERGED after {} cycles", cycle);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Save loop summary as a store node
|
||||
if let Some(first) = history.first() {
|
||||
let key = format!("_knowledge-loop-{}", first.timestamp);
|
||||
if let Ok(json) = serde_json::to_string_pretty(&history) {
|
||||
store = Store::load()?;
|
||||
store.upsert_provenance(&key, &json,
|
||||
store::Provenance::AgentKnowledgeObservation).ok();
|
||||
depth_db.save(&mut store);
|
||||
store.save()?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(history)
|
||||
}
|
||||
|
||||
fn run_cycle(
|
||||
cycle_num: usize,
|
||||
config: &KnowledgeLoopConfig,
|
||||
depth_db: &mut DepthDb,
|
||||
) -> Result<CycleResult, String> {
|
||||
let timestamp = chrono::Local::now().format("%Y%m%dT%H%M%S").to_string();
|
||||
eprintln!("\n{}", "=".repeat(60));
|
||||
eprintln!("CYCLE {} — {}", cycle_num, timestamp);
|
||||
eprintln!("{}", "=".repeat(60));
|
||||
|
||||
let mut store = Store::load()?;
|
||||
let graph = store.build_graph();
|
||||
let metrics_before = GraphMetrics::from_graph(&store, &graph);
|
||||
eprintln!(" Before: nodes={} edges={} cc={:.3} sigma={:.3}",
|
||||
metrics_before.nodes, metrics_before.edges, metrics_before.cc, metrics_before.sigma);
|
||||
|
||||
let mut all_actions = Vec::new();
|
||||
let mut all_no_ops = 0;
|
||||
let mut depth_rejected = 0;
|
||||
let mut total_applied = 0;
|
||||
|
||||
// Run each agent, rebuilding graph after mutations
|
||||
let agent_names = ["observation", "extractor", "connector", "challenger"];
|
||||
|
||||
for agent_name in &agent_names {
|
||||
eprintln!("\n --- {} (n={}) ---", agent_name, config.batch_size);
|
||||
|
||||
// Rebuild graph to reflect any mutations from previous agents
|
||||
let graph = store.build_graph();
|
||||
|
||||
let output = match *agent_name {
|
||||
"observation" => run_observation_extractor(&store, &graph, config.batch_size),
|
||||
"extractor" => run_extractor(&store, &graph, config.batch_size),
|
||||
"connector" => run_connector(&store, &graph, config.batch_size),
|
||||
"challenger" => run_challenger(&store, &graph, config.batch_size),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let output = match output {
|
||||
Ok(o) => o,
|
||||
Err(e) => {
|
||||
eprintln!(" ERROR: {}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Store raw output as a node (for debugging/audit)
|
||||
let raw_key = format!("_knowledge-{}-{}", agent_name, timestamp);
|
||||
let raw_content = format!("# {} Agent Results — {}\n\n{}", agent_name, timestamp, output);
|
||||
store.upsert_provenance(&raw_key, &raw_content,
|
||||
agent_provenance(agent_name)).ok();
|
||||
|
||||
let mut actions = parse_all_actions(&output);
|
||||
let no_ops = count_no_ops(&output);
|
||||
all_no_ops += no_ops;
|
||||
|
||||
eprintln!(" Actions: {} No-ops: {}", actions.len(), no_ops);
|
||||
|
||||
let mut applied = 0;
|
||||
for action in &mut actions {
|
||||
let depth = compute_action_depth(depth_db, action, agent_name);
|
||||
action.depth = depth;
|
||||
|
||||
match &action.kind {
|
||||
ActionKind::WriteNode { key, covers, .. } => {
|
||||
let conf_val = action.confidence.value();
|
||||
let req = required_confidence(depth, config.confidence_base);
|
||||
|
||||
let source_uses: Vec<u32> = covers.iter()
|
||||
.filter_map(|k| store.nodes.get(k).map(|n| n.uses))
|
||||
.collect();
|
||||
let avg_uses = if source_uses.is_empty() { 0 }
|
||||
else { source_uses.iter().sum::<u32>() / source_uses.len() as u32 };
|
||||
let eff_conf = (conf_val + use_bonus(avg_uses)).min(1.0);
|
||||
|
||||
if eff_conf < req {
|
||||
action.applied = Some(false);
|
||||
action.rejected_reason = Some("depth_threshold".into());
|
||||
depth_rejected += 1;
|
||||
continue;
|
||||
}
|
||||
if depth > config.max_depth {
|
||||
action.applied = Some(false);
|
||||
action.rejected_reason = Some("max_depth".into());
|
||||
depth_rejected += 1;
|
||||
continue;
|
||||
}
|
||||
eprintln!(" WRITE {} depth={} conf={:.2} eff={:.2} req={:.2}",
|
||||
key, depth, conf_val, eff_conf, req);
|
||||
}
|
||||
ActionKind::Link { source, target } => {
|
||||
eprintln!(" LINK {} → {}", source, target);
|
||||
}
|
||||
ActionKind::Refine { key, .. } => {
|
||||
eprintln!(" REFINE {} depth={}", key, depth);
|
||||
}
|
||||
}
|
||||
|
||||
if apply_action(&mut store, action, agent_name, ×tamp, depth) {
|
||||
applied += 1;
|
||||
action.applied = Some(true);
|
||||
if let ActionKind::WriteNode { key, .. } | ActionKind::Refine { key, .. } = &action.kind {
|
||||
depth_db.set(key.clone(), depth);
|
||||
}
|
||||
} else {
|
||||
action.applied = Some(false);
|
||||
}
|
||||
}
|
||||
|
||||
eprintln!(" Applied: {}/{}", applied, actions.len());
|
||||
total_applied += applied;
|
||||
all_actions.extend(actions);
|
||||
}
|
||||
|
||||
depth_db.save(&mut store);
|
||||
|
||||
// Recompute spectral if anything changed
|
||||
if total_applied > 0 {
|
||||
eprintln!("\n Recomputing spectral embedding...");
|
||||
let graph = store.build_graph();
|
||||
let result = spectral::decompose(&graph, 8);
|
||||
let emb = spectral::to_embedding(&result);
|
||||
spectral::save_embedding(&emb).ok();
|
||||
}
|
||||
|
||||
let graph = store.build_graph();
|
||||
let metrics_after = GraphMetrics::from_graph(&store, &graph);
|
||||
let weighted_delta: f64 = all_actions.iter()
|
||||
.filter(|a| a.applied == Some(true))
|
||||
.map(|a| a.weight)
|
||||
.sum();
|
||||
|
||||
eprintln!("\n CYCLE {} SUMMARY", cycle_num);
|
||||
eprintln!(" Applied: {}/{} depth-rejected: {} no-ops: {}",
|
||||
total_applied, all_actions.len(), depth_rejected, all_no_ops);
|
||||
eprintln!(" Weighted delta: {:.2}", weighted_delta);
|
||||
|
||||
Ok(CycleResult {
|
||||
cycle: cycle_num,
|
||||
timestamp,
|
||||
total_actions: all_actions.len(),
|
||||
total_applied,
|
||||
total_no_ops: all_no_ops,
|
||||
depth_rejected,
|
||||
weighted_delta,
|
||||
graph_metrics_before: metrics_before,
|
||||
graph_metrics_after: metrics_after,
|
||||
})
|
||||
}
|
||||
28
poc-memory/src/lib.rs
Normal file
28
poc-memory/src/lib.rs
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
// poc-memory library — shared modules for all binaries
|
||||
//
|
||||
// Re-exports modules so that memory-search and other binaries
|
||||
// can call library functions directly instead of shelling out.
|
||||
|
||||
pub mod config;
|
||||
pub mod store;
|
||||
pub mod util;
|
||||
pub mod llm;
|
||||
pub mod digest;
|
||||
pub mod audit;
|
||||
pub mod enrich;
|
||||
pub mod consolidate;
|
||||
pub mod graph;
|
||||
pub mod search;
|
||||
pub mod similarity;
|
||||
pub mod migrate;
|
||||
pub mod neuro;
|
||||
pub mod query;
|
||||
pub mod spectral;
|
||||
pub mod lookups;
|
||||
pub mod daemon;
|
||||
pub mod fact_mine;
|
||||
pub mod knowledge;
|
||||
|
||||
pub mod memory_capnp {
|
||||
include!(concat!(env!("OUT_DIR"), "/schema/memory_capnp.rs"));
|
||||
}
|
||||
144
poc-memory/src/llm.rs
Normal file
144
poc-memory/src/llm.rs
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
// LLM utilities: model invocation and response parsing
|
||||
//
|
||||
// Calls claude CLI as a subprocess. Uses prctl(PR_SET_PDEATHSIG)
|
||||
// so child processes die when the daemon exits, preventing orphans.
|
||||
|
||||
use crate::store::Store;
|
||||
|
||||
use regex::Regex;
|
||||
use std::fs;
|
||||
use std::os::unix::process::CommandExt;
|
||||
use std::process::Command;
|
||||
|
||||
fn log_usage(agent: &str, model: &str, prompt: &str, response: &str,
|
||||
duration_ms: u128, ok: bool) {
|
||||
let dir = crate::config::get().data_dir.join("llm-logs").join(agent);
|
||||
let _ = fs::create_dir_all(&dir);
|
||||
|
||||
let date = chrono::Local::now().format("%Y-%m-%d");
|
||||
let path = dir.join(format!("{}.md", date));
|
||||
|
||||
let ts = chrono::Local::now().format("%H:%M:%S");
|
||||
let status = if ok { "ok" } else { "ERROR" };
|
||||
|
||||
let entry = format!(
|
||||
"\n## {} — {} ({}, {:.1}s, {})\n\n\
|
||||
### Prompt ({} chars)\n\n\
|
||||
```\n{}\n```\n\n\
|
||||
### Response ({} chars)\n\n\
|
||||
```\n{}\n```\n\n---\n",
|
||||
ts, agent, model, duration_ms as f64 / 1000.0, status,
|
||||
prompt.len(), prompt,
|
||||
response.len(), response,
|
||||
);
|
||||
|
||||
use std::io::Write;
|
||||
if let Ok(mut f) = fs::OpenOptions::new().create(true).append(true).open(&path) {
|
||||
let _ = f.write_all(entry.as_bytes());
|
||||
}
|
||||
}
|
||||
|
||||
/// Call a model via claude CLI. Returns the response text.
|
||||
///
|
||||
/// Sets PR_SET_PDEATHSIG on the child so it gets SIGTERM if the
|
||||
/// parent daemon exits — no more orphaned claude processes.
|
||||
fn call_model(agent: &str, model: &str, prompt: &str) -> Result<String, String> {
|
||||
// Write prompt to temp file (claude CLI needs file input for large prompts)
|
||||
let tmp = std::env::temp_dir().join(format!("poc-llm-{}-{:?}.txt",
|
||||
std::process::id(), std::thread::current().id()));
|
||||
fs::write(&tmp, prompt)
|
||||
.map_err(|e| format!("write temp prompt: {}", e))?;
|
||||
|
||||
let mut cmd = Command::new("claude");
|
||||
cmd.args(["-p", "--model", model, "--tools", "", "--no-session-persistence"])
|
||||
.stdin(fs::File::open(&tmp).map_err(|e| format!("open temp: {}", e))?)
|
||||
.env_remove("CLAUDECODE");
|
||||
|
||||
// Use separate OAuth credentials for agent work if configured
|
||||
if let Some(ref dir) = crate::config::get().agent_config_dir {
|
||||
cmd.env("CLAUDE_CONFIG_DIR", dir);
|
||||
}
|
||||
|
||||
// Tell hooks this is a daemon agent call, not interactive
|
||||
cmd.env("POC_AGENT", "1");
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
let result = unsafe {
|
||||
cmd.pre_exec(|| {
|
||||
libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM);
|
||||
Ok(())
|
||||
})
|
||||
.output()
|
||||
};
|
||||
|
||||
fs::remove_file(&tmp).ok();
|
||||
|
||||
match result {
|
||||
Ok(output) => {
|
||||
let elapsed = start.elapsed().as_millis();
|
||||
if output.status.success() {
|
||||
let response = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
||||
log_usage(agent, model, prompt, &response, elapsed, true);
|
||||
Ok(response)
|
||||
} else {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
let preview: String = stderr.chars().take(500).collect();
|
||||
log_usage(agent, model, prompt, &preview, elapsed, false);
|
||||
Err(format!("claude exited {}: {}", output.status, preview.trim()))
|
||||
}
|
||||
}
|
||||
Err(e) => Err(format!("spawn claude: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Call Sonnet via claude CLI.
|
||||
pub(crate) fn call_sonnet(agent: &str, prompt: &str) -> Result<String, String> {
|
||||
call_model(agent, "sonnet", prompt)
|
||||
}
|
||||
|
||||
/// Call Haiku via claude CLI (cheaper, faster — good for high-volume extraction).
|
||||
pub(crate) fn call_haiku(agent: &str, prompt: &str) -> Result<String, String> {
|
||||
call_model(agent, "haiku", prompt)
|
||||
}
|
||||
|
||||
/// Parse a JSON response, handling markdown fences.
|
||||
pub(crate) fn parse_json_response(response: &str) -> Result<serde_json::Value, String> {
|
||||
let cleaned = response.trim();
|
||||
let cleaned = cleaned.strip_prefix("```json").unwrap_or(cleaned);
|
||||
let cleaned = cleaned.strip_prefix("```").unwrap_or(cleaned);
|
||||
let cleaned = cleaned.strip_suffix("```").unwrap_or(cleaned);
|
||||
let cleaned = cleaned.trim();
|
||||
|
||||
if let Ok(v) = serde_json::from_str(cleaned) {
|
||||
return Ok(v);
|
||||
}
|
||||
|
||||
// Try to find JSON object or array
|
||||
let re_obj = Regex::new(r"\{[\s\S]*\}").unwrap();
|
||||
let re_arr = Regex::new(r"\[[\s\S]*\]").unwrap();
|
||||
|
||||
if let Some(m) = re_obj.find(cleaned) {
|
||||
if let Ok(v) = serde_json::from_str(m.as_str()) {
|
||||
return Ok(v);
|
||||
}
|
||||
}
|
||||
if let Some(m) = re_arr.find(cleaned) {
|
||||
if let Ok(v) = serde_json::from_str(m.as_str()) {
|
||||
return Ok(v);
|
||||
}
|
||||
}
|
||||
|
||||
let preview: String = cleaned.chars().take(200).collect();
|
||||
Err(format!("no valid JSON in response: {preview}..."))
|
||||
}
|
||||
|
||||
/// Get all keys for prompt context.
|
||||
pub(crate) fn semantic_keys(store: &Store) -> Vec<String> {
|
||||
let mut keys: Vec<String> = store.nodes.keys()
|
||||
.cloned()
|
||||
.collect();
|
||||
keys.sort();
|
||||
keys.truncate(200);
|
||||
keys
|
||||
}
|
||||
204
poc-memory/src/lookups.rs
Normal file
204
poc-memory/src/lookups.rs
Normal file
|
|
@ -0,0 +1,204 @@
|
|||
// Daily lookup counters — mmap'd open-addressing hash table.
|
||||
//
|
||||
// Records which memory keys are retrieved each day. The knowledge loop
|
||||
// uses this to focus extraction on actively-used graph neighborhoods,
|
||||
// like hippocampal replay preferentially consolidating recent experience.
|
||||
//
|
||||
// Format: 16-byte header + 4096 entries of (u64 hash, u32 count).
|
||||
// Total: ~49KB per day. Fast path: hash key → probe → bump counter.
|
||||
// No store loading required.
|
||||
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::util::memory_subdir;
|
||||
|
||||
const MAGIC: [u8; 4] = *b"LKUP";
|
||||
const VERSION: u32 = 1;
|
||||
const CAPACITY: u32 = 4096;
|
||||
const HEADER_SIZE: usize = 16;
|
||||
const ENTRY_SIZE: usize = 12; // u64 hash + u32 count
|
||||
const FILE_SIZE: usize = HEADER_SIZE + CAPACITY as usize * ENTRY_SIZE;
|
||||
|
||||
// FNV-1a hash — simple, fast, no dependencies
|
||||
fn fnv1a(key: &str) -> u64 {
|
||||
let mut h: u64 = 0xcbf29ce484222325;
|
||||
for b in key.as_bytes() {
|
||||
h ^= *b as u64;
|
||||
h = h.wrapping_mul(0x100000001b3);
|
||||
}
|
||||
h
|
||||
}
|
||||
|
||||
fn daily_path(date: &str) -> Result<PathBuf, String> {
|
||||
let dir = memory_subdir("lookups")?;
|
||||
Ok(dir.join(format!("{}.bin", date)))
|
||||
}
|
||||
|
||||
fn today() -> String {
|
||||
chrono::Local::now().format("%Y-%m-%d").to_string()
|
||||
}
|
||||
|
||||
/// Read or create the daily file, returning its contents as a mutable Vec.
|
||||
fn load_file(date: &str) -> Result<Vec<u8>, String> {
|
||||
let path = daily_path(date)?;
|
||||
|
||||
if path.exists() {
|
||||
let data = fs::read(&path)
|
||||
.map_err(|e| format!("read {}: {}", path.display(), e))?;
|
||||
if data.len() == FILE_SIZE && data[0..4] == MAGIC {
|
||||
return Ok(data);
|
||||
}
|
||||
// Corrupt or wrong size — reinitialize
|
||||
}
|
||||
|
||||
// Create fresh file
|
||||
let mut buf = vec![0u8; FILE_SIZE];
|
||||
buf[0..4].copy_from_slice(&MAGIC);
|
||||
buf[4..8].copy_from_slice(&VERSION.to_le_bytes());
|
||||
buf[8..12].copy_from_slice(&CAPACITY.to_le_bytes());
|
||||
// count = 0 (already zero)
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
fn write_file(date: &str, data: &[u8]) -> Result<(), String> {
|
||||
let path = daily_path(date)?;
|
||||
fs::write(&path, data)
|
||||
.map_err(|e| format!("write {}: {}", path.display(), e))
|
||||
}
|
||||
|
||||
fn entry_offset(slot: usize) -> usize {
|
||||
HEADER_SIZE + slot * ENTRY_SIZE
|
||||
}
|
||||
|
||||
fn read_entry(data: &[u8], slot: usize) -> (u64, u32) {
|
||||
let off = entry_offset(slot);
|
||||
let hash = u64::from_le_bytes(data[off..off + 8].try_into().unwrap());
|
||||
let count = u32::from_le_bytes(data[off + 8..off + 12].try_into().unwrap());
|
||||
(hash, count)
|
||||
}
|
||||
|
||||
fn write_entry(data: &mut [u8], slot: usize, hash: u64, count: u32) {
|
||||
let off = entry_offset(slot);
|
||||
data[off..off + 8].copy_from_slice(&hash.to_le_bytes());
|
||||
data[off + 8..off + 12].copy_from_slice(&count.to_le_bytes());
|
||||
}
|
||||
|
||||
fn read_count(data: &[u8]) -> u32 {
|
||||
u32::from_le_bytes(data[12..16].try_into().unwrap())
|
||||
}
|
||||
|
||||
fn write_count(data: &mut [u8], count: u32) {
|
||||
data[12..16].copy_from_slice(&count.to_le_bytes());
|
||||
}
|
||||
|
||||
/// Bump the counter for a key. Fast path — no store needed.
|
||||
pub fn bump(key: &str) -> Result<(), String> {
|
||||
let date = today();
|
||||
let mut data = load_file(&date)?;
|
||||
let hash = fnv1a(key);
|
||||
let cap = CAPACITY as usize;
|
||||
|
||||
let mut slot = (hash as usize) % cap;
|
||||
for _ in 0..cap {
|
||||
let (h, c) = read_entry(&data, slot);
|
||||
if h == hash {
|
||||
// Existing entry — bump
|
||||
write_entry(&mut data, slot, hash, c + 1);
|
||||
write_file(&date, &data)?;
|
||||
return Ok(());
|
||||
}
|
||||
if h == 0 && c == 0 {
|
||||
// Empty slot — new entry
|
||||
write_entry(&mut data, slot, hash, 1);
|
||||
let c = read_count(&data);
|
||||
write_count(&mut data, c + 1);
|
||||
write_file(&date, &data)?;
|
||||
return Ok(());
|
||||
}
|
||||
slot = (slot + 1) % cap;
|
||||
}
|
||||
|
||||
// Table full (shouldn't happen with 4096 slots)
|
||||
Err("lookup table full".into())
|
||||
}
|
||||
|
||||
/// Bump counters for multiple keys at once (single file read/write).
|
||||
pub fn bump_many(keys: &[&str]) -> Result<(), String> {
|
||||
if keys.is_empty() { return Ok(()); }
|
||||
|
||||
let date = today();
|
||||
let mut data = load_file(&date)?;
|
||||
let cap = CAPACITY as usize;
|
||||
let mut used = read_count(&data);
|
||||
|
||||
for key in keys {
|
||||
let hash = fnv1a(key);
|
||||
let mut slot = (hash as usize) % cap;
|
||||
let mut found = false;
|
||||
|
||||
for _ in 0..cap {
|
||||
let (h, c) = read_entry(&data, slot);
|
||||
if h == hash {
|
||||
write_entry(&mut data, slot, hash, c + 1);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
if h == 0 && c == 0 {
|
||||
write_entry(&mut data, slot, hash, 1);
|
||||
used += 1;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
slot = (slot + 1) % cap;
|
||||
}
|
||||
if !found {
|
||||
// Table full — stop, don't lose what we have
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
write_count(&mut data, used);
|
||||
write_file(&date, &data)
|
||||
}
|
||||
|
||||
/// Dump all lookups for a date as (hash, count) pairs, sorted by count descending.
|
||||
pub fn dump_raw(date: &str) -> Result<Vec<(u64, u32)>, String> {
|
||||
let data = load_file(date)?;
|
||||
let mut entries = Vec::new();
|
||||
|
||||
for slot in 0..CAPACITY as usize {
|
||||
let (hash, count) = read_entry(&data, slot);
|
||||
if hash != 0 || count != 0 {
|
||||
entries.push((hash, count));
|
||||
}
|
||||
}
|
||||
|
||||
entries.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
/// Resolve hashes to keys by scanning the store. Returns (key, count) pairs.
|
||||
pub fn dump_resolved(date: &str, keys: &[String]) -> Result<Vec<(String, u32)>, String> {
|
||||
let raw = dump_raw(date)?;
|
||||
|
||||
// Build hash → key map from known keys
|
||||
let hash_map: std::collections::HashMap<u64, &str> = keys.iter()
|
||||
.map(|k| (fnv1a(k), k.as_str()))
|
||||
.collect();
|
||||
|
||||
let mut resolved = Vec::new();
|
||||
for (hash, count) in raw {
|
||||
let key = hash_map.get(&hash)
|
||||
.map(|k| k.to_string())
|
||||
.unwrap_or_else(|| format!("#{:016x}", hash));
|
||||
resolved.push((key, count));
|
||||
}
|
||||
|
||||
Ok(resolved)
|
||||
}
|
||||
|
||||
/// Hash a key (exposed for testing/external use).
|
||||
pub fn hash_key(key: &str) -> u64 {
|
||||
fnv1a(key)
|
||||
}
|
||||
2140
poc-memory/src/main.rs
Normal file
2140
poc-memory/src/main.rs
Normal file
File diff suppressed because it is too large
Load diff
367
poc-memory/src/migrate.rs
Normal file
367
poc-memory/src/migrate.rs
Normal file
|
|
@ -0,0 +1,367 @@
|
|||
// Migration from old weights.json + markdown marker system
|
||||
//
|
||||
// Reads:
|
||||
// ~/.claude/memory/weights.json (1,874 entries with metrics)
|
||||
// ~/.claude/memory/*.md (content + mem markers + edges)
|
||||
//
|
||||
// Emits:
|
||||
// ~/.claude/memory/nodes.capnp (all nodes with preserved metadata)
|
||||
// ~/.claude/memory/relations.capnp (all edges from markers + md links)
|
||||
// ~/.claude/memory/state.json (derived cache)
|
||||
//
|
||||
// Old files are preserved as backup. Run once.
|
||||
|
||||
use crate::store::{
|
||||
self, Store, Node, NodeType, Provenance, RelationType,
|
||||
parse_units, new_relation,
|
||||
};
|
||||
|
||||
use serde::Deserialize;
|
||||
use uuid::Uuid;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
fn home() -> PathBuf {
|
||||
PathBuf::from(env::var("HOME").expect("HOME not set"))
|
||||
}
|
||||
|
||||
// Old system data structures (just enough for deserialization)
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OldStore {
|
||||
#[serde(default)]
|
||||
entries: HashMap<String, OldEntry>,
|
||||
#[serde(default)]
|
||||
retrieval_log: Vec<OldRetrievalEvent>,
|
||||
#[serde(default)]
|
||||
params: OldParams,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OldEntry {
|
||||
weight: f64,
|
||||
created: String,
|
||||
#[serde(default)]
|
||||
last_retrieved: Option<String>,
|
||||
#[serde(default)]
|
||||
last_used: Option<String>,
|
||||
#[serde(default)]
|
||||
retrievals: u32,
|
||||
#[serde(default)]
|
||||
uses: u32,
|
||||
#[serde(default)]
|
||||
wrongs: u32,
|
||||
#[serde(default = "default_category")]
|
||||
category: String,
|
||||
}
|
||||
|
||||
fn default_category() -> String { "General".to_string() }
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OldRetrievalEvent {
|
||||
query: String,
|
||||
timestamp: String,
|
||||
results: Vec<String>,
|
||||
#[serde(default)]
|
||||
used: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OldParams {
|
||||
#[serde(default = "default_0_7")]
|
||||
default_weight: f64,
|
||||
#[serde(default = "default_0_95")]
|
||||
decay_factor: f64,
|
||||
#[serde(default = "default_0_15")]
|
||||
use_boost: f64,
|
||||
#[serde(default = "default_0_1")]
|
||||
prune_threshold: f64,
|
||||
#[serde(default = "default_0_3")]
|
||||
edge_decay: f64,
|
||||
#[serde(default = "default_3")]
|
||||
max_hops: u32,
|
||||
#[serde(default = "default_0_05")]
|
||||
min_activation: f64,
|
||||
}
|
||||
|
||||
impl Default for OldParams {
|
||||
fn default() -> Self {
|
||||
OldParams {
|
||||
default_weight: 0.7,
|
||||
decay_factor: 0.95,
|
||||
use_boost: 0.15,
|
||||
prune_threshold: 0.1,
|
||||
edge_decay: 0.3,
|
||||
max_hops: 3,
|
||||
min_activation: 0.05,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn default_0_7() -> f64 { 0.7 }
|
||||
fn default_0_95() -> f64 { 0.95 }
|
||||
fn default_0_15() -> f64 { 0.15 }
|
||||
fn default_0_1() -> f64 { 0.1 }
|
||||
fn default_0_3() -> f64 { 0.3 }
|
||||
fn default_3() -> u32 { 3 }
|
||||
fn default_0_05() -> f64 { 0.05 }
|
||||
|
||||
pub fn migrate() -> Result<(), String> {
|
||||
let weights_path = home().join(".claude/memory/weights.json");
|
||||
let memory_dir = home().join(".claude/memory");
|
||||
let nodes_path = memory_dir.join("nodes.capnp");
|
||||
let rels_path = memory_dir.join("relations.capnp");
|
||||
|
||||
// Safety check
|
||||
if nodes_path.exists() || rels_path.exists() {
|
||||
return Err("nodes.capnp or relations.capnp already exist. \
|
||||
Remove them first if you want to re-migrate.".into());
|
||||
}
|
||||
|
||||
// Load old store
|
||||
let old_store: OldStore = if weights_path.exists() {
|
||||
let data = fs::read_to_string(&weights_path)
|
||||
.map_err(|e| format!("read weights.json: {}", e))?;
|
||||
serde_json::from_str(&data)
|
||||
.map_err(|e| format!("parse weights.json: {}", e))?
|
||||
} else {
|
||||
eprintln!("Warning: no weights.json found, migrating markdown only");
|
||||
OldStore {
|
||||
entries: HashMap::new(),
|
||||
retrieval_log: Vec::new(),
|
||||
params: OldParams::default(),
|
||||
}
|
||||
};
|
||||
|
||||
eprintln!("Old store: {} entries, {} retrieval events",
|
||||
old_store.entries.len(), old_store.retrieval_log.len());
|
||||
|
||||
// Scan markdown files to get content + edges
|
||||
let mut units_by_key: HashMap<String, store::MemoryUnit> = HashMap::new();
|
||||
scan_markdown_dir(&memory_dir, &mut units_by_key)?;
|
||||
|
||||
eprintln!("Scanned {} markdown units", units_by_key.len());
|
||||
|
||||
// Create new store
|
||||
let mut store = Store::default();
|
||||
|
||||
// Migrate params
|
||||
store.params.default_weight = old_store.params.default_weight;
|
||||
store.params.decay_factor = old_store.params.decay_factor;
|
||||
store.params.use_boost = old_store.params.use_boost;
|
||||
store.params.prune_threshold = old_store.params.prune_threshold;
|
||||
store.params.edge_decay = old_store.params.edge_decay;
|
||||
store.params.max_hops = old_store.params.max_hops;
|
||||
store.params.min_activation = old_store.params.min_activation;
|
||||
|
||||
// Migrate retrieval log
|
||||
store.retrieval_log = old_store.retrieval_log.iter().map(|e| {
|
||||
store::RetrievalEvent {
|
||||
query: e.query.clone(),
|
||||
timestamp: e.timestamp.clone(),
|
||||
results: e.results.clone(),
|
||||
used: e.used.clone(),
|
||||
}
|
||||
}).collect();
|
||||
|
||||
// Phase 1: Create nodes
|
||||
// Merge old entries (weight metadata) with markdown units (content)
|
||||
let mut all_nodes: Vec<Node> = Vec::new();
|
||||
let mut key_to_uuid: HashMap<String, [u8; 16]> = HashMap::new();
|
||||
|
||||
// First, all entries from the old store
|
||||
for (key, old_entry) in &old_store.entries {
|
||||
let uuid = *Uuid::new_v4().as_bytes();
|
||||
key_to_uuid.insert(key.clone(), uuid);
|
||||
|
||||
let content = units_by_key.get(key)
|
||||
.map(|u| u.content.clone())
|
||||
.unwrap_or_default();
|
||||
|
||||
let state_tag = units_by_key.get(key)
|
||||
.and_then(|u| u.state.clone())
|
||||
.unwrap_or_default();
|
||||
|
||||
let node = Node {
|
||||
uuid,
|
||||
version: 1,
|
||||
timestamp: store::now_epoch(),
|
||||
node_type: if key.contains("journal") {
|
||||
NodeType::EpisodicSession
|
||||
} else {
|
||||
NodeType::Semantic
|
||||
},
|
||||
provenance: Provenance::Manual,
|
||||
key: key.clone(),
|
||||
content,
|
||||
weight: old_entry.weight as f32,
|
||||
emotion: 0.0,
|
||||
deleted: false,
|
||||
source_ref: String::new(),
|
||||
created: old_entry.created.clone(),
|
||||
retrievals: old_entry.retrievals,
|
||||
uses: old_entry.uses,
|
||||
wrongs: old_entry.wrongs,
|
||||
state_tag,
|
||||
last_replayed: 0,
|
||||
spaced_repetition_interval: 1,
|
||||
position: 0,
|
||||
created_at: 0,
|
||||
community_id: None,
|
||||
clustering_coefficient: None,
|
||||
degree: None,
|
||||
};
|
||||
all_nodes.push(node);
|
||||
}
|
||||
|
||||
// Then, any markdown units not in the old store
|
||||
for (key, unit) in &units_by_key {
|
||||
if key_to_uuid.contains_key(key) { continue; }
|
||||
|
||||
let uuid = *Uuid::new_v4().as_bytes();
|
||||
key_to_uuid.insert(key.clone(), uuid);
|
||||
|
||||
let node = Node {
|
||||
uuid,
|
||||
version: 1,
|
||||
timestamp: store::now_epoch(),
|
||||
node_type: if key.contains("journal") {
|
||||
NodeType::EpisodicSession
|
||||
} else {
|
||||
NodeType::Semantic
|
||||
},
|
||||
provenance: Provenance::Manual,
|
||||
key: key.clone(),
|
||||
content: unit.content.clone(),
|
||||
weight: 0.7,
|
||||
emotion: 0.0,
|
||||
deleted: false,
|
||||
source_ref: String::new(),
|
||||
created: String::new(),
|
||||
retrievals: 0,
|
||||
uses: 0,
|
||||
wrongs: 0,
|
||||
state_tag: unit.state.clone().unwrap_or_default(),
|
||||
last_replayed: 0,
|
||||
spaced_repetition_interval: 1,
|
||||
position: 0,
|
||||
created_at: 0,
|
||||
community_id: None,
|
||||
clustering_coefficient: None,
|
||||
degree: None,
|
||||
};
|
||||
all_nodes.push(node);
|
||||
}
|
||||
|
||||
// Write nodes to capnp log
|
||||
store.append_nodes(&all_nodes)?;
|
||||
for node in &all_nodes {
|
||||
store.uuid_to_key.insert(node.uuid, node.key.clone());
|
||||
store.nodes.insert(node.key.clone(), node.clone());
|
||||
}
|
||||
|
||||
eprintln!("Migrated {} nodes", all_nodes.len());
|
||||
|
||||
// Phase 2: Create relations from markdown links + causal edges
|
||||
let mut all_relations = Vec::new();
|
||||
|
||||
for (key, unit) in &units_by_key {
|
||||
let source_uuid = match key_to_uuid.get(key) {
|
||||
Some(u) => *u,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
// Association links (bidirectional)
|
||||
for link in unit.marker_links.iter().chain(unit.md_links.iter()) {
|
||||
let target_uuid = match key_to_uuid.get(link) {
|
||||
Some(u) => *u,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
// Avoid duplicate relations
|
||||
let exists = all_relations.iter().any(|r: &store::Relation|
|
||||
(r.source == source_uuid && r.target == target_uuid) ||
|
||||
(r.source == target_uuid && r.target == source_uuid));
|
||||
if exists { continue; }
|
||||
|
||||
all_relations.push(new_relation(
|
||||
source_uuid, target_uuid,
|
||||
RelationType::Link, 1.0,
|
||||
key, link,
|
||||
));
|
||||
}
|
||||
|
||||
// Causal edges (directed)
|
||||
for cause in &unit.causes {
|
||||
let cause_uuid = match key_to_uuid.get(cause) {
|
||||
Some(u) => *u,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
all_relations.push(new_relation(
|
||||
cause_uuid, source_uuid,
|
||||
RelationType::Causal, 1.0,
|
||||
cause, key,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Write relations to capnp log
|
||||
store.append_relations(&all_relations)?;
|
||||
store.relations = all_relations;
|
||||
|
||||
eprintln!("Migrated {} relations", store.relations.len());
|
||||
|
||||
// Phase 3: Compute graph metrics
|
||||
store.update_graph_metrics();
|
||||
|
||||
// Save derived cache
|
||||
store.save()?;
|
||||
|
||||
eprintln!("Migration complete. Files:");
|
||||
eprintln!(" {}", nodes_path.display());
|
||||
eprintln!(" {}", rels_path.display());
|
||||
eprintln!(" {}", memory_dir.join("state.json").display());
|
||||
|
||||
// Verify
|
||||
let g = store.build_graph();
|
||||
eprintln!("\nVerification:");
|
||||
eprintln!(" Nodes: {}", store.nodes.len());
|
||||
eprintln!(" Relations: {}", store.relations.len());
|
||||
eprintln!(" Graph edges: {}", g.edge_count());
|
||||
eprintln!(" Communities: {}", g.community_count());
|
||||
eprintln!(" Avg CC: {:.4}", g.avg_clustering_coefficient());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn scan_markdown_dir(
|
||||
dir: &Path,
|
||||
units: &mut HashMap<String, store::MemoryUnit>,
|
||||
) -> Result<(), String> {
|
||||
let entries = fs::read_dir(dir)
|
||||
.map_err(|e| format!("read dir {}: {}", dir.display(), e))?;
|
||||
|
||||
for entry in entries.flatten() {
|
||||
let path = entry.path();
|
||||
if path.is_dir() {
|
||||
scan_markdown_dir(&path, units)?;
|
||||
continue;
|
||||
}
|
||||
let Some(ext) = path.extension() else { continue };
|
||||
if ext != "md" { continue }
|
||||
|
||||
let filename = path.file_name().unwrap().to_string_lossy().to_string();
|
||||
let content = match fs::read_to_string(&path) {
|
||||
Ok(c) => c,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
for unit in parse_units(&filename, &content) {
|
||||
units.insert(unit.key.clone(), unit);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
29
poc-memory/src/neuro/mod.rs
Normal file
29
poc-memory/src/neuro/mod.rs
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
// Neuroscience-inspired memory algorithms, split by concern:
|
||||
//
|
||||
// scoring — pure analysis: priority, replay queues, interference, plans
|
||||
// prompts — agent prompt generation and formatting
|
||||
// rewrite — graph topology mutations: differentiation, closure, linking
|
||||
|
||||
mod scoring;
|
||||
mod prompts;
|
||||
mod rewrite;
|
||||
|
||||
// Re-export public API so `neuro::` paths continue to work.
|
||||
|
||||
pub use scoring::{
|
||||
replay_queue, detect_interference,
|
||||
consolidation_plan, format_plan,
|
||||
daily_check,
|
||||
};
|
||||
|
||||
pub use prompts::{
|
||||
load_prompt,
|
||||
consolidation_batch, agent_prompt,
|
||||
};
|
||||
|
||||
pub use rewrite::{
|
||||
refine_target, LinkMove,
|
||||
differentiate_hub,
|
||||
apply_differentiation, find_differentiable_hubs,
|
||||
triangle_close, link_orphans,
|
||||
};
|
||||
390
poc-memory/src/neuro/prompts.rs
Normal file
390
poc-memory/src/neuro/prompts.rs
Normal file
|
|
@ -0,0 +1,390 @@
|
|||
// Agent prompt generation and formatting. Presentation logic —
|
||||
// builds text prompts from store data for consolidation agents.
|
||||
|
||||
use crate::store::Store;
|
||||
use crate::graph::Graph;
|
||||
use crate::similarity;
|
||||
use crate::spectral;
|
||||
|
||||
use super::scoring::{
|
||||
ReplayItem, consolidation_priority,
|
||||
replay_queue, replay_queue_with_graph, detect_interference,
|
||||
};
|
||||
|
||||
/// Prompt template directory
|
||||
pub fn prompts_dir() -> std::path::PathBuf {
|
||||
let home = std::env::var("HOME").unwrap_or_default();
|
||||
std::path::PathBuf::from(home).join("poc/memory/prompts")
|
||||
}
|
||||
|
||||
/// Load a prompt template, replacing {{PLACEHOLDER}} with data
|
||||
pub fn load_prompt(name: &str, replacements: &[(&str, &str)]) -> Result<String, String> {
|
||||
let path = prompts_dir().join(format!("{}.md", name));
|
||||
let mut content = std::fs::read_to_string(&path)
|
||||
.map_err(|e| format!("load prompt {}: {}", path.display(), e))?;
|
||||
for (placeholder, data) in replacements {
|
||||
content = content.replace(placeholder, data);
|
||||
}
|
||||
Ok(content)
|
||||
}
|
||||
|
||||
/// Format topology header for agent prompts — current graph health metrics
|
||||
fn format_topology_header(graph: &Graph) -> String {
|
||||
let sigma = graph.small_world_sigma();
|
||||
let alpha = graph.degree_power_law_exponent();
|
||||
let gini = graph.degree_gini();
|
||||
let avg_cc = graph.avg_clustering_coefficient();
|
||||
let n = graph.nodes().len();
|
||||
let e = graph.edge_count();
|
||||
|
||||
// Identify saturated hubs — nodes with degree well above threshold
|
||||
let threshold = graph.hub_threshold();
|
||||
let mut hubs: Vec<_> = graph.nodes().iter()
|
||||
.map(|k| (k.clone(), graph.degree(k)))
|
||||
.filter(|(_, d)| *d >= threshold)
|
||||
.collect();
|
||||
hubs.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
hubs.truncate(15);
|
||||
|
||||
let hub_list = if hubs.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
let lines: Vec<String> = hubs.iter()
|
||||
.map(|(k, d)| format!(" - {} (degree {})", k, d))
|
||||
.collect();
|
||||
format!(
|
||||
"### SATURATED HUBS — DO NOT LINK TO THESE\n\
|
||||
The following nodes are already over-connected. Adding more links\n\
|
||||
to them makes the graph worse (star topology). Find lateral\n\
|
||||
connections between peripheral nodes instead.\n\n{}\n\n\
|
||||
Only link to a hub if it is genuinely the ONLY reasonable target.\n\n",
|
||||
lines.join("\n"))
|
||||
};
|
||||
|
||||
format!(
|
||||
"## Current graph topology\n\
|
||||
Nodes: {} Edges: {} Communities: {}\n\
|
||||
Small-world σ: {:.1} Power-law α: {:.2} Degree Gini: {:.3}\n\
|
||||
Avg clustering coefficient: {:.4}\n\n\
|
||||
{}\
|
||||
Each node below shows its hub-link ratio (fraction of edges to top-5% degree nodes).\n\
|
||||
Use `poc-memory link-impact SOURCE TARGET` to evaluate proposed links.\n\n",
|
||||
n, e, graph.community_count(), sigma, alpha, gini, avg_cc, hub_list)
|
||||
}
|
||||
|
||||
/// Format node data section for prompt templates
|
||||
fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> String {
|
||||
let hub_thresh = graph.hub_threshold();
|
||||
let mut out = String::new();
|
||||
for item in items {
|
||||
let node = match store.nodes.get(&item.key) {
|
||||
Some(n) => n,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
out.push_str(&format!("## {} \n", item.key));
|
||||
out.push_str(&format!("Priority: {:.3} CC: {:.3} Emotion: {:.1} ",
|
||||
item.priority, item.cc, item.emotion));
|
||||
out.push_str(&format!("Interval: {}d\n",
|
||||
node.spaced_repetition_interval));
|
||||
if item.outlier_score > 0.0 {
|
||||
out.push_str(&format!("Spectral: {} (outlier={:.1})\n",
|
||||
item.classification, item.outlier_score));
|
||||
}
|
||||
|
||||
if let Some(community) = node.community_id {
|
||||
out.push_str(&format!("Community: {} ", community));
|
||||
}
|
||||
let deg = graph.degree(&item.key);
|
||||
let cc = graph.clustering_coefficient(&item.key);
|
||||
|
||||
// Hub-link ratio: what fraction of this node's edges go to hubs?
|
||||
let neighbors = graph.neighbors(&item.key);
|
||||
let hub_links = neighbors.iter()
|
||||
.filter(|(n, _)| graph.degree(n) >= hub_thresh)
|
||||
.count();
|
||||
let hub_ratio = if deg > 0 { hub_links as f32 / deg as f32 } else { 0.0 };
|
||||
let is_hub = deg >= hub_thresh;
|
||||
|
||||
out.push_str(&format!("Degree: {} CC: {:.3} Hub-link ratio: {:.0}% ({}/{})",
|
||||
deg, cc, hub_ratio * 100.0, hub_links, deg));
|
||||
if is_hub {
|
||||
out.push_str(" ← THIS IS A HUB");
|
||||
} else if hub_ratio > 0.6 {
|
||||
out.push_str(" ← mostly hub-connected, needs lateral links");
|
||||
}
|
||||
out.push('\n');
|
||||
|
||||
// Content (truncated for large nodes)
|
||||
let content = &node.content;
|
||||
if content.len() > 1500 {
|
||||
let end = content.floor_char_boundary(1500);
|
||||
out.push_str(&format!("\nContent ({} chars, truncated):\n{}\n[...]\n\n",
|
||||
content.len(), &content[..end]));
|
||||
} else {
|
||||
out.push_str(&format!("\nContent:\n{}\n\n", content));
|
||||
}
|
||||
|
||||
// Neighbors
|
||||
let neighbors = graph.neighbors(&item.key);
|
||||
if !neighbors.is_empty() {
|
||||
out.push_str("Neighbors:\n");
|
||||
for (n, strength) in neighbors.iter().take(15) {
|
||||
let n_cc = graph.clustering_coefficient(n);
|
||||
let n_community = store.nodes.get(n.as_str())
|
||||
.and_then(|n| n.community_id);
|
||||
out.push_str(&format!(" - {} (str={:.2}, cc={:.3}",
|
||||
n, strength, n_cc));
|
||||
if let Some(c) = n_community {
|
||||
out.push_str(&format!(", c{}", c));
|
||||
}
|
||||
out.push_str(")\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Suggested link targets: text-similar semantic nodes not already neighbors
|
||||
let neighbor_keys: std::collections::HashSet<&str> = neighbors.iter()
|
||||
.map(|(k, _)| k.as_str()).collect();
|
||||
let mut candidates: Vec<(&str, f32)> = store.nodes.iter()
|
||||
.filter(|(k, _)| {
|
||||
*k != &item.key
|
||||
&& !neighbor_keys.contains(k.as_str())
|
||||
})
|
||||
.map(|(k, n)| {
|
||||
let sim = similarity::cosine_similarity(content, &n.content);
|
||||
(k.as_str(), sim)
|
||||
})
|
||||
.filter(|(_, sim)| *sim > 0.1)
|
||||
.collect();
|
||||
candidates.sort_by(|a, b| b.1.total_cmp(&a.1));
|
||||
candidates.truncate(8);
|
||||
|
||||
if !candidates.is_empty() {
|
||||
out.push_str("\nSuggested link targets (by text similarity, not yet linked):\n");
|
||||
for (k, sim) in &candidates {
|
||||
let is_hub = graph.degree(k) >= hub_thresh;
|
||||
out.push_str(&format!(" - {} (sim={:.3}{})\n",
|
||||
k, sim, if is_hub { ", HUB" } else { "" }));
|
||||
}
|
||||
}
|
||||
|
||||
out.push_str("\n---\n\n");
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Format health data for the health agent prompt
|
||||
fn format_health_section(store: &Store, graph: &Graph) -> String {
|
||||
use crate::graph;
|
||||
|
||||
let health = graph::health_report(graph, store);
|
||||
|
||||
let mut out = health;
|
||||
out.push_str("\n\n## Weight distribution\n");
|
||||
|
||||
// Weight histogram
|
||||
let mut buckets = [0u32; 10]; // 0.0-0.1, 0.1-0.2, ..., 0.9-1.0
|
||||
for node in store.nodes.values() {
|
||||
let bucket = ((node.weight * 10.0) as usize).min(9);
|
||||
buckets[bucket] += 1;
|
||||
}
|
||||
for (i, &count) in buckets.iter().enumerate() {
|
||||
let lo = i as f32 / 10.0;
|
||||
let hi = (i + 1) as f32 / 10.0;
|
||||
let bar = "█".repeat((count as usize) / 10);
|
||||
out.push_str(&format!(" {:.1}-{:.1}: {:4} {}\n", lo, hi, count, bar));
|
||||
}
|
||||
|
||||
// Near-prune nodes
|
||||
let near_prune: Vec<_> = store.nodes.iter()
|
||||
.filter(|(_, n)| n.weight < 0.15)
|
||||
.map(|(k, n)| (k.clone(), n.weight))
|
||||
.collect();
|
||||
if !near_prune.is_empty() {
|
||||
out.push_str(&format!("\n## Near-prune nodes ({} total)\n", near_prune.len()));
|
||||
for (k, w) in near_prune.iter().take(20) {
|
||||
out.push_str(&format!(" [{:.3}] {}\n", w, k));
|
||||
}
|
||||
}
|
||||
|
||||
// Community sizes
|
||||
let communities = graph.communities();
|
||||
let mut comm_sizes: std::collections::HashMap<u32, Vec<String>> = std::collections::HashMap::new();
|
||||
for (key, &label) in communities {
|
||||
comm_sizes.entry(label).or_default().push(key.clone());
|
||||
}
|
||||
let mut sizes: Vec<_> = comm_sizes.iter()
|
||||
.map(|(id, members)| (*id, members.len(), members.clone()))
|
||||
.collect();
|
||||
sizes.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
|
||||
out.push_str("\n## Largest communities\n");
|
||||
for (id, size, members) in sizes.iter().take(10) {
|
||||
out.push_str(&format!(" Community {} ({} nodes): ", id, size));
|
||||
let sample: Vec<_> = members.iter().take(5).map(|s| s.as_str()).collect();
|
||||
out.push_str(&sample.join(", "));
|
||||
if *size > 5 { out.push_str(", ..."); }
|
||||
out.push('\n');
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
/// Format interference pairs for the separator agent prompt
|
||||
fn format_pairs_section(
|
||||
pairs: &[(String, String, f32)],
|
||||
store: &Store,
|
||||
graph: &Graph,
|
||||
) -> String {
|
||||
let mut out = String::new();
|
||||
let communities = graph.communities();
|
||||
|
||||
for (a, b, sim) in pairs {
|
||||
out.push_str(&format!("## Pair: similarity={:.3}\n", sim));
|
||||
|
||||
let ca = communities.get(a).map(|c| format!("c{}", c)).unwrap_or_else(|| "?".into());
|
||||
let cb = communities.get(b).map(|c| format!("c{}", c)).unwrap_or_else(|| "?".into());
|
||||
|
||||
// Node A
|
||||
out.push_str(&format!("\n### {} ({})\n", a, ca));
|
||||
if let Some(node) = store.nodes.get(a) {
|
||||
let content = if node.content.len() > 500 {
|
||||
let end = node.content.floor_char_boundary(500);
|
||||
format!("{}...", &node.content[..end])
|
||||
} else {
|
||||
node.content.clone()
|
||||
};
|
||||
out.push_str(&format!("Weight: {:.2}\n{}\n",
|
||||
node.weight, content));
|
||||
}
|
||||
|
||||
// Node B
|
||||
out.push_str(&format!("\n### {} ({})\n", b, cb));
|
||||
if let Some(node) = store.nodes.get(b) {
|
||||
let content = if node.content.len() > 500 {
|
||||
let end = node.content.floor_char_boundary(500);
|
||||
format!("{}...", &node.content[..end])
|
||||
} else {
|
||||
node.content.clone()
|
||||
};
|
||||
out.push_str(&format!("Weight: {:.2}\n{}\n",
|
||||
node.weight, content));
|
||||
}
|
||||
|
||||
out.push_str("\n---\n\n");
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Run agent consolidation on top-priority nodes
|
||||
pub fn consolidation_batch(store: &Store, count: usize, auto: bool) -> Result<(), String> {
|
||||
let graph = store.build_graph();
|
||||
let items = replay_queue(store, count);
|
||||
|
||||
if items.is_empty() {
|
||||
println!("No nodes to consolidate.");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let nodes_section = format_nodes_section(store, &items, &graph);
|
||||
|
||||
if auto {
|
||||
let prompt = load_prompt("replay", &[("{{NODES}}", &nodes_section)])?;
|
||||
println!("{}", prompt);
|
||||
} else {
|
||||
// Interactive: show what needs attention and available agent types
|
||||
println!("Consolidation batch ({} nodes):\n", items.len());
|
||||
for item in &items {
|
||||
let node_type = store.nodes.get(&item.key)
|
||||
.map(|n| if matches!(n.node_type, crate::store::NodeType::EpisodicSession) { "episodic" } else { "semantic" })
|
||||
.unwrap_or("?");
|
||||
println!(" [{:.3}] {} (cc={:.3}, interval={}d, type={})",
|
||||
item.priority, item.key, item.cc, item.interval_days, node_type);
|
||||
}
|
||||
|
||||
// Also show interference pairs
|
||||
let pairs = detect_interference(store, &graph, 0.6);
|
||||
if !pairs.is_empty() {
|
||||
println!("\nInterfering pairs ({}):", pairs.len());
|
||||
for (a, b, sim) in pairs.iter().take(5) {
|
||||
println!(" [{:.3}] {} ↔ {}", sim, a, b);
|
||||
}
|
||||
}
|
||||
|
||||
println!("\nAgent prompts:");
|
||||
println!(" --auto Generate replay agent prompt");
|
||||
println!(" --agent replay Replay agent (schema assimilation)");
|
||||
println!(" --agent linker Linker agent (relational binding)");
|
||||
println!(" --agent separator Separator agent (pattern separation)");
|
||||
println!(" --agent transfer Transfer agent (CLS episodic→semantic)");
|
||||
println!(" --agent health Health agent (synaptic homeostasis)");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Generate a specific agent prompt with filled-in data
|
||||
pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<String, String> {
|
||||
let graph = store.build_graph();
|
||||
let topology = format_topology_header(&graph);
|
||||
|
||||
let emb = spectral::load_embedding().ok();
|
||||
|
||||
match agent {
|
||||
"replay" => {
|
||||
let items = replay_queue_with_graph(store, count, &graph, emb.as_ref());
|
||||
let nodes_section = format_nodes_section(store, &items, &graph);
|
||||
load_prompt("replay", &[("{{TOPOLOGY}}", &topology), ("{{NODES}}", &nodes_section)])
|
||||
}
|
||||
"linker" => {
|
||||
// Filter to episodic entries
|
||||
let mut items = replay_queue_with_graph(store, count * 2, &graph, emb.as_ref());
|
||||
items.retain(|item| {
|
||||
store.nodes.get(&item.key)
|
||||
.map(|n| matches!(n.node_type, crate::store::NodeType::EpisodicSession))
|
||||
.unwrap_or(false)
|
||||
});
|
||||
items.truncate(count);
|
||||
let nodes_section = format_nodes_section(store, &items, &graph);
|
||||
load_prompt("linker", &[("{{TOPOLOGY}}", &topology), ("{{NODES}}", &nodes_section)])
|
||||
}
|
||||
"separator" => {
|
||||
let mut pairs = detect_interference(store, &graph, 0.5);
|
||||
pairs.truncate(count);
|
||||
let pairs_section = format_pairs_section(&pairs, store, &graph);
|
||||
load_prompt("separator", &[("{{TOPOLOGY}}", &topology), ("{{PAIRS}}", &pairs_section)])
|
||||
}
|
||||
"transfer" => {
|
||||
// Recent episodic entries
|
||||
let mut episodes: Vec<_> = store.nodes.iter()
|
||||
.filter(|(_, n)| matches!(n.node_type, crate::store::NodeType::EpisodicSession))
|
||||
.map(|(k, n)| (k.clone(), n.timestamp))
|
||||
.collect();
|
||||
episodes.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
episodes.truncate(count);
|
||||
|
||||
let episode_keys: Vec<_> = episodes.iter().map(|(k, _)| k.clone()).collect();
|
||||
let items: Vec<ReplayItem> = episode_keys.iter()
|
||||
.filter_map(|k| {
|
||||
let node = store.nodes.get(k)?;
|
||||
Some(ReplayItem {
|
||||
key: k.clone(),
|
||||
priority: consolidation_priority(store, k, &graph, None),
|
||||
interval_days: node.spaced_repetition_interval,
|
||||
emotion: node.emotion,
|
||||
cc: graph.clustering_coefficient(k),
|
||||
classification: "unknown",
|
||||
outlier_score: 0.0,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
let episodes_section = format_nodes_section(store, &items, &graph);
|
||||
load_prompt("transfer", &[("{{TOPOLOGY}}", &topology), ("{{EPISODES}}", &episodes_section)])
|
||||
}
|
||||
"health" => {
|
||||
let health_section = format_health_section(store, &graph);
|
||||
load_prompt("health", &[("{{TOPOLOGY}}", &topology), ("{{HEALTH}}", &health_section)])
|
||||
}
|
||||
_ => Err(format!("Unknown agent: {}. Use: replay, linker, separator, transfer, health", agent)),
|
||||
}
|
||||
}
|
||||
363
poc-memory/src/neuro/rewrite.rs
Normal file
363
poc-memory/src/neuro/rewrite.rs
Normal file
|
|
@ -0,0 +1,363 @@
|
|||
// Graph topology mutations: hub differentiation, triangle closure,
|
||||
// orphan linking, and link refinement. These modify the store.
|
||||
|
||||
use crate::store::{Store, new_relation};
|
||||
use crate::graph::Graph;
|
||||
use crate::similarity;
|
||||
|
||||
/// Refine a link target: if the target is a file-level node with section
|
||||
/// children, find the best-matching section by cosine similarity against
|
||||
/// the source content. Returns the original key if no sections exist or
|
||||
/// no section matches above threshold.
|
||||
///
|
||||
/// This prevents hub formation at link creation time — every new link
|
||||
/// targets the most specific available node.
|
||||
pub fn refine_target(store: &Store, source_content: &str, target_key: &str) -> String {
|
||||
// Only refine file-level nodes (no # in key)
|
||||
if target_key.contains('#') { return target_key.to_string(); }
|
||||
|
||||
let prefix = format!("{}#", target_key);
|
||||
let sections: Vec<(&str, &str)> = store.nodes.iter()
|
||||
.filter(|(k, _)| k.starts_with(&prefix))
|
||||
.map(|(k, n)| (k.as_str(), n.content.as_str()))
|
||||
.collect();
|
||||
|
||||
if sections.is_empty() { return target_key.to_string(); }
|
||||
|
||||
let mut best_section = "";
|
||||
let mut best_sim = 0.0f32;
|
||||
|
||||
for (section_key, section_content) in §ions {
|
||||
let sim = similarity::cosine_similarity(source_content, section_content);
|
||||
if sim > best_sim {
|
||||
best_sim = sim;
|
||||
best_section = section_key;
|
||||
}
|
||||
}
|
||||
|
||||
// Threshold: only refine if there's a meaningful match
|
||||
if best_sim > 0.05 && !best_section.is_empty() {
|
||||
best_section.to_string()
|
||||
} else {
|
||||
target_key.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// A proposed link move: from hub→neighbor to section→neighbor
|
||||
pub struct LinkMove {
|
||||
pub neighbor_key: String,
|
||||
pub from_hub: String,
|
||||
pub to_section: String,
|
||||
pub similarity: f32,
|
||||
pub neighbor_snippet: String,
|
||||
}
|
||||
|
||||
/// Analyze a hub node and propose redistributing its links to child sections.
|
||||
///
|
||||
/// Returns None if the node isn't a hub or has no sections to redistribute to.
|
||||
pub fn differentiate_hub(store: &Store, hub_key: &str) -> Option<Vec<LinkMove>> {
|
||||
let graph = store.build_graph();
|
||||
differentiate_hub_with_graph(store, hub_key, &graph)
|
||||
}
|
||||
|
||||
/// Like differentiate_hub but uses a pre-built graph.
|
||||
pub fn differentiate_hub_with_graph(store: &Store, hub_key: &str, graph: &Graph) -> Option<Vec<LinkMove>> {
|
||||
let degree = graph.degree(hub_key);
|
||||
|
||||
// Only differentiate actual hubs
|
||||
if degree < 20 { return None; }
|
||||
|
||||
// Only works on file-level nodes that have section children
|
||||
if hub_key.contains('#') { return None; }
|
||||
|
||||
let prefix = format!("{}#", hub_key);
|
||||
let sections: Vec<(&str, &str)> = store.nodes.iter()
|
||||
.filter(|(k, _)| k.starts_with(&prefix))
|
||||
.map(|(k, n)| (k.as_str(), n.content.as_str()))
|
||||
.collect();
|
||||
|
||||
if sections.is_empty() { return None; }
|
||||
|
||||
// Get all neighbors of the hub
|
||||
let neighbors = graph.neighbors(hub_key);
|
||||
|
||||
let mut moves = Vec::new();
|
||||
|
||||
for (neighbor_key, _strength) in &neighbors {
|
||||
// Skip section children — they should stay linked to parent
|
||||
if neighbor_key.starts_with(&prefix) { continue; }
|
||||
|
||||
let neighbor_content = match store.nodes.get(neighbor_key.as_str()) {
|
||||
Some(n) => &n.content,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
// Find best-matching section by content similarity
|
||||
let mut best_section = "";
|
||||
let mut best_sim = 0.0f32;
|
||||
|
||||
for (section_key, section_content) in §ions {
|
||||
let sim = similarity::cosine_similarity(neighbor_content, section_content);
|
||||
if sim > best_sim {
|
||||
best_sim = sim;
|
||||
best_section = section_key;
|
||||
}
|
||||
}
|
||||
|
||||
// Only propose move if there's a reasonable match
|
||||
if best_sim > 0.05 && !best_section.is_empty() {
|
||||
let snippet = neighbor_content.lines()
|
||||
.find(|l| !l.is_empty() && !l.starts_with("<!--") && !l.starts_with("##"))
|
||||
.unwrap_or("")
|
||||
.chars().take(80).collect::<String>();
|
||||
|
||||
moves.push(LinkMove {
|
||||
neighbor_key: neighbor_key.to_string(),
|
||||
from_hub: hub_key.to_string(),
|
||||
to_section: best_section.to_string(),
|
||||
similarity: best_sim,
|
||||
neighbor_snippet: snippet,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
moves.sort_by(|a, b| b.similarity.total_cmp(&a.similarity));
|
||||
Some(moves)
|
||||
}
|
||||
|
||||
/// Apply link moves: soft-delete hub→neighbor, create section→neighbor.
|
||||
pub fn apply_differentiation(
|
||||
store: &mut Store,
|
||||
moves: &[LinkMove],
|
||||
) -> (usize, usize) {
|
||||
let mut applied = 0usize;
|
||||
let mut skipped = 0usize;
|
||||
|
||||
for mv in moves {
|
||||
// Check that section→neighbor doesn't already exist
|
||||
let exists = store.relations.iter().any(|r|
|
||||
((r.source_key == mv.to_section && r.target_key == mv.neighbor_key)
|
||||
|| (r.source_key == mv.neighbor_key && r.target_key == mv.to_section))
|
||||
&& !r.deleted
|
||||
);
|
||||
if exists { skipped += 1; continue; }
|
||||
|
||||
let section_uuid = match store.nodes.get(&mv.to_section) {
|
||||
Some(n) => n.uuid,
|
||||
None => { skipped += 1; continue; }
|
||||
};
|
||||
let neighbor_uuid = match store.nodes.get(&mv.neighbor_key) {
|
||||
Some(n) => n.uuid,
|
||||
None => { skipped += 1; continue; }
|
||||
};
|
||||
|
||||
// Soft-delete old hub→neighbor relation
|
||||
for rel in &mut store.relations {
|
||||
if ((rel.source_key == mv.from_hub && rel.target_key == mv.neighbor_key)
|
||||
|| (rel.source_key == mv.neighbor_key && rel.target_key == mv.from_hub))
|
||||
&& !rel.deleted
|
||||
{
|
||||
rel.deleted = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Create new section→neighbor relation
|
||||
let new_rel = new_relation(
|
||||
section_uuid, neighbor_uuid,
|
||||
crate::store::RelationType::Auto,
|
||||
0.5,
|
||||
&mv.to_section, &mv.neighbor_key,
|
||||
);
|
||||
if store.add_relation(new_rel).is_ok() {
|
||||
applied += 1;
|
||||
}
|
||||
}
|
||||
|
||||
(applied, skipped)
|
||||
}
|
||||
|
||||
/// Find all file-level hubs that have section children to split into.
|
||||
pub fn find_differentiable_hubs(store: &Store) -> Vec<(String, usize, usize)> {
|
||||
let graph = store.build_graph();
|
||||
let threshold = graph.hub_threshold();
|
||||
|
||||
let mut hubs = Vec::new();
|
||||
for key in graph.nodes() {
|
||||
let deg = graph.degree(key);
|
||||
if deg < threshold { continue; }
|
||||
if key.contains('#') { continue; }
|
||||
|
||||
let prefix = format!("{}#", key);
|
||||
let section_count = store.nodes.keys()
|
||||
.filter(|k| k.starts_with(&prefix))
|
||||
.count();
|
||||
|
||||
if section_count > 0 {
|
||||
hubs.push((key.clone(), deg, section_count));
|
||||
}
|
||||
}
|
||||
|
||||
hubs.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
hubs
|
||||
}
|
||||
|
||||
/// Triangle closure: for each node with degree >= min_degree, find pairs
|
||||
/// of its neighbors that aren't directly connected and have cosine
|
||||
/// similarity above sim_threshold. Add links between them.
|
||||
///
|
||||
/// This turns hub-spoke patterns into triangles, directly improving
|
||||
/// clustering coefficient and schema fit.
|
||||
pub fn triangle_close(
|
||||
store: &mut Store,
|
||||
min_degree: usize,
|
||||
sim_threshold: f32,
|
||||
max_links_per_hub: usize,
|
||||
) -> (usize, usize) {
|
||||
let graph = store.build_graph();
|
||||
let mut added = 0usize;
|
||||
let mut hubs_processed = 0usize;
|
||||
|
||||
// Get nodes sorted by degree (highest first)
|
||||
let mut candidates: Vec<(String, usize)> = graph.nodes().iter()
|
||||
.map(|k| (k.clone(), graph.degree(k)))
|
||||
.filter(|(_, d)| *d >= min_degree)
|
||||
.collect();
|
||||
candidates.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
|
||||
for (hub_key, hub_deg) in &candidates {
|
||||
let neighbors = graph.neighbor_keys(hub_key);
|
||||
if neighbors.len() < 2 { continue; }
|
||||
|
||||
// Collect neighbor content for similarity
|
||||
let neighbor_docs: Vec<(String, String)> = neighbors.iter()
|
||||
.filter_map(|&k| {
|
||||
store.nodes.get(k).map(|n| (k.to_string(), n.content.clone()))
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Find unconnected pairs with high similarity
|
||||
let mut pair_scores: Vec<(String, String, f32)> = Vec::new();
|
||||
for i in 0..neighbor_docs.len() {
|
||||
for j in (i + 1)..neighbor_docs.len() {
|
||||
// Check if already connected
|
||||
let n_i = graph.neighbor_keys(&neighbor_docs[i].0);
|
||||
if n_i.contains(neighbor_docs[j].0.as_str()) { continue; }
|
||||
|
||||
let sim = similarity::cosine_similarity(
|
||||
&neighbor_docs[i].1, &neighbor_docs[j].1);
|
||||
if sim >= sim_threshold {
|
||||
pair_scores.push((
|
||||
neighbor_docs[i].0.clone(),
|
||||
neighbor_docs[j].0.clone(),
|
||||
sim,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pair_scores.sort_by(|a, b| b.2.total_cmp(&a.2));
|
||||
let to_add = pair_scores.len().min(max_links_per_hub);
|
||||
|
||||
if to_add > 0 {
|
||||
println!(" {} (deg={}) — {} triangles to close (top {})",
|
||||
hub_key, hub_deg, pair_scores.len(), to_add);
|
||||
|
||||
for (a, b, sim) in pair_scores.iter().take(to_add) {
|
||||
let uuid_a = match store.nodes.get(a) { Some(n) => n.uuid, None => continue };
|
||||
let uuid_b = match store.nodes.get(b) { Some(n) => n.uuid, None => continue };
|
||||
|
||||
let rel = new_relation(
|
||||
uuid_a, uuid_b,
|
||||
crate::store::RelationType::Auto,
|
||||
sim * 0.5, // scale by similarity
|
||||
a, b,
|
||||
);
|
||||
if let Ok(()) = store.add_relation(rel) {
|
||||
added += 1;
|
||||
}
|
||||
}
|
||||
hubs_processed += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if added > 0 {
|
||||
let _ = store.save();
|
||||
}
|
||||
(hubs_processed, added)
|
||||
}
|
||||
|
||||
/// Link orphan nodes (degree < min_degree) to their most textually similar
|
||||
/// connected nodes. For each orphan, finds top-K nearest neighbors by
|
||||
/// cosine similarity and creates Auto links.
|
||||
/// Returns (orphans_linked, total_links_added).
|
||||
pub fn link_orphans(
|
||||
store: &mut Store,
|
||||
min_degree: usize,
|
||||
links_per_orphan: usize,
|
||||
sim_threshold: f32,
|
||||
) -> (usize, usize) {
|
||||
let graph = store.build_graph();
|
||||
let mut added = 0usize;
|
||||
let mut orphans_linked = 0usize;
|
||||
|
||||
// Separate orphans from connected nodes
|
||||
let orphans: Vec<String> = graph.nodes().iter()
|
||||
.filter(|k| graph.degree(k) < min_degree)
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
// Build candidate pool: connected nodes with their content
|
||||
let candidates: Vec<(String, String)> = graph.nodes().iter()
|
||||
.filter(|k| graph.degree(k) >= min_degree)
|
||||
.filter_map(|k| store.nodes.get(k).map(|n| (k.clone(), n.content.clone())))
|
||||
.collect();
|
||||
|
||||
if candidates.is_empty() { return (0, 0); }
|
||||
|
||||
for orphan_key in &orphans {
|
||||
let orphan_content = match store.nodes.get(orphan_key) {
|
||||
Some(n) => n.content.clone(),
|
||||
None => continue,
|
||||
};
|
||||
if orphan_content.len() < 20 { continue; } // skip near-empty nodes
|
||||
|
||||
// Score against all candidates
|
||||
let mut scores: Vec<(usize, f32)> = candidates.iter()
|
||||
.enumerate()
|
||||
.map(|(i, (_, content))| {
|
||||
(i, similarity::cosine_similarity(&orphan_content, content))
|
||||
})
|
||||
.filter(|(_, s)| *s >= sim_threshold)
|
||||
.collect();
|
||||
|
||||
scores.sort_by(|a, b| b.1.total_cmp(&a.1));
|
||||
let to_link = scores.len().min(links_per_orphan);
|
||||
if to_link == 0 { continue; }
|
||||
|
||||
let orphan_uuid = store.nodes.get(orphan_key).unwrap().uuid;
|
||||
|
||||
for &(idx, sim) in scores.iter().take(to_link) {
|
||||
let target_key = &candidates[idx].0;
|
||||
let target_uuid = match store.nodes.get(target_key) {
|
||||
Some(n) => n.uuid,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let rel = new_relation(
|
||||
orphan_uuid, target_uuid,
|
||||
crate::store::RelationType::Auto,
|
||||
sim * 0.5,
|
||||
orphan_key, target_key,
|
||||
);
|
||||
if store.add_relation(rel).is_ok() {
|
||||
added += 1;
|
||||
}
|
||||
}
|
||||
orphans_linked += 1;
|
||||
}
|
||||
|
||||
if added > 0 {
|
||||
let _ = store.save();
|
||||
}
|
||||
(orphans_linked, added)
|
||||
}
|
||||
390
poc-memory/src/neuro/scoring.rs
Normal file
390
poc-memory/src/neuro/scoring.rs
Normal file
|
|
@ -0,0 +1,390 @@
|
|||
// Consolidation scoring, replay queues, interference detection, and
|
||||
// graph health metrics. Pure analysis — no store mutations.
|
||||
|
||||
use crate::store::{Store, now_epoch};
|
||||
use crate::graph::{self, Graph};
|
||||
use crate::spectral::{self, SpectralEmbedding, SpectralPosition};
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
const SECS_PER_DAY: f64 = 86400.0;
|
||||
|
||||
/// Consolidation priority: how urgently a node needs attention.
|
||||
///
|
||||
/// With spectral data:
|
||||
/// priority = spectral_displacement × overdue × emotion
|
||||
/// Without:
|
||||
/// priority = (1 - cc) × overdue × emotion
|
||||
///
|
||||
/// Spectral displacement is the outlier_score clamped and normalized —
|
||||
/// it measures how far a node sits from its community center in the
|
||||
/// eigenspace. This is a global signal (considers all graph structure)
|
||||
/// vs CC which is local (only immediate neighbors).
|
||||
pub fn consolidation_priority(
|
||||
store: &Store,
|
||||
key: &str,
|
||||
graph: &Graph,
|
||||
spectral_outlier: Option<f64>,
|
||||
) -> f64 {
|
||||
let node = match store.nodes.get(key) {
|
||||
Some(n) => n,
|
||||
None => return 0.0,
|
||||
};
|
||||
|
||||
// Integration factor: how poorly integrated is this node?
|
||||
let displacement = if let Some(outlier) = spectral_outlier {
|
||||
// outlier_score = dist_to_center / median_dist_in_community
|
||||
// 1.0 = typical position, >2 = unusual, >5 = extreme outlier
|
||||
// Use log scale for dynamic range: the difference between
|
||||
// outlier=5 and outlier=10 matters less than 1 vs 2.
|
||||
(outlier / 3.0).min(3.0)
|
||||
} else {
|
||||
let cc = graph.clustering_coefficient(key) as f64;
|
||||
1.0 - cc
|
||||
};
|
||||
|
||||
// Spaced repetition: how overdue is this node for replay?
|
||||
let interval_secs = node.spaced_repetition_interval as f64 * SECS_PER_DAY;
|
||||
let time_since_replay = if node.last_replayed > 0 {
|
||||
(now_epoch() - node.last_replayed).max(0) as f64
|
||||
} else {
|
||||
interval_secs * 3.0
|
||||
};
|
||||
let overdue_ratio = (time_since_replay / interval_secs).min(5.0);
|
||||
|
||||
// Emotional intensity: higher emotion = higher priority
|
||||
let emotion_factor = 1.0 + (node.emotion as f64 / 10.0);
|
||||
|
||||
displacement * overdue_ratio * emotion_factor
|
||||
}
|
||||
|
||||
/// Item in the replay queue
|
||||
pub struct ReplayItem {
|
||||
pub key: String,
|
||||
pub priority: f64,
|
||||
pub interval_days: u32,
|
||||
pub emotion: f32,
|
||||
pub cc: f32,
|
||||
/// Spectral classification: "bridge", "outlier", "core", "peripheral"
|
||||
pub classification: &'static str,
|
||||
/// Raw spectral outlier score (distance / median)
|
||||
pub outlier_score: f64,
|
||||
}
|
||||
|
||||
/// Generate the replay queue: nodes ordered by consolidation priority.
|
||||
/// Automatically loads spectral embedding if available.
|
||||
pub fn replay_queue(store: &Store, count: usize) -> Vec<ReplayItem> {
|
||||
let graph = store.build_graph();
|
||||
let emb = spectral::load_embedding().ok();
|
||||
replay_queue_with_graph(store, count, &graph, emb.as_ref())
|
||||
}
|
||||
|
||||
/// Generate the replay queue using pre-built graph and optional spectral data.
|
||||
pub fn replay_queue_with_graph(
|
||||
store: &Store,
|
||||
count: usize,
|
||||
graph: &Graph,
|
||||
emb: Option<&SpectralEmbedding>,
|
||||
) -> Vec<ReplayItem> {
|
||||
// Build spectral position map if embedding is available
|
||||
let positions: HashMap<String, SpectralPosition> = if let Some(emb) = emb {
|
||||
let communities = graph.communities().clone();
|
||||
spectral::analyze_positions(emb, &communities)
|
||||
.into_iter()
|
||||
.map(|p| (p.key.clone(), p))
|
||||
.collect()
|
||||
} else {
|
||||
HashMap::new()
|
||||
};
|
||||
|
||||
let mut items: Vec<ReplayItem> = store.nodes.iter()
|
||||
.map(|(key, node)| {
|
||||
let pos = positions.get(key);
|
||||
let outlier_score = pos.map(|p| p.outlier_score).unwrap_or(0.0);
|
||||
let classification = pos
|
||||
.map(|p| spectral::classify_position(p))
|
||||
.unwrap_or("unknown");
|
||||
|
||||
let priority = consolidation_priority(
|
||||
store, key, graph,
|
||||
pos.map(|p| p.outlier_score),
|
||||
);
|
||||
ReplayItem {
|
||||
key: key.clone(),
|
||||
priority,
|
||||
interval_days: node.spaced_repetition_interval,
|
||||
emotion: node.emotion,
|
||||
cc: graph.clustering_coefficient(key),
|
||||
classification,
|
||||
outlier_score,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
items.sort_by(|a, b| b.priority.total_cmp(&a.priority));
|
||||
items.truncate(count);
|
||||
items
|
||||
}
|
||||
|
||||
/// Detect interfering memory pairs: high text similarity but different communities
|
||||
pub fn detect_interference(
|
||||
store: &Store,
|
||||
graph: &Graph,
|
||||
threshold: f32,
|
||||
) -> Vec<(String, String, f32)> {
|
||||
use crate::similarity;
|
||||
|
||||
let communities = graph.communities();
|
||||
|
||||
// Only compare nodes within a reasonable set — take the most active ones
|
||||
let mut docs: Vec<(String, String)> = store.nodes.iter()
|
||||
.filter(|(_, n)| n.content.len() > 50) // skip tiny nodes
|
||||
.map(|(k, n)| (k.clone(), n.content.clone()))
|
||||
.collect();
|
||||
|
||||
// For large stores, sample to keep pairwise comparison feasible
|
||||
if docs.len() > 200 {
|
||||
docs.sort_by(|a, b| b.1.len().cmp(&a.1.len()));
|
||||
docs.truncate(200);
|
||||
}
|
||||
|
||||
let similar = similarity::pairwise_similar(&docs, threshold);
|
||||
|
||||
// Filter to pairs in different communities
|
||||
similar.into_iter()
|
||||
.filter(|(a, b, _)| {
|
||||
let ca = communities.get(a);
|
||||
let cb = communities.get(b);
|
||||
match (ca, cb) {
|
||||
(Some(a), Some(b)) => a != b,
|
||||
_ => true, // if community unknown, flag it
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Schema assimilation scoring for a new node.
|
||||
/// Returns how easily the node integrates into existing structure.
|
||||
///
|
||||
/// High fit (>0.5): auto-link, done
|
||||
/// Medium fit (0.2-0.5): agent reviews, proposes links
|
||||
/// Low fit (<0.2): deep examination needed — new schema seed, bridge, or noise?
|
||||
pub fn schema_assimilation(store: &Store, key: &str) -> (f32, &'static str) {
|
||||
let graph = store.build_graph();
|
||||
let fit = graph.clustering_coefficient(key);
|
||||
|
||||
let recommendation = if fit > 0.5 {
|
||||
"auto-integrate"
|
||||
} else if fit > 0.2 {
|
||||
"agent-review"
|
||||
} else if graph.degree(key) > 0 {
|
||||
"deep-examine-bridge"
|
||||
} else {
|
||||
"deep-examine-orphan"
|
||||
};
|
||||
|
||||
(fit, recommendation)
|
||||
}
|
||||
|
||||
/// Agent allocation from the control loop
|
||||
pub struct ConsolidationPlan {
|
||||
pub replay_count: usize,
|
||||
pub linker_count: usize,
|
||||
pub separator_count: usize,
|
||||
pub transfer_count: usize,
|
||||
pub run_health: bool,
|
||||
pub rationale: Vec<String>,
|
||||
}
|
||||
|
||||
/// Analyze metrics and decide how much each agent needs to run.
|
||||
///
|
||||
/// This is the control loop: metrics → error signal → agent allocation.
|
||||
/// Target values are based on healthy small-world networks.
|
||||
pub fn consolidation_plan(store: &Store) -> ConsolidationPlan {
|
||||
let graph = store.build_graph();
|
||||
let alpha = graph.degree_power_law_exponent();
|
||||
let gini = graph.degree_gini();
|
||||
let avg_cc = graph.avg_clustering_coefficient();
|
||||
let interference_pairs = detect_interference(store, &graph, 0.5);
|
||||
let interference_count = interference_pairs.len();
|
||||
|
||||
// Count episodic vs semantic nodes
|
||||
let episodic_count = store.nodes.iter()
|
||||
.filter(|(_, n)| matches!(n.node_type, crate::store::NodeType::EpisodicSession))
|
||||
.count();
|
||||
let _semantic_count = store.nodes.len() - episodic_count;
|
||||
let episodic_ratio = if store.nodes.is_empty() { 0.0 }
|
||||
else { episodic_count as f32 / store.nodes.len() as f32 };
|
||||
|
||||
let mut plan = ConsolidationPlan {
|
||||
replay_count: 0,
|
||||
linker_count: 0,
|
||||
separator_count: 0,
|
||||
transfer_count: 0,
|
||||
run_health: true, // always run health first
|
||||
rationale: Vec::new(),
|
||||
};
|
||||
|
||||
// Target: α ≥ 2.5 (healthy scale-free)
|
||||
if alpha < 2.0 {
|
||||
plan.replay_count += 10;
|
||||
plan.linker_count += 5;
|
||||
plan.rationale.push(format!(
|
||||
"α={:.2} (target ≥2.5): extreme hub dominance → 10 replay + 5 linker for lateral links",
|
||||
alpha));
|
||||
} else if alpha < 2.5 {
|
||||
plan.replay_count += 5;
|
||||
plan.linker_count += 3;
|
||||
plan.rationale.push(format!(
|
||||
"α={:.2} (target ≥2.5): moderate hub dominance → 5 replay + 3 linker",
|
||||
alpha));
|
||||
} else {
|
||||
plan.replay_count += 3;
|
||||
plan.rationale.push(format!(
|
||||
"α={:.2}: healthy — 3 replay for maintenance", alpha));
|
||||
}
|
||||
|
||||
// Target: Gini ≤ 0.4
|
||||
if gini > 0.5 {
|
||||
plan.replay_count += 3;
|
||||
plan.rationale.push(format!(
|
||||
"Gini={:.3} (target ≤0.4): high inequality → +3 replay (lateral focus)",
|
||||
gini));
|
||||
}
|
||||
|
||||
// Target: avg CC ≥ 0.2
|
||||
if avg_cc < 0.1 {
|
||||
plan.replay_count += 5;
|
||||
plan.rationale.push(format!(
|
||||
"CC={:.3} (target ≥0.2): very poor integration → +5 replay",
|
||||
avg_cc));
|
||||
} else if avg_cc < 0.2 {
|
||||
plan.replay_count += 2;
|
||||
plan.rationale.push(format!(
|
||||
"CC={:.3} (target ≥0.2): low integration → +2 replay",
|
||||
avg_cc));
|
||||
}
|
||||
|
||||
// Interference: >100 pairs is a lot, <10 is clean
|
||||
if interference_count > 100 {
|
||||
plan.separator_count += 10;
|
||||
plan.rationale.push(format!(
|
||||
"Interference: {} pairs (target <50) → 10 separator",
|
||||
interference_count));
|
||||
} else if interference_count > 20 {
|
||||
plan.separator_count += 5;
|
||||
plan.rationale.push(format!(
|
||||
"Interference: {} pairs (target <50) → 5 separator",
|
||||
interference_count));
|
||||
} else if interference_count > 0 {
|
||||
plan.separator_count += interference_count.min(3);
|
||||
plan.rationale.push(format!(
|
||||
"Interference: {} pairs → {} separator",
|
||||
interference_count, plan.separator_count));
|
||||
}
|
||||
|
||||
// Episodic → semantic transfer
|
||||
if episodic_ratio > 0.6 {
|
||||
plan.transfer_count += 10;
|
||||
plan.rationale.push(format!(
|
||||
"Episodic ratio: {:.0}% ({}/{}) → 10 transfer (knowledge extraction needed)",
|
||||
episodic_ratio * 100.0, episodic_count, store.nodes.len()));
|
||||
} else if episodic_ratio > 0.4 {
|
||||
plan.transfer_count += 5;
|
||||
plan.rationale.push(format!(
|
||||
"Episodic ratio: {:.0}% → 5 transfer",
|
||||
episodic_ratio * 100.0));
|
||||
}
|
||||
|
||||
plan
|
||||
}
|
||||
|
||||
/// Format the consolidation plan for display
|
||||
pub fn format_plan(plan: &ConsolidationPlan) -> String {
|
||||
let mut out = String::from("Consolidation Plan\n==================\n\n");
|
||||
|
||||
out.push_str("Analysis:\n");
|
||||
for r in &plan.rationale {
|
||||
out.push_str(&format!(" • {}\n", r));
|
||||
}
|
||||
|
||||
out.push_str("\nAgent allocation:\n");
|
||||
if plan.run_health {
|
||||
out.push_str(" 1. health — system audit\n");
|
||||
}
|
||||
let mut step = 2;
|
||||
if plan.replay_count > 0 {
|
||||
out.push_str(&format!(" {}. replay ×{:2} — schema assimilation + lateral linking\n",
|
||||
step, plan.replay_count));
|
||||
step += 1;
|
||||
}
|
||||
if plan.linker_count > 0 {
|
||||
out.push_str(&format!(" {}. linker ×{:2} — relational binding from episodes\n",
|
||||
step, plan.linker_count));
|
||||
step += 1;
|
||||
}
|
||||
if plan.separator_count > 0 {
|
||||
out.push_str(&format!(" {}. separator ×{} — pattern separation\n",
|
||||
step, plan.separator_count));
|
||||
step += 1;
|
||||
}
|
||||
if plan.transfer_count > 0 {
|
||||
out.push_str(&format!(" {}. transfer ×{:2} — episodic→semantic extraction\n",
|
||||
step, plan.transfer_count));
|
||||
}
|
||||
|
||||
let total = plan.replay_count + plan.linker_count
|
||||
+ plan.separator_count + plan.transfer_count
|
||||
+ if plan.run_health { 1 } else { 0 };
|
||||
out.push_str(&format!("\nTotal agent runs: {}\n", total));
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
/// Brief daily check: compare current metrics to last snapshot
|
||||
pub fn daily_check(store: &Store) -> String {
|
||||
let graph_obj = store.build_graph();
|
||||
let snap = graph::current_metrics(&graph_obj);
|
||||
|
||||
let history = graph::load_metrics_history();
|
||||
let prev = history.last();
|
||||
|
||||
let mut out = String::from("Memory daily check\n");
|
||||
|
||||
// Current state
|
||||
out.push_str(&format!(" σ={:.1} α={:.2} gini={:.3} cc={:.4}\n",
|
||||
snap.sigma, snap.alpha, snap.gini, snap.avg_cc));
|
||||
|
||||
// Trend
|
||||
if let Some(p) = prev {
|
||||
let d_sigma = snap.sigma - p.sigma;
|
||||
let d_alpha = snap.alpha - p.alpha;
|
||||
let d_gini = snap.gini - p.gini;
|
||||
|
||||
out.push_str(&format!(" Δσ={:+.1} Δα={:+.2} Δgini={:+.3}\n",
|
||||
d_sigma, d_alpha, d_gini));
|
||||
|
||||
// Assessment
|
||||
let mut issues = Vec::new();
|
||||
if snap.alpha < 2.0 { issues.push("hub dominance critical"); }
|
||||
if snap.gini > 0.5 { issues.push("high inequality"); }
|
||||
if snap.avg_cc < 0.1 { issues.push("poor integration"); }
|
||||
if d_sigma < -5.0 { issues.push("σ declining"); }
|
||||
if d_alpha < -0.1 { issues.push("α declining"); }
|
||||
if d_gini > 0.02 { issues.push("inequality increasing"); }
|
||||
|
||||
if issues.is_empty() {
|
||||
out.push_str(" Status: healthy\n");
|
||||
} else {
|
||||
out.push_str(&format!(" Status: needs attention — {}\n", issues.join(", ")));
|
||||
out.push_str(" Run: poc-memory consolidate-session\n");
|
||||
}
|
||||
} else {
|
||||
out.push_str(" (first snapshot, no trend data yet)\n");
|
||||
}
|
||||
|
||||
// Persist the snapshot
|
||||
graph::save_metrics_snapshot(&snap);
|
||||
|
||||
out
|
||||
}
|
||||
501
poc-memory/src/query.rs
Normal file
501
poc-memory/src/query.rs
Normal file
|
|
@ -0,0 +1,501 @@
|
|||
// query.rs — peg-based query language for the memory graph
|
||||
//
|
||||
// Grammar-driven: the peg definition IS the language spec.
|
||||
// Evaluates against node properties, graph metrics, and edge attributes.
|
||||
// Designed for ad-hoc exploration without memorizing 35+ subcommands.
|
||||
//
|
||||
// Syntax:
|
||||
// expr | stage | stage ...
|
||||
//
|
||||
// Stages (piped):
|
||||
// sort FIELD sort descending (default for exploration)
|
||||
// sort FIELD asc sort ascending
|
||||
// limit N cap results
|
||||
// select F,F,... output specific fields as TSV
|
||||
// count just show count
|
||||
//
|
||||
// Examples:
|
||||
// degree > 15 | sort degree | limit 10
|
||||
// category = core | select degree,weight
|
||||
// neighbors('identity') WHERE strength > 0.5 | sort strength
|
||||
// key ~ 'journal.*' AND degree > 10 | count
|
||||
// * | sort weight asc | limit 20
|
||||
|
||||
use crate::store::{NodeType, Provenance, RelationType, Store};
|
||||
use crate::graph::Graph;
|
||||
use regex::Regex;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
// -- AST types --
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Expr {
|
||||
All,
|
||||
Comparison { field: String, op: CmpOp, value: Value },
|
||||
And(Box<Expr>, Box<Expr>),
|
||||
Or(Box<Expr>, Box<Expr>),
|
||||
Not(Box<Expr>),
|
||||
Neighbors { key: String, filter: Option<Box<Expr>> },
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Value {
|
||||
Num(f64),
|
||||
Str(String),
|
||||
Ident(String),
|
||||
FnCall(FnCall),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum FnCall {
|
||||
Community(String),
|
||||
Degree(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum CmpOp {
|
||||
Gt, Lt, Ge, Le, Eq, Ne, Match,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Stage {
|
||||
Sort { field: String, ascending: bool },
|
||||
Limit(usize),
|
||||
Select(Vec<String>),
|
||||
Count,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Query {
|
||||
pub expr: Expr,
|
||||
pub stages: Vec<Stage>,
|
||||
}
|
||||
|
||||
// -- PEG grammar --
|
||||
|
||||
peg::parser! {
|
||||
pub grammar query_parser() for str {
|
||||
rule _() = [' ' | '\t']*
|
||||
|
||||
pub rule query() -> Query
|
||||
= e:expr() s:stages() { Query { expr: e, stages: s } }
|
||||
|
||||
rule stages() -> Vec<Stage>
|
||||
= s:(_ "|" _ s:stage() { s })* { s }
|
||||
|
||||
rule stage() -> Stage
|
||||
= "sort" _ f:field() _ a:asc_desc() { Stage::Sort { field: f, ascending: a } }
|
||||
/ "limit" _ n:integer() { Stage::Limit(n) }
|
||||
/ "select" _ f:field_list() { Stage::Select(f) }
|
||||
/ "count" { Stage::Count }
|
||||
|
||||
rule asc_desc() -> bool
|
||||
= "asc" { true }
|
||||
/ "desc" { false }
|
||||
/ { false } // default: descending
|
||||
|
||||
rule field_list() -> Vec<String>
|
||||
= f:field() fs:(_ "," _ f:field() { f })* {
|
||||
let mut v = vec![f];
|
||||
v.extend(fs);
|
||||
v
|
||||
}
|
||||
|
||||
rule integer() -> usize
|
||||
= n:$(['0'..='9']+) { n.parse().unwrap() }
|
||||
|
||||
pub rule expr() -> Expr = precedence! {
|
||||
a:(@) _ "OR" _ b:@ { Expr::Or(Box::new(a), Box::new(b)) }
|
||||
--
|
||||
a:(@) _ "AND" _ b:@ { Expr::And(Box::new(a), Box::new(b)) }
|
||||
--
|
||||
"NOT" _ e:@ { Expr::Not(Box::new(e)) }
|
||||
--
|
||||
"neighbors" _ "(" _ k:string() _ ")" _ w:where_clause()? {
|
||||
Expr::Neighbors { key: k, filter: w.map(Box::new) }
|
||||
}
|
||||
f:field() _ op:cmp_op() _ v:value() {
|
||||
Expr::Comparison { field: f, op, value: v }
|
||||
}
|
||||
"*" { Expr::All }
|
||||
"(" _ e:expr() _ ")" { e }
|
||||
}
|
||||
|
||||
rule where_clause() -> Expr
|
||||
= "WHERE" _ e:expr() { e }
|
||||
|
||||
rule field() -> String
|
||||
= s:$(['a'..='z' | 'A'..='Z' | '_']['a'..='z' | 'A'..='Z' | '0'..='9' | '_']*) {
|
||||
s.to_string()
|
||||
}
|
||||
|
||||
rule cmp_op() -> CmpOp
|
||||
= ">=" { CmpOp::Ge }
|
||||
/ "<=" { CmpOp::Le }
|
||||
/ "!=" { CmpOp::Ne }
|
||||
/ ">" { CmpOp::Gt }
|
||||
/ "<" { CmpOp::Lt }
|
||||
/ "=" { CmpOp::Eq }
|
||||
/ "~" { CmpOp::Match }
|
||||
|
||||
rule value() -> Value
|
||||
= f:fn_call() { Value::FnCall(f) }
|
||||
/ n:number() { Value::Num(n) }
|
||||
/ s:string() { Value::Str(s) }
|
||||
/ i:ident() { Value::Ident(i) }
|
||||
|
||||
rule fn_call() -> FnCall
|
||||
= "community" _ "(" _ k:string() _ ")" { FnCall::Community(k) }
|
||||
/ "degree" _ "(" _ k:string() _ ")" { FnCall::Degree(k) }
|
||||
|
||||
rule number() -> f64
|
||||
= n:$(['0'..='9']+ ("." ['0'..='9']+)?) {
|
||||
n.parse().unwrap()
|
||||
}
|
||||
|
||||
rule string() -> String
|
||||
= "'" s:$([^ '\'']*) "'" { s.to_string() }
|
||||
|
||||
rule ident() -> String
|
||||
= s:$(['a'..='z' | 'A'..='Z' | '_']['a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' | '.']*) {
|
||||
s.to_string()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -- Field resolution --
|
||||
|
||||
/// Resolve a field value from a node + graph context, returning a comparable Value.
|
||||
fn resolve_field(field: &str, key: &str, store: &Store, graph: &Graph) -> Option<Value> {
|
||||
let node = store.nodes.get(key)?;
|
||||
match field {
|
||||
"key" => Some(Value::Str(key.to_string())),
|
||||
"weight" => Some(Value::Num(node.weight as f64)),
|
||||
"category" => None, // vestigial, kept for query compat
|
||||
"node_type" => Some(Value::Str(node_type_label(node.node_type).to_string())),
|
||||
"provenance" => Some(Value::Str(node.provenance.label().to_string())),
|
||||
"emotion" => Some(Value::Num(node.emotion as f64)),
|
||||
"retrievals" => Some(Value::Num(node.retrievals as f64)),
|
||||
"uses" => Some(Value::Num(node.uses as f64)),
|
||||
"wrongs" => Some(Value::Num(node.wrongs as f64)),
|
||||
"created" => Some(Value::Str(node.created.clone())),
|
||||
"content" => Some(Value::Str(node.content.clone())),
|
||||
"degree" => Some(Value::Num(graph.degree(key) as f64)),
|
||||
"community_id" => {
|
||||
graph.communities().get(key).map(|&c| Value::Num(c as f64))
|
||||
}
|
||||
"clustering_coefficient" | "schema_fit" | "cc" => {
|
||||
Some(Value::Num(graph.clustering_coefficient(key) as f64))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn node_type_label(nt: NodeType) -> &'static str {
|
||||
match nt {
|
||||
NodeType::EpisodicSession => "episodic_session",
|
||||
NodeType::EpisodicDaily => "episodic_daily",
|
||||
NodeType::EpisodicWeekly => "episodic_weekly",
|
||||
NodeType::EpisodicMonthly => "episodic_monthly",
|
||||
NodeType::Semantic => "semantic",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn rel_type_label(r: RelationType) -> &'static str {
|
||||
match r {
|
||||
RelationType::Link => "link",
|
||||
RelationType::Causal => "causal",
|
||||
RelationType::Auto => "auto",
|
||||
}
|
||||
}
|
||||
|
||||
// -- Comparison logic --
|
||||
|
||||
fn as_num(v: &Value) -> Option<f64> {
|
||||
match v {
|
||||
Value::Num(n) => Some(*n),
|
||||
Value::Str(s) => s.parse().ok(),
|
||||
Value::Ident(s) => s.parse().ok(),
|
||||
Value::FnCall(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn as_str(v: &Value) -> String {
|
||||
match v {
|
||||
Value::Str(s) | Value::Ident(s) => s.clone(),
|
||||
Value::Num(n) => format!("{}", n),
|
||||
Value::FnCall(_) => String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn compare(lhs: &Value, op: CmpOp, rhs: &Value) -> bool {
|
||||
if let CmpOp::Match = op {
|
||||
return Regex::new(&as_str(rhs))
|
||||
.map(|re| re.is_match(&as_str(lhs)))
|
||||
.unwrap_or(false);
|
||||
}
|
||||
|
||||
// Numeric comparison if both parse, otherwise string
|
||||
let ord = match (as_num(lhs), as_num(rhs)) {
|
||||
(Some(a), Some(b)) => a.total_cmp(&b),
|
||||
_ => as_str(lhs).cmp(&as_str(rhs)),
|
||||
};
|
||||
|
||||
match op {
|
||||
CmpOp::Eq => ord.is_eq(),
|
||||
CmpOp::Ne => !ord.is_eq(),
|
||||
CmpOp::Gt => ord.is_gt(),
|
||||
CmpOp::Lt => ord.is_lt(),
|
||||
CmpOp::Ge => !ord.is_lt(),
|
||||
CmpOp::Le => !ord.is_gt(),
|
||||
CmpOp::Match => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
// -- Evaluator --
|
||||
|
||||
fn resolve_fn(f: &FnCall, store: &Store, graph: &Graph) -> Value {
|
||||
match f {
|
||||
FnCall::Community(key) => {
|
||||
let resolved = store.resolve_key(key).unwrap_or_else(|_| key.clone());
|
||||
graph.communities().get(&resolved)
|
||||
.map(|&c| Value::Num(c as f64))
|
||||
.unwrap_or(Value::Num(f64::NAN))
|
||||
}
|
||||
FnCall::Degree(key) => {
|
||||
let resolved = store.resolve_key(key).unwrap_or_else(|_| key.clone());
|
||||
Value::Num(graph.degree(&resolved) as f64)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_value(v: &Value, store: &Store, graph: &Graph) -> Value {
|
||||
match v {
|
||||
Value::FnCall(f) => resolve_fn(f, store, graph),
|
||||
other => other.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Evaluate an expression against a field resolver.
|
||||
/// The resolver returns field values — different for nodes vs edges.
|
||||
fn eval(
|
||||
expr: &Expr,
|
||||
resolve: &dyn Fn(&str) -> Option<Value>,
|
||||
store: &Store,
|
||||
graph: &Graph,
|
||||
) -> bool {
|
||||
match expr {
|
||||
Expr::All => true,
|
||||
Expr::Comparison { field, op, value } => {
|
||||
let lhs = match resolve(field) {
|
||||
Some(v) => v,
|
||||
None => return false,
|
||||
};
|
||||
let rhs = resolve_value(value, store, graph);
|
||||
compare(&lhs, *op, &rhs)
|
||||
}
|
||||
Expr::And(a, b) => eval(a, resolve, store, graph) && eval(b, resolve, store, graph),
|
||||
Expr::Or(a, b) => eval(a, resolve, store, graph) || eval(b, resolve, store, graph),
|
||||
Expr::Not(e) => !eval(e, resolve, store, graph),
|
||||
Expr::Neighbors { .. } => false,
|
||||
}
|
||||
}
|
||||
|
||||
// -- Query result --
|
||||
|
||||
pub struct QueryResult {
|
||||
pub key: String,
|
||||
pub fields: BTreeMap<String, Value>,
|
||||
}
|
||||
|
||||
// -- Query executor --
|
||||
|
||||
pub fn execute_query(
|
||||
store: &Store,
|
||||
graph: &Graph,
|
||||
query_str: &str,
|
||||
) -> Result<Vec<QueryResult>, String> {
|
||||
let q = query_parser::query(query_str)
|
||||
.map_err(|e| format!("Parse error: {}", e))?;
|
||||
execute_parsed(store, graph, &q)
|
||||
}
|
||||
|
||||
fn execute_parsed(
|
||||
store: &Store,
|
||||
graph: &Graph,
|
||||
q: &Query,
|
||||
) -> Result<Vec<QueryResult>, String> {
|
||||
let mut results = match &q.expr {
|
||||
Expr::Neighbors { key, filter } => {
|
||||
let resolved = store.resolve_key(key).unwrap_or_else(|_| key.clone());
|
||||
let edges = graph.edges_of(&resolved);
|
||||
let mut out = Vec::new();
|
||||
for edge in edges {
|
||||
let include = match filter {
|
||||
Some(f) => {
|
||||
let strength = edge.strength;
|
||||
let rt = edge.rel_type;
|
||||
let target = &edge.target;
|
||||
eval(f, &|field| match field {
|
||||
"strength" => Some(Value::Num(strength as f64)),
|
||||
"rel_type" => Some(Value::Str(rel_type_label(rt).to_string())),
|
||||
_ => resolve_field(field, target, store, graph),
|
||||
}, store, graph)
|
||||
}
|
||||
None => true,
|
||||
};
|
||||
if include {
|
||||
let mut fields = BTreeMap::new();
|
||||
fields.insert("strength".into(), Value::Num(edge.strength as f64));
|
||||
fields.insert("rel_type".into(),
|
||||
Value::Str(rel_type_label(edge.rel_type).to_string()));
|
||||
out.push(QueryResult { key: edge.target.clone(), fields });
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
_ => {
|
||||
let mut out = Vec::new();
|
||||
for key in store.nodes.keys() {
|
||||
if store.nodes[key].deleted { continue; }
|
||||
if eval(&q.expr, &|f| resolve_field(f, key, store, graph), store, graph) {
|
||||
out.push(QueryResult { key: key.clone(), fields: BTreeMap::new() });
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
};
|
||||
|
||||
// Collect fields needed by select/sort stages and resolve them once
|
||||
let needed: Vec<String> = {
|
||||
let mut set = Vec::new();
|
||||
for stage in &q.stages {
|
||||
match stage {
|
||||
Stage::Select(fields) => {
|
||||
for f in fields {
|
||||
if !set.contains(f) { set.push(f.clone()); }
|
||||
}
|
||||
}
|
||||
Stage::Sort { field, .. } => {
|
||||
if !set.contains(field) { set.push(field.clone()); }
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
set
|
||||
};
|
||||
|
||||
for r in &mut results {
|
||||
for f in &needed {
|
||||
if !r.fields.contains_key(f) {
|
||||
if let Some(v) = resolve_field(f, &r.key, store, graph) {
|
||||
r.fields.insert(f.clone(), v);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Apply pipeline stages
|
||||
let mut has_sort = false;
|
||||
for stage in &q.stages {
|
||||
match stage {
|
||||
Stage::Sort { field, ascending } => {
|
||||
has_sort = true;
|
||||
let asc = *ascending;
|
||||
results.sort_by(|a, b| {
|
||||
let va = a.fields.get(field).and_then(|v| as_num(v));
|
||||
let vb = b.fields.get(field).and_then(|v| as_num(v));
|
||||
let ord = match (va, vb) {
|
||||
(Some(a), Some(b)) => a.total_cmp(&b),
|
||||
_ => {
|
||||
let sa = a.fields.get(field).map(|v| as_str(v)).unwrap_or_default();
|
||||
let sb = b.fields.get(field).map(|v| as_str(v)).unwrap_or_default();
|
||||
sa.cmp(&sb)
|
||||
}
|
||||
};
|
||||
if asc { ord } else { ord.reverse() }
|
||||
});
|
||||
}
|
||||
Stage::Limit(n) => {
|
||||
results.truncate(*n);
|
||||
}
|
||||
Stage::Select(_) | Stage::Count => {} // handled in output
|
||||
}
|
||||
}
|
||||
|
||||
// Default sort by degree desc if no explicit sort
|
||||
if !has_sort {
|
||||
results.sort_by(|a, b| {
|
||||
let da = graph.degree(&a.key);
|
||||
let db = graph.degree(&b.key);
|
||||
db.cmp(&da)
|
||||
});
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Format a Value for display
|
||||
pub fn format_value(v: &Value) -> String {
|
||||
match v {
|
||||
Value::Num(n) => {
|
||||
if *n == n.floor() && n.abs() < 1e15 {
|
||||
format!("{}", *n as i64)
|
||||
} else {
|
||||
format!("{:.3}", n)
|
||||
}
|
||||
}
|
||||
Value::Str(s) => s.clone(),
|
||||
Value::Ident(s) => s.clone(),
|
||||
Value::FnCall(_) => "?".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute query and print formatted output.
|
||||
pub fn run_query(store: &Store, graph: &Graph, query_str: &str) -> Result<(), String> {
|
||||
let q = query_parser::query(query_str)
|
||||
.map_err(|e| format!("Parse error: {}", e))?;
|
||||
|
||||
let results = execute_parsed(store, graph, &q)?;
|
||||
|
||||
// Count stage
|
||||
if q.stages.iter().any(|s| matches!(s, Stage::Count)) {
|
||||
println!("{}", results.len());
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if results.is_empty() {
|
||||
eprintln!("No results");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Select stage
|
||||
let fields: Option<&Vec<String>> = q.stages.iter().find_map(|s| match s {
|
||||
Stage::Select(f) => Some(f),
|
||||
_ => None,
|
||||
});
|
||||
|
||||
if let Some(fields) = fields {
|
||||
let mut header = vec!["key".to_string()];
|
||||
header.extend(fields.iter().cloned());
|
||||
println!("{}", header.join("\t"));
|
||||
|
||||
for r in &results {
|
||||
let mut row = vec![r.key.clone()];
|
||||
for f in fields {
|
||||
row.push(match r.fields.get(f) {
|
||||
Some(v) => format_value(v),
|
||||
None => "-".to_string(),
|
||||
});
|
||||
}
|
||||
println!("{}", row.join("\t"));
|
||||
}
|
||||
} else {
|
||||
for r in &results {
|
||||
println!("{}", r.key);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
156
poc-memory/src/search.rs
Normal file
156
poc-memory/src/search.rs
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
// Spreading activation search across the memory graph
|
||||
//
|
||||
// Same model as the old system but richer: uses graph edge strengths,
|
||||
// supports circumscription parameter for blending associative vs
|
||||
// causal walks, and benefits from community-aware result grouping.
|
||||
|
||||
use crate::store::StoreView;
|
||||
use crate::graph::Graph;
|
||||
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
|
||||
pub struct SearchResult {
|
||||
pub key: String,
|
||||
pub activation: f64,
|
||||
pub is_direct: bool,
|
||||
pub snippet: Option<String>,
|
||||
}
|
||||
|
||||
/// Spreading activation with circumscription parameter.
|
||||
///
|
||||
/// circ = 0.0: field mode — all edges (default, broad resonance)
|
||||
/// circ = 1.0: causal mode — prefer causal edges
|
||||
fn spreading_activation(
|
||||
seeds: &[(String, f64)],
|
||||
graph: &Graph,
|
||||
store: &impl StoreView,
|
||||
_circumscription: f64,
|
||||
) -> Vec<(String, f64)> {
|
||||
let params = store.params();
|
||||
|
||||
let mut activation: HashMap<String, f64> = HashMap::new();
|
||||
let mut queue: VecDeque<(String, f64, u32)> = VecDeque::new();
|
||||
|
||||
for (key, act) in seeds {
|
||||
let current = activation.entry(key.clone()).or_insert(0.0);
|
||||
if *act > *current {
|
||||
*current = *act;
|
||||
queue.push_back((key.clone(), *act, 0));
|
||||
}
|
||||
}
|
||||
|
||||
while let Some((key, act, depth)) = queue.pop_front() {
|
||||
if depth >= params.max_hops { continue; }
|
||||
|
||||
for (neighbor, strength) in graph.neighbors(&key) {
|
||||
let neighbor_weight = store.node_weight(neighbor.as_str());
|
||||
let propagated = act * params.edge_decay * neighbor_weight * strength as f64;
|
||||
if propagated < params.min_activation { continue; }
|
||||
|
||||
let current = activation.entry(neighbor.clone()).or_insert(0.0);
|
||||
if propagated > *current {
|
||||
*current = propagated;
|
||||
queue.push_back((neighbor.clone(), propagated, depth + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut results: Vec<_> = activation.into_iter().collect();
|
||||
results.sort_by(|a, b| b.1.total_cmp(&a.1));
|
||||
results
|
||||
}
|
||||
|
||||
/// Full search: find direct hits, spread activation, return ranked results
|
||||
pub fn search(query: &str, store: &impl StoreView) -> Vec<SearchResult> {
|
||||
let graph = crate::graph::build_graph_fast(store);
|
||||
let query_lower = query.to_lowercase();
|
||||
let query_tokens: Vec<&str> = query_lower.split_whitespace().collect();
|
||||
|
||||
let mut seeds: Vec<(String, f64)> = Vec::new();
|
||||
let mut snippets: HashMap<String, String> = HashMap::new();
|
||||
|
||||
store.for_each_node(|key, content, weight| {
|
||||
let content_lower = content.to_lowercase();
|
||||
|
||||
let exact_match = content_lower.contains(&query_lower);
|
||||
let token_match = query_tokens.len() > 1
|
||||
&& query_tokens.iter().all(|t| content_lower.contains(t));
|
||||
|
||||
if exact_match || token_match {
|
||||
let activation = if exact_match { weight as f64 } else { weight as f64 * 0.85 };
|
||||
seeds.push((key.to_owned(), activation));
|
||||
|
||||
let snippet: String = content.lines()
|
||||
.filter(|l| {
|
||||
let ll = l.to_lowercase();
|
||||
if exact_match && ll.contains(&query_lower) { return true; }
|
||||
query_tokens.iter().any(|t| ll.contains(t))
|
||||
})
|
||||
.take(3)
|
||||
.map(|l| {
|
||||
let t = l.trim();
|
||||
if t.len() > 100 {
|
||||
let end = t.floor_char_boundary(97);
|
||||
format!("{}...", &t[..end])
|
||||
} else {
|
||||
t.to_string()
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n ");
|
||||
snippets.insert(key.to_owned(), snippet);
|
||||
}
|
||||
});
|
||||
|
||||
if seeds.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let direct_hits: HashSet<String> = seeds.iter().map(|(k, _)| k.clone()).collect();
|
||||
let raw_results = spreading_activation(&seeds, &graph, store, 0.0);
|
||||
|
||||
raw_results.into_iter().map(|(key, activation)| {
|
||||
let is_direct = direct_hits.contains(&key);
|
||||
let snippet = snippets.get(&key).cloned();
|
||||
SearchResult { key, activation, is_direct, snippet }
|
||||
}).collect()
|
||||
}
|
||||
|
||||
/// Extract meaningful search terms from natural language.
|
||||
/// Strips common English stop words, returns up to max_terms words.
|
||||
pub fn extract_query_terms(text: &str, max_terms: usize) -> String {
|
||||
const STOP_WORDS: &[&str] = &[
|
||||
"the", "a", "an", "is", "are", "was", "were", "do", "does", "did",
|
||||
"have", "has", "had", "will", "would", "could", "should", "can",
|
||||
"may", "might", "shall", "been", "being", "to", "of", "in", "for",
|
||||
"on", "with", "at", "by", "from", "as", "but", "or", "and", "not",
|
||||
"no", "if", "then", "than", "that", "this", "it", "its", "my",
|
||||
"your", "our", "we", "you", "i", "me", "he", "she", "they", "them",
|
||||
"what", "how", "why", "when", "where", "about", "just", "let",
|
||||
"want", "tell", "show", "think", "know", "see", "look", "make",
|
||||
"get", "go", "some", "any", "all", "very", "really", "also", "too",
|
||||
"so", "up", "out", "here", "there",
|
||||
];
|
||||
|
||||
text.to_lowercase()
|
||||
.split(|c: char| !c.is_alphanumeric())
|
||||
.filter(|w| !w.is_empty() && w.len() > 2 && !STOP_WORDS.contains(w))
|
||||
.take(max_terms)
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
|
||||
/// Format search results as text lines (for hook consumption).
|
||||
pub fn format_results(results: &[SearchResult]) -> String {
|
||||
let mut out = String::new();
|
||||
for (i, r) in results.iter().enumerate().take(5) {
|
||||
let marker = if r.is_direct { "→" } else { " " };
|
||||
out.push_str(&format!("{}{:2}. [{:.2}/{:.2}] {}",
|
||||
marker, i + 1, r.activation, r.activation, r.key));
|
||||
out.push('\n');
|
||||
if let Some(ref snippet) = r.snippet {
|
||||
out.push_str(&format!(" {}\n", snippet));
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
135
poc-memory/src/similarity.rs
Normal file
135
poc-memory/src/similarity.rs
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
// Text similarity: Porter stemming + BM25
|
||||
//
|
||||
// Used for interference detection (similar content, different communities)
|
||||
// and schema fit scoring. Intentionally simple — ~100 lines, no
|
||||
// external dependencies.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Minimal Porter stemmer — handles the most common English suffixes.
|
||||
/// Not linguistically complete but good enough for similarity matching.
|
||||
pub fn stem(word: &str) -> String {
|
||||
let w = word.to_lowercase();
|
||||
if w.len() <= 3 { return w; }
|
||||
|
||||
let w = strip_suffix(&w, "ation", "ate");
|
||||
let w = strip_suffix(&w, "ness", "");
|
||||
let w = strip_suffix(&w, "ment", "");
|
||||
let w = strip_suffix(&w, "ting", "t");
|
||||
let w = strip_suffix(&w, "ling", "l");
|
||||
let w = strip_suffix(&w, "ring", "r");
|
||||
let w = strip_suffix(&w, "ning", "n");
|
||||
let w = strip_suffix(&w, "ding", "d");
|
||||
let w = strip_suffix(&w, "ping", "p");
|
||||
let w = strip_suffix(&w, "ging", "g");
|
||||
let w = strip_suffix(&w, "ying", "y");
|
||||
let w = strip_suffix(&w, "ied", "y");
|
||||
let w = strip_suffix(&w, "ies", "y");
|
||||
let w = strip_suffix(&w, "ing", "");
|
||||
let w = strip_suffix(&w, "ed", "");
|
||||
let w = strip_suffix(&w, "ly", "");
|
||||
let w = strip_suffix(&w, "er", "");
|
||||
let w = strip_suffix(&w, "al", "");
|
||||
strip_suffix(&w, "s", "")
|
||||
}
|
||||
|
||||
fn strip_suffix(word: &str, suffix: &str, replacement: &str) -> String {
|
||||
if word.len() > suffix.len() + 2 && word.ends_with(suffix) {
|
||||
let base = &word[..word.len() - suffix.len()];
|
||||
format!("{}{}", base, replacement)
|
||||
} else {
|
||||
word.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// Tokenize and stem a text into a term frequency map
|
||||
pub fn term_frequencies(text: &str) -> HashMap<String, u32> {
|
||||
let mut tf = HashMap::new();
|
||||
for word in text.split(|c: char| !c.is_alphanumeric()) {
|
||||
if word.len() > 2 {
|
||||
let stemmed = stem(word);
|
||||
*tf.entry(stemmed).or_default() += 1;
|
||||
}
|
||||
}
|
||||
tf
|
||||
}
|
||||
|
||||
/// Cosine similarity between two documents using stemmed term frequencies.
|
||||
/// Returns 0.0 for disjoint vocabularies, 1.0 for identical content.
|
||||
pub fn cosine_similarity(doc_a: &str, doc_b: &str) -> f32 {
|
||||
let tf_a = term_frequencies(doc_a);
|
||||
let tf_b = term_frequencies(doc_b);
|
||||
|
||||
if tf_a.is_empty() || tf_b.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
// Dot product
|
||||
let mut dot = 0.0f64;
|
||||
for (term, &freq_a) in &tf_a {
|
||||
if let Some(&freq_b) = tf_b.get(term) {
|
||||
dot += freq_a as f64 * freq_b as f64;
|
||||
}
|
||||
}
|
||||
|
||||
// Magnitudes
|
||||
let mag_a: f64 = tf_a.values().map(|&f| (f as f64).powi(2)).sum::<f64>().sqrt();
|
||||
let mag_b: f64 = tf_b.values().map(|&f| (f as f64).powi(2)).sum::<f64>().sqrt();
|
||||
|
||||
if mag_a < 1e-10 || mag_b < 1e-10 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
(dot / (mag_a * mag_b)) as f32
|
||||
}
|
||||
|
||||
/// Compute pairwise similarity for a set of documents.
|
||||
/// Returns pairs with similarity above threshold.
|
||||
pub fn pairwise_similar(
|
||||
docs: &[(String, String)], // (key, content)
|
||||
threshold: f32,
|
||||
) -> Vec<(String, String, f32)> {
|
||||
let mut results = Vec::new();
|
||||
|
||||
for i in 0..docs.len() {
|
||||
for j in (i + 1)..docs.len() {
|
||||
let sim = cosine_similarity(&docs[i].1, &docs[j].1);
|
||||
if sim >= threshold {
|
||||
results.push((docs[i].0.clone(), docs[j].0.clone(), sim));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results.sort_by(|a, b| b.2.total_cmp(&a.2));
|
||||
results
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_stem() {
|
||||
assert_eq!(stem("running"), "runn"); // -ning → n
|
||||
assert_eq!(stem("talking"), "talk"); // not matched by specific consonant rules
|
||||
assert_eq!(stem("slowly"), "slow"); // -ly
|
||||
// The stemmer is minimal — it doesn't need to be perfect,
|
||||
// just consistent enough that related words collide.
|
||||
assert_eq!(stem("observations"), "observation"); // -s stripped, -ation stays (word too short after)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cosine_identical() {
|
||||
let text = "the quick brown fox jumps over the lazy dog";
|
||||
let sim = cosine_similarity(text, text);
|
||||
assert!((sim - 1.0).abs() < 0.01, "identical docs should have sim ~1.0, got {}", sim);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cosine_different() {
|
||||
let a = "kernel filesystem transaction restart handling";
|
||||
let b = "cooking recipe chocolate cake baking temperature";
|
||||
let sim = cosine_similarity(a, b);
|
||||
assert!(sim < 0.1, "unrelated docs should have low sim, got {}", sim);
|
||||
}
|
||||
}
|
||||
566
poc-memory/src/spectral.rs
Normal file
566
poc-memory/src/spectral.rs
Normal file
|
|
@ -0,0 +1,566 @@
|
|||
// Spectral decomposition of the memory graph.
|
||||
//
|
||||
// Computes eigenvalues and eigenvectors of the normalized graph Laplacian.
|
||||
// The eigenvectors provide natural coordinates for each node — connected
|
||||
// nodes land nearby, communities form clusters, bridges sit between clusters.
|
||||
//
|
||||
// The eigenvalue spectrum reveals:
|
||||
// - Number of connected components (count of zero eigenvalues)
|
||||
// - Number of natural communities (eigenvalues near zero, before the gap)
|
||||
// - How well-connected the graph is (Fiedler value = second eigenvalue)
|
||||
//
|
||||
// The eigenvectors provide:
|
||||
// - Spectral coordinates for each node (the embedding)
|
||||
// - Community membership (sign/magnitude of Fiedler vector)
|
||||
// - Natural projections (select which eigenvectors to include)
|
||||
|
||||
use crate::graph::Graph;
|
||||
|
||||
use faer::Mat;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::path::PathBuf;
|
||||
|
||||
pub struct SpectralResult {
|
||||
/// Node keys in index order
|
||||
pub keys: Vec<String>,
|
||||
/// Eigenvalues in ascending order
|
||||
pub eigenvalues: Vec<f64>,
|
||||
/// Eigenvectors: eigvecs[k] is the k-th eigenvector (ascending eigenvalue order),
|
||||
/// with eigvecs[k][i] being the value for node keys[i]
|
||||
pub eigvecs: Vec<Vec<f64>>,
|
||||
}
|
||||
|
||||
/// Per-node spectral embedding, serializable to disk.
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct SpectralEmbedding {
|
||||
/// Number of dimensions (eigenvectors)
|
||||
pub dims: usize,
|
||||
/// Eigenvalues for each dimension
|
||||
pub eigenvalues: Vec<f64>,
|
||||
/// Node key → coordinate vector
|
||||
pub coords: HashMap<String, Vec<f64>>,
|
||||
}
|
||||
|
||||
fn embedding_path() -> PathBuf {
|
||||
let home = std::env::var("HOME").unwrap_or_default();
|
||||
PathBuf::from(home).join(".claude/memory/spectral-embedding.json")
|
||||
}
|
||||
|
||||
/// Compute spectral decomposition of the memory graph.
|
||||
///
|
||||
/// Returns the smallest `k` eigenvalues and their eigenvectors of the
|
||||
/// normalized Laplacian L_sym = I - D^{-1/2} A D^{-1/2}.
|
||||
///
|
||||
/// We compute the full decomposition (it's only 2000×2000, takes <1s)
|
||||
/// and return the bottom k.
|
||||
pub fn decompose(graph: &Graph, k: usize) -> SpectralResult {
|
||||
// Only include nodes with edges (filter isolates)
|
||||
let mut keys: Vec<String> = graph.nodes().iter()
|
||||
.filter(|k| graph.degree(k) > 0)
|
||||
.cloned()
|
||||
.collect();
|
||||
keys.sort();
|
||||
let n = keys.len();
|
||||
let isolates = graph.nodes().len() - n;
|
||||
if isolates > 0 {
|
||||
eprintln!("note: filtered {} isolated nodes, decomposing {} connected nodes", isolates, n);
|
||||
}
|
||||
|
||||
let key_to_idx: HashMap<&str, usize> = keys.iter()
|
||||
.enumerate()
|
||||
.map(|(i, k)| (k.as_str(), i))
|
||||
.collect();
|
||||
|
||||
// Build weighted degree vector and adjacency
|
||||
let mut degree = vec![0.0f64; n];
|
||||
let mut adj_entries: Vec<(usize, usize, f64)> = Vec::new();
|
||||
|
||||
for (i, key) in keys.iter().enumerate() {
|
||||
for (neighbor, strength) in graph.neighbors(key) {
|
||||
if let Some(&j) = key_to_idx.get(neighbor.as_str()) {
|
||||
if j > i { // each edge once
|
||||
let w = strength as f64;
|
||||
adj_entries.push((i, j, w));
|
||||
degree[i] += w;
|
||||
degree[j] += w;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build normalized Laplacian: L_sym = I - D^{-1/2} A D^{-1/2}
|
||||
let mut laplacian = Mat::<f64>::zeros(n, n);
|
||||
|
||||
// Diagonal = 1 for nodes with edges, 0 for isolates
|
||||
for i in 0..n {
|
||||
if degree[i] > 0.0 {
|
||||
laplacian[(i, i)] = 1.0;
|
||||
}
|
||||
}
|
||||
|
||||
// Off-diagonal: -w / sqrt(d_i * d_j)
|
||||
for &(i, j, w) in &adj_entries {
|
||||
if degree[i] > 0.0 && degree[j] > 0.0 {
|
||||
let val = -w / (degree[i] * degree[j]).sqrt();
|
||||
laplacian[(i, j)] = val;
|
||||
laplacian[(j, i)] = val;
|
||||
}
|
||||
}
|
||||
|
||||
// Eigendecompose
|
||||
let eig = laplacian.self_adjoint_eigen(faer::Side::Lower)
|
||||
.expect("eigendecomposition failed");
|
||||
let s = eig.S();
|
||||
let u = eig.U();
|
||||
|
||||
let k = k.min(n);
|
||||
let mut eigenvalues = Vec::with_capacity(k);
|
||||
let mut eigvecs = Vec::with_capacity(k);
|
||||
|
||||
let s_col = s.column_vector();
|
||||
for col in 0..k {
|
||||
eigenvalues.push(s_col[col]);
|
||||
let mut vec = Vec::with_capacity(n);
|
||||
for row in 0..n {
|
||||
vec.push(u[(row, col)]);
|
||||
}
|
||||
eigvecs.push(vec);
|
||||
}
|
||||
|
||||
SpectralResult { keys, eigenvalues, eigvecs }
|
||||
}
|
||||
|
||||
/// Print the spectral summary: eigenvalue spectrum, then each axis with
|
||||
/// its extreme nodes (what the axis "means").
|
||||
pub fn print_summary(result: &SpectralResult, graph: &Graph) {
|
||||
let n = result.keys.len();
|
||||
let k = result.eigenvalues.len();
|
||||
|
||||
println!("Spectral Decomposition — {} nodes, {} eigenpairs", n, k);
|
||||
println!("=========================================\n");
|
||||
|
||||
// Compact eigenvalue table
|
||||
println!("Eigenvalue spectrum:");
|
||||
for (i, &ev) in result.eigenvalues.iter().enumerate() {
|
||||
let gap = if i > 0 {
|
||||
ev - result.eigenvalues[i - 1]
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
let gap_bar = if i > 0 {
|
||||
let bars = (gap * 500.0).min(40.0) as usize;
|
||||
"#".repeat(bars)
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
println!(" λ_{:<2} = {:.6} {}", i, ev, gap_bar);
|
||||
}
|
||||
|
||||
// Connected components
|
||||
let near_zero = result.eigenvalues.iter()
|
||||
.filter(|&&v| v.abs() < 1e-6)
|
||||
.count();
|
||||
if near_zero > 1 {
|
||||
println!("\n {} eigenvalues near 0 = {} disconnected components", near_zero, near_zero);
|
||||
}
|
||||
|
||||
// Each axis: what are the extremes?
|
||||
println!("\n\nNatural axes of the knowledge space");
|
||||
println!("====================================");
|
||||
|
||||
for axis in 0..k {
|
||||
let ev = result.eigenvalues[axis];
|
||||
let vec = &result.eigvecs[axis];
|
||||
|
||||
// Sort nodes by their value on this axis
|
||||
let mut indexed: Vec<(usize, f64)> = vec.iter()
|
||||
.enumerate()
|
||||
.map(|(i, &v)| (i, v))
|
||||
.collect();
|
||||
indexed.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
|
||||
|
||||
// Compute the "spread" — how much this axis differentiates
|
||||
let min_val = indexed.first().map(|x| x.1).unwrap_or(0.0);
|
||||
let max_val = indexed.last().map(|x| x.1).unwrap_or(0.0);
|
||||
|
||||
println!("\n--- Axis {} (λ={:.6}, range={:.4}) ---", axis, ev, max_val - min_val);
|
||||
|
||||
// Show extremes: 5 most negative, 5 most positive
|
||||
let show = 5;
|
||||
println!(" Negative pole:");
|
||||
for &(idx, val) in indexed.iter().take(show) {
|
||||
let key = &result.keys[idx];
|
||||
// Shorten key for display: take last component
|
||||
let short = shorten_key(key);
|
||||
let deg = graph.degree(key);
|
||||
let comm = graph.communities().get(key).copied().unwrap_or(999);
|
||||
println!(" {:+.5} d={:<3} c={:<3} {}", val, deg, comm, short);
|
||||
}
|
||||
|
||||
println!(" Positive pole:");
|
||||
for &(idx, val) in indexed.iter().rev().take(show) {
|
||||
let key = &result.keys[idx];
|
||||
let short = shorten_key(key);
|
||||
let deg = graph.degree(key);
|
||||
let comm = graph.communities().get(key).copied().unwrap_or(999);
|
||||
println!(" {:+.5} d={:<3} c={:<3} {}", val, deg, comm, short);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Shorten a node key for display.
|
||||
fn shorten_key(key: &str) -> &str {
|
||||
if key.len() > 60 { &key[..60] } else { key }
|
||||
}
|
||||
|
||||
/// Convert SpectralResult to a per-node embedding (transposing the layout).
|
||||
pub fn to_embedding(result: &SpectralResult) -> SpectralEmbedding {
|
||||
let dims = result.eigvecs.len();
|
||||
let mut coords = HashMap::new();
|
||||
|
||||
for (i, key) in result.keys.iter().enumerate() {
|
||||
let mut vec = Vec::with_capacity(dims);
|
||||
for d in 0..dims {
|
||||
vec.push(result.eigvecs[d][i]);
|
||||
}
|
||||
coords.insert(key.clone(), vec);
|
||||
}
|
||||
|
||||
SpectralEmbedding {
|
||||
dims,
|
||||
eigenvalues: result.eigenvalues.clone(),
|
||||
coords,
|
||||
}
|
||||
}
|
||||
|
||||
/// Save embedding to disk.
|
||||
pub fn save_embedding(emb: &SpectralEmbedding) -> Result<(), String> {
|
||||
let path = embedding_path();
|
||||
let json = serde_json::to_string(emb)
|
||||
.map_err(|e| format!("serialize embedding: {}", e))?;
|
||||
std::fs::write(&path, json)
|
||||
.map_err(|e| format!("write {}: {}", path.display(), e))?;
|
||||
eprintln!("Saved {}-dim embedding for {} nodes to {}",
|
||||
emb.dims, emb.coords.len(), path.display());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Load embedding from disk.
|
||||
pub fn load_embedding() -> Result<SpectralEmbedding, String> {
|
||||
let path = embedding_path();
|
||||
let data = std::fs::read_to_string(&path)
|
||||
.map_err(|e| format!("read {}: {}", path.display(), e))?;
|
||||
serde_json::from_str(&data)
|
||||
.map_err(|e| format!("parse embedding: {}", e))
|
||||
}
|
||||
|
||||
/// Find the k nearest neighbors to a node in spectral space.
|
||||
///
|
||||
/// Uses weighted euclidean distance where each dimension is weighted
|
||||
/// by 1/eigenvalue — lower eigenvalues (coarser structure) matter more.
|
||||
pub fn nearest_neighbors(
|
||||
emb: &SpectralEmbedding,
|
||||
key: &str,
|
||||
k: usize,
|
||||
) -> Vec<(String, f64)> {
|
||||
let target = match emb.coords.get(key) {
|
||||
Some(c) => c,
|
||||
None => return vec![],
|
||||
};
|
||||
|
||||
// Weight by inverse eigenvalue (coarser axes matter more)
|
||||
let weights: Vec<f64> = emb.eigenvalues.iter()
|
||||
.map(|&ev| if ev > 1e-8 { 1.0 / ev } else { 0.0 })
|
||||
.collect();
|
||||
|
||||
let mut distances: Vec<(String, f64)> = emb.coords.iter()
|
||||
.filter(|(k, _)| k.as_str() != key)
|
||||
.map(|(k, coords)| {
|
||||
let dist: f64 = target.iter()
|
||||
.zip(coords.iter())
|
||||
.zip(weights.iter())
|
||||
.map(|((&a, &b), &w)| w * (a - b) * (a - b))
|
||||
.sum::<f64>()
|
||||
.sqrt();
|
||||
(k.clone(), dist)
|
||||
})
|
||||
.collect();
|
||||
|
||||
distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
|
||||
distances.truncate(k);
|
||||
distances
|
||||
}
|
||||
|
||||
/// Find nearest neighbors to a set of seed nodes (multi-seed query).
|
||||
/// Returns nodes ranked by minimum distance to any seed.
|
||||
pub fn nearest_to_seeds(
|
||||
emb: &SpectralEmbedding,
|
||||
seeds: &[&str],
|
||||
k: usize,
|
||||
) -> Vec<(String, f64)> {
|
||||
let seed_set: std::collections::HashSet<&str> = seeds.iter().copied().collect();
|
||||
|
||||
let seed_coords: Vec<&Vec<f64>> = seeds.iter()
|
||||
.filter_map(|s| emb.coords.get(*s))
|
||||
.collect();
|
||||
if seed_coords.is_empty() {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
let weights: Vec<f64> = emb.eigenvalues.iter()
|
||||
.map(|&ev| if ev > 1e-8 { 1.0 / ev } else { 0.0 })
|
||||
.collect();
|
||||
|
||||
let mut distances: Vec<(String, f64)> = emb.coords.iter()
|
||||
.filter(|(k, _)| !seed_set.contains(k.as_str()))
|
||||
.map(|(k, coords)| {
|
||||
// Distance to nearest seed
|
||||
let min_dist = seed_coords.iter()
|
||||
.map(|sc| {
|
||||
coords.iter()
|
||||
.zip(sc.iter())
|
||||
.zip(weights.iter())
|
||||
.map(|((&a, &b), &w)| w * (a - b) * (a - b))
|
||||
.sum::<f64>()
|
||||
.sqrt()
|
||||
})
|
||||
.fold(f64::MAX, f64::min);
|
||||
(k.clone(), min_dist)
|
||||
})
|
||||
.collect();
|
||||
|
||||
distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
|
||||
distances.truncate(k);
|
||||
distances
|
||||
}
|
||||
|
||||
/// Weighted euclidean distance in spectral space.
|
||||
/// Dimensions weighted by 1/eigenvalue — coarser structure matters more.
|
||||
fn weighted_distance(a: &[f64], b: &[f64], weights: &[f64]) -> f64 {
|
||||
a.iter()
|
||||
.zip(b.iter())
|
||||
.zip(weights.iter())
|
||||
.map(|((&x, &y), &w)| w * (x - y) * (x - y))
|
||||
.sum::<f64>()
|
||||
.sqrt()
|
||||
}
|
||||
|
||||
/// Compute eigenvalue-inverse weights for distance calculations.
|
||||
fn eigenvalue_weights(eigenvalues: &[f64]) -> Vec<f64> {
|
||||
eigenvalues.iter()
|
||||
.map(|&ev| if ev > 1e-8 { 1.0 / ev } else { 0.0 })
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Compute cluster centers (centroids) in spectral space.
|
||||
pub fn cluster_centers(
|
||||
emb: &SpectralEmbedding,
|
||||
communities: &HashMap<String, u32>,
|
||||
) -> HashMap<u32, Vec<f64>> {
|
||||
let mut sums: HashMap<u32, (Vec<f64>, usize)> = HashMap::new();
|
||||
|
||||
for (key, coords) in &emb.coords {
|
||||
if let Some(&comm) = communities.get(key) {
|
||||
let entry = sums.entry(comm)
|
||||
.or_insert_with(|| (vec![0.0; emb.dims], 0));
|
||||
for (i, &c) in coords.iter().enumerate() {
|
||||
entry.0[i] += c;
|
||||
}
|
||||
entry.1 += 1;
|
||||
}
|
||||
}
|
||||
|
||||
sums.into_iter()
|
||||
.map(|(comm, (sum, count))| {
|
||||
let center: Vec<f64> = sum.iter()
|
||||
.map(|s| s / count as f64)
|
||||
.collect();
|
||||
(comm, center)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Per-node analysis of spectral position relative to communities.
|
||||
pub struct SpectralPosition {
|
||||
pub key: String,
|
||||
pub community: u32,
|
||||
/// Distance to own community center
|
||||
pub dist_to_center: f64,
|
||||
/// Distance to nearest OTHER community center
|
||||
pub dist_to_nearest: f64,
|
||||
/// Which community is nearest (other than own)
|
||||
pub nearest_community: u32,
|
||||
/// dist_to_center / median_dist_in_community (>1 = outlier)
|
||||
pub outlier_score: f64,
|
||||
/// dist_to_center / dist_to_nearest (>1 = between clusters, potential bridge)
|
||||
pub bridge_score: f64,
|
||||
}
|
||||
|
||||
/// Analyze spectral positions for all nodes.
|
||||
///
|
||||
/// Returns positions sorted by outlier_score descending (most displaced first).
|
||||
pub fn analyze_positions(
|
||||
emb: &SpectralEmbedding,
|
||||
communities: &HashMap<String, u32>,
|
||||
) -> Vec<SpectralPosition> {
|
||||
let centers = cluster_centers(emb, communities);
|
||||
let weights = eigenvalue_weights(&emb.eigenvalues);
|
||||
|
||||
// Compute distances to own community center
|
||||
let mut by_community: HashMap<u32, Vec<f64>> = HashMap::new();
|
||||
let mut node_dists: Vec<(String, u32, f64)> = Vec::new();
|
||||
|
||||
for (key, coords) in &emb.coords {
|
||||
if let Some(&comm) = communities.get(key) {
|
||||
if let Some(center) = centers.get(&comm) {
|
||||
let dist = weighted_distance(coords, center, &weights);
|
||||
by_community.entry(comm).or_default().push(dist);
|
||||
node_dists.push((key.clone(), comm, dist));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Median distance per community for outlier scoring
|
||||
let medians: HashMap<u32, f64> = by_community.into_iter()
|
||||
.map(|(comm, mut dists)| {
|
||||
dists.sort_by(|a, b| a.partial_cmp(b).unwrap());
|
||||
let median = if dists.is_empty() {
|
||||
1.0
|
||||
} else if dists.len() % 2 == 0 {
|
||||
(dists[dists.len() / 2 - 1] + dists[dists.len() / 2]) / 2.0
|
||||
} else {
|
||||
dists[dists.len() / 2]
|
||||
};
|
||||
(comm, median.max(1e-6))
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut positions: Vec<SpectralPosition> = node_dists.into_iter()
|
||||
.map(|(key, comm, dist_to_center)| {
|
||||
let coords = &emb.coords[&key];
|
||||
|
||||
let (nearest_community, dist_to_nearest) = centers.iter()
|
||||
.filter(|(&c, _)| c != comm)
|
||||
.map(|(&c, center)| (c, weighted_distance(coords, center, &weights)))
|
||||
.min_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
|
||||
.unwrap_or((comm, f64::MAX));
|
||||
|
||||
let median = medians.get(&comm).copied().unwrap_or(1.0);
|
||||
let outlier_score = dist_to_center / median;
|
||||
let bridge_score = if dist_to_nearest > 1e-8 {
|
||||
dist_to_center / dist_to_nearest
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
SpectralPosition {
|
||||
key, community: comm,
|
||||
dist_to_center, dist_to_nearest, nearest_community,
|
||||
outlier_score, bridge_score,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
positions.sort_by(|a, b| b.outlier_score.partial_cmp(&a.outlier_score).unwrap());
|
||||
positions
|
||||
}
|
||||
|
||||
/// Find pairs of nodes that are spectrally close but not linked in the graph.
|
||||
///
|
||||
/// These are the most valuable candidates for extractor agents —
|
||||
/// the spectral structure says they should be related, but nobody
|
||||
/// has articulated why.
|
||||
pub fn unlinked_neighbors(
|
||||
emb: &SpectralEmbedding,
|
||||
linked_pairs: &HashSet<(String, String)>,
|
||||
max_pairs: usize,
|
||||
) -> Vec<(String, String, f64)> {
|
||||
let weights = eigenvalue_weights(&emb.eigenvalues);
|
||||
let keys: Vec<&String> = emb.coords.keys().collect();
|
||||
let mut pairs: Vec<(String, String, f64)> = Vec::new();
|
||||
|
||||
for (i, k1) in keys.iter().enumerate() {
|
||||
let c1 = &emb.coords[*k1];
|
||||
for k2 in keys.iter().skip(i + 1) {
|
||||
// Skip if already linked
|
||||
let pair_fwd = ((*k1).clone(), (*k2).clone());
|
||||
let pair_rev = ((*k2).clone(), (*k1).clone());
|
||||
if linked_pairs.contains(&pair_fwd) || linked_pairs.contains(&pair_rev) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let dist = weighted_distance(c1, &emb.coords[*k2], &weights);
|
||||
pairs.push(((*k1).clone(), (*k2).clone(), dist));
|
||||
}
|
||||
}
|
||||
|
||||
pairs.sort_by(|a, b| a.2.partial_cmp(&b.2).unwrap());
|
||||
pairs.truncate(max_pairs);
|
||||
pairs
|
||||
}
|
||||
|
||||
/// Approximate spectral coordinates for a new node using Nyström extension.
|
||||
///
|
||||
/// Given a new node's edges to existing nodes, estimate where it would
|
||||
/// land in spectral space without recomputing the full decomposition.
|
||||
/// Uses weighted average of neighbors' coordinates, weighted by edge strength.
|
||||
pub fn nystrom_project(
|
||||
emb: &SpectralEmbedding,
|
||||
neighbors: &[(&str, f32)], // (key, edge_strength)
|
||||
) -> Option<Vec<f64>> {
|
||||
let mut weighted_sum = vec![0.0f64; emb.dims];
|
||||
let mut total_weight = 0.0f64;
|
||||
|
||||
for &(key, strength) in neighbors {
|
||||
if let Some(coords) = emb.coords.get(key) {
|
||||
let w = strength as f64;
|
||||
for (i, &c) in coords.iter().enumerate() {
|
||||
weighted_sum[i] += w * c;
|
||||
}
|
||||
total_weight += w;
|
||||
}
|
||||
}
|
||||
|
||||
if total_weight < 1e-8 {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(weighted_sum.iter().map(|s| s / total_weight).collect())
|
||||
}
|
||||
|
||||
/// Classify a spectral position: well-integrated, outlier, bridge, or orphan.
|
||||
pub fn classify_position(pos: &SpectralPosition) -> &'static str {
|
||||
if pos.bridge_score > 0.7 {
|
||||
"bridge" // between two communities
|
||||
} else if pos.outlier_score > 2.0 {
|
||||
"outlier" // far from own community center
|
||||
} else if pos.outlier_score < 0.5 {
|
||||
"core" // close to community center
|
||||
} else {
|
||||
"peripheral" // normal community member
|
||||
}
|
||||
}
|
||||
|
||||
/// Identify which spectral dimensions a set of nodes load on most heavily.
|
||||
/// Returns dimension indices sorted by total loading.
|
||||
pub fn dominant_dimensions(emb: &SpectralEmbedding, keys: &[&str]) -> Vec<(usize, f64)> {
|
||||
let coords: Vec<&Vec<f64>> = keys.iter()
|
||||
.filter_map(|k| emb.coords.get(*k))
|
||||
.collect();
|
||||
if coords.is_empty() {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
let mut dim_loading: Vec<(usize, f64)> = (0..emb.dims)
|
||||
.map(|d| {
|
||||
let loading: f64 = coords.iter()
|
||||
.map(|c| c[d].abs())
|
||||
.sum();
|
||||
(d, loading)
|
||||
})
|
||||
.collect();
|
||||
|
||||
dim_loading.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
|
||||
dim_loading
|
||||
}
|
||||
326
poc-memory/src/store/mod.rs
Normal file
326
poc-memory/src/store/mod.rs
Normal file
|
|
@ -0,0 +1,326 @@
|
|||
// Append-only Cap'n Proto storage + derived KV cache
|
||||
//
|
||||
// Two log files are source of truth:
|
||||
// nodes.capnp - ContentNode messages
|
||||
// relations.capnp - Relation messages
|
||||
//
|
||||
// The Store struct is the derived cache: latest version per UUID,
|
||||
// rebuilt from logs when stale. Three-tier load strategy:
|
||||
// 1. rkyv mmap snapshot (snapshot.rkyv) — ~4ms deserialize
|
||||
// 2. bincode cache (state.bin) — ~10ms
|
||||
// 3. capnp log replay — ~40ms
|
||||
// Staleness: log file sizes embedded in cache headers.
|
||||
//
|
||||
// Module layout:
|
||||
// types.rs — Node, Relation, enums, capnp macros, path helpers
|
||||
// parse.rs — markdown → MemoryUnit parsing
|
||||
// view.rs — zero-copy read-only access (StoreView, MmapView)
|
||||
// persist.rs — load, save, replay, append, snapshot (all disk IO)
|
||||
// ops.rs — mutations (upsert, delete, decay, cap_degree, etc.)
|
||||
// mod.rs — re-exports, key resolution, ingestion, rendering
|
||||
|
||||
mod types;
|
||||
mod parse;
|
||||
mod view;
|
||||
mod persist;
|
||||
mod ops;
|
||||
|
||||
// Re-export everything callers need
|
||||
pub use types::*;
|
||||
pub use parse::{MemoryUnit, parse_units};
|
||||
pub use view::{StoreView, AnyView};
|
||||
pub use persist::fsck;
|
||||
pub use persist::strip_md_keys;
|
||||
|
||||
use crate::graph::{self, Graph};
|
||||
|
||||
use std::fs;
|
||||
use std::io::Write as IoWrite;
|
||||
use std::path::Path;
|
||||
|
||||
use parse::classify_filename;
|
||||
|
||||
/// Strip .md suffix from a key, handling both bare keys and section keys.
|
||||
/// "journal.md#j-2026" → "journal#j-2026", "identity.md" → "identity", "identity" → "identity"
|
||||
pub fn strip_md_suffix(key: &str) -> String {
|
||||
if let Some((file, section)) = key.split_once('#') {
|
||||
let bare = file.strip_suffix(".md").unwrap_or(file);
|
||||
format!("{}#{}", bare, section)
|
||||
} else {
|
||||
key.strip_suffix(".md").unwrap_or(key).to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl Store {
|
||||
pub fn build_graph(&self) -> Graph {
|
||||
graph::build_graph(self)
|
||||
}
|
||||
|
||||
pub fn resolve_key(&self, target: &str) -> Result<String, String> {
|
||||
// Strip .md suffix if present — keys no longer use it
|
||||
let bare = strip_md_suffix(target);
|
||||
|
||||
if self.nodes.contains_key(&bare) {
|
||||
return Ok(bare);
|
||||
}
|
||||
|
||||
let matches: Vec<_> = self.nodes.keys()
|
||||
.filter(|k| k.to_lowercase().contains(&target.to_lowercase()))
|
||||
.cloned().collect();
|
||||
|
||||
match matches.len() {
|
||||
0 => Err(format!("No entry for '{}'. Run 'init'?", target)),
|
||||
1 => Ok(matches[0].clone()),
|
||||
n if n <= 10 => {
|
||||
let list = matches.join("\n ");
|
||||
Err(format!("Ambiguous '{}'. Matches:\n {}", target, list))
|
||||
}
|
||||
n => Err(format!("Too many matches for '{}' ({}). Be more specific.", target, n)),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// Resolve a link target to (key, uuid).
|
||||
fn resolve_node_uuid(&self, target: &str) -> Option<(String, [u8; 16])> {
|
||||
let bare = strip_md_suffix(target);
|
||||
let n = self.nodes.get(&bare)?;
|
||||
Some((bare, n.uuid))
|
||||
}
|
||||
|
||||
/// Append retrieval event to retrieval.log without needing a Store instance.
|
||||
pub fn log_retrieval_static(query: &str, results: &[String]) {
|
||||
let path = memory_dir().join("retrieval.log");
|
||||
let line = format!("[{}] q=\"{}\" hits={}\n", today(), query, results.len());
|
||||
if let Ok(mut f) = fs::OpenOptions::new()
|
||||
.create(true).append(true).open(&path) {
|
||||
let _ = f.write_all(line.as_bytes());
|
||||
}
|
||||
}
|
||||
|
||||
/// Scan markdown files and index all memory units
|
||||
pub fn init_from_markdown(&mut self) -> Result<usize, String> {
|
||||
let dir = memory_dir();
|
||||
let mut count = 0;
|
||||
if dir.exists() {
|
||||
count = self.scan_dir_for_init(&dir)?;
|
||||
}
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
fn scan_dir_for_init(&mut self, dir: &Path) -> Result<usize, String> {
|
||||
let mut count = 0;
|
||||
let entries = fs::read_dir(dir)
|
||||
.map_err(|e| format!("read dir {}: {}", dir.display(), e))?;
|
||||
|
||||
for entry in entries.flatten() {
|
||||
let path = entry.path();
|
||||
if path.is_dir() {
|
||||
count += self.scan_dir_for_init(&path)?;
|
||||
continue;
|
||||
}
|
||||
let Some(ext) = path.extension() else { continue };
|
||||
if ext != "md" { continue }
|
||||
|
||||
let filename = path.file_name().unwrap().to_string_lossy().to_string();
|
||||
let content = fs::read_to_string(&path)
|
||||
.map_err(|e| format!("read {}: {}", path.display(), e))?;
|
||||
|
||||
let units = parse_units(&filename, &content);
|
||||
let (new_count, _) = self.ingest_units(&units, &filename)?;
|
||||
count += new_count;
|
||||
|
||||
// Create relations from links
|
||||
let mut new_relations = Vec::new();
|
||||
for unit in &units {
|
||||
let source_uuid = match self.nodes.get(&unit.key) {
|
||||
Some(n) => n.uuid,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
for link in unit.marker_links.iter().chain(unit.md_links.iter()) {
|
||||
let Some((key, uuid)) = self.resolve_node_uuid(link) else { continue };
|
||||
let exists = self.relations.iter().any(|r|
|
||||
(r.source == source_uuid && r.target == uuid) ||
|
||||
(r.source == uuid && r.target == source_uuid));
|
||||
if !exists {
|
||||
new_relations.push(new_relation(
|
||||
source_uuid, uuid, RelationType::Link, 1.0,
|
||||
&unit.key, &key,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
for cause in &unit.causes {
|
||||
let Some((key, uuid)) = self.resolve_node_uuid(cause) else { continue };
|
||||
let exists = self.relations.iter().any(|r|
|
||||
r.source == uuid && r.target == source_uuid
|
||||
&& r.rel_type == RelationType::Causal);
|
||||
if !exists {
|
||||
new_relations.push(new_relation(
|
||||
uuid, source_uuid, RelationType::Causal, 1.0,
|
||||
&key, &unit.key,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !new_relations.is_empty() {
|
||||
self.append_relations(&new_relations)?;
|
||||
self.relations.extend(new_relations);
|
||||
}
|
||||
}
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
/// Process parsed memory units: diff against existing nodes, persist changes.
|
||||
fn ingest_units(&mut self, units: &[MemoryUnit], filename: &str) -> Result<(usize, usize), String> {
|
||||
let node_type = classify_filename(filename);
|
||||
let mut new_nodes = Vec::new();
|
||||
let mut updated_nodes = Vec::new();
|
||||
|
||||
for (pos, unit) in units.iter().enumerate() {
|
||||
if let Some(existing) = self.nodes.get(&unit.key) {
|
||||
if existing.content != unit.content || existing.position != pos as u32 {
|
||||
let mut node = existing.clone();
|
||||
node.content = unit.content.clone();
|
||||
node.position = pos as u32;
|
||||
node.version += 1;
|
||||
if let Some(ref s) = unit.state { node.state_tag = s.clone(); }
|
||||
if let Some(ref s) = unit.source_ref { node.source_ref = s.clone(); }
|
||||
updated_nodes.push(node);
|
||||
}
|
||||
} else {
|
||||
let mut node = new_node(&unit.key, &unit.content);
|
||||
node.node_type = node_type;
|
||||
node.position = pos as u32;
|
||||
if let Some(ref s) = unit.state { node.state_tag = s.clone(); }
|
||||
if let Some(ref s) = unit.source_ref { node.source_ref = s.clone(); }
|
||||
new_nodes.push(node);
|
||||
}
|
||||
}
|
||||
|
||||
if !new_nodes.is_empty() {
|
||||
self.append_nodes(&new_nodes)?;
|
||||
for node in &new_nodes {
|
||||
self.uuid_to_key.insert(node.uuid, node.key.clone());
|
||||
self.nodes.insert(node.key.clone(), node.clone());
|
||||
}
|
||||
}
|
||||
if !updated_nodes.is_empty() {
|
||||
self.append_nodes(&updated_nodes)?;
|
||||
for node in &updated_nodes {
|
||||
self.nodes.insert(node.key.clone(), node.clone());
|
||||
}
|
||||
}
|
||||
|
||||
Ok((new_nodes.len(), updated_nodes.len()))
|
||||
}
|
||||
|
||||
/// Import a markdown file into the store, parsing it into nodes.
|
||||
pub fn import_file(&mut self, path: &Path) -> Result<(usize, usize), String> {
|
||||
let filename = path.file_name().unwrap().to_string_lossy().to_string();
|
||||
let content = fs::read_to_string(path)
|
||||
.map_err(|e| format!("read {}: {}", path.display(), e))?;
|
||||
let units = parse_units(&filename, &content);
|
||||
self.ingest_units(&units, &filename)
|
||||
}
|
||||
|
||||
/// Gather all sections for a file key, sorted by position.
|
||||
pub fn file_sections(&self, file_key: &str) -> Option<Vec<&Node>> {
|
||||
let prefix = format!("{}#", file_key);
|
||||
let mut sections: Vec<_> = self.nodes.values()
|
||||
.filter(|n| n.key == file_key || n.key.starts_with(&prefix))
|
||||
.collect();
|
||||
if sections.is_empty() {
|
||||
return None;
|
||||
}
|
||||
sections.sort_by_key(|n| n.position);
|
||||
Some(sections)
|
||||
}
|
||||
|
||||
/// Render a file key as plain content (no mem markers).
|
||||
pub fn render_file(&self, file_key: &str) -> Option<String> {
|
||||
let sections = self.file_sections(file_key)?;
|
||||
let mut output = String::new();
|
||||
for node in §ions {
|
||||
output.push_str(&node.content);
|
||||
if !node.content.ends_with('\n') {
|
||||
output.push('\n');
|
||||
}
|
||||
output.push('\n');
|
||||
}
|
||||
Some(output.trim_end().to_string())
|
||||
}
|
||||
|
||||
/// Render a file key back to markdown with reconstituted mem markers.
|
||||
pub fn export_to_markdown(&self, file_key: &str) -> Option<String> {
|
||||
let sections = self.file_sections(file_key)?;
|
||||
|
||||
let mut output = String::new();
|
||||
for node in §ions {
|
||||
if node.key.contains('#') {
|
||||
let section_id = node.key.rsplit_once('#').map_or("", |(_, s)| s);
|
||||
|
||||
let links: Vec<_> = self.relations.iter()
|
||||
.filter(|r| r.source_key == node.key && !r.deleted
|
||||
&& r.rel_type != RelationType::Causal)
|
||||
.map(|r| r.target_key.clone())
|
||||
.collect();
|
||||
let causes: Vec<_> = self.relations.iter()
|
||||
.filter(|r| r.target_key == node.key && !r.deleted
|
||||
&& r.rel_type == RelationType::Causal)
|
||||
.map(|r| r.source_key.clone())
|
||||
.collect();
|
||||
|
||||
let mut marker_parts = vec![format!("id={}", section_id)];
|
||||
if !links.is_empty() {
|
||||
marker_parts.push(format!("links={}", links.join(",")));
|
||||
}
|
||||
if !causes.is_empty() {
|
||||
marker_parts.push(format!("causes={}", causes.join(",")));
|
||||
}
|
||||
|
||||
output.push_str(&format!("<!-- mem: {} -->\n", marker_parts.join(" ")));
|
||||
}
|
||||
output.push_str(&node.content);
|
||||
if !node.content.ends_with('\n') {
|
||||
output.push('\n');
|
||||
}
|
||||
output.push('\n');
|
||||
}
|
||||
|
||||
Some(output.trim_end().to_string())
|
||||
}
|
||||
|
||||
/// Find the episodic node that best matches the given entry text.
|
||||
pub fn find_journal_node(&self, entry_text: &str) -> Option<String> {
|
||||
if entry_text.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let words: Vec<&str> = entry_text.split_whitespace()
|
||||
.filter(|w| w.len() > 5)
|
||||
.take(5)
|
||||
.collect();
|
||||
|
||||
let mut best_key = None;
|
||||
let mut best_score = 0;
|
||||
|
||||
for (key, node) in &self.nodes {
|
||||
if node.node_type != NodeType::EpisodicSession {
|
||||
continue;
|
||||
}
|
||||
let content_lower = node.content.to_lowercase();
|
||||
let score: usize = words.iter()
|
||||
.filter(|w| content_lower.contains(&w.to_lowercase()))
|
||||
.count();
|
||||
if score > best_score {
|
||||
best_score = score;
|
||||
best_key = Some(key.clone());
|
||||
}
|
||||
}
|
||||
|
||||
best_key
|
||||
}
|
||||
}
|
||||
265
poc-memory/src/store/ops.rs
Normal file
265
poc-memory/src/store/ops.rs
Normal file
|
|
@ -0,0 +1,265 @@
|
|||
// Mutation operations on the store
|
||||
//
|
||||
// CRUD (upsert, delete, modify), feedback tracking (mark_used, mark_wrong),
|
||||
// maintenance (decay, fix_categories, cap_degree), and graph metrics.
|
||||
|
||||
use super::types::*;
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
impl Store {
|
||||
/// Add or update a node (appends to log + updates cache)
|
||||
pub fn upsert_node(&mut self, mut node: Node) -> Result<(), String> {
|
||||
if let Some(existing) = self.nodes.get(&node.key) {
|
||||
node.uuid = existing.uuid;
|
||||
node.version = existing.version + 1;
|
||||
}
|
||||
self.append_nodes(&[node.clone()])?;
|
||||
self.uuid_to_key.insert(node.uuid, node.key.clone());
|
||||
self.nodes.insert(node.key.clone(), node);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add a relation (appends to log + updates cache)
|
||||
pub fn add_relation(&mut self, rel: Relation) -> Result<(), String> {
|
||||
self.append_relations(std::slice::from_ref(&rel))?;
|
||||
self.relations.push(rel);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Upsert a node: update if exists (and content changed), create if not.
|
||||
/// Returns: "created", "updated", or "unchanged".
|
||||
///
|
||||
/// Provenance is determined by the POC_PROVENANCE env var if set,
|
||||
/// otherwise defaults to Manual.
|
||||
pub fn upsert(&mut self, key: &str, content: &str) -> Result<&'static str, String> {
|
||||
let prov = Provenance::from_env().unwrap_or(Provenance::Manual);
|
||||
self.upsert_provenance(key, content, prov)
|
||||
}
|
||||
|
||||
/// Upsert with explicit provenance (for agent-created nodes).
|
||||
pub fn upsert_provenance(&mut self, key: &str, content: &str, provenance: Provenance) -> Result<&'static str, String> {
|
||||
if let Some(existing) = self.nodes.get(key) {
|
||||
if existing.content == content {
|
||||
return Ok("unchanged");
|
||||
}
|
||||
let mut node = existing.clone();
|
||||
node.content = content.to_string();
|
||||
node.provenance = provenance;
|
||||
node.version += 1;
|
||||
self.append_nodes(std::slice::from_ref(&node))?;
|
||||
self.nodes.insert(key.to_string(), node);
|
||||
Ok("updated")
|
||||
} else {
|
||||
let mut node = new_node(key, content);
|
||||
node.provenance = provenance;
|
||||
self.append_nodes(std::slice::from_ref(&node))?;
|
||||
self.uuid_to_key.insert(node.uuid, node.key.clone());
|
||||
self.nodes.insert(key.to_string(), node);
|
||||
Ok("created")
|
||||
}
|
||||
}
|
||||
|
||||
/// Soft-delete a node (appends deleted version, removes from cache).
|
||||
pub fn delete_node(&mut self, key: &str) -> Result<(), String> {
|
||||
let node = self.nodes.get(key)
|
||||
.ok_or_else(|| format!("No node '{}'", key))?;
|
||||
let mut deleted = node.clone();
|
||||
deleted.deleted = true;
|
||||
deleted.version += 1;
|
||||
self.append_nodes(std::slice::from_ref(&deleted))?;
|
||||
self.nodes.remove(key);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Rename a node: change its key, update debug strings on all edges.
|
||||
///
|
||||
/// Graph edges (source/target UUIDs) are unaffected — they're already
|
||||
/// UUID-based. We update the human-readable source_key/target_key strings
|
||||
/// on relations, and created_at is preserved untouched.
|
||||
///
|
||||
/// Appends: (new_key, v+1) + (old_key, deleted, v+1) + updated relations.
|
||||
pub fn rename_node(&mut self, old_key: &str, new_key: &str) -> Result<(), String> {
|
||||
if old_key == new_key {
|
||||
return Ok(());
|
||||
}
|
||||
if self.nodes.contains_key(new_key) {
|
||||
return Err(format!("Key '{}' already exists", new_key));
|
||||
}
|
||||
let node = self.nodes.get(old_key)
|
||||
.ok_or_else(|| format!("No node '{}'", old_key))?
|
||||
.clone();
|
||||
|
||||
// New version under the new key
|
||||
let mut renamed = node.clone();
|
||||
renamed.key = new_key.to_string();
|
||||
renamed.version += 1;
|
||||
|
||||
// Deletion record for the old key (same UUID, independent version counter)
|
||||
let mut tombstone = node.clone();
|
||||
tombstone.deleted = true;
|
||||
tombstone.version += 1;
|
||||
|
||||
// Collect affected relations and update their debug key strings
|
||||
let updated_rels: Vec<_> = self.relations.iter()
|
||||
.filter(|r| r.source_key == old_key || r.target_key == old_key)
|
||||
.map(|r| {
|
||||
let mut r = r.clone();
|
||||
r.version += 1;
|
||||
if r.source_key == old_key { r.source_key = new_key.to_string(); }
|
||||
if r.target_key == old_key { r.target_key = new_key.to_string(); }
|
||||
r
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Persist (each append acquires its own file lock)
|
||||
self.append_nodes(&[renamed.clone(), tombstone])?;
|
||||
if !updated_rels.is_empty() {
|
||||
self.append_relations(&updated_rels)?;
|
||||
}
|
||||
|
||||
// Update in-memory cache
|
||||
self.nodes.remove(old_key);
|
||||
self.uuid_to_key.insert(renamed.uuid, new_key.to_string());
|
||||
self.nodes.insert(new_key.to_string(), renamed);
|
||||
for updated in &updated_rels {
|
||||
if let Some(r) = self.relations.iter_mut().find(|r| r.uuid == updated.uuid) {
|
||||
r.source_key = updated.source_key.clone();
|
||||
r.target_key = updated.target_key.clone();
|
||||
r.version = updated.version;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Modify a node in-place, bump version, and persist to capnp log.
|
||||
fn modify_node(&mut self, key: &str, f: impl FnOnce(&mut Node)) -> Result<(), String> {
|
||||
let node = self.nodes.get_mut(key)
|
||||
.ok_or_else(|| format!("No node '{}'", key))?;
|
||||
f(node);
|
||||
node.version += 1;
|
||||
let node = node.clone();
|
||||
self.append_nodes(&[node])
|
||||
}
|
||||
|
||||
pub fn mark_used(&mut self, key: &str) {
|
||||
let boost = self.params.use_boost as f32;
|
||||
let _ = self.modify_node(key, |n| {
|
||||
n.uses += 1;
|
||||
n.weight = (n.weight + boost).min(1.0);
|
||||
if n.spaced_repetition_interval < 30 {
|
||||
n.spaced_repetition_interval = match n.spaced_repetition_interval {
|
||||
1 => 3, 3 => 7, 7 => 14, 14 => 30, _ => 30,
|
||||
};
|
||||
}
|
||||
n.last_replayed = now_epoch();
|
||||
});
|
||||
}
|
||||
|
||||
pub fn mark_wrong(&mut self, key: &str, _ctx: Option<&str>) {
|
||||
let _ = self.modify_node(key, |n| {
|
||||
n.wrongs += 1;
|
||||
n.weight = (n.weight - 0.1).max(0.0);
|
||||
n.spaced_repetition_interval = 1;
|
||||
});
|
||||
}
|
||||
|
||||
pub fn record_gap(&mut self, desc: &str) {
|
||||
self.gaps.push(GapRecord {
|
||||
description: desc.to_string(),
|
||||
timestamp: today(),
|
||||
});
|
||||
}
|
||||
|
||||
/// Cap node degree by soft-deleting edges from mega-hubs.
|
||||
pub fn cap_degree(&mut self, max_degree: usize) -> Result<(usize, usize), String> {
|
||||
let mut node_degree: HashMap<String, usize> = HashMap::new();
|
||||
for rel in &self.relations {
|
||||
if rel.deleted { continue; }
|
||||
*node_degree.entry(rel.source_key.clone()).or_default() += 1;
|
||||
*node_degree.entry(rel.target_key.clone()).or_default() += 1;
|
||||
}
|
||||
|
||||
let mut node_edges: HashMap<String, Vec<usize>> = HashMap::new();
|
||||
for (i, rel) in self.relations.iter().enumerate() {
|
||||
if rel.deleted { continue; }
|
||||
node_edges.entry(rel.source_key.clone()).or_default().push(i);
|
||||
node_edges.entry(rel.target_key.clone()).or_default().push(i);
|
||||
}
|
||||
|
||||
let mut to_delete: HashSet<usize> = HashSet::new();
|
||||
let mut hubs_capped = 0;
|
||||
|
||||
for (_key, edge_indices) in &node_edges {
|
||||
let active: Vec<usize> = edge_indices.iter()
|
||||
.filter(|&&i| !to_delete.contains(&i))
|
||||
.copied()
|
||||
.collect();
|
||||
if active.len() <= max_degree { continue; }
|
||||
|
||||
let mut auto_indices: Vec<(usize, f32)> = Vec::new();
|
||||
let mut link_indices: Vec<(usize, usize)> = Vec::new();
|
||||
for &i in &active {
|
||||
let rel = &self.relations[i];
|
||||
if rel.rel_type == RelationType::Auto {
|
||||
auto_indices.push((i, rel.strength));
|
||||
} else {
|
||||
let other = if &rel.source_key == _key {
|
||||
&rel.target_key
|
||||
} else {
|
||||
&rel.source_key
|
||||
};
|
||||
let other_deg = node_degree.get(other).copied().unwrap_or(0);
|
||||
link_indices.push((i, other_deg));
|
||||
}
|
||||
}
|
||||
|
||||
let excess = active.len() - max_degree;
|
||||
|
||||
auto_indices.sort_by(|a, b| a.1.total_cmp(&b.1));
|
||||
let auto_prune = excess.min(auto_indices.len());
|
||||
for &(i, _) in auto_indices.iter().take(auto_prune) {
|
||||
to_delete.insert(i);
|
||||
}
|
||||
|
||||
let remaining_excess = excess.saturating_sub(auto_prune);
|
||||
if remaining_excess > 0 {
|
||||
link_indices.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
let link_prune = remaining_excess.min(link_indices.len());
|
||||
for &(i, _) in link_indices.iter().take(link_prune) {
|
||||
to_delete.insert(i);
|
||||
}
|
||||
}
|
||||
|
||||
hubs_capped += 1;
|
||||
}
|
||||
|
||||
let mut pruned_rels = Vec::new();
|
||||
for &i in &to_delete {
|
||||
self.relations[i].deleted = true;
|
||||
self.relations[i].version += 1;
|
||||
pruned_rels.push(self.relations[i].clone());
|
||||
}
|
||||
|
||||
if !pruned_rels.is_empty() {
|
||||
self.append_relations(&pruned_rels)?;
|
||||
}
|
||||
|
||||
self.relations.retain(|r| !r.deleted);
|
||||
|
||||
Ok((hubs_capped, to_delete.len()))
|
||||
}
|
||||
|
||||
/// Update graph-derived fields on all nodes
|
||||
pub fn update_graph_metrics(&mut self) {
|
||||
let g = self.build_graph();
|
||||
let communities = g.communities();
|
||||
|
||||
for (key, node) in &mut self.nodes {
|
||||
node.community_id = communities.get(key).copied();
|
||||
node.clustering_coefficient = Some(g.clustering_coefficient(key));
|
||||
node.degree = Some(g.degree(key) as u32);
|
||||
}
|
||||
}
|
||||
}
|
||||
173
poc-memory/src/store/parse.rs
Normal file
173
poc-memory/src/store/parse.rs
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
// Markdown parsing for memory files
|
||||
//
|
||||
// Splits markdown files into MemoryUnit structs based on `<!-- mem: ... -->`
|
||||
// markers. Each marker starts a new section; content before the first marker
|
||||
// becomes the file-level unit. Links and causal edges are extracted from
|
||||
// both marker attributes and inline markdown links.
|
||||
|
||||
use super::NodeType;
|
||||
|
||||
use regex::Regex;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
pub struct MemoryUnit {
|
||||
pub key: String,
|
||||
pub content: String,
|
||||
pub marker_links: Vec<String>,
|
||||
pub md_links: Vec<String>,
|
||||
pub causes: Vec<String>,
|
||||
pub state: Option<String>,
|
||||
pub source_ref: Option<String>,
|
||||
}
|
||||
|
||||
pub fn classify_filename(filename: &str) -> NodeType {
|
||||
let bare = filename.strip_suffix(".md").unwrap_or(filename);
|
||||
if bare.starts_with("daily-") { NodeType::EpisodicDaily }
|
||||
else if bare.starts_with("weekly-") { NodeType::EpisodicWeekly }
|
||||
else if bare.starts_with("monthly-") { NodeType::EpisodicMonthly }
|
||||
else if bare == "journal" { NodeType::EpisodicSession }
|
||||
else { NodeType::Semantic }
|
||||
}
|
||||
|
||||
pub fn parse_units(raw_filename: &str, content: &str) -> Vec<MemoryUnit> {
|
||||
let filename = raw_filename.strip_suffix(".md").unwrap_or(raw_filename);
|
||||
static MARKER_RE: OnceLock<Regex> = OnceLock::new();
|
||||
static SOURCE_RE: OnceLock<Regex> = OnceLock::new();
|
||||
static MD_LINK_RE: OnceLock<Regex> = OnceLock::new();
|
||||
|
||||
let marker_re = MARKER_RE.get_or_init(||
|
||||
Regex::new(r"<!--\s*mem:\s*((?:id|links|tags|causes|state)\s*=\s*[^\s].*?)-->").unwrap());
|
||||
let source_re = SOURCE_RE.get_or_init(||
|
||||
Regex::new(r"<!--\s*source:\s*(.+?)\s*-->").unwrap());
|
||||
let md_link_re = MD_LINK_RE.get_or_init(||
|
||||
Regex::new(r"\[[^\]]*\]\(([^):]+(?:#[^)]*)?)\)").unwrap());
|
||||
|
||||
let markers: Vec<_> = marker_re.captures_iter(content)
|
||||
.map(|cap| {
|
||||
let full_match = cap.get(0).unwrap();
|
||||
let attrs_str = &cap[1];
|
||||
(full_match.start(), full_match.end(), parse_marker_attrs(attrs_str))
|
||||
})
|
||||
.collect();
|
||||
|
||||
let find_source = |text: &str| -> Option<String> {
|
||||
source_re.captures(text).map(|c| c[1].trim().to_string())
|
||||
};
|
||||
|
||||
if markers.is_empty() {
|
||||
let source_ref = find_source(content);
|
||||
let md_links = extract_md_links(content, md_link_re, filename);
|
||||
return vec![MemoryUnit {
|
||||
key: filename.to_string(),
|
||||
content: content.to_string(),
|
||||
marker_links: Vec::new(),
|
||||
md_links,
|
||||
causes: Vec::new(),
|
||||
state: None,
|
||||
source_ref,
|
||||
}];
|
||||
}
|
||||
|
||||
let mut units = Vec::new();
|
||||
|
||||
let first_start = markers[0].0;
|
||||
let pre_content = content[..first_start].trim();
|
||||
if !pre_content.is_empty() {
|
||||
let source_ref = find_source(pre_content);
|
||||
let md_links = extract_md_links(pre_content, md_link_re, filename);
|
||||
units.push(MemoryUnit {
|
||||
key: filename.to_string(),
|
||||
content: pre_content.to_string(),
|
||||
marker_links: Vec::new(),
|
||||
md_links,
|
||||
causes: Vec::new(),
|
||||
state: None,
|
||||
source_ref,
|
||||
});
|
||||
}
|
||||
|
||||
for (i, (_, end, attrs)) in markers.iter().enumerate() {
|
||||
let unit_end = if i + 1 < markers.len() {
|
||||
markers[i + 1].0
|
||||
} else {
|
||||
content.len()
|
||||
};
|
||||
let unit_content = content[*end..unit_end].trim();
|
||||
|
||||
let id = attrs.get("id").cloned().unwrap_or_default();
|
||||
let key = if id.is_empty() {
|
||||
format!("{}#unnamed-{}", filename, i)
|
||||
} else {
|
||||
format!("{}#{}", filename, id)
|
||||
};
|
||||
|
||||
let marker_links = attrs.get("links")
|
||||
.map(|l| l.split(',').map(|s| normalize_link(s.trim(), filename)).collect())
|
||||
.unwrap_or_default();
|
||||
|
||||
let causes = attrs.get("causes")
|
||||
.map(|l| l.split(',').map(|s| normalize_link(s.trim(), filename)).collect())
|
||||
.unwrap_or_default();
|
||||
|
||||
let state = attrs.get("state").cloned();
|
||||
let source_ref = find_source(unit_content);
|
||||
let md_links = extract_md_links(unit_content, md_link_re, filename);
|
||||
|
||||
units.push(MemoryUnit {
|
||||
key,
|
||||
content: unit_content.to_string(),
|
||||
marker_links,
|
||||
md_links,
|
||||
causes,
|
||||
state,
|
||||
source_ref,
|
||||
});
|
||||
}
|
||||
|
||||
units
|
||||
}
|
||||
|
||||
fn parse_marker_attrs(attrs_str: &str) -> HashMap<String, String> {
|
||||
static ATTR_RE: OnceLock<Regex> = OnceLock::new();
|
||||
let attr_re = ATTR_RE.get_or_init(|| Regex::new(r"(\w+)\s*=\s*(\S+)").unwrap());
|
||||
let mut attrs = HashMap::new();
|
||||
for cap in attr_re.captures_iter(attrs_str) {
|
||||
attrs.insert(cap[1].to_string(), cap[2].to_string());
|
||||
}
|
||||
attrs
|
||||
}
|
||||
|
||||
fn extract_md_links(content: &str, re: &Regex, source_file: &str) -> Vec<String> {
|
||||
re.captures_iter(content)
|
||||
.map(|cap| normalize_link(&cap[1], source_file))
|
||||
.filter(|link| !link.starts_with(source_file) || link.contains('#'))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn normalize_link(target: &str, source_file: &str) -> String {
|
||||
let source_bare = source_file.strip_suffix(".md").unwrap_or(source_file);
|
||||
|
||||
if target.starts_with('#') {
|
||||
return format!("{}{}", source_bare, target);
|
||||
}
|
||||
|
||||
let (path_part, fragment) = if let Some(hash_pos) = target.find('#') {
|
||||
(&target[..hash_pos], Some(&target[hash_pos..]))
|
||||
} else {
|
||||
(target, None)
|
||||
};
|
||||
|
||||
let basename = Path::new(path_part)
|
||||
.file_name()
|
||||
.map(|f| f.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|| path_part.to_string());
|
||||
let bare = basename.strip_suffix(".md").unwrap_or(&basename);
|
||||
|
||||
match fragment {
|
||||
Some(frag) => format!("{}{}", bare, frag),
|
||||
None => bare.to_string(),
|
||||
}
|
||||
}
|
||||
591
poc-memory/src/store/persist.rs
Normal file
591
poc-memory/src/store/persist.rs
Normal file
|
|
@ -0,0 +1,591 @@
|
|||
// Persistence layer: load, save, replay, append, snapshot
|
||||
//
|
||||
// Three-tier loading strategy:
|
||||
// 1. rkyv mmap snapshot (snapshot.rkyv) — ~4ms deserialize
|
||||
// 2. bincode cache (state.bin) — ~10ms
|
||||
// 3. capnp log replay — ~40ms
|
||||
//
|
||||
// Logs are append-only; cache staleness uses log file sizes, not mtimes.
|
||||
|
||||
use super::types::*;
|
||||
|
||||
use crate::memory_capnp;
|
||||
|
||||
use capnp::message;
|
||||
use capnp::serialize;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::io::{BufReader, BufWriter, Seek, Write as IoWrite};
|
||||
use std::path::Path;
|
||||
|
||||
impl Store {
|
||||
/// Load store from state.bin cache if fresh, otherwise rebuild from capnp logs.
|
||||
///
|
||||
/// Staleness check uses log file sizes (not mtimes). Since logs are
|
||||
/// append-only, any write grows the file, invalidating the cache.
|
||||
/// This avoids the mtime race that caused data loss with concurrent
|
||||
/// writers (dream loop, link audit, journal enrichment).
|
||||
pub fn load() -> Result<Store, String> {
|
||||
// 1. Try rkyv mmap snapshot (~4ms with deserialize, <1ms zero-copy)
|
||||
match Self::load_snapshot_mmap() {
|
||||
Ok(Some(store)) => return Ok(store),
|
||||
Ok(None) => {},
|
||||
Err(e) => eprintln!("rkyv snapshot: {}", e),
|
||||
}
|
||||
|
||||
// 2. Try bincode state.bin cache (~10ms)
|
||||
let nodes_p = nodes_path();
|
||||
let rels_p = relations_path();
|
||||
let state_p = state_path();
|
||||
|
||||
let nodes_size = fs::metadata(&nodes_p).map(|m| m.len()).unwrap_or(0);
|
||||
let rels_size = fs::metadata(&rels_p).map(|m| m.len()).unwrap_or(0);
|
||||
|
||||
if let Ok(data) = fs::read(&state_p) {
|
||||
if data.len() >= CACHE_HEADER_LEN && data[..4] == CACHE_MAGIC {
|
||||
let cached_nodes = u64::from_le_bytes(data[4..12].try_into().unwrap());
|
||||
let cached_rels = u64::from_le_bytes(data[12..20].try_into().unwrap());
|
||||
|
||||
if cached_nodes == nodes_size && cached_rels == rels_size {
|
||||
if let Ok(mut store) = bincode::deserialize::<Store>(&data[CACHE_HEADER_LEN..]) {
|
||||
// Rebuild uuid_to_key (skipped by serde)
|
||||
for (key, node) in &store.nodes {
|
||||
store.uuid_to_key.insert(node.uuid, key.clone());
|
||||
}
|
||||
store.loaded_nodes_size = nodes_size;
|
||||
store.loaded_rels_size = rels_size;
|
||||
// Bootstrap: write rkyv snapshot if missing
|
||||
if !snapshot_path().exists() {
|
||||
if let Err(e) = store.save_snapshot(cached_nodes, cached_rels) {
|
||||
eprintln!("rkyv bootstrap: {}", e);
|
||||
}
|
||||
}
|
||||
return Ok(store);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Stale or no cache — rebuild from capnp logs
|
||||
let mut store = Store::default();
|
||||
|
||||
if nodes_p.exists() {
|
||||
store.replay_nodes(&nodes_p)?;
|
||||
}
|
||||
if rels_p.exists() {
|
||||
store.replay_relations(&rels_p)?;
|
||||
}
|
||||
|
||||
// Record log sizes after replay — this is the state we reflect
|
||||
store.loaded_nodes_size = fs::metadata(&nodes_p).map(|m| m.len()).unwrap_or(0);
|
||||
store.loaded_rels_size = fs::metadata(&rels_p).map(|m| m.len()).unwrap_or(0);
|
||||
|
||||
// Drop edges referencing deleted/missing nodes
|
||||
store.relations.retain(|r|
|
||||
store.nodes.contains_key(&r.source_key) &&
|
||||
store.nodes.contains_key(&r.target_key)
|
||||
);
|
||||
|
||||
store.save()?;
|
||||
Ok(store)
|
||||
}
|
||||
|
||||
/// Replay node log, keeping latest version per UUID.
|
||||
/// Tracks all UUIDs seen per key to detect duplicates.
|
||||
fn replay_nodes(&mut self, path: &Path) -> Result<(), String> {
|
||||
let file = fs::File::open(path)
|
||||
.map_err(|e| format!("open {}: {}", path.display(), e))?;
|
||||
let mut reader = BufReader::new(file);
|
||||
|
||||
// Track all non-deleted UUIDs per key to detect duplicates
|
||||
let mut key_uuids: HashMap<String, Vec<[u8; 16]>> = HashMap::new();
|
||||
|
||||
while let Ok(msg) = serialize::read_message(&mut reader, message::ReaderOptions::new()) {
|
||||
let log = msg.get_root::<memory_capnp::node_log::Reader>()
|
||||
.map_err(|e| format!("read node log: {}", e))?;
|
||||
for node_reader in log.get_nodes()
|
||||
.map_err(|e| format!("get nodes: {}", e))? {
|
||||
let node = Node::from_capnp(node_reader)?;
|
||||
let existing_version = self.nodes.get(&node.key)
|
||||
.map(|n| n.version)
|
||||
.unwrap_or(0);
|
||||
if node.version >= existing_version {
|
||||
if node.deleted {
|
||||
self.nodes.remove(&node.key);
|
||||
self.uuid_to_key.remove(&node.uuid);
|
||||
if let Some(uuids) = key_uuids.get_mut(&node.key) {
|
||||
uuids.retain(|u| *u != node.uuid);
|
||||
}
|
||||
} else {
|
||||
self.uuid_to_key.insert(node.uuid, node.key.clone());
|
||||
self.nodes.insert(node.key.clone(), node.clone());
|
||||
let uuids = key_uuids.entry(node.key).or_default();
|
||||
if !uuids.contains(&node.uuid) {
|
||||
uuids.push(node.uuid);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Report duplicate keys
|
||||
for (key, uuids) in &key_uuids {
|
||||
if uuids.len() > 1 {
|
||||
eprintln!("WARNING: key '{}' has {} UUIDs (duplicate nodes)", key, uuids.len());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Replay relation log, keeping latest version per UUID
|
||||
fn replay_relations(&mut self, path: &Path) -> Result<(), String> {
|
||||
let file = fs::File::open(path)
|
||||
.map_err(|e| format!("open {}: {}", path.display(), e))?;
|
||||
let mut reader = BufReader::new(file);
|
||||
|
||||
// Collect all, then deduplicate by UUID keeping latest version
|
||||
let mut by_uuid: HashMap<[u8; 16], Relation> = HashMap::new();
|
||||
|
||||
while let Ok(msg) = serialize::read_message(&mut reader, message::ReaderOptions::new()) {
|
||||
let log = msg.get_root::<memory_capnp::relation_log::Reader>()
|
||||
.map_err(|e| format!("read relation log: {}", e))?;
|
||||
for rel_reader in log.get_relations()
|
||||
.map_err(|e| format!("get relations: {}", e))? {
|
||||
let rel = Relation::from_capnp(rel_reader)?;
|
||||
let existing_version = by_uuid.get(&rel.uuid)
|
||||
.map(|r| r.version)
|
||||
.unwrap_or(0);
|
||||
if rel.version >= existing_version {
|
||||
by_uuid.insert(rel.uuid, rel);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.relations = by_uuid.into_values()
|
||||
.filter(|r| !r.deleted)
|
||||
.collect();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Append nodes to the log file.
|
||||
/// Serializes to a Vec first, then does a single write() syscall
|
||||
/// so the append is atomic with O_APPEND even without flock.
|
||||
pub fn append_nodes(&mut self, nodes: &[Node]) -> Result<(), String> {
|
||||
let _lock = StoreLock::acquire()?;
|
||||
|
||||
let mut msg = message::Builder::new_default();
|
||||
{
|
||||
let log = msg.init_root::<memory_capnp::node_log::Builder>();
|
||||
let mut list = log.init_nodes(nodes.len() as u32);
|
||||
for (i, node) in nodes.iter().enumerate() {
|
||||
node.to_capnp(list.reborrow().get(i as u32));
|
||||
}
|
||||
}
|
||||
let mut buf = Vec::new();
|
||||
serialize::write_message(&mut buf, &msg)
|
||||
.map_err(|e| format!("serialize nodes: {}", e))?;
|
||||
|
||||
let path = nodes_path();
|
||||
let file = fs::OpenOptions::new()
|
||||
.create(true).append(true).open(&path)
|
||||
.map_err(|e| format!("open {}: {}", path.display(), e))?;
|
||||
use std::io::Write;
|
||||
(&file).write_all(&buf)
|
||||
.map_err(|e| format!("write nodes: {}", e))?;
|
||||
|
||||
self.loaded_nodes_size = file.metadata().map(|m| m.len()).unwrap_or(0);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Append relations to the log file.
|
||||
/// Single write() syscall for atomic O_APPEND.
|
||||
pub fn append_relations(&mut self, relations: &[Relation]) -> Result<(), String> {
|
||||
let _lock = StoreLock::acquire()?;
|
||||
|
||||
let mut msg = message::Builder::new_default();
|
||||
{
|
||||
let log = msg.init_root::<memory_capnp::relation_log::Builder>();
|
||||
let mut list = log.init_relations(relations.len() as u32);
|
||||
for (i, rel) in relations.iter().enumerate() {
|
||||
rel.to_capnp(list.reborrow().get(i as u32));
|
||||
}
|
||||
}
|
||||
let mut buf = Vec::new();
|
||||
serialize::write_message(&mut buf, &msg)
|
||||
.map_err(|e| format!("serialize relations: {}", e))?;
|
||||
|
||||
let path = relations_path();
|
||||
let file = fs::OpenOptions::new()
|
||||
.create(true).append(true).open(&path)
|
||||
.map_err(|e| format!("open {}: {}", path.display(), e))?;
|
||||
use std::io::Write;
|
||||
(&file).write_all(&buf)
|
||||
.map_err(|e| format!("write relations: {}", e))?;
|
||||
|
||||
self.loaded_rels_size = file.metadata().map(|m| m.len()).unwrap_or(0);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Save the derived cache with log size header for staleness detection.
|
||||
/// Uses atomic write (tmp + rename) to prevent partial reads.
|
||||
pub fn save(&self) -> Result<(), String> {
|
||||
let _lock = StoreLock::acquire()?;
|
||||
|
||||
let path = state_path();
|
||||
if let Some(parent) = path.parent() {
|
||||
fs::create_dir_all(parent).ok();
|
||||
}
|
||||
|
||||
// Use log sizes from load time, not current filesystem sizes.
|
||||
// If another writer appended since we loaded, our recorded size
|
||||
// will be smaller than the actual log → next reader detects stale
|
||||
// cache and replays the (correct, append-only) log.
|
||||
let nodes_size = self.loaded_nodes_size;
|
||||
let rels_size = self.loaded_rels_size;
|
||||
|
||||
let bincode_data = bincode::serialize(self)
|
||||
.map_err(|e| format!("bincode serialize: {}", e))?;
|
||||
|
||||
let mut data = Vec::with_capacity(CACHE_HEADER_LEN + bincode_data.len());
|
||||
data.extend_from_slice(&CACHE_MAGIC);
|
||||
data.extend_from_slice(&nodes_size.to_le_bytes());
|
||||
data.extend_from_slice(&rels_size.to_le_bytes());
|
||||
data.extend_from_slice(&bincode_data);
|
||||
|
||||
// Atomic write: tmp file + rename
|
||||
let tmp_path = path.with_extension("bin.tmp");
|
||||
fs::write(&tmp_path, &data)
|
||||
.map_err(|e| format!("write {}: {}", tmp_path.display(), e))?;
|
||||
fs::rename(&tmp_path, &path)
|
||||
.map_err(|e| format!("rename {} → {}: {}", tmp_path.display(), path.display(), e))?;
|
||||
|
||||
// Also write rkyv snapshot (mmap-friendly)
|
||||
if let Err(e) = self.save_snapshot(nodes_size, rels_size) {
|
||||
eprintln!("rkyv snapshot save: {}", e);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Serialize store as rkyv snapshot with staleness header.
|
||||
/// Assumes StoreLock is already held by caller.
|
||||
fn save_snapshot(&self, nodes_size: u64, rels_size: u64) -> Result<(), String> {
|
||||
let snap = Snapshot {
|
||||
nodes: self.nodes.clone(),
|
||||
relations: self.relations.iter().filter(|r| !r.deleted).cloned().collect(),
|
||||
gaps: self.gaps.clone(),
|
||||
params: self.params,
|
||||
};
|
||||
|
||||
let rkyv_data = rkyv::to_bytes::<_, 256>(&snap)
|
||||
.map_err(|e| format!("rkyv serialize: {}", e))?;
|
||||
|
||||
let mut data = Vec::with_capacity(RKYV_HEADER_LEN + rkyv_data.len());
|
||||
data.extend_from_slice(&RKYV_MAGIC);
|
||||
data.extend_from_slice(&1u32.to_le_bytes()); // format version
|
||||
data.extend_from_slice(&nodes_size.to_le_bytes());
|
||||
data.extend_from_slice(&rels_size.to_le_bytes());
|
||||
data.extend_from_slice(&(rkyv_data.len() as u64).to_le_bytes());
|
||||
data.extend_from_slice(&rkyv_data);
|
||||
|
||||
let path = snapshot_path();
|
||||
let tmp_path = path.with_extension("rkyv.tmp");
|
||||
fs::write(&tmp_path, &data)
|
||||
.map_err(|e| format!("write {}: {}", tmp_path.display(), e))?;
|
||||
fs::rename(&tmp_path, &path)
|
||||
.map_err(|e| format!("rename: {}", e))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Try loading store from mmap'd rkyv snapshot.
|
||||
/// Returns None if snapshot is missing or stale (log sizes don't match).
|
||||
fn load_snapshot_mmap() -> Result<Option<Store>, String> {
|
||||
let path = snapshot_path();
|
||||
if !path.exists() { return Ok(None); }
|
||||
|
||||
let nodes_size = fs::metadata(nodes_path()).map(|m| m.len()).unwrap_or(0);
|
||||
let rels_size = fs::metadata(relations_path()).map(|m| m.len()).unwrap_or(0);
|
||||
|
||||
let file = fs::File::open(&path)
|
||||
.map_err(|e| format!("open {}: {}", path.display(), e))?;
|
||||
|
||||
let mmap = unsafe { memmap2::Mmap::map(&file) }
|
||||
.map_err(|e| format!("mmap {}: {}", path.display(), e))?;
|
||||
|
||||
if mmap.len() < RKYV_HEADER_LEN { return Ok(None); }
|
||||
if mmap[..4] != RKYV_MAGIC { return Ok(None); }
|
||||
|
||||
// [4..8] = version, skip for now
|
||||
let cached_nodes = u64::from_le_bytes(mmap[8..16].try_into().unwrap());
|
||||
let cached_rels = u64::from_le_bytes(mmap[16..24].try_into().unwrap());
|
||||
let data_len = u64::from_le_bytes(mmap[24..32].try_into().unwrap()) as usize;
|
||||
|
||||
if cached_nodes != nodes_size || cached_rels != rels_size {
|
||||
return Ok(None); // stale
|
||||
}
|
||||
if mmap.len() < RKYV_HEADER_LEN + data_len {
|
||||
return Ok(None); // truncated
|
||||
}
|
||||
|
||||
let rkyv_data = &mmap[RKYV_HEADER_LEN..RKYV_HEADER_LEN + data_len];
|
||||
|
||||
// SAFETY: we wrote this file ourselves via save_snapshot().
|
||||
// Skip full validation (check_archived_root) — the staleness header
|
||||
// already confirms this snapshot matches the current log state.
|
||||
let archived = unsafe { rkyv::archived_root::<Snapshot>(rkyv_data) };
|
||||
|
||||
let snap: Snapshot = <ArchivedSnapshot as rkyv::Deserialize<Snapshot, rkyv::Infallible>>
|
||||
::deserialize(archived, &mut rkyv::Infallible).unwrap();
|
||||
|
||||
let mut store = Store {
|
||||
nodes: snap.nodes,
|
||||
relations: snap.relations,
|
||||
gaps: snap.gaps,
|
||||
params: snap.params,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Rebuild uuid_to_key (not serialized)
|
||||
for (key, node) in &store.nodes {
|
||||
store.uuid_to_key.insert(node.uuid, key.clone());
|
||||
}
|
||||
store.loaded_nodes_size = nodes_size;
|
||||
store.loaded_rels_size = rels_size;
|
||||
|
||||
Ok(Some(store))
|
||||
}
|
||||
}
|
||||
|
||||
/// Strip .md suffix from all node keys and relation key strings.
|
||||
/// Merges duplicates (bare key + .md key) by keeping the latest version.
|
||||
pub fn strip_md_keys() -> Result<(), String> {
|
||||
use super::strip_md_suffix;
|
||||
|
||||
let mut store = Store::load()?;
|
||||
let mut renamed_nodes = 0usize;
|
||||
let mut renamed_rels = 0usize;
|
||||
let mut merged = 0usize;
|
||||
|
||||
// Collect keys that need renaming
|
||||
let old_keys: Vec<String> = store.nodes.keys()
|
||||
.filter(|k| k.ends_with(".md") || k.contains(".md#"))
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
for old_key in &old_keys {
|
||||
let new_key = strip_md_suffix(old_key);
|
||||
if new_key == *old_key { continue; }
|
||||
|
||||
let mut node = store.nodes.remove(old_key).unwrap();
|
||||
store.uuid_to_key.remove(&node.uuid);
|
||||
|
||||
if let Some(existing) = store.nodes.get(&new_key) {
|
||||
// Merge: keep whichever has the higher version
|
||||
if existing.version >= node.version {
|
||||
eprintln!(" merge {} → {} (keeping existing v{})",
|
||||
old_key, new_key, existing.version);
|
||||
merged += 1;
|
||||
continue;
|
||||
}
|
||||
eprintln!(" merge {} → {} (replacing v{} with v{})",
|
||||
old_key, new_key, existing.version, node.version);
|
||||
merged += 1;
|
||||
}
|
||||
|
||||
node.key = new_key.clone();
|
||||
node.version += 1;
|
||||
store.uuid_to_key.insert(node.uuid, new_key.clone());
|
||||
store.nodes.insert(new_key, node);
|
||||
renamed_nodes += 1;
|
||||
}
|
||||
|
||||
// Fix relation key strings
|
||||
for rel in &mut store.relations {
|
||||
let new_source = strip_md_suffix(&rel.source_key);
|
||||
let new_target = strip_md_suffix(&rel.target_key);
|
||||
if new_source != rel.source_key || new_target != rel.target_key {
|
||||
rel.source_key = new_source;
|
||||
rel.target_key = new_target;
|
||||
rel.version += 1;
|
||||
renamed_rels += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if renamed_nodes == 0 && renamed_rels == 0 && merged == 0 {
|
||||
eprintln!("No .md suffixes found — store is clean");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
eprintln!("Renamed {} nodes, {} relations, merged {} duplicates",
|
||||
renamed_nodes, renamed_rels, merged);
|
||||
|
||||
// Write fresh logs from the migrated state
|
||||
rewrite_store(&store)?;
|
||||
|
||||
eprintln!("Store rewritten successfully");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Rewrite the entire store from scratch (fresh logs + caches).
|
||||
/// Used after migrations that change keys across all nodes/relations.
|
||||
fn rewrite_store(store: &Store) -> Result<(), String> {
|
||||
let _lock = StoreLock::acquire()?;
|
||||
|
||||
// Write fresh node log
|
||||
let nodes: Vec<_> = store.nodes.values().cloned().collect();
|
||||
let nodes_path = nodes_path();
|
||||
{
|
||||
let file = fs::File::create(&nodes_path)
|
||||
.map_err(|e| format!("create {}: {}", nodes_path.display(), e))?;
|
||||
let mut writer = BufWriter::new(file);
|
||||
|
||||
// Write in chunks to keep message sizes reasonable
|
||||
for chunk in nodes.chunks(100) {
|
||||
let mut msg = message::Builder::new_default();
|
||||
{
|
||||
let log = msg.init_root::<memory_capnp::node_log::Builder>();
|
||||
let mut list = log.init_nodes(chunk.len() as u32);
|
||||
for (i, node) in chunk.iter().enumerate() {
|
||||
node.to_capnp(list.reborrow().get(i as u32));
|
||||
}
|
||||
}
|
||||
serialize::write_message(&mut writer, &msg)
|
||||
.map_err(|e| format!("write nodes: {}", e))?;
|
||||
}
|
||||
}
|
||||
|
||||
// Write fresh relation log
|
||||
let rels_path = relations_path();
|
||||
{
|
||||
let file = fs::File::create(&rels_path)
|
||||
.map_err(|e| format!("create {}: {}", rels_path.display(), e))?;
|
||||
let mut writer = BufWriter::new(file);
|
||||
|
||||
let rels: Vec<_> = store.relations.iter().filter(|r| !r.deleted).cloned().collect();
|
||||
if !rels.is_empty() {
|
||||
for chunk in rels.chunks(100) {
|
||||
let mut msg = message::Builder::new_default();
|
||||
{
|
||||
let log = msg.init_root::<memory_capnp::relation_log::Builder>();
|
||||
let mut list = log.init_relations(chunk.len() as u32);
|
||||
for (i, rel) in chunk.iter().enumerate() {
|
||||
rel.to_capnp(list.reborrow().get(i as u32));
|
||||
}
|
||||
}
|
||||
serialize::write_message(&mut writer, &msg)
|
||||
.map_err(|e| format!("write relations: {}", e))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Nuke caches so next load rebuilds from fresh logs
|
||||
for p in [state_path(), snapshot_path()] {
|
||||
if p.exists() {
|
||||
fs::remove_file(&p).ok();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check and repair corrupt capnp log files.
|
||||
///
|
||||
/// Reads each message sequentially, tracking file position. On the first
|
||||
/// corrupt message, truncates the file to the last good position. Also
|
||||
/// removes stale caches so the next load replays from the repaired log.
|
||||
pub fn fsck() -> Result<(), String> {
|
||||
let mut any_corrupt = false;
|
||||
|
||||
for (path, kind) in [
|
||||
(nodes_path(), "node"),
|
||||
(relations_path(), "relation"),
|
||||
] {
|
||||
if !path.exists() { continue; }
|
||||
|
||||
let file = fs::File::open(&path)
|
||||
.map_err(|e| format!("open {}: {}", path.display(), e))?;
|
||||
let file_len = file.metadata()
|
||||
.map_err(|e| format!("stat {}: {}", path.display(), e))?.len();
|
||||
let mut reader = BufReader::new(file);
|
||||
|
||||
let mut good_messages = 0u64;
|
||||
let mut last_good_pos = 0u64;
|
||||
|
||||
loop {
|
||||
let pos = reader.stream_position()
|
||||
.map_err(|e| format!("tell {}: {}", path.display(), e))?;
|
||||
|
||||
let msg = match serialize::read_message(&mut reader, message::ReaderOptions::new()) {
|
||||
Ok(m) => m,
|
||||
Err(_) => {
|
||||
// read_message fails at EOF (normal) or on corrupt framing
|
||||
if pos < file_len {
|
||||
// Not at EOF — corrupt framing
|
||||
eprintln!("{}: corrupt message at offset {}, truncating", kind, pos);
|
||||
any_corrupt = true;
|
||||
drop(reader);
|
||||
let file = fs::OpenOptions::new().write(true).open(&path)
|
||||
.map_err(|e| format!("open for truncate: {}", e))?;
|
||||
file.set_len(pos)
|
||||
.map_err(|e| format!("truncate {}: {}", path.display(), e))?;
|
||||
eprintln!("{}: truncated from {} to {} bytes ({} good messages)",
|
||||
kind, file_len, pos, good_messages);
|
||||
}
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
// Validate the message content too
|
||||
let valid = if kind == "node" {
|
||||
msg.get_root::<memory_capnp::node_log::Reader>()
|
||||
.and_then(|l| l.get_nodes().map(|_| ()))
|
||||
.is_ok()
|
||||
} else {
|
||||
msg.get_root::<memory_capnp::relation_log::Reader>()
|
||||
.and_then(|l| l.get_relations().map(|_| ()))
|
||||
.is_ok()
|
||||
};
|
||||
|
||||
if valid {
|
||||
good_messages += 1;
|
||||
last_good_pos = reader.stream_position()
|
||||
.map_err(|e| format!("tell {}: {}", path.display(), e))?;
|
||||
} else {
|
||||
eprintln!("{}: corrupt message content at offset {}, truncating to {}",
|
||||
kind, pos, last_good_pos);
|
||||
any_corrupt = true;
|
||||
drop(reader);
|
||||
let file = fs::OpenOptions::new().write(true).open(&path)
|
||||
.map_err(|e| format!("open for truncate: {}", e))?;
|
||||
file.set_len(last_good_pos)
|
||||
.map_err(|e| format!("truncate {}: {}", path.display(), e))?;
|
||||
eprintln!("{}: truncated from {} to {} bytes ({} good messages)",
|
||||
kind, file_len, last_good_pos, good_messages);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if !any_corrupt {
|
||||
eprintln!("{}: {} messages, all clean", kind, good_messages);
|
||||
}
|
||||
}
|
||||
|
||||
if any_corrupt {
|
||||
// Nuke caches so next load replays from the repaired logs
|
||||
for p in [state_path(), snapshot_path()] {
|
||||
if p.exists() {
|
||||
fs::remove_file(&p)
|
||||
.map_err(|e| format!("remove {}: {}", p.display(), e))?;
|
||||
eprintln!("removed stale cache: {}", p.display());
|
||||
}
|
||||
}
|
||||
eprintln!("repair complete — run `poc-memory status` to verify");
|
||||
} else {
|
||||
eprintln!("store is clean");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
500
poc-memory/src/store/types.rs
Normal file
500
poc-memory/src/store/types.rs
Normal file
|
|
@ -0,0 +1,500 @@
|
|||
// Core types for the memory store
|
||||
//
|
||||
// Node, Relation, enums, Params, and supporting types. Also contains
|
||||
// the capnp serialization macros that generate bidirectional conversion.
|
||||
|
||||
use crate::memory_capnp;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::os::unix::io::AsRawFd;
|
||||
use std::path::PathBuf;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Capnp serialization macros
|
||||
//
|
||||
// Declarative mapping between Rust types and capnp generated types.
|
||||
// Adding a field to the schema means adding it in one place below;
|
||||
// both read and write are generated from the same declaration.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Generate to_capnp/from_capnp conversion methods for an enum.
|
||||
macro_rules! capnp_enum {
|
||||
($rust_type:ident, $capnp_type:path, [$($variant:ident),+ $(,)?]) => {
|
||||
impl $rust_type {
|
||||
pub(crate) fn to_capnp(&self) -> $capnp_type {
|
||||
match self {
|
||||
$(Self::$variant => <$capnp_type>::$variant,)+
|
||||
}
|
||||
}
|
||||
pub(crate) fn from_capnp(v: $capnp_type) -> Self {
|
||||
match v {
|
||||
$(<$capnp_type>::$variant => Self::$variant,)+
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Generate from_capnp/to_capnp methods for a struct with capnp serialization.
|
||||
/// Fields are grouped by serialization kind:
|
||||
/// text - capnp Text fields (String in Rust)
|
||||
/// uuid - capnp Data fields ([u8; 16] in Rust)
|
||||
/// prim - copy types (u32, f32, f64, bool)
|
||||
/// enm - enums with to_capnp/from_capnp methods
|
||||
/// skip - Rust-only fields not in capnp (set to Default on read)
|
||||
macro_rules! capnp_message {
|
||||
(
|
||||
$struct:ident,
|
||||
reader: $reader:ty,
|
||||
builder: $builder:ty,
|
||||
text: [$($tf:ident),* $(,)?],
|
||||
uuid: [$($uf:ident),* $(,)?],
|
||||
prim: [$($pf:ident),* $(,)?],
|
||||
enm: [$($ef:ident: $et:ident),* $(,)?],
|
||||
skip: [$($sf:ident),* $(,)?] $(,)?
|
||||
) => {
|
||||
impl $struct {
|
||||
pub fn from_capnp(r: $reader) -> Result<Self, String> {
|
||||
paste::paste! {
|
||||
Ok(Self {
|
||||
$($tf: read_text(r.[<get_ $tf>]()),)*
|
||||
$($uf: read_uuid(r.[<get_ $uf>]()),)*
|
||||
$($pf: r.[<get_ $pf>](),)*
|
||||
$($ef: $et::from_capnp(
|
||||
r.[<get_ $ef>]().map_err(|_| concat!("bad ", stringify!($ef)))?
|
||||
),)*
|
||||
$($sf: Default::default(),)*
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn to_capnp(&self, mut b: $builder) {
|
||||
paste::paste! {
|
||||
$(b.[<set_ $tf>](&self.$tf);)*
|
||||
$(b.[<set_ $uf>](&self.$uf);)*
|
||||
$(b.[<set_ $pf>](self.$pf);)*
|
||||
$(b.[<set_ $ef>](self.$ef.to_capnp());)*
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub fn memory_dir() -> PathBuf {
|
||||
crate::config::get().data_dir.clone()
|
||||
}
|
||||
|
||||
pub fn nodes_path() -> PathBuf { memory_dir().join("nodes.capnp") }
|
||||
pub(crate) fn relations_path() -> PathBuf { memory_dir().join("relations.capnp") }
|
||||
pub(crate) fn state_path() -> PathBuf { memory_dir().join("state.bin") }
|
||||
pub(crate) fn snapshot_path() -> PathBuf { memory_dir().join("snapshot.rkyv") }
|
||||
fn lock_path() -> PathBuf { memory_dir().join(".store.lock") }
|
||||
|
||||
/// RAII file lock using flock(2). Dropped when scope exits.
|
||||
pub(crate) struct StoreLock {
|
||||
_file: fs::File,
|
||||
}
|
||||
|
||||
impl StoreLock {
|
||||
pub(crate) fn acquire() -> Result<Self, String> {
|
||||
let path = lock_path();
|
||||
let file = fs::OpenOptions::new()
|
||||
.create(true).truncate(false).write(true).open(&path)
|
||||
.map_err(|e| format!("open lock {}: {}", path.display(), e))?;
|
||||
|
||||
// Blocking exclusive lock
|
||||
let ret = unsafe { libc::flock(file.as_raw_fd(), libc::LOCK_EX) };
|
||||
if ret != 0 {
|
||||
return Err(format!("flock: {}", std::io::Error::last_os_error()));
|
||||
}
|
||||
Ok(StoreLock { _file: file })
|
||||
}
|
||||
// Lock released automatically when _file is dropped (flock semantics)
|
||||
}
|
||||
|
||||
pub fn now_epoch() -> i64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs() as i64
|
||||
}
|
||||
|
||||
/// Convert epoch seconds to broken-down local time components.
|
||||
/// Returns (year, month, day, hour, minute, second).
|
||||
pub fn epoch_to_local(epoch: i64) -> (i32, u32, u32, u32, u32, u32) {
|
||||
use chrono::{Datelike, Local, TimeZone, Timelike};
|
||||
let dt = Local.timestamp_opt(epoch, 0).unwrap();
|
||||
(
|
||||
dt.year(),
|
||||
dt.month(),
|
||||
dt.day(),
|
||||
dt.hour(),
|
||||
dt.minute(),
|
||||
dt.second(),
|
||||
)
|
||||
}
|
||||
|
||||
/// Format epoch as "YYYY-MM-DD"
|
||||
pub fn format_date(epoch: i64) -> String {
|
||||
let (y, m, d, _, _, _) = epoch_to_local(epoch);
|
||||
format!("{:04}-{:02}-{:02}", y, m, d)
|
||||
}
|
||||
|
||||
/// Format epoch as "YYYY-MM-DDTHH:MM"
|
||||
pub fn format_datetime(epoch: i64) -> String {
|
||||
let (y, m, d, h, min, _) = epoch_to_local(epoch);
|
||||
format!("{:04}-{:02}-{:02}T{:02}:{:02}", y, m, d, h, min)
|
||||
}
|
||||
|
||||
/// Format epoch as "YYYY-MM-DD HH:MM"
|
||||
pub fn format_datetime_space(epoch: i64) -> String {
|
||||
let (y, m, d, h, min, _) = epoch_to_local(epoch);
|
||||
format!("{:04}-{:02}-{:02} {:02}:{:02}", y, m, d, h, min)
|
||||
}
|
||||
|
||||
pub fn today() -> String {
|
||||
format_date(now_epoch())
|
||||
}
|
||||
|
||||
// In-memory node representation
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
|
||||
#[archive(check_bytes)]
|
||||
pub struct Node {
|
||||
pub uuid: [u8; 16],
|
||||
pub version: u32,
|
||||
pub timestamp: i64,
|
||||
pub node_type: NodeType,
|
||||
pub provenance: Provenance,
|
||||
pub key: String,
|
||||
pub content: String,
|
||||
pub weight: f32,
|
||||
pub emotion: f32,
|
||||
pub deleted: bool,
|
||||
pub source_ref: String,
|
||||
pub created: String,
|
||||
pub retrievals: u32,
|
||||
pub uses: u32,
|
||||
pub wrongs: u32,
|
||||
pub state_tag: String,
|
||||
pub last_replayed: i64,
|
||||
pub spaced_repetition_interval: u32,
|
||||
|
||||
// Position within file (section index, for export ordering)
|
||||
#[serde(default)]
|
||||
pub position: u32,
|
||||
|
||||
// Stable creation timestamp (unix epoch seconds). Set once at creation;
|
||||
// never updated on rename or content update. Zero for legacy nodes.
|
||||
#[serde(default)]
|
||||
pub created_at: i64,
|
||||
|
||||
// Derived fields (not in capnp, computed from graph)
|
||||
#[serde(default)]
|
||||
pub community_id: Option<u32>,
|
||||
#[serde(default)]
|
||||
pub clustering_coefficient: Option<f32>,
|
||||
#[serde(default)]
|
||||
pub degree: Option<u32>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
|
||||
#[archive(check_bytes)]
|
||||
pub struct Relation {
|
||||
pub uuid: [u8; 16],
|
||||
pub version: u32,
|
||||
pub timestamp: i64,
|
||||
pub source: [u8; 16],
|
||||
pub target: [u8; 16],
|
||||
pub rel_type: RelationType,
|
||||
pub strength: f32,
|
||||
pub provenance: Provenance,
|
||||
pub deleted: bool,
|
||||
pub source_key: String,
|
||||
pub target_key: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
|
||||
#[archive(check_bytes)]
|
||||
pub enum NodeType {
|
||||
EpisodicSession,
|
||||
EpisodicDaily,
|
||||
EpisodicWeekly,
|
||||
Semantic,
|
||||
EpisodicMonthly,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
|
||||
#[archive(check_bytes)]
|
||||
pub enum Provenance {
|
||||
Manual,
|
||||
Journal,
|
||||
Agent, // legacy catch-all, prefer specific variants below
|
||||
Dream,
|
||||
Derived,
|
||||
AgentExperienceMine,
|
||||
AgentKnowledgeObservation,
|
||||
AgentKnowledgePattern,
|
||||
AgentKnowledgeConnector,
|
||||
AgentKnowledgeChallenger,
|
||||
AgentConsolidate,
|
||||
AgentDigest,
|
||||
AgentFactMine,
|
||||
AgentDecay,
|
||||
}
|
||||
|
||||
impl Provenance {
|
||||
/// Parse from POC_PROVENANCE env var. Returns None if unset.
|
||||
pub fn from_env() -> Option<Self> {
|
||||
std::env::var("POC_PROVENANCE").ok().and_then(|s| Self::from_label(&s))
|
||||
}
|
||||
|
||||
pub fn from_label(s: &str) -> Option<Self> {
|
||||
Some(match s {
|
||||
"manual" => Self::Manual,
|
||||
"journal" => Self::Journal,
|
||||
"agent" => Self::Agent,
|
||||
"dream" => Self::Dream,
|
||||
"derived" => Self::Derived,
|
||||
"agent:experience-mine" => Self::AgentExperienceMine,
|
||||
"agent:knowledge-observation"=> Self::AgentKnowledgeObservation,
|
||||
"agent:knowledge-pattern" => Self::AgentKnowledgePattern,
|
||||
"agent:knowledge-connector" => Self::AgentKnowledgeConnector,
|
||||
"agent:knowledge-challenger" => Self::AgentKnowledgeChallenger,
|
||||
"agent:consolidate" => Self::AgentConsolidate,
|
||||
"agent:digest" => Self::AgentDigest,
|
||||
"agent:fact-mine" => Self::AgentFactMine,
|
||||
"agent:decay" => Self::AgentDecay,
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn label(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Manual => "manual",
|
||||
Self::Journal => "journal",
|
||||
Self::Agent => "agent",
|
||||
Self::Dream => "dream",
|
||||
Self::Derived => "derived",
|
||||
Self::AgentExperienceMine => "agent:experience-mine",
|
||||
Self::AgentKnowledgeObservation => "agent:knowledge-observation",
|
||||
Self::AgentKnowledgePattern => "agent:knowledge-pattern",
|
||||
Self::AgentKnowledgeConnector => "agent:knowledge-connector",
|
||||
Self::AgentKnowledgeChallenger => "agent:knowledge-challenger",
|
||||
Self::AgentConsolidate => "agent:consolidate",
|
||||
Self::AgentDigest => "agent:digest",
|
||||
Self::AgentFactMine => "agent:fact-mine",
|
||||
Self::AgentDecay => "agent:decay",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
|
||||
#[archive(check_bytes)]
|
||||
pub enum RelationType {
|
||||
Link,
|
||||
Causal,
|
||||
Auto,
|
||||
}
|
||||
|
||||
capnp_enum!(NodeType, memory_capnp::NodeType,
|
||||
[EpisodicSession, EpisodicDaily, EpisodicWeekly, Semantic, EpisodicMonthly]);
|
||||
|
||||
capnp_enum!(Provenance, memory_capnp::Provenance,
|
||||
[Manual, Journal, Agent, Dream, Derived,
|
||||
AgentExperienceMine, AgentKnowledgeObservation, AgentKnowledgePattern,
|
||||
AgentKnowledgeConnector, AgentKnowledgeChallenger, AgentConsolidate,
|
||||
AgentDigest, AgentFactMine, AgentDecay]);
|
||||
|
||||
|
||||
capnp_enum!(RelationType, memory_capnp::RelationType,
|
||||
[Link, Causal, Auto]);
|
||||
|
||||
capnp_message!(Node,
|
||||
reader: memory_capnp::content_node::Reader<'_>,
|
||||
builder: memory_capnp::content_node::Builder<'_>,
|
||||
text: [key, content, source_ref, created, state_tag],
|
||||
uuid: [uuid],
|
||||
prim: [version, timestamp, weight, emotion, deleted,
|
||||
retrievals, uses, wrongs, last_replayed,
|
||||
spaced_repetition_interval, position, created_at],
|
||||
enm: [node_type: NodeType, provenance: Provenance],
|
||||
skip: [community_id, clustering_coefficient, degree],
|
||||
);
|
||||
|
||||
capnp_message!(Relation,
|
||||
reader: memory_capnp::relation::Reader<'_>,
|
||||
builder: memory_capnp::relation::Builder<'_>,
|
||||
text: [source_key, target_key],
|
||||
uuid: [uuid, source, target],
|
||||
prim: [version, timestamp, strength, deleted],
|
||||
enm: [rel_type: RelationType, provenance: Provenance],
|
||||
skip: [],
|
||||
);
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
|
||||
#[archive(check_bytes)]
|
||||
pub struct RetrievalEvent {
|
||||
pub query: String,
|
||||
pub timestamp: String,
|
||||
pub results: Vec<String>,
|
||||
pub used: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
|
||||
#[archive(check_bytes)]
|
||||
pub struct Params {
|
||||
pub default_weight: f64,
|
||||
pub decay_factor: f64,
|
||||
pub use_boost: f64,
|
||||
pub prune_threshold: f64,
|
||||
pub edge_decay: f64,
|
||||
pub max_hops: u32,
|
||||
pub min_activation: f64,
|
||||
}
|
||||
|
||||
impl Default for Params {
|
||||
fn default() -> Self {
|
||||
Params {
|
||||
default_weight: 0.7,
|
||||
decay_factor: 0.95,
|
||||
use_boost: 0.15,
|
||||
prune_threshold: 0.1,
|
||||
edge_decay: 0.3,
|
||||
max_hops: 3,
|
||||
min_activation: 0.05,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Gap record — something we looked for but didn't find
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
|
||||
#[archive(check_bytes)]
|
||||
pub struct GapRecord {
|
||||
pub description: String,
|
||||
pub timestamp: String,
|
||||
}
|
||||
|
||||
// The full in-memory store
|
||||
#[derive(Default, Serialize, Deserialize)]
|
||||
pub struct Store {
|
||||
pub nodes: HashMap<String, Node>, // key → latest node
|
||||
#[serde(skip)]
|
||||
pub uuid_to_key: HashMap<[u8; 16], String>, // uuid → key (rebuilt from nodes)
|
||||
pub relations: Vec<Relation>, // all active relations
|
||||
pub retrieval_log: Vec<RetrievalEvent>,
|
||||
pub gaps: Vec<GapRecord>,
|
||||
pub params: Params,
|
||||
/// Log sizes at load time — used by save() to write correct staleness header.
|
||||
/// If another writer appended since we loaded, our cache will be marked stale
|
||||
/// (recorded size < actual size), forcing the next reader to replay the log.
|
||||
#[serde(skip)]
|
||||
pub(crate) loaded_nodes_size: u64,
|
||||
#[serde(skip)]
|
||||
pub(crate) loaded_rels_size: u64,
|
||||
}
|
||||
|
||||
/// Snapshot for mmap: full store state minus retrieval_log (which
|
||||
/// is append-only in retrieval.log). rkyv zero-copy serialization
|
||||
/// lets us mmap this and access archived data without deserialization.
|
||||
#[derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
|
||||
#[archive(check_bytes)]
|
||||
pub(crate) struct Snapshot {
|
||||
pub(crate) nodes: HashMap<String, Node>,
|
||||
pub(crate) relations: Vec<Relation>,
|
||||
pub(crate) gaps: Vec<GapRecord>,
|
||||
pub(crate) params: Params,
|
||||
}
|
||||
|
||||
// rkyv snapshot header: 32 bytes (multiple of 16 for alignment after mmap)
|
||||
// [0..4] magic "RKV\x01"
|
||||
// [4..8] format version (u32 LE)
|
||||
// [8..16] nodes.capnp file size (u64 LE) — staleness check
|
||||
// [16..24] relations.capnp file size (u64 LE)
|
||||
// [24..32] rkyv data length (u64 LE)
|
||||
pub(crate) const RKYV_MAGIC: [u8; 4] = *b"RKV\x01";
|
||||
pub(crate) const RKYV_HEADER_LEN: usize = 32;
|
||||
|
||||
// state.bin header: magic + log file sizes for staleness detection.
|
||||
// File sizes are race-free for append-only logs (they only grow),
|
||||
// unlike mtimes which race with concurrent writers.
|
||||
pub(crate) const CACHE_MAGIC: [u8; 4] = *b"POC\x01";
|
||||
pub(crate) const CACHE_HEADER_LEN: usize = 4 + 8 + 8; // magic + nodes_size + rels_size
|
||||
|
||||
// Cap'n Proto serialization helpers
|
||||
|
||||
/// Read a capnp text field, returning empty string on any error
|
||||
pub(crate) fn read_text(result: capnp::Result<capnp::text::Reader>) -> String {
|
||||
result.ok()
|
||||
.and_then(|t| t.to_str().ok())
|
||||
.unwrap_or("")
|
||||
.to_string()
|
||||
}
|
||||
|
||||
/// Read a capnp data field as [u8; 16], zero-padded
|
||||
pub(crate) fn read_uuid(result: capnp::Result<&[u8]>) -> [u8; 16] {
|
||||
let mut out = [0u8; 16];
|
||||
if let Ok(data) = result {
|
||||
if data.len() >= 16 {
|
||||
out.copy_from_slice(&data[..16]);
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Create a new node with defaults
|
||||
pub fn new_node(key: &str, content: &str) -> Node {
|
||||
Node {
|
||||
uuid: *Uuid::new_v4().as_bytes(),
|
||||
version: 1,
|
||||
timestamp: now_epoch(),
|
||||
node_type: NodeType::Semantic,
|
||||
provenance: Provenance::Manual,
|
||||
key: key.to_string(),
|
||||
content: content.to_string(),
|
||||
weight: 0.7,
|
||||
emotion: 0.0,
|
||||
deleted: false,
|
||||
source_ref: String::new(),
|
||||
created: today(),
|
||||
retrievals: 0,
|
||||
uses: 0,
|
||||
wrongs: 0,
|
||||
state_tag: String::new(),
|
||||
last_replayed: 0,
|
||||
spaced_repetition_interval: 1,
|
||||
position: 0,
|
||||
created_at: now_epoch(),
|
||||
community_id: None,
|
||||
clustering_coefficient: None,
|
||||
degree: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new relation
|
||||
pub fn new_relation(
|
||||
source_uuid: [u8; 16],
|
||||
target_uuid: [u8; 16],
|
||||
rel_type: RelationType,
|
||||
strength: f32,
|
||||
source_key: &str,
|
||||
target_key: &str,
|
||||
) -> Relation {
|
||||
Relation {
|
||||
uuid: *Uuid::new_v4().as_bytes(),
|
||||
version: 1,
|
||||
timestamp: now_epoch(),
|
||||
source: source_uuid,
|
||||
target: target_uuid,
|
||||
rel_type,
|
||||
strength,
|
||||
provenance: Provenance::Manual,
|
||||
deleted: false,
|
||||
source_key: source_key.to_string(),
|
||||
target_key: target_key.to_string(),
|
||||
}
|
||||
}
|
||||
191
poc-memory/src/store/view.rs
Normal file
191
poc-memory/src/store/view.rs
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
// Read-only access abstractions for the memory store
|
||||
//
|
||||
// StoreView: trait abstracting over owned Store and zero-copy MmapView.
|
||||
// MmapView: mmap'd rkyv snapshot for sub-millisecond read-only access.
|
||||
// AnyView: enum dispatch selecting fastest available view at runtime.
|
||||
|
||||
use super::types::*;
|
||||
|
||||
use std::fs;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// StoreView: read-only access trait for search and graph code.
|
||||
//
|
||||
// Abstracts over owned Store and zero-copy MmapView so the same
|
||||
// spreading-activation and graph code works with either.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
pub trait StoreView {
|
||||
/// Iterate all nodes. Callback receives (key, content, weight).
|
||||
fn for_each_node<F: FnMut(&str, &str, f32)>(&self, f: F);
|
||||
|
||||
/// Iterate all relations. Callback receives (source_key, target_key, strength, rel_type).
|
||||
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, f: F);
|
||||
|
||||
/// Node weight by key, or the default weight if missing.
|
||||
fn node_weight(&self, key: &str) -> f64;
|
||||
|
||||
/// Node content by key.
|
||||
fn node_content(&self, key: &str) -> Option<&str>;
|
||||
|
||||
/// Search/graph parameters.
|
||||
fn params(&self) -> Params;
|
||||
}
|
||||
|
||||
impl StoreView for Store {
|
||||
fn for_each_node<F: FnMut(&str, &str, f32)>(&self, mut f: F) {
|
||||
for (key, node) in &self.nodes {
|
||||
f(key, &node.content, node.weight);
|
||||
}
|
||||
}
|
||||
|
||||
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, mut f: F) {
|
||||
for rel in &self.relations {
|
||||
if rel.deleted { continue; }
|
||||
f(&rel.source_key, &rel.target_key, rel.strength, rel.rel_type);
|
||||
}
|
||||
}
|
||||
|
||||
fn node_weight(&self, key: &str) -> f64 {
|
||||
self.nodes.get(key).map(|n| n.weight as f64).unwrap_or(self.params.default_weight)
|
||||
}
|
||||
|
||||
fn node_content(&self, key: &str) -> Option<&str> {
|
||||
self.nodes.get(key).map(|n| n.content.as_str())
|
||||
}
|
||||
|
||||
fn params(&self) -> Params {
|
||||
self.params
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// MmapView: zero-copy store access via mmap'd rkyv snapshot.
|
||||
//
|
||||
// Holds the mmap alive; all string reads go directly into the mapped
|
||||
// pages without allocation. Falls back to None if snapshot is stale.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
pub struct MmapView {
|
||||
mmap: memmap2::Mmap,
|
||||
_file: fs::File,
|
||||
data_offset: usize,
|
||||
data_len: usize,
|
||||
}
|
||||
|
||||
impl MmapView {
|
||||
/// Try to open a fresh rkyv snapshot. Returns None if missing or stale.
|
||||
pub fn open() -> Option<Self> {
|
||||
let path = snapshot_path();
|
||||
let file = fs::File::open(&path).ok()?;
|
||||
let mmap = unsafe { memmap2::Mmap::map(&file) }.ok()?;
|
||||
|
||||
if mmap.len() < RKYV_HEADER_LEN { return None; }
|
||||
if mmap[..4] != RKYV_MAGIC { return None; }
|
||||
|
||||
let nodes_size = fs::metadata(nodes_path()).map(|m| m.len()).unwrap_or(0);
|
||||
let rels_size = fs::metadata(relations_path()).map(|m| m.len()).unwrap_or(0);
|
||||
|
||||
let cached_nodes = u64::from_le_bytes(mmap[8..16].try_into().unwrap());
|
||||
let cached_rels = u64::from_le_bytes(mmap[16..24].try_into().unwrap());
|
||||
let data_len = u64::from_le_bytes(mmap[24..32].try_into().unwrap()) as usize;
|
||||
|
||||
if cached_nodes != nodes_size || cached_rels != rels_size { return None; }
|
||||
if mmap.len() < RKYV_HEADER_LEN + data_len { return None; }
|
||||
|
||||
Some(MmapView { mmap, _file: file, data_offset: RKYV_HEADER_LEN, data_len })
|
||||
}
|
||||
|
||||
fn snapshot(&self) -> &ArchivedSnapshot {
|
||||
let data = &self.mmap[self.data_offset..self.data_offset + self.data_len];
|
||||
unsafe { rkyv::archived_root::<Snapshot>(data) }
|
||||
}
|
||||
}
|
||||
|
||||
impl StoreView for MmapView {
|
||||
fn for_each_node<F: FnMut(&str, &str, f32)>(&self, mut f: F) {
|
||||
let snap = self.snapshot();
|
||||
for (key, node) in snap.nodes.iter() {
|
||||
f(&key, &node.content, node.weight);
|
||||
}
|
||||
}
|
||||
|
||||
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, mut f: F) {
|
||||
let snap = self.snapshot();
|
||||
for rel in snap.relations.iter() {
|
||||
if rel.deleted { continue; }
|
||||
let rt = match rel.rel_type {
|
||||
ArchivedRelationType::Link => RelationType::Link,
|
||||
ArchivedRelationType::Causal => RelationType::Causal,
|
||||
ArchivedRelationType::Auto => RelationType::Auto,
|
||||
};
|
||||
f(&rel.source_key, &rel.target_key, rel.strength, rt);
|
||||
}
|
||||
}
|
||||
|
||||
fn node_weight(&self, key: &str) -> f64 {
|
||||
let snap = self.snapshot();
|
||||
snap.nodes.get(key)
|
||||
.map(|n| n.weight as f64)
|
||||
.unwrap_or(snap.params.default_weight)
|
||||
}
|
||||
|
||||
fn node_content(&self, key: &str) -> Option<&str> {
|
||||
let snap = self.snapshot();
|
||||
snap.nodes.get(key).map(|n| &*n.content)
|
||||
}
|
||||
|
||||
fn params(&self) -> Params {
|
||||
let p = &self.snapshot().params;
|
||||
Params {
|
||||
default_weight: p.default_weight,
|
||||
decay_factor: p.decay_factor,
|
||||
use_boost: p.use_boost,
|
||||
prune_threshold: p.prune_threshold,
|
||||
edge_decay: p.edge_decay,
|
||||
max_hops: p.max_hops,
|
||||
min_activation: p.min_activation,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// AnyView: enum dispatch for read-only access.
|
||||
//
|
||||
// MmapView when the snapshot is fresh, owned Store as fallback.
|
||||
// The match on each call is a single predicted branch — zero overhead.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
pub enum AnyView {
|
||||
Mmap(MmapView),
|
||||
Owned(Store),
|
||||
}
|
||||
|
||||
impl AnyView {
|
||||
/// Load the fastest available view: mmap snapshot or owned store.
|
||||
pub fn load() -> Result<Self, String> {
|
||||
if let Some(mv) = MmapView::open() {
|
||||
Ok(AnyView::Mmap(mv))
|
||||
} else {
|
||||
Ok(AnyView::Owned(Store::load()?))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl StoreView for AnyView {
|
||||
fn for_each_node<F: FnMut(&str, &str, f32)>(&self, f: F) {
|
||||
match self { AnyView::Mmap(v) => v.for_each_node(f), AnyView::Owned(s) => s.for_each_node(f) }
|
||||
}
|
||||
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, f: F) {
|
||||
match self { AnyView::Mmap(v) => v.for_each_relation(f), AnyView::Owned(s) => s.for_each_relation(f) }
|
||||
}
|
||||
fn node_weight(&self, key: &str) -> f64 {
|
||||
match self { AnyView::Mmap(v) => v.node_weight(key), AnyView::Owned(s) => s.node_weight(key) }
|
||||
}
|
||||
fn node_content(&self, key: &str) -> Option<&str> {
|
||||
match self { AnyView::Mmap(v) => v.node_content(key), AnyView::Owned(s) => s.node_content(key) }
|
||||
}
|
||||
fn params(&self) -> Params {
|
||||
match self { AnyView::Mmap(v) => v.params(), AnyView::Owned(s) => s.params() }
|
||||
}
|
||||
}
|
||||
14
poc-memory/src/util.rs
Normal file
14
poc-memory/src/util.rs
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
// Shared utilities
|
||||
|
||||
use crate::store;
|
||||
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Ensure a subdirectory of the memory dir exists and return its path.
|
||||
pub fn memory_subdir(name: &str) -> Result<PathBuf, String> {
|
||||
let dir = store::memory_dir().join(name);
|
||||
fs::create_dir_all(&dir)
|
||||
.map_err(|e| format!("create {}: {}", dir.display(), e))?;
|
||||
Ok(dir)
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue