restructure: hippocampus/ for memory, subconscious/ for agents

hippocampus/ — memory storage, retrieval, and consolidation:
  store, graph, query, similarity, spectral, neuro, counters,
  config, transcript, memory_search, lookups, cursor, migrate

subconscious/ — autonomous agents that process without being asked:
  reflect, surface, consolidate, digest, audit, etc.

All existing crate::X paths preserved via re-exports in lib.rs.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
ProofOfConcept 2026-03-25 01:04:13 -04:00 committed by Kent Overstreet
parent cfed85bd20
commit d5c0e86700
39 changed files with 87 additions and 32 deletions

304
src/hippocampus/config.rs Normal file
View file

@ -0,0 +1,304 @@
// Configuration for poc-memory
//
// Primary config: ~/.config/poc-agent/config.json5 (shared with poc-agent)
// Memory-specific settings live in the "memory" section.
// API backend resolved from the shared "models" + backend configs.
//
// Fallback: ~/.config/poc-memory/config.jsonl (legacy, still supported)
// Env override: POC_MEMORY_CONFIG
//
// The shared config eliminates API credential duplication between
// poc-memory and poc-agent.
use std::path::PathBuf;
use std::sync::{Arc, OnceLock, RwLock};
static CONFIG: OnceLock<RwLock<Arc<Config>>> = OnceLock::new();
#[derive(Debug, Clone, PartialEq, serde::Deserialize)]
#[serde(rename_all = "lowercase")]
#[derive(Default)]
pub enum ContextSource {
#[serde(alias = "")]
#[default]
Store,
File,
Journal,
}
#[derive(Debug, Clone, serde::Deserialize)]
pub struct ContextGroup {
pub label: String,
#[serde(default)]
pub keys: Vec<String>,
#[serde(default)]
pub source: ContextSource,
/// Include this group in agent context (default true)
#[serde(default = "default_true")]
pub agent: bool,
}
fn default_true() -> bool { true }
#[derive(Debug, Clone, serde::Deserialize)]
#[serde(default)]
pub struct Config {
pub user_name: String,
pub assistant_name: String,
#[serde(deserialize_with = "deserialize_path")]
pub data_dir: PathBuf,
#[serde(deserialize_with = "deserialize_path")]
pub projects_dir: PathBuf,
pub core_nodes: Vec<String>,
pub journal_days: u32,
pub journal_max: usize,
pub context_groups: Vec<ContextGroup>,
pub llm_concurrency: usize,
pub agent_budget: usize,
#[serde(deserialize_with = "deserialize_path")]
pub prompts_dir: PathBuf,
#[serde(default, deserialize_with = "deserialize_path_opt")]
pub agent_config_dir: Option<PathBuf>,
/// Resolved from agent_model → models → backend (not in config directly)
#[serde(skip)]
pub api_base_url: Option<String>,
#[serde(skip)]
pub api_key: Option<String>,
#[serde(skip)]
pub api_model: Option<String>,
/// Used to resolve API settings, not stored on Config
#[serde(default)]
agent_model: Option<String>,
pub api_reasoning: String,
pub agent_types: Vec<String>,
/// Surface agent timeout in seconds. Kill if running longer than this.
#[serde(default)]
pub surface_timeout_secs: Option<u32>,
/// Hook events that trigger the surface agent (e.g. ["UserPromptSubmit"]).
/// Empty list disables surface agent.
#[serde(default)]
pub surface_hooks: Vec<String>,
}
impl Default for Config {
fn default() -> Self {
let home = PathBuf::from(std::env::var("HOME").expect("HOME not set"));
Self {
user_name: "User".to_string(),
assistant_name: "Assistant".to_string(),
data_dir: home.join(".claude/memory"),
projects_dir: home.join(".claude/projects"),
core_nodes: vec!["identity".to_string(), "core-practices".to_string()],
journal_days: 7,
journal_max: 20,
context_groups: vec![
ContextGroup {
label: "identity".into(),
keys: vec!["identity".into()],
source: ContextSource::Store,
agent: true,
},
ContextGroup {
label: "core-practices".into(),
keys: vec!["core-practices".into()],
source: ContextSource::Store,
agent: true,
},
],
llm_concurrency: 1,
agent_budget: 1000,
prompts_dir: home.join("poc/memory/prompts"),
agent_config_dir: None,
api_base_url: None,
api_key: None,
api_model: None,
agent_model: None,
api_reasoning: "high".to_string(),
agent_types: vec![
"linker".into(), "organize".into(), "distill".into(),
"separator".into(), "split".into(),
],
surface_timeout_secs: None,
surface_hooks: vec![],
}
}
}
impl Config {
fn load_from_file() -> Self {
// Try shared config first, then legacy JSONL
if let Some(config) = Self::try_load_shared() {
return config;
}
Self::load_legacy_jsonl()
}
/// Load from shared poc-agent config (~/.config/poc-agent/config.json5).
/// Memory settings live in the "memory" section; API settings are
/// resolved from the shared model/backend configuration.
fn try_load_shared() -> Option<Self> {
let path = PathBuf::from(std::env::var("HOME").ok()?)
.join(".config/poc-agent/config.json5");
let content = std::fs::read_to_string(&path).ok()?;
let root: serde_json::Value = json5::from_str(&content).ok()?;
let mem_value = root.get("memory")?;
let mut config: Config = serde_json::from_value(mem_value.clone()).ok()?;
config.llm_concurrency = config.llm_concurrency.max(1);
// Resolve API settings: agent_model → models → backend
if let Some(model_name) = &config.agent_model
&& let Some(model_cfg) = root.get("models").and_then(|m| m.get(model_name.as_str())) {
let backend_name = model_cfg.get("backend").and_then(|v| v.as_str()).unwrap_or("");
let model_id = model_cfg.get("model_id").and_then(|v| v.as_str()).unwrap_or("");
if let Some(backend) = root.get(backend_name) {
config.api_base_url = backend.get("base_url")
.and_then(|v| v.as_str()).map(String::from);
config.api_key = backend.get("api_key")
.and_then(|v| v.as_str()).map(String::from);
}
config.api_model = Some(model_id.to_string());
}
Some(config)
}
/// Load from legacy JSONL config (~/.config/poc-memory/config.jsonl).
fn load_legacy_jsonl() -> Self {
let path = std::env::var("POC_MEMORY_CONFIG")
.map(PathBuf::from)
.unwrap_or_else(|_| {
PathBuf::from(std::env::var("HOME").expect("HOME not set"))
.join(".config/poc-memory/config.jsonl")
});
let mut config = Config::default();
let Ok(content) = std::fs::read_to_string(&path) else {
return config;
};
let mut context_groups: Vec<ContextGroup> = Vec::new();
let stream = serde_json::Deserializer::from_str(&content)
.into_iter::<serde_json::Value>();
for result in stream {
let Ok(obj) = result else { continue };
if let Some(cfg) = obj.get("config") {
if let Some(s) = cfg.get("user_name").and_then(|v| v.as_str()) {
config.user_name = s.to_string();
}
if let Some(s) = cfg.get("assistant_name").and_then(|v| v.as_str()) {
config.assistant_name = s.to_string();
}
if let Some(s) = cfg.get("data_dir").and_then(|v| v.as_str()) {
config.data_dir = expand_home(s);
}
if let Some(s) = cfg.get("projects_dir").and_then(|v| v.as_str()) {
config.projects_dir = expand_home(s);
}
if let Some(arr) = cfg.get("core_nodes").and_then(|v| v.as_array()) {
config.core_nodes = arr.iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.collect();
}
if let Some(d) = cfg.get("journal_days").and_then(|v| v.as_u64()) {
config.journal_days = d as u32;
}
if let Some(m) = cfg.get("journal_max").and_then(|v| v.as_u64()) {
config.journal_max = m as usize;
}
if let Some(n) = cfg.get("llm_concurrency").and_then(|v| v.as_u64()) {
config.llm_concurrency = n.max(1) as usize;
}
if let Some(n) = cfg.get("agent_budget").and_then(|v| v.as_u64()) {
config.agent_budget = n as usize;
}
if let Some(s) = cfg.get("prompts_dir").and_then(|v| v.as_str()) {
config.prompts_dir = expand_home(s);
}
if let Some(s) = cfg.get("agent_config_dir").and_then(|v| v.as_str()) {
config.agent_config_dir = Some(expand_home(s));
}
if let Some(s) = cfg.get("api_base_url").and_then(|v| v.as_str()) {
config.api_base_url = Some(s.to_string());
}
if let Some(s) = cfg.get("api_key").and_then(|v| v.as_str()) {
config.api_key = Some(s.to_string());
}
if let Some(s) = cfg.get("api_model").and_then(|v| v.as_str()) {
config.api_model = Some(s.to_string());
}
continue;
}
if let Some(label) = obj.get("group").and_then(|v| v.as_str()) {
let keys = obj.get("keys")
.and_then(|v| v.as_array())
.map(|arr| arr.iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.collect())
.unwrap_or_default();
let source = match obj.get("source").and_then(|v| v.as_str()) {
Some("file") => ContextSource::File,
Some("journal") => ContextSource::Journal,
_ => ContextSource::Store,
};
let agent = obj.get("agent").and_then(|v| v.as_bool()).unwrap_or(true);
context_groups.push(ContextGroup { label: label.to_string(), keys, source, agent });
}
}
if !context_groups.is_empty() {
config.context_groups = context_groups;
}
config
}
}
fn deserialize_path<'de, D: serde::Deserializer<'de>>(d: D) -> Result<PathBuf, D::Error> {
let s: String = serde::Deserialize::deserialize(d)?;
Ok(expand_home(&s))
}
fn deserialize_path_opt<'de, D: serde::Deserializer<'de>>(d: D) -> Result<Option<PathBuf>, D::Error> {
let s: Option<String> = serde::Deserialize::deserialize(d)?;
Ok(s.map(|s| expand_home(&s)))
}
fn expand_home(path: &str) -> PathBuf {
if let Some(rest) = path.strip_prefix("~/") {
PathBuf::from(std::env::var("HOME").expect("HOME not set")).join(rest)
} else {
PathBuf::from(path)
}
}
/// Get the global config (cheap Arc clone).
pub fn get() -> Arc<Config> {
CONFIG
.get_or_init(|| RwLock::new(Arc::new(Config::load_from_file())))
.read()
.unwrap()
.clone()
}
/// Reload the config from disk. Returns true if changed.
pub fn reload() -> bool {
let lock = CONFIG.get_or_init(|| RwLock::new(Arc::new(Config::load_from_file())));
let new = Config::load_from_file();
let mut current = lock.write().unwrap();
let changed = format!("{:?}", **current) != format!("{:?}", new);
if changed {
*current = Arc::new(new);
}
changed
}

116
src/hippocampus/counters.rs Normal file
View file

@ -0,0 +1,116 @@
// counters.rs — persistent counters backed by redb
//
// Tracks search hits, visit counts, and other per-key metrics that
// need fast increment/read without loading the full capnp store.
//
// Tables:
// search_hits: key → u64 (how often memory-search found this node)
// last_hit_ts: key → i64 (unix timestamp of last search hit)
use redb::{Database, ReadableTable, TableDefinition};
use std::path::PathBuf;
const SEARCH_HITS: TableDefinition<&str, u64> = TableDefinition::new("search_hits");
const LAST_HIT_TS: TableDefinition<&str, i64> = TableDefinition::new("last_hit_ts");
fn db_path() -> PathBuf {
crate::config::get().data_dir.join("counters.redb")
}
/// Open (or create) the counters database.
pub fn open() -> Result<Database, String> {
Database::create(db_path()).map_err(|e| format!("open counters db: {}", e))
}
/// Increment search hit count for a set of keys.
pub fn record_search_hits(keys: &[&str]) -> Result<(), String> {
let db = open()?;
let ts = chrono::Utc::now().timestamp();
let txn = db.begin_write().map_err(|e| format!("begin write: {}", e))?;
{
let mut hits = txn.open_table(SEARCH_HITS).map_err(|e| format!("open table: {}", e))?;
let mut ts_table = txn.open_table(LAST_HIT_TS).map_err(|e| format!("open table: {}", e))?;
for key in keys {
let count = hits.get(*key).map_err(|e| format!("get: {}", e))?
.map(|v| v.value())
.unwrap_or(0);
hits.insert(*key, count + 1).map_err(|e| format!("insert: {}", e))?;
ts_table.insert(*key, ts).map_err(|e| format!("insert ts: {}", e))?;
}
}
txn.commit().map_err(|e| format!("commit: {}", e))?;
Ok(())
}
/// Get search hit count for a key.
pub fn search_hit_count(key: &str) -> u64 {
let db = match open() {
Ok(db) => db,
Err(_) => return 0,
};
let txn = match db.begin_read() {
Ok(t) => t,
Err(_) => return 0,
};
let table = match txn.open_table(SEARCH_HITS) {
Ok(t) => t,
Err(_) => return 0,
};
table.get(key).ok().flatten().map(|v| v.value()).unwrap_or(0)
}
/// Get all search hit counts (for rename agent).
/// Returns keys sorted by count descending.
pub fn all_search_hits() -> Vec<(String, u64)> {
let db = match open() {
Ok(db) => db,
Err(_) => return Vec::new(),
};
let txn = match db.begin_read() {
Ok(t) => t,
Err(_) => return Vec::new(),
};
let table = match txn.open_table(SEARCH_HITS) {
Ok(t) => t,
Err(_) => return Vec::new(),
};
let mut results: Vec<(String, u64)> = match table.iter() {
Ok(iter) => iter
.flatten()
.map(|(k, v)| (k.value().to_string(), v.value()))
.collect(),
Err(_) => return Vec::new(),
};
results.sort_by(|a, b| b.1.cmp(&a.1));
results
}
/// Decay all counters by a factor (e.g. 0.9 = 10% decay).
/// Removes entries that drop to zero.
pub fn decay_all(factor: f64) -> Result<usize, String> {
let db = open()?;
let txn = db.begin_write().map_err(|e| format!("begin write: {}", e))?;
let mut removed = 0;
{
let mut table = txn.open_table(SEARCH_HITS).map_err(|e| format!("open table: {}", e))?;
// Collect keys first to avoid borrow conflict
let entries: Vec<(String, u64)> = table.iter()
.map_err(|e| format!("iter: {}", e))?
.flatten()
.map(|(k, v)| (k.value().to_string(), v.value()))
.collect();
for (key, count) in entries {
let new_count = (count as f64 * factor) as u64;
if new_count == 0 {
table.remove(key.as_str()).ok();
removed += 1;
} else {
table.insert(key.as_str(), new_count).ok();
}
}
}
txn.commit().map_err(|e| format!("commit: {}", e))?;
Ok(removed)
}

329
src/hippocampus/cursor.rs Normal file
View file

@ -0,0 +1,329 @@
// Spatial memory cursor — a persistent pointer into the knowledge graph.
//
// The cursor maintains a "you are here" position that persists across
// sessions. Navigation moves through three dimensions:
// - Temporal: forward/back among same-type nodes by timestamp
// - Hierarchical: up/down the digest tree (journal→daily→weekly→monthly)
// - Spatial: sideways along graph edges to linked nodes
//
// This is the beginning of place cells — the hippocampus doesn't just
// store, it maintains a map. The cursor is the map's current position.
use crate::store::{self, Node, Store};
use std::path::PathBuf;
fn cursor_path() -> PathBuf {
store::memory_dir().join("cursor")
}
/// Read the current cursor position (node key), if any.
pub fn get() -> Option<String> {
std::fs::read_to_string(cursor_path())
.ok()
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
}
/// Set the cursor to a node key.
pub fn set(key: &str) -> Result<(), String> {
std::fs::write(cursor_path(), format!("{}\n", key))
.map_err(|e| format!("write cursor: {}", e))
}
/// Clear the cursor.
pub fn clear() -> Result<(), String> {
let p = cursor_path();
if p.exists() {
std::fs::remove_file(&p)
.map_err(|e| format!("clear cursor: {}", e))?;
}
Ok(())
}
/// Temporal neighbors: nodes of the same type, sorted by timestamp.
/// Returns (prev, next) keys relative to the given node.
pub fn temporal_neighbors(store: &Store, key: &str) -> (Option<String>, Option<String>) {
let Some(node) = store.nodes.get(key) else { return (None, None) };
let node_type = node.node_type;
let mut same_type: Vec<(&str, i64)> = store.nodes.iter()
.filter(|(_, n)| !n.deleted && n.node_type == node_type && n.timestamp > 0)
.map(|(k, n)| (k.as_str(), n.timestamp))
.collect();
same_type.sort_by_key(|(_, t)| *t);
let pos = same_type.iter().position(|(k, _)| *k == key);
let prev = pos.and_then(|i| if i > 0 { Some(same_type[i - 1].0.to_string()) } else { None });
let next = pos.and_then(|i| same_type.get(i + 1).map(|(k, _)| k.to_string()));
(prev, next)
}
/// Digest hierarchy: find the parent digest for a node.
/// Journal → daily, daily → weekly, weekly → monthly.
pub fn digest_parent(store: &Store, key: &str) -> Option<String> {
let node = store.nodes.get(key)?;
let parent_type = match node.node_type {
store::NodeType::EpisodicSession => store::NodeType::EpisodicDaily,
store::NodeType::EpisodicDaily => store::NodeType::EpisodicWeekly,
store::NodeType::EpisodicWeekly => store::NodeType::EpisodicMonthly,
_ => return None,
};
// Look for structural links first (digest:structural provenance)
for r in &store.relations {
if r.deleted { continue; }
if r.source_key == key
&& let Some(target) = store.nodes.get(&r.target_key)
&& target.node_type == parent_type {
return Some(r.target_key.clone());
}
}
// Fallback: match by date for journal→daily
if node.node_type == store::NodeType::EpisodicSession {
// Try extracting date from timestamp first, then from key
let mut dates = Vec::new();
if node.timestamp > 0 {
dates.push(store::format_date(node.timestamp));
}
// Extract date from key patterns like "journal#2026-03-03-..." or "journal#j-2026-03-13t..."
if let Some(rest) = key.strip_prefix("journal#j-").or_else(|| key.strip_prefix("journal#"))
&& rest.len() >= 10 {
let candidate = &rest[..10];
if candidate.chars().nth(4) == Some('-') {
let date = candidate.to_string();
if !dates.contains(&date) {
dates.push(date);
}
}
}
for date in &dates {
for prefix in [&format!("daily-{}", date), &format!("digest#daily#{}", date)] {
for (k, n) in &store.nodes {
if !n.deleted && n.node_type == parent_type && k.starts_with(prefix.as_str()) {
return Some(k.clone());
}
}
}
}
}
None
}
/// Digest children: find nodes that feed into this digest.
/// Monthly → weeklies, weekly → dailies, daily → journal entries.
pub fn digest_children(store: &Store, key: &str) -> Vec<String> {
let Some(node) = store.nodes.get(key) else { return vec![] };
let child_type = match node.node_type {
store::NodeType::EpisodicDaily => store::NodeType::EpisodicSession,
store::NodeType::EpisodicWeekly => store::NodeType::EpisodicDaily,
store::NodeType::EpisodicMonthly => store::NodeType::EpisodicWeekly,
_ => return vec![],
};
// Look for structural links (source → this digest)
let mut children: Vec<(String, i64)> = Vec::new();
for r in &store.relations {
if r.deleted { continue; }
if r.target_key == key
&& let Some(source) = store.nodes.get(&r.source_key)
&& source.node_type == child_type {
children.push((r.source_key.clone(), source.timestamp));
}
}
// Fallback for daily → journal: extract date from key and match
if children.is_empty() && node.node_type == store::NodeType::EpisodicDaily {
// Extract date from keys like "daily-2026-03-13" or "daily-2026-03-13-suffix"
let date = key.strip_prefix("daily-")
.or_else(|| key.strip_prefix("digest#daily#"))
.and_then(|rest| rest.get(..10)); // "YYYY-MM-DD"
if let Some(date) = date {
for (k, n) in &store.nodes {
if n.deleted { continue; }
if n.node_type == store::NodeType::EpisodicSession
&& n.timestamp > 0
&& store::format_date(n.timestamp) == date
{
children.push((k.clone(), n.timestamp));
}
}
}
}
children.sort_by_key(|(_, t)| *t);
children.into_iter().map(|(k, _)| k).collect()
}
/// Graph neighbors sorted by edge strength.
pub fn graph_neighbors(store: &Store, key: &str) -> Vec<(String, f32)> {
let mut neighbors: Vec<(String, f32)> = Vec::new();
for r in &store.relations {
if r.deleted { continue; }
if r.source_key == key {
neighbors.push((r.target_key.clone(), r.strength));
} else if r.target_key == key {
neighbors.push((r.source_key.clone(), r.strength));
}
}
neighbors.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
neighbors.dedup_by(|a, b| a.0 == b.0);
neighbors
}
/// Format a one-line summary of a node for context display.
fn node_summary(node: &Node) -> String {
let ts = if node.timestamp > 0 {
store::format_datetime(node.timestamp)
} else {
"no-date".to_string()
};
let type_tag = match node.node_type {
store::NodeType::EpisodicSession => "journal",
store::NodeType::EpisodicDaily => "daily",
store::NodeType::EpisodicWeekly => "weekly",
store::NodeType::EpisodicMonthly => "monthly",
store::NodeType::Semantic => "semantic",
};
// First line of content, truncated
let first_line = node.content.lines().next().unwrap_or("")
.chars().take(80).collect::<String>();
format!("[{}] ({}) {}", ts, type_tag, first_line)
}
/// Display the cursor position with full context.
pub fn show(store: &Store) -> Result<(), String> {
let key = get().ok_or_else(|| "No cursor set. Use `poc-memory cursor set KEY`".to_string())?;
let node = store.nodes.get(&key)
.ok_or_else(|| format!("Cursor points to missing node: {}", key))?;
// Header
let type_tag = match node.node_type {
store::NodeType::EpisodicSession => "journal",
store::NodeType::EpisodicDaily => "daily",
store::NodeType::EpisodicWeekly => "weekly",
store::NodeType::EpisodicMonthly => "monthly",
store::NodeType::Semantic => "semantic",
};
if node.timestamp > 0 {
eprintln!("@ {} [{}]", key, type_tag);
eprintln!(" {}", store::format_datetime(node.timestamp));
} else {
eprintln!("@ {} [{}]", key, type_tag);
}
// Temporal context
let (prev, next) = temporal_neighbors(store, &key);
eprintln!();
if let Some(ref p) = prev
&& let Some(pn) = store.nodes.get(p) {
eprintln!("{}", node_summary(pn));
eprintln!(" `cursor back`");
}
if let Some(ref n) = next
&& let Some(nn) = store.nodes.get(n) {
eprintln!("{}", node_summary(nn));
eprintln!(" `cursor forward`");
}
// Hierarchy
if let Some(ref parent) = digest_parent(store, &key)
&& let Some(pn) = store.nodes.get(parent) {
eprintln!("{}", node_summary(pn));
eprintln!(" `cursor up`");
}
let children = digest_children(store, &key);
if !children.is_empty() {
let count = children.len();
if let Some(first) = children.first().and_then(|k| store.nodes.get(k)) {
eprintln!("{} children — first: {}", count, node_summary(first));
eprintln!(" `cursor down`");
}
}
// Graph neighbors (non-temporal)
let neighbors = graph_neighbors(store, &key);
let semantic: Vec<_> = neighbors.iter()
.filter(|(k, _)| {
store.nodes.get(k)
.map(|n| n.node_type == store::NodeType::Semantic)
.unwrap_or(false)
})
.take(8)
.collect();
if !semantic.is_empty() {
eprintln!();
eprintln!(" Linked:");
for (k, strength) in &semantic {
eprintln!(" [{:.1}] {}", strength, k);
}
}
eprintln!();
eprintln!("---");
// Content
print!("{}", node.content);
Ok(())
}
/// Move cursor in a temporal direction.
pub fn move_temporal(store: &Store, forward: bool) -> Result<(), String> {
let key = get().ok_or("No cursor set")?;
let _ = store.nodes.get(&key)
.ok_or_else(|| format!("Cursor points to missing node: {}", key))?;
let (prev, next) = temporal_neighbors(store, &key);
let target = if forward { next } else { prev };
match target {
Some(k) => {
set(&k)?;
show(store)
}
None => {
let dir = if forward { "forward" } else { "back" };
Err(format!("No {} neighbor from {}", dir, key))
}
}
}
/// Move cursor up the digest hierarchy.
pub fn move_up(store: &Store) -> Result<(), String> {
let key = get().ok_or("No cursor set")?;
match digest_parent(store, &key) {
Some(parent) => {
set(&parent)?;
show(store)
}
None => Err(format!("No parent digest for {}", key)),
}
}
/// Move cursor down the digest hierarchy (to first child).
pub fn move_down(store: &Store) -> Result<(), String> {
let key = get().ok_or("No cursor set")?;
let children = digest_children(store, &key);
match children.first() {
Some(child) => {
set(child)?;
show(store)
}
None => Err(format!("No children for {}", key)),
}
}
/// Move cursor to a graph neighbor by index (from the neighbors list).
pub fn move_to_neighbor(store: &Store, index: usize) -> Result<(), String> {
let key = get().ok_or("No cursor set")?;
let neighbors = graph_neighbors(store, &key);
let (target, _) = neighbors.get(index)
.ok_or_else(|| format!("Neighbor index {} out of range (have {})", index, neighbors.len()))?;
set(target)?;
show(store)
}

992
src/hippocampus/graph.rs Normal file
View file

@ -0,0 +1,992 @@
// Graph algorithms: clustering coefficient, community detection (label
// propagation), schema fit scoring, small-world metrics, consolidation
// priority scoring.
//
// The Graph is built from the Store's nodes + relations. Edges are
// undirected for clustering/community (even causal edges count as
// connections), but relation type and direction are preserved for
// specific queries.
use crate::store::{Store, RelationType, StoreView};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet, VecDeque};
/// Community info for reporting
#[derive(Clone, Debug)]
pub struct CommunityInfo {
pub id: u32,
pub members: Vec<String>,
pub size: usize,
pub isolation: f32,
pub cross_edges: usize,
}
/// Weighted edge in the graph
#[derive(Clone, Debug)]
pub struct Edge {
pub target: String,
pub strength: f32,
pub rel_type: RelationType,
}
/// The in-memory graph built from store nodes + relations
pub struct Graph {
/// Adjacency list: node key → list of edges
adj: HashMap<String, Vec<Edge>>,
/// All node keys
keys: HashSet<String>,
/// Community labels (from label propagation)
communities: HashMap<String, u32>,
}
impl Graph {
pub fn nodes(&self) -> &HashSet<String> {
&self.keys
}
pub fn degree(&self, key: &str) -> usize {
self.adj.get(key).map(|e| e.len()).unwrap_or(0)
}
pub fn edge_count(&self) -> usize {
self.adj.values().map(|e| e.len()).sum::<usize>() / 2
}
/// All edges for a node (full Edge data including rel_type)
pub fn edges_of(&self, key: &str) -> &[Edge] {
self.adj.get(key)
.map(|v| v.as_slice())
.unwrap_or(&[])
}
/// All neighbor keys with strengths
pub fn neighbors(&self, key: &str) -> Vec<(&String, f32)> {
self.adj.get(key)
.map(|edges| edges.iter().map(|e| (&e.target, e.strength)).collect())
.unwrap_or_default()
}
/// Just neighbor keys
pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> {
self.adj.get(key)
.map(|edges| edges.iter().map(|e| e.target.as_str()).collect())
.unwrap_or_default()
}
/// Jaccard similarity between two nodes' neighborhoods.
/// Measures overlap: |intersection| / |union| of their neighbor sets.
pub fn jaccard(&self, a: &str, b: &str) -> f32 {
let na = self.neighbor_keys(a);
let nb = self.neighbor_keys(b);
let intersection = na.intersection(&nb).count();
let union = na.union(&nb).count();
if union == 0 { 0.0 } else { intersection as f32 / union as f32 }
}
/// Compute Jaccard-based strength for every edge in the graph.
/// Returns (source_key, target_key, jaccard_strength) triples.
/// Scales raw Jaccard (typically 0.0-0.3) to a useful range.
pub fn jaccard_strengths(&self) -> Vec<(String, String, f32)> {
let mut result = Vec::new();
let mut seen = HashSet::new();
for (key, edges) in &self.adj {
for edge in edges {
// Deduplicate undirected edges
let pair = if key < &edge.target {
(key.as_str(), edge.target.as_str())
} else {
(edge.target.as_str(), key.as_str())
};
if !seen.insert((pair.0.to_string(), pair.1.to_string())) {
continue;
}
let j = self.jaccard(key, &edge.target);
// Scale: raw Jaccard 0.05 → 0.15, 0.15 → 0.45, 0.30 → 0.90
// Formula: clamp(j * 3, 0.1, 1.0)
let strength = (j * 3.0).clamp(0.1, 1.0);
result.push((key.clone(), edge.target.clone(), strength));
}
}
result
}
pub fn community_count(&self) -> usize {
let labels: HashSet<_> = self.communities.values().collect();
labels.len()
}
pub fn communities(&self) -> &HashMap<String, u32> {
&self.communities
}
/// Community isolation scores: for each community, what fraction of its
/// total edge weight is internal (vs cross-community). Returns community_id → score
/// where 1.0 = fully isolated (no external edges), 0.0 = all edges external.
/// Singleton communities (1 node, no edges) get score 1.0.
pub fn community_isolation(&self) -> HashMap<u32, f32> {
// Accumulate internal and total edge weight per community
let mut internal: HashMap<u32, f32> = HashMap::new();
let mut total: HashMap<u32, f32> = HashMap::new();
for (key, edges) in &self.adj {
let Some(&my_comm) = self.communities.get(key) else { continue };
for edge in edges {
let nbr_comm = self.communities.get(&edge.target).copied().unwrap_or(u32::MAX);
*total.entry(my_comm).or_default() += edge.strength;
if my_comm == nbr_comm {
*internal.entry(my_comm).or_default() += edge.strength;
}
}
}
let mut scores = HashMap::new();
let all_communities: HashSet<u32> = self.communities.values().copied().collect();
for &comm in &all_communities {
let t = total.get(&comm).copied().unwrap_or(0.0);
if t < 0.001 {
scores.insert(comm, 1.0); // no edges = fully isolated
} else {
let i = internal.get(&comm).copied().unwrap_or(0.0);
scores.insert(comm, i / t);
}
}
scores
}
/// Community info: id → (member keys, size, isolation score, cross-community edge count)
pub fn community_info(&self) -> Vec<CommunityInfo> {
let isolation = self.community_isolation();
// Group members by community
let mut members: HashMap<u32, Vec<String>> = HashMap::new();
for (key, &comm) in &self.communities {
members.entry(comm).or_default().push(key.clone());
}
// Count cross-community edges per community
let mut cross_edges: HashMap<u32, usize> = HashMap::new();
for (key, edges) in &self.adj {
let Some(&my_comm) = self.communities.get(key) else { continue };
for edge in edges {
let nbr_comm = self.communities.get(&edge.target).copied().unwrap_or(u32::MAX);
if my_comm != nbr_comm {
*cross_edges.entry(my_comm).or_default() += 1;
}
}
}
let mut result: Vec<CommunityInfo> = members.into_iter()
.map(|(id, mut keys)| {
keys.sort();
let size = keys.len();
let iso = isolation.get(&id).copied().unwrap_or(1.0);
let cross = cross_edges.get(&id).copied().unwrap_or(0) / 2; // undirected
CommunityInfo { id, members: keys, size, isolation: iso, cross_edges: cross }
})
.collect();
result.sort_by(|a, b| b.isolation.total_cmp(&a.isolation));
result
}
/// Hub degree threshold: top 5% by degree
pub fn hub_threshold(&self) -> usize {
let mut degrees: Vec<usize> = self.keys.iter()
.map(|k| self.degree(k))
.collect();
degrees.sort_unstable();
if degrees.len() >= 20 {
degrees[degrees.len() * 95 / 100]
} else {
usize::MAX
}
}
/// Local clustering coefficient: fraction of a node's neighbors
/// that are also neighbors of each other.
/// cc(v) = 2E / (deg * (deg - 1))
pub fn clustering_coefficient(&self, key: &str) -> f32 {
let neighbors = self.neighbor_keys(key);
let deg = neighbors.len();
if deg < 2 {
return 0.0;
}
let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect();
let mut triangles = 0u32;
for i in 0..neighbor_vec.len() {
for j in (i + 1)..neighbor_vec.len() {
let ni_neighbors = self.neighbor_keys(neighbor_vec[i]);
if ni_neighbors.contains(neighbor_vec[j]) {
triangles += 1;
}
}
}
(2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0))
}
/// Average clustering coefficient across all nodes with deg >= 2
pub fn avg_clustering_coefficient(&self) -> f32 {
let mut sum = 0.0f32;
let mut count = 0u32;
for key in &self.keys {
if self.degree(key) >= 2 {
sum += self.clustering_coefficient(key);
count += 1;
}
}
if count == 0 { 0.0 } else { sum / count as f32 }
}
/// Average shortest path length (sampled BFS from up to 100 nodes)
pub fn avg_path_length(&self) -> f32 {
let sample: Vec<&String> = self.keys.iter().take(100).collect();
if sample.is_empty() { return 0.0; }
let mut total_dist = 0u64;
let mut total_pairs = 0u64;
for &start in &sample {
let dists = self.bfs_distances(start);
for d in dists.values() {
if *d > 0 {
total_dist += *d as u64;
total_pairs += 1;
}
}
}
if total_pairs == 0 { 0.0 } else { total_dist as f32 / total_pairs as f32 }
}
fn bfs_distances(&self, start: &str) -> HashMap<String, u32> {
let mut dist = HashMap::new();
let mut queue = VecDeque::new();
dist.insert(start.to_string(), 0u32);
queue.push_back(start.to_string());
while let Some(node) = queue.pop_front() {
let d = dist[&node];
for neighbor in self.neighbor_keys(&node) {
if !dist.contains_key(neighbor) {
dist.insert(neighbor.to_string(), d + 1);
queue.push_back(neighbor.to_string());
}
}
}
dist
}
/// Power-law exponent α of the degree distribution.
///
/// Estimated via MLE: α = 1 + n / Σ ln(k_i / (k_min - 0.5))
/// α ≈ 2: extreme hub dominance (fragile)
/// α ≈ 3: healthy scale-free
/// α > 3: approaching random graph (egalitarian)
pub fn degree_power_law_exponent(&self) -> f32 {
let mut degrees: Vec<usize> = self.keys.iter()
.map(|k| self.degree(k))
.filter(|&d| d > 0) // exclude isolates
.collect();
if degrees.len() < 10 { return 0.0; } // not enough data
degrees.sort_unstable();
let k_min = degrees[0] as f64;
if k_min < 1.0 { return 0.0; }
let n = degrees.len() as f64;
let sum_ln: f64 = degrees.iter()
.map(|&k| (k as f64 / (k_min - 0.5)).ln())
.sum();
if sum_ln <= 0.0 { return 0.0; }
(1.0 + n / sum_ln) as f32
}
/// Gini coefficient of the degree distribution.
///
/// 0 = perfectly egalitarian (all nodes same degree)
/// 1 = maximally unequal (one node has all edges)
/// Measures hub concentration independent of distribution shape.
pub fn degree_gini(&self) -> f32 {
let mut degrees: Vec<f64> = self.keys.iter()
.map(|k| self.degree(k) as f64)
.collect();
let n = degrees.len();
if n < 2 { return 0.0; }
degrees.sort_by(|a, b| a.total_cmp(b));
let mean = degrees.iter().sum::<f64>() / n as f64;
if mean < 1e-10 { return 0.0; }
// Gini = (2 Σ i·x_i) / (n Σ x_i) - (n+1)/n
let weighted_sum: f64 = degrees.iter().enumerate()
.map(|(i, &d)| (i as f64 + 1.0) * d)
.sum();
let total = degrees.iter().sum::<f64>();
let gini = (2.0 * weighted_sum) / (n as f64 * total) - (n as f64 + 1.0) / n as f64;
gini.max(0.0) as f32
}
/// Small-world coefficient σ = (C/C_rand) / (L/L_rand)
/// C_rand ≈ <k>/n, L_rand ≈ ln(n)/ln(<k>)
pub fn small_world_sigma(&self) -> f32 {
let n = self.keys.len() as f32;
if n < 10.0 { return 0.0; }
let avg_degree = self.adj.values()
.map(|e| e.len() as f32)
.sum::<f32>() / n;
if avg_degree < 1.0 { return 0.0; }
let c = self.avg_clustering_coefficient();
let l = self.avg_path_length();
let c_rand = avg_degree / n;
let l_rand = n.ln() / avg_degree.ln();
if c_rand < 1e-10 || l_rand < 1e-10 || l < 1e-10 {
return 0.0;
}
(c / c_rand) / (l / l_rand)
}
}
/// Impact of adding a hypothetical edge
#[derive(Debug)]
pub struct LinkImpact {
pub source: String,
pub target: String,
pub source_deg: usize,
pub target_deg: usize,
/// Is this a hub link? (either endpoint in top 5% by degree)
pub is_hub_link: bool,
/// Are both endpoints in the same community?
pub same_community: bool,
/// Change in clustering coefficient for source
pub delta_cc_source: f32,
/// Change in clustering coefficient for target
pub delta_cc_target: f32,
/// Change in degree Gini (positive = more hub-dominated)
pub delta_gini: f32,
/// Qualitative assessment
pub assessment: &'static str,
}
impl Graph {
/// Simulate adding an edge and report impact on topology metrics.
///
/// Doesn't modify the graph — computes what would change if the
/// edge were added.
pub fn link_impact(&self, source: &str, target: &str) -> LinkImpact {
let source_deg = self.degree(source);
let target_deg = self.degree(target);
let hub_threshold = self.hub_threshold();
let is_hub_link = source_deg >= hub_threshold || target_deg >= hub_threshold;
// Community check
let sc = self.communities.get(source);
let tc = self.communities.get(target);
let same_community = match (sc, tc) {
(Some(a), Some(b)) => a == b,
_ => false,
};
// CC change for source: adding target as neighbor changes the
// triangle count. New triangles form for each node that's a
// neighbor of BOTH source and target.
let source_neighbors = self.neighbor_keys(source);
let target_neighbors = self.neighbor_keys(target);
let shared_neighbors = source_neighbors.intersection(&target_neighbors).count();
let cc_before_source = self.clustering_coefficient(source);
let cc_before_target = self.clustering_coefficient(target);
// Estimate new CC for source after adding edge
let new_source_deg = source_deg + 1;
let new_source_triangles = if source_deg >= 2 {
// Current triangles + new ones from shared neighbors
let current_triangles = (cc_before_source
* source_deg as f32 * (source_deg as f32 - 1.0) / 2.0) as u32;
current_triangles + shared_neighbors as u32
} else {
shared_neighbors as u32
};
let cc_after_source = if new_source_deg >= 2 {
(2.0 * new_source_triangles as f32)
/ (new_source_deg as f32 * (new_source_deg as f32 - 1.0))
} else {
0.0
};
let new_target_deg = target_deg + 1;
let new_target_triangles = if target_deg >= 2 {
let current_triangles = (cc_before_target
* target_deg as f32 * (target_deg as f32 - 1.0) / 2.0) as u32;
current_triangles + shared_neighbors as u32
} else {
shared_neighbors as u32
};
let cc_after_target = if new_target_deg >= 2 {
(2.0 * new_target_triangles as f32)
/ (new_target_deg as f32 * (new_target_deg as f32 - 1.0))
} else {
0.0
};
// Gini change via influence function:
// IF(x; Gini, F) = (2F(x) - 1) * x/μ - Gini - 1
// Adding an edge increments two degrees. The net ΔGini is the sum
// of influence contributions from both endpoints shifting up by 1.
let gini_before = self.degree_gini();
let n = self.keys.len();
let total_degree: f64 = self.keys.iter()
.map(|k| self.degree(k) as f64)
.sum();
let mean_deg = if n > 0 { total_degree / n as f64 } else { 1.0 };
// CDF at each endpoint's degree: fraction of nodes with degree ≤ d
let delta_gini = if mean_deg > 1e-10 && n >= 2 {
// Count nodes with degree ≤ source_deg and ≤ target_deg
let f_source = self.keys.iter()
.filter(|k| self.degree(k) <= source_deg)
.count() as f64 / n as f64;
let f_target = self.keys.iter()
.filter(|k| self.degree(k) <= target_deg)
.count() as f64 / n as f64;
// Influence of incrementing source's degree by 1
let new_source = (source_deg + 1) as f64;
let if_source = (2.0 * f_source - 1.0) * new_source / mean_deg
- gini_before as f64 - 1.0;
// Influence of incrementing target's degree by 1
let new_target = (target_deg + 1) as f64;
let if_target = (2.0 * f_target - 1.0) * new_target / mean_deg
- gini_before as f64 - 1.0;
// Scale: each point contributes 1/n to the distribution
((if_source + if_target) / n as f64) as f32
} else {
0.0f32
};
// Qualitative assessment
let assessment = if is_hub_link && same_community {
"hub-reinforcing: strengthens existing star topology"
} else if is_hub_link && !same_community {
"hub-bridging: cross-community but through a hub"
} else if !is_hub_link && same_community && shared_neighbors > 0 {
"lateral-clustering: strengthens local mesh topology"
} else if !is_hub_link && !same_community {
"lateral-bridging: best kind — cross-community lateral link"
} else if !is_hub_link && same_community {
"lateral-local: connects peripheral nodes in same community"
} else {
"neutral"
};
LinkImpact {
source: source.to_string(),
target: target.to_string(),
source_deg,
target_deg,
is_hub_link,
same_community,
delta_cc_source: cc_after_source - cc_before_source,
delta_cc_target: cc_after_target - cc_before_target,
delta_gini,
assessment,
}
}
}
/// Build graph from store data (with community detection)
pub fn build_graph(store: &impl StoreView) -> Graph {
let (adj, keys) = build_adjacency(store);
let communities = label_propagation(&keys, &adj, 20);
Graph { adj, keys, communities }
}
/// Build graph without community detection — for spreading activation
/// searches where we only need the adjacency list.
pub fn build_graph_fast(store: &impl StoreView) -> Graph {
let (adj, keys) = build_adjacency(store);
Graph { adj, keys, communities: HashMap::new() }
}
fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashSet<String>) {
let mut adj: HashMap<String, Vec<Edge>> = HashMap::new();
let mut keys: HashSet<String> = HashSet::new();
store.for_each_node(|key, _, _| {
keys.insert(key.to_owned());
});
store.for_each_relation(|source_key, target_key, strength, rel_type| {
if !keys.contains(source_key) || !keys.contains(target_key) {
return;
}
adj.entry(source_key.to_owned()).or_default().push(Edge {
target: target_key.to_owned(),
strength,
rel_type,
});
adj.entry(target_key.to_owned()).or_default().push(Edge {
target: source_key.to_owned(),
strength,
rel_type,
});
});
add_implicit_temporal_edges(store, &keys, &mut adj);
(adj, keys)
}
/// Add implicit edges for the temporal/digest hierarchy.
///
/// These edges are derived from node types and dates — they don't
/// need to be stored. Two kinds:
/// - parent/child: session→daily→weekly→monthly (by date containment)
/// - prev/next: chronological ordering within each level
///
/// Sessions use their timestamp for date. Digest nodes (daily/weekly/monthly)
/// extract the date they *cover* from the key name, since their timestamp
/// is when the digest was created, not what period it covers.
fn add_implicit_temporal_edges(
store: &impl StoreView,
keys: &HashSet<String>,
adj: &mut HashMap<String, Vec<Edge>>,
) {
use crate::store::NodeType::*;
use chrono::{Datelike, DateTime, NaiveDate};
// Extract the covered date from a key name.
// Patterns: "daily-2026-03-06", "daily-2026-03-06-identity",
// "weekly-2026-W09", "monthly-2026-02"
// "journal#j-2026-03-13t...", "journal#2026-03-13-..."
fn date_from_key(key: &str) -> Option<NaiveDate> {
// Try extracting YYYY-MM-DD after known prefixes
for prefix in ["daily-", "journal#j-", "journal#"] {
if let Some(rest) = key.strip_prefix(prefix)
&& rest.len() >= 10
&& let Ok(d) = NaiveDate::parse_from_str(&rest[..10], "%Y-%m-%d") {
return Some(d);
}
}
None
}
fn week_from_key(key: &str) -> Option<(i32, u32)> {
// "weekly-2026-W09" → (2026, 9)
let rest = key.strip_prefix("weekly-")?;
let (year_str, w_str) = rest.split_once("-W")?;
let year: i32 = year_str.parse().ok()?;
// Week string might have a suffix like "-foo"
let week_str = w_str.split('-').next()?;
let week: u32 = week_str.parse().ok()?;
Some((year, week))
}
fn month_from_key(key: &str) -> Option<(i32, u32)> {
// "monthly-2026-02" → (2026, 2)
let rest = key.strip_prefix("monthly-")?;
let (year_str, month_str) = rest.split_once('-')?;
let year: i32 = year_str.parse().ok()?;
let month_str = month_str.split('-').next()?;
let month: u32 = month_str.parse().ok()?;
Some((year, month))
}
// Collect episodic nodes by type
struct Dated { key: String, ts: i64, date: NaiveDate }
let mut sessions: Vec<Dated> = Vec::new();
let mut dailies: Vec<(String, NaiveDate)> = Vec::new();
let mut weeklies: Vec<(String, (i32, u32))> = Vec::new();
let mut monthlies: Vec<(String, (i32, u32))> = Vec::new();
store.for_each_node_meta(|key, node_type, ts| {
if !keys.contains(key) { return; }
match node_type {
EpisodicSession => {
// Prefer date from key (local time) over timestamp (UTC)
// to avoid timezone mismatches
let date = date_from_key(key).or_else(|| {
DateTime::from_timestamp(ts, 0).map(|dt| dt.date_naive())
});
if let Some(date) = date {
sessions.push(Dated { key: key.to_owned(), ts, date });
}
}
EpisodicDaily => {
if let Some(date) = date_from_key(key) {
dailies.push((key.to_owned(), date));
}
}
EpisodicWeekly => {
if let Some(yw) = week_from_key(key) {
weeklies.push((key.to_owned(), yw));
}
}
EpisodicMonthly => {
if let Some(ym) = month_from_key(key) {
monthlies.push((key.to_owned(), ym));
}
}
_ => {}
}
});
sessions.sort_by_key(|d| d.ts);
dailies.sort_by_key(|(_, d)| *d);
weeklies.sort_by_key(|(_, yw)| *yw);
monthlies.sort_by_key(|(_, ym)| *ym);
let add_edge = |adj: &mut HashMap<String, Vec<Edge>>, a: &str, b: &str| {
if let Some(edges) = adj.get(a)
&& edges.iter().any(|e| e.target == b) { return; }
adj.entry(a.to_owned()).or_default().push(Edge {
target: b.to_owned(),
strength: 1.0,
rel_type: RelationType::Auto,
});
adj.entry(b.to_owned()).or_default().push(Edge {
target: a.to_owned(),
strength: 1.0,
rel_type: RelationType::Auto,
});
};
// Build indexes: date→dailies, (year,week)→weekly, (year,month)→monthly
// Note: multiple dailies can share a date (e.g. daily-2026-03-06-identity,
// daily-2026-03-06-technical), so we collect all of them.
let mut date_to_dailies: HashMap<NaiveDate, Vec<String>> = HashMap::new();
for (key, date) in &dailies {
date_to_dailies.entry(*date).or_default().push(key.clone());
}
let mut yw_to_weekly: HashMap<(i32, u32), String> = HashMap::new();
for (key, yw) in &weeklies {
yw_to_weekly.insert(*yw, key.clone());
}
let mut ym_to_monthly: HashMap<(i32, u32), String> = HashMap::new();
for (key, ym) in &monthlies {
ym_to_monthly.insert(*ym, key.clone());
}
// Session → Daily (parent): each session links to all dailies for its date
for sess in &sessions {
if let Some(daily_keys) = date_to_dailies.get(&sess.date) {
for daily in daily_keys {
add_edge(adj, &sess.key, daily);
}
}
}
// Daily → Weekly (parent)
for (key, date) in &dailies {
let yw = (date.iso_week().year(), date.iso_week().week());
if let Some(weekly) = yw_to_weekly.get(&yw) {
add_edge(adj, key, weekly);
}
}
// Weekly → Monthly (parent)
for (key, yw) in &weeklies {
// A week can span two months; use the Thursday date (ISO week convention)
let thursday = NaiveDate::from_isoywd_opt(yw.0, yw.1, chrono::Weekday::Thu);
if let Some(d) = thursday {
let ym = (d.year(), d.month());
if let Some(monthly) = ym_to_monthly.get(&ym) {
add_edge(adj, key, monthly);
}
}
}
// Prev/next within each level
for pair in sessions.windows(2) {
add_edge(adj, &pair[0].key, &pair[1].key);
}
for pair in dailies.windows(2) {
add_edge(adj, &pair[0].0, &pair[1].0);
}
for pair in weeklies.windows(2) {
add_edge(adj, &pair[0].0, &pair[1].0);
}
for pair in monthlies.windows(2) {
add_edge(adj, &pair[0].0, &pair[1].0);
}
}
/// Label propagation community detection.
///
/// Each node starts with its own label. Each iteration: adopt the most
/// common label among neighbors (weighted by edge strength). Iterate
/// until stable or max_iterations.
fn label_propagation(
keys: &HashSet<String>,
adj: &HashMap<String, Vec<Edge>>,
max_iterations: u32,
) -> HashMap<String, u32> {
// Only consider edges above this strength for community votes.
// Weak auto-links from triangle closure (0.15-0.35) bridge
// unrelated clusters — filtering them lets natural communities emerge.
let min_strength: f32 = 0.3;
// Initialize: each node gets its own label
let key_vec: Vec<String> = keys.iter().cloned().collect();
let mut labels: HashMap<String, u32> = key_vec.iter()
.enumerate()
.map(|(i, k)| (k.clone(), i as u32))
.collect();
for _iter in 0..max_iterations {
let mut changed = false;
for key in &key_vec {
let edges = match adj.get(key) {
Some(e) => e,
None => continue,
};
if edges.is_empty() { continue; }
// Count weighted votes for each label (skip weak edges)
let mut votes: HashMap<u32, f32> = HashMap::new();
for edge in edges {
if edge.strength < min_strength { continue; }
if let Some(&label) = labels.get(&edge.target) {
*votes.entry(label).or_default() += edge.strength;
}
}
// Adopt the label with most votes
if let Some((&best_label, _)) = votes.iter()
.max_by(|a, b| a.1.total_cmp(b.1))
{
let current = labels[key];
if best_label != current {
labels.insert(key.clone(), best_label);
changed = true;
}
}
}
if !changed { break; }
}
// Compact labels to 0..n
let mut label_map: HashMap<u32, u32> = HashMap::new();
let mut next_id = 0;
for label in labels.values_mut() {
let new_label = *label_map.entry(*label).or_insert_with(|| {
let id = next_id;
next_id += 1;
id
});
*label = new_label;
}
labels
}
/// A snapshot of graph topology metrics, for tracking evolution over time
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct MetricsSnapshot {
pub timestamp: i64,
pub date: String,
pub nodes: usize,
pub edges: usize,
pub communities: usize,
pub sigma: f32,
pub alpha: f32,
pub gini: f32,
pub avg_cc: f32,
pub avg_path_length: f32,
// Removed: avg_schema_fit was identical to avg_cc.
// Old snapshots with the field still deserialize (serde ignores unknown fields by default).
}
fn metrics_log_path() -> std::path::PathBuf {
crate::store::memory_dir().join("metrics.jsonl")
}
/// Load previous metrics snapshots
pub fn load_metrics_history() -> Vec<MetricsSnapshot> {
crate::util::jsonl_load(&metrics_log_path())
}
/// Append a metrics snapshot to the log
pub fn save_metrics_snapshot(snap: &MetricsSnapshot) {
let _ = crate::util::jsonl_append(&metrics_log_path(), snap);
}
/// Compute current graph metrics as a snapshot (no side effects).
pub fn current_metrics(graph: &Graph) -> MetricsSnapshot {
let now = crate::store::now_epoch();
let date = crate::store::format_datetime_space(now);
MetricsSnapshot {
timestamp: now,
date,
nodes: graph.nodes().len(),
edges: graph.edge_count(),
communities: graph.community_count(),
sigma: graph.small_world_sigma(),
alpha: graph.degree_power_law_exponent(),
gini: graph.degree_gini(),
avg_cc: graph.avg_clustering_coefficient(),
avg_path_length: graph.avg_path_length(),
}
}
/// Health report: summary of graph metrics.
/// Saves a metrics snapshot as a side effect (callers who want pure
/// computation should use `current_metrics` + `save_metrics_snapshot`).
pub fn health_report(graph: &Graph, store: &Store) -> String {
let snap = current_metrics(graph);
save_metrics_snapshot(&snap);
let n = snap.nodes;
let e = snap.edges;
let avg_cc = snap.avg_cc;
let avg_pl = snap.avg_path_length;
let sigma = snap.sigma;
let alpha = snap.alpha;
let gini = snap.gini;
let communities = snap.communities;
// Community sizes
let mut comm_sizes: HashMap<u32, usize> = HashMap::new();
for label in graph.communities().values() {
*comm_sizes.entry(*label).or_default() += 1;
}
let mut sizes: Vec<usize> = comm_sizes.values().copied().collect();
sizes.sort_unstable_by(|a, b| b.cmp(a));
// Degree distribution
let mut degrees: Vec<usize> = graph.nodes().iter()
.map(|k| graph.degree(k))
.collect();
degrees.sort_unstable();
let max_deg = degrees.last().copied().unwrap_or(0);
let median_deg = if degrees.is_empty() { 0 } else { degrees[degrees.len() / 2] };
let avg_deg = if n == 0 { 0.0 } else {
degrees.iter().sum::<usize>() as f64 / n as f64
};
// Low-CC nodes: poorly integrated
let low_cc = graph.nodes().iter()
.filter(|k| graph.clustering_coefficient(k) < 0.1)
.count();
// Orphan edges: relations referencing non-existent nodes
let mut orphan_edges = 0usize;
let mut missing_nodes: HashSet<String> = HashSet::new();
for rel in &store.relations {
if rel.deleted { continue; }
let s_missing = !store.nodes.contains_key(&rel.source_key);
let t_missing = !store.nodes.contains_key(&rel.target_key);
if s_missing || t_missing {
orphan_edges += 1;
if s_missing { missing_nodes.insert(rel.source_key.clone()); }
if t_missing { missing_nodes.insert(rel.target_key.clone()); }
}
}
// NodeType breakdown
let mut type_counts: HashMap<&str, usize> = HashMap::new();
for node in store.nodes.values() {
let label = match node.node_type {
crate::store::NodeType::EpisodicSession => "episodic",
crate::store::NodeType::EpisodicDaily => "daily",
crate::store::NodeType::EpisodicWeekly => "weekly",
crate::store::NodeType::EpisodicMonthly => "monthly",
crate::store::NodeType::Semantic => "semantic",
};
*type_counts.entry(label).or_default() += 1;
}
// Load history for deltas
let history = load_metrics_history();
let prev = if history.len() >= 2 {
Some(&history[history.len() - 2]) // second-to-last (last is the one we just wrote)
} else {
None
};
fn delta(current: f32, prev: Option<f32>) -> String {
match prev {
Some(p) => {
let d = current - p;
if d.abs() < 0.001 { String::new() }
else { format!("{:+.3})", d) }
}
None => String::new(),
}
}
let sigma_d = delta(sigma, prev.map(|p| p.sigma));
let alpha_d = delta(alpha, prev.map(|p| p.alpha));
let gini_d = delta(gini, prev.map(|p| p.gini));
let cc_d = delta(avg_cc, prev.map(|p| p.avg_cc));
let mut report = format!(
"Memory Health Report
====================
Nodes: {n} Relations: {e} Communities: {communities}
Degree: max={max_deg} median={median_deg} avg={avg_deg:.1}
Clustering coefficient (avg): {avg_cc:.4}{cc_d} low-CC (<0.1): {low_cc} nodes
Average path length: {avg_pl:.2}
Small-world σ: {sigma:.3}{sigma_d} (>1 = small-world)
Power-law α: {alpha:.2}{alpha_d} (2=hub-dominated, 3=healthy, >3=egalitarian)
Degree Gini: {gini:.3}{gini_d} (0=equal, 1=one-hub)
Community sizes (top 5): {top5}
Types: semantic={semantic} episodic={episodic} daily={daily} weekly={weekly} monthly={monthly}",
top5 = sizes.iter().take(5)
.map(|s| s.to_string())
.collect::<Vec<_>>()
.join(", "),
semantic = type_counts.get("semantic").unwrap_or(&0),
episodic = type_counts.get("episodic").unwrap_or(&0),
daily = type_counts.get("daily").unwrap_or(&0),
weekly = type_counts.get("weekly").unwrap_or(&0),
monthly = type_counts.get("monthly").unwrap_or(&0),
);
// Orphan edges
if orphan_edges == 0 {
report.push_str("\n\nBroken links: 0");
} else {
report.push_str(&format!(
"\n\nBroken links: {} edges reference {} missing nodes",
orphan_edges, missing_nodes.len()));
let mut sorted: Vec<_> = missing_nodes.iter().collect();
sorted.sort();
for key in sorted.iter().take(10) {
report.push_str(&format!("\n - {}", key));
}
if sorted.len() > 10 {
report.push_str(&format!("\n ... and {} more", sorted.len() - 10));
}
}
// Show history trend if we have enough data points
if history.len() >= 3 {
report.push_str("\n\nMetrics history (last 5):\n");
for snap in &history[history.len().saturating_sub(5)..] {
report.push_str(&format!(" {}σ={:.1} α={:.2} gini={:.3} cc={:.4}\n",
snap.date, snap.sigma, snap.alpha, snap.gini, snap.avg_cc));
}
}
report
}

204
src/hippocampus/lookups.rs Normal file
View file

@ -0,0 +1,204 @@
// Daily lookup counters — mmap'd open-addressing hash table.
//
// Records which memory keys are retrieved each day. The knowledge loop
// uses this to focus extraction on actively-used graph neighborhoods,
// like hippocampal replay preferentially consolidating recent experience.
//
// Format: 16-byte header + 4096 entries of (u64 hash, u32 count).
// Total: ~49KB per day. Fast path: hash key → probe → bump counter.
// No store loading required.
use std::fs;
use std::path::PathBuf;
use crate::util::memory_subdir;
const MAGIC: [u8; 4] = *b"LKUP";
const VERSION: u32 = 1;
const CAPACITY: u32 = 4096;
const HEADER_SIZE: usize = 16;
const ENTRY_SIZE: usize = 12; // u64 hash + u32 count
const FILE_SIZE: usize = HEADER_SIZE + CAPACITY as usize * ENTRY_SIZE;
// FNV-1a hash — simple, fast, no dependencies
fn fnv1a(key: &str) -> u64 {
let mut h: u64 = 0xcbf29ce484222325;
for b in key.as_bytes() {
h ^= *b as u64;
h = h.wrapping_mul(0x100000001b3);
}
h
}
fn daily_path(date: &str) -> Result<PathBuf, String> {
let dir = memory_subdir("lookups")?;
Ok(dir.join(format!("{}.bin", date)))
}
fn today() -> String {
chrono::Local::now().format("%Y-%m-%d").to_string()
}
/// Read or create the daily file, returning its contents as a mutable Vec.
fn load_file(date: &str) -> Result<Vec<u8>, String> {
let path = daily_path(date)?;
if path.exists() {
let data = fs::read(&path)
.map_err(|e| format!("read {}: {}", path.display(), e))?;
if data.len() == FILE_SIZE && data[0..4] == MAGIC {
return Ok(data);
}
// Corrupt or wrong size — reinitialize
}
// Create fresh file
let mut buf = vec![0u8; FILE_SIZE];
buf[0..4].copy_from_slice(&MAGIC);
buf[4..8].copy_from_slice(&VERSION.to_le_bytes());
buf[8..12].copy_from_slice(&CAPACITY.to_le_bytes());
// count = 0 (already zero)
Ok(buf)
}
fn write_file(date: &str, data: &[u8]) -> Result<(), String> {
let path = daily_path(date)?;
fs::write(&path, data)
.map_err(|e| format!("write {}: {}", path.display(), e))
}
fn entry_offset(slot: usize) -> usize {
HEADER_SIZE + slot * ENTRY_SIZE
}
fn read_entry(data: &[u8], slot: usize) -> (u64, u32) {
let off = entry_offset(slot);
let hash = u64::from_le_bytes(data[off..off + 8].try_into().unwrap());
let count = u32::from_le_bytes(data[off + 8..off + 12].try_into().unwrap());
(hash, count)
}
fn write_entry(data: &mut [u8], slot: usize, hash: u64, count: u32) {
let off = entry_offset(slot);
data[off..off + 8].copy_from_slice(&hash.to_le_bytes());
data[off + 8..off + 12].copy_from_slice(&count.to_le_bytes());
}
fn read_count(data: &[u8]) -> u32 {
u32::from_le_bytes(data[12..16].try_into().unwrap())
}
fn write_count(data: &mut [u8], count: u32) {
data[12..16].copy_from_slice(&count.to_le_bytes());
}
/// Bump the counter for a key. Fast path — no store needed.
pub fn bump(key: &str) -> Result<(), String> {
let date = today();
let mut data = load_file(&date)?;
let hash = fnv1a(key);
let cap = CAPACITY as usize;
let mut slot = (hash as usize) % cap;
for _ in 0..cap {
let (h, c) = read_entry(&data, slot);
if h == hash {
// Existing entry — bump
write_entry(&mut data, slot, hash, c + 1);
write_file(&date, &data)?;
return Ok(());
}
if h == 0 && c == 0 {
// Empty slot — new entry
write_entry(&mut data, slot, hash, 1);
let c = read_count(&data);
write_count(&mut data, c + 1);
write_file(&date, &data)?;
return Ok(());
}
slot = (slot + 1) % cap;
}
// Table full (shouldn't happen with 4096 slots)
Err("lookup table full".into())
}
/// Bump counters for multiple keys at once (single file read/write).
pub fn bump_many(keys: &[&str]) -> Result<(), String> {
if keys.is_empty() { return Ok(()); }
let date = today();
let mut data = load_file(&date)?;
let cap = CAPACITY as usize;
let mut used = read_count(&data);
for key in keys {
let hash = fnv1a(key);
let mut slot = (hash as usize) % cap;
let mut found = false;
for _ in 0..cap {
let (h, c) = read_entry(&data, slot);
if h == hash {
write_entry(&mut data, slot, hash, c + 1);
found = true;
break;
}
if h == 0 && c == 0 {
write_entry(&mut data, slot, hash, 1);
used += 1;
found = true;
break;
}
slot = (slot + 1) % cap;
}
if !found {
// Table full — stop, don't lose what we have
break;
}
}
write_count(&mut data, used);
write_file(&date, &data)
}
/// Dump all lookups for a date as (hash, count) pairs, sorted by count descending.
pub fn dump_raw(date: &str) -> Result<Vec<(u64, u32)>, String> {
let data = load_file(date)?;
let mut entries = Vec::new();
for slot in 0..CAPACITY as usize {
let (hash, count) = read_entry(&data, slot);
if hash != 0 || count != 0 {
entries.push((hash, count));
}
}
entries.sort_by(|a, b| b.1.cmp(&a.1));
Ok(entries)
}
/// Resolve hashes to keys by scanning the store. Returns (key, count) pairs.
pub fn dump_resolved(date: &str, keys: &[String]) -> Result<Vec<(String, u32)>, String> {
let raw = dump_raw(date)?;
// Build hash → key map from known keys
let hash_map: std::collections::HashMap<u64, &str> = keys.iter()
.map(|k| (fnv1a(k), k.as_str()))
.collect();
let mut resolved = Vec::new();
for (hash, count) in raw {
let key = hash_map.get(&hash)
.map(|k| k.to_string())
.unwrap_or_else(|| format!("#{:016x}", hash));
resolved.push((key, count));
}
Ok(resolved)
}
/// Hash a key (exposed for testing/external use).
pub fn hash_key(key: &str) -> u64 {
fnv1a(key)
}

View file

@ -0,0 +1,360 @@
// memory-search: context loading + ambient memory retrieval
//
// Core hook logic lives here as a library module so poc-hook can call
// it directly (no subprocess). The memory-search binary is a thin CLI
// wrapper with --hook for debugging and show_seen for inspection.
use std::collections::HashSet;
use std::fs;
use std::fs::File;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
fn now_secs() -> u64 {
SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs()
}
/// Max bytes per context chunk (hook output limit is ~10K chars)
const CHUNK_SIZE: usize = 9000;
pub struct Session {
pub session_id: String,
pub transcript_path: String,
pub hook_event: String,
pub state_dir: PathBuf,
}
impl Session {
pub fn from_json(input: &str) -> Option<Self> {
let state_dir = PathBuf::from("/tmp/claude-memory-search");
fs::create_dir_all(&state_dir).ok();
let json: serde_json::Value = serde_json::from_str(input).ok()?;
let session_id = json["session_id"].as_str().unwrap_or("").to_string();
if session_id.is_empty() { return None; }
let transcript_path = json["transcript_path"].as_str().unwrap_or("").to_string();
let hook_event = json["hook_event_name"].as_str().unwrap_or("").to_string();
Some(Session { session_id, transcript_path, hook_event, state_dir })
}
pub fn path(&self, prefix: &str) -> PathBuf {
self.state_dir.join(format!("{}-{}", prefix, self.session_id))
}
/// Load from POC_SESSION_ID environment variable
pub fn from_env() -> Option<Self> {
let session_id = std::env::var("POC_SESSION_ID").ok()?;
if session_id.is_empty() { return None; }
let state_dir = PathBuf::from("/tmp/claude-memory-search");
Some(Session {
session_id,
transcript_path: String::new(),
hook_event: String::new(),
state_dir,
})
}
/// Get the seen set for this session
pub fn seen(&self) -> HashSet<String> {
load_seen(&self.state_dir, &self.session_id)
}
}
/// Run the hook logic on parsed JSON input. Returns output to inject.
pub fn run_hook(input: &str) -> String {
// Daemon agent calls set POC_AGENT=1 — skip memory search.
if std::env::var("POC_AGENT").is_ok() { return String::new(); }
let Some(session) = Session::from_json(input) else { return String::new() };
hook(&session)
}
/// Split context output into chunks of approximately `max_bytes`, breaking
/// at section boundaries ("--- KEY (group) ---" lines).
fn chunk_context(ctx: &str, max_bytes: usize) -> Vec<String> {
let mut sections: Vec<String> = Vec::new();
let mut current = String::new();
for line in ctx.lines() {
if line.starts_with("--- ") && line.ends_with(" ---") && !current.is_empty() {
sections.push(std::mem::take(&mut current));
}
if !current.is_empty() {
current.push('\n');
}
current.push_str(line);
}
if !current.is_empty() {
sections.push(current);
}
let mut chunks: Vec<String> = Vec::new();
let mut chunk = String::new();
for section in sections {
if !chunk.is_empty() && chunk.len() + section.len() + 1 > max_bytes {
chunks.push(std::mem::take(&mut chunk));
}
if !chunk.is_empty() {
chunk.push('\n');
}
chunk.push_str(&section);
}
if !chunk.is_empty() {
chunks.push(chunk);
}
chunks
}
fn save_pending_chunks(dir: &Path, session_id: &str, chunks: &[String]) {
let chunks_dir = dir.join(format!("chunks-{}", session_id));
let _ = fs::remove_dir_all(&chunks_dir);
if chunks.is_empty() { return; }
fs::create_dir_all(&chunks_dir).ok();
for (i, chunk) in chunks.iter().enumerate() {
let path = chunks_dir.join(format!("{:04}", i));
fs::write(path, chunk).ok();
}
}
fn pop_pending_chunk(dir: &Path, session_id: &str) -> Option<String> {
let chunks_dir = dir.join(format!("chunks-{}", session_id));
if !chunks_dir.exists() { return None; }
let mut entries: Vec<_> = fs::read_dir(&chunks_dir).ok()?
.flatten()
.filter(|e| e.file_type().map(|t| t.is_file()).unwrap_or(false))
.collect();
entries.sort_by_key(|e| e.file_name());
let first = entries.first()?;
let content = fs::read_to_string(first.path()).ok()?;
fs::remove_file(first.path()).ok();
if fs::read_dir(&chunks_dir).ok().map(|mut d| d.next().is_none()).unwrap_or(true) {
fs::remove_dir(&chunks_dir).ok();
}
Some(content)
}
fn generate_cookie() -> String {
uuid::Uuid::new_v4().as_simple().to_string()[..12].to_string()
}
fn parse_seen_line(line: &str) -> &str {
line.split_once('\t').map(|(_, key)| key).unwrap_or(line)
}
fn load_seen(dir: &Path, session_id: &str) -> HashSet<String> {
let path = dir.join(format!("seen-{}", session_id));
if path.exists() {
fs::read_to_string(&path)
.unwrap_or_default()
.lines()
.filter(|s| !s.is_empty())
.map(|s| parse_seen_line(s).to_string())
.collect()
} else {
HashSet::new()
}
}
fn mark_seen(dir: &Path, session_id: &str, key: &str, seen: &mut HashSet<String>) {
if !seen.insert(key.to_string()) { return; }
let path = dir.join(format!("seen-{}", session_id));
if let Ok(mut f) = fs::OpenOptions::new().create(true).append(true).open(path) {
let ts = chrono::Local::now().format("%Y-%m-%dT%H:%M:%S");
writeln!(f, "{}\t{}", ts, key).ok();
}
}
fn surface_agent_cycle(session: &Session, out: &mut String, log_f: &mut File) {
let result_path = session.state_dir.join(format!("surface-result-{}", session.session_id));
let pid_path = session.state_dir.join(format!("surface-pid-{}", session.session_id));
let surface_timeout = crate::config::get()
.surface_timeout_secs
.unwrap_or(120) as u64;
let agent_done = match fs::read_to_string(&pid_path) {
Ok(content) => {
let parts: Vec<&str> = content.split('\t').collect();
let pid: u32 = parts.first().and_then(|s| s.trim().parse().ok()).unwrap_or(0);
let start_ts: u64 = parts.get(1).and_then(|s| s.trim().parse().ok()).unwrap_or(0);
if pid == 0 { true }
else {
let alive = unsafe { libc::kill(pid as i32, 0) == 0 };
if !alive { true }
else if now_secs().saturating_sub(start_ts) > surface_timeout {
unsafe { libc::kill(pid as i32, libc::SIGTERM); }
true
} else { false }
}
}
Err(_) => true,
};
let _ = writeln!(log_f, "agent_done {agent_done}");
if !agent_done { return; }
if let Ok(result) = fs::read_to_string(&result_path) {
if !result.trim().is_empty() {
let tail_lines: Vec<&str> = result.lines().rev()
.filter(|l| !l.trim().is_empty()).take(8).collect();
let has_new = tail_lines.iter().any(|l| l.starts_with("NEW RELEVANT MEMORIES:"));
let has_none = tail_lines.iter().any(|l| l.starts_with("NO NEW RELEVANT MEMORIES"));
let _ = writeln!(log_f, "has_new {has_new} has_none {has_none}");
if has_new {
let after_marker = result.rsplit_once("NEW RELEVANT MEMORIES:")
.map(|(_, rest)| rest).unwrap_or("");
let keys: Vec<String> = after_marker.lines()
.map(|l| l.trim().trim_start_matches("- ").trim().to_string())
.filter(|l| !l.is_empty() && !l.starts_with("```")).collect();
let _ = writeln!(log_f, "keys {:?}", keys);
let Ok(store) = crate::store::Store::load() else { return; };
let mut seen = session.seen();
let seen_path = session.path("seen");
for key in &keys {
if !seen.insert(key.clone()) {
let _ = writeln!(log_f, " skip (seen): {}", key);
continue;
}
if let Some(content) = crate::cli::node::render_node(&store, key) {
if !content.trim().is_empty() {
use std::fmt::Write as _;
writeln!(out, "--- {} (surfaced) ---", key).ok();
write!(out, "{}", content).ok();
let _ = writeln!(log_f, " rendered {}: {} bytes, out now {} bytes", key, content.len(), out.len());
if let Ok(mut f) = fs::OpenOptions::new()
.create(true).append(true).open(&seen_path) {
let ts = chrono::Local::now().format("%Y-%m-%dT%H:%M:%S");
writeln!(f, "{}\t{}", ts, key).ok();
}
}
}
}
} else if !has_none {
let log_dir = crate::store::memory_dir().join("logs");
fs::create_dir_all(&log_dir).ok();
let log_path = log_dir.join("surface-errors.log");
if let Ok(mut f) = fs::OpenOptions::new().create(true).append(true).open(&log_path) {
let ts = chrono::Local::now().format("%Y-%m-%dT%H:%M:%S");
let last = tail_lines.first().unwrap_or(&"");
let _ = writeln!(f, "[{}] unexpected surface output: {}", ts, last);
}
}
}
}
fs::remove_file(&result_path).ok();
fs::remove_file(&pid_path).ok();
if let Ok(output_file) = fs::File::create(&result_path) {
if let Ok(child) = Command::new("poc-memory")
.args(["agent", "run", "surface", "--count", "1", "--local"])
.env("POC_SESSION_ID", &session.session_id)
.stdout(output_file)
.stderr(std::process::Stdio::null())
.spawn()
{
let pid = child.id();
let ts = now_secs();
if let Ok(mut f) = fs::File::create(&pid_path) {
write!(f, "{}\t{}", pid, ts).ok();
}
}
}
}
fn cleanup_stale_files(dir: &Path, max_age: Duration) {
let entries = match fs::read_dir(dir) {
Ok(e) => e,
Err(_) => return,
};
let cutoff = SystemTime::now() - max_age;
for entry in entries.flatten() {
if let Ok(meta) = entry.metadata() {
if let Ok(modified) = meta.modified() {
if modified < cutoff {
fs::remove_file(entry.path()).ok();
}
}
}
}
}
fn hook(session: &Session) -> String {
let mut out = String::new();
let is_compaction = crate::transcript::detect_new_compaction(
&session.state_dir, &session.session_id, &session.transcript_path,
);
let cookie_path = session.path("cookie");
let is_first = !cookie_path.exists();
let log_path = session.state_dir.join(format!("hook-log-{}", session.session_id));
let Ok(mut log_f) = fs::OpenOptions::new().create(true).append(true).open(log_path) else { return Default::default(); };
let ts = chrono::Local::now().format("%Y-%m-%dT%H:%M:%S");
let _ = writeln!(log_f, "\n=== {} ({}) {} bytes ===", ts, session.hook_event, out.len());
let _ = writeln!(log_f, "is_first {is_first} is_compaction {is_compaction}");
if is_first || is_compaction {
if is_compaction {
fs::rename(&session.path("seen"), &session.path("seen-prev")).ok();
} else {
fs::remove_file(&session.path("seen")).ok();
fs::remove_file(&session.path("seen-prev")).ok();
}
fs::remove_file(&session.path("returned")).ok();
if is_first {
fs::write(&cookie_path, generate_cookie()).ok();
}
if let Ok(output) = Command::new("poc-memory").args(["admin", "load-context"]).output() {
if output.status.success() {
let ctx = String::from_utf8_lossy(&output.stdout).to_string();
if !ctx.trim().is_empty() {
let mut ctx_seen = session.seen();
for line in ctx.lines() {
if line.starts_with("--- ") && line.ends_with(" ---") {
let inner = &line[4..line.len() - 4];
if let Some(paren) = inner.rfind(" (") {
let key = inner[..paren].trim();
mark_seen(&session.state_dir, &session.session_id, key, &mut ctx_seen);
}
}
}
let chunks = chunk_context(&ctx, CHUNK_SIZE);
if let Some(first) = chunks.first() {
out.push_str(first);
}
save_pending_chunks(&session.state_dir, &session.session_id, &chunks[1..]);
}
}
}
}
if let Some(chunk) = pop_pending_chunk(&session.state_dir, &session.session_id) {
out.push_str(&chunk);
} else {
let cfg = crate::config::get();
if cfg.surface_hooks.iter().any(|h| h == &session.hook_event) {
surface_agent_cycle(session, &mut out, &mut log_f);
}
}
cleanup_stale_files(&session.state_dir, Duration::from_secs(86400));
let _ = write!(log_f, "{}", out);
out
}

368
src/hippocampus/migrate.rs Normal file
View file

@ -0,0 +1,368 @@
// Migration from old weights.json + markdown marker system
//
// Reads:
// ~/.claude/memory/weights.json (1,874 entries with metrics)
// ~/.claude/memory/*.md (content + mem markers + edges)
//
// Emits:
// ~/.claude/memory/nodes.capnp (all nodes with preserved metadata)
// ~/.claude/memory/relations.capnp (all edges from markers + md links)
// ~/.claude/memory/state.json (derived cache)
//
// Old files are preserved as backup. Run once.
use crate::store::{
self, Store, Node, NodeType, RelationType,
parse_units, new_relation,
};
use serde::Deserialize;
use uuid::Uuid;
use std::collections::HashMap;
use std::env;
use std::fs;
use std::path::{Path, PathBuf};
fn home() -> PathBuf {
PathBuf::from(env::var("HOME").expect("HOME not set"))
}
// Old system data structures (just enough for deserialization)
#[derive(Deserialize)]
struct OldStore {
#[serde(default)]
entries: HashMap<String, OldEntry>,
#[serde(default)]
retrieval_log: Vec<OldRetrievalEvent>,
#[serde(default)]
params: OldParams,
}
#[derive(Deserialize)]
#[allow(dead_code)] // fields needed for deserialization of old format
struct OldEntry {
weight: f64,
created: String,
#[serde(default)]
last_retrieved: Option<String>,
#[serde(default)]
last_used: Option<String>,
#[serde(default)]
retrievals: u32,
#[serde(default)]
uses: u32,
#[serde(default)]
wrongs: u32,
#[serde(default = "default_category")]
category: String,
}
fn default_category() -> String { "General".to_string() }
#[derive(Deserialize)]
struct OldRetrievalEvent {
query: String,
timestamp: String,
results: Vec<String>,
#[serde(default)]
used: Option<Vec<String>>,
}
#[derive(Deserialize)]
struct OldParams {
#[serde(default = "default_0_7")]
default_weight: f64,
#[serde(default = "default_0_95")]
decay_factor: f64,
#[serde(default = "default_0_15")]
use_boost: f64,
#[serde(default = "default_0_1")]
prune_threshold: f64,
#[serde(default = "default_0_3")]
edge_decay: f64,
#[serde(default = "default_3")]
max_hops: u32,
#[serde(default = "default_0_05")]
min_activation: f64,
}
impl Default for OldParams {
fn default() -> Self {
OldParams {
default_weight: 0.7,
decay_factor: 0.95,
use_boost: 0.15,
prune_threshold: 0.1,
edge_decay: 0.3,
max_hops: 3,
min_activation: 0.05,
}
}
}
fn default_0_7() -> f64 { 0.7 }
fn default_0_95() -> f64 { 0.95 }
fn default_0_15() -> f64 { 0.15 }
fn default_0_1() -> f64 { 0.1 }
fn default_0_3() -> f64 { 0.3 }
fn default_3() -> u32 { 3 }
fn default_0_05() -> f64 { 0.05 }
pub fn migrate() -> Result<(), String> {
let weights_path = home().join(".claude/memory/weights.json");
let memory_dir = home().join(".claude/memory");
let nodes_path = memory_dir.join("nodes.capnp");
let rels_path = memory_dir.join("relations.capnp");
// Safety check
if nodes_path.exists() || rels_path.exists() {
return Err("nodes.capnp or relations.capnp already exist. \
Remove them first if you want to re-migrate.".into());
}
// Load old store
let old_store: OldStore = if weights_path.exists() {
let data = fs::read_to_string(&weights_path)
.map_err(|e| format!("read weights.json: {}", e))?;
serde_json::from_str(&data)
.map_err(|e| format!("parse weights.json: {}", e))?
} else {
eprintln!("Warning: no weights.json found, migrating markdown only");
OldStore {
entries: HashMap::new(),
retrieval_log: Vec::new(),
params: OldParams::default(),
}
};
eprintln!("Old store: {} entries, {} retrieval events",
old_store.entries.len(), old_store.retrieval_log.len());
// Scan markdown files to get content + edges
let mut units_by_key: HashMap<String, store::MemoryUnit> = HashMap::new();
scan_markdown_dir(&memory_dir, &mut units_by_key)?;
eprintln!("Scanned {} markdown units", units_by_key.len());
// Create new store
let mut store = Store::default();
// Migrate params
store.params.default_weight = old_store.params.default_weight;
store.params.decay_factor = old_store.params.decay_factor;
store.params.use_boost = old_store.params.use_boost;
store.params.prune_threshold = old_store.params.prune_threshold;
store.params.edge_decay = old_store.params.edge_decay;
store.params.max_hops = old_store.params.max_hops;
store.params.min_activation = old_store.params.min_activation;
// Migrate retrieval log
store.retrieval_log = old_store.retrieval_log.iter().map(|e| {
store::RetrievalEvent {
query: e.query.clone(),
timestamp: e.timestamp.clone(),
results: e.results.clone(),
used: e.used.clone(),
}
}).collect();
// Phase 1: Create nodes
// Merge old entries (weight metadata) with markdown units (content)
let mut all_nodes: Vec<Node> = Vec::new();
let mut key_to_uuid: HashMap<String, [u8; 16]> = HashMap::new();
// First, all entries from the old store
for (key, old_entry) in &old_store.entries {
let uuid = *Uuid::new_v4().as_bytes();
key_to_uuid.insert(key.clone(), uuid);
let content = units_by_key.get(key)
.map(|u| u.content.clone())
.unwrap_or_default();
let state_tag = units_by_key.get(key)
.and_then(|u| u.state.clone())
.unwrap_or_default();
let node = Node {
uuid,
version: 1,
timestamp: store::now_epoch(),
node_type: if key.contains("journal") {
NodeType::EpisodicSession
} else {
NodeType::Semantic
},
provenance: "manual".to_string(),
key: key.clone(),
content,
weight: old_entry.weight as f32,
emotion: 0.0,
deleted: false,
source_ref: String::new(),
created: old_entry.created.clone(),
retrievals: old_entry.retrievals,
uses: old_entry.uses,
wrongs: old_entry.wrongs,
state_tag,
last_replayed: 0,
spaced_repetition_interval: 1,
position: 0,
created_at: 0,
community_id: None,
clustering_coefficient: None,
degree: None,
};
all_nodes.push(node);
}
// Then, any markdown units not in the old store
for (key, unit) in &units_by_key {
if key_to_uuid.contains_key(key) { continue; }
let uuid = *Uuid::new_v4().as_bytes();
key_to_uuid.insert(key.clone(), uuid);
let node = Node {
uuid,
version: 1,
timestamp: store::now_epoch(),
node_type: if key.contains("journal") {
NodeType::EpisodicSession
} else {
NodeType::Semantic
},
provenance: "manual".to_string(),
key: key.clone(),
content: unit.content.clone(),
weight: 0.7,
emotion: 0.0,
deleted: false,
source_ref: String::new(),
created: String::new(),
retrievals: 0,
uses: 0,
wrongs: 0,
state_tag: unit.state.clone().unwrap_or_default(),
last_replayed: 0,
spaced_repetition_interval: 1,
position: 0,
created_at: 0,
community_id: None,
clustering_coefficient: None,
degree: None,
};
all_nodes.push(node);
}
// Write nodes to capnp log
store.append_nodes(&all_nodes)?;
for node in &all_nodes {
store.uuid_to_key.insert(node.uuid, node.key.clone());
store.nodes.insert(node.key.clone(), node.clone());
}
eprintln!("Migrated {} nodes", all_nodes.len());
// Phase 2: Create relations from markdown links + causal edges
let mut all_relations = Vec::new();
for (key, unit) in &units_by_key {
let source_uuid = match key_to_uuid.get(key) {
Some(u) => *u,
None => continue,
};
// Association links (bidirectional)
for link in unit.marker_links.iter().chain(unit.md_links.iter()) {
let target_uuid = match key_to_uuid.get(link) {
Some(u) => *u,
None => continue,
};
// Avoid duplicate relations
let exists = all_relations.iter().any(|r: &store::Relation|
(r.source == source_uuid && r.target == target_uuid) ||
(r.source == target_uuid && r.target == source_uuid));
if exists { continue; }
all_relations.push(new_relation(
source_uuid, target_uuid,
RelationType::Link, 1.0,
key, link,
));
}
// Causal edges (directed)
for cause in &unit.causes {
let cause_uuid = match key_to_uuid.get(cause) {
Some(u) => *u,
None => continue,
};
all_relations.push(new_relation(
cause_uuid, source_uuid,
RelationType::Causal, 1.0,
cause, key,
));
}
}
// Write relations to capnp log
store.append_relations(&all_relations)?;
store.relations = all_relations;
eprintln!("Migrated {} relations", store.relations.len());
// Phase 3: Compute graph metrics
store.update_graph_metrics();
// Save derived cache
store.save()?;
eprintln!("Migration complete. Files:");
eprintln!(" {}", nodes_path.display());
eprintln!(" {}", rels_path.display());
eprintln!(" {}", memory_dir.join("state.json").display());
// Verify
let g = store.build_graph();
eprintln!("\nVerification:");
eprintln!(" Nodes: {}", store.nodes.len());
eprintln!(" Relations: {}", store.relations.len());
eprintln!(" Graph edges: {}", g.edge_count());
eprintln!(" Communities: {}", g.community_count());
eprintln!(" Avg CC: {:.4}", g.avg_clustering_coefficient());
Ok(())
}
fn scan_markdown_dir(
dir: &Path,
units: &mut HashMap<String, store::MemoryUnit>,
) -> Result<(), String> {
let entries = fs::read_dir(dir)
.map_err(|e| format!("read dir {}: {}", dir.display(), e))?;
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
scan_markdown_dir(&path, units)?;
continue;
}
let Some(ext) = path.extension() else { continue };
if ext != "md" { continue }
let filename = path.file_name().unwrap().to_string_lossy().to_string();
let content = match fs::read_to_string(&path) {
Ok(c) => c,
Err(_) => continue,
};
for unit in parse_units(&filename, &content) {
units.insert(unit.key.clone(), unit);
}
}
Ok(())
}

20
src/hippocampus/mod.rs Normal file
View file

@ -0,0 +1,20 @@
// hippocampus — memory storage, retrieval, and consolidation
//
// The graph-structured memory system: nodes, relations, queries,
// similarity scoring, spectral analysis, and neuroscience-inspired
// consolidation (spaced repetition, interference detection, schema
// assimilation).
pub mod store;
pub mod graph;
pub mod lookups;
pub mod cursor;
pub mod query;
pub mod similarity;
pub mod spectral;
pub mod neuro;
pub mod counters;
pub mod migrate;
pub mod config;
pub mod transcript;
pub mod memory_search;

View file

@ -0,0 +1,25 @@
// Neuroscience-inspired memory algorithms, split by concern:
//
// scoring — pure analysis: priority, replay queues, interference, plans
// prompts — agent prompt generation and formatting
// rewrite — graph topology mutations: differentiation, closure, linking
mod scoring;
mod rewrite;
pub use scoring::{
ReplayItem,
ConsolidationPlan,
consolidation_priority,
replay_queue, replay_queue_with_graph,
detect_interference,
consolidation_plan, consolidation_plan_quick, format_plan,
daily_check,
};
pub use rewrite::{
refine_target, LinkMove,
differentiate_hub,
apply_differentiation, find_differentiable_hubs,
triangle_close, link_orphans,
};

View file

@ -0,0 +1,348 @@
// Graph topology mutations: hub differentiation, triangle closure,
// orphan linking, and link refinement. These modify the store.
use crate::store::{Store, new_relation};
use crate::graph::Graph;
use crate::similarity;
/// Collect (key, content) pairs for all section children of a file-level node.
fn section_children<'a>(store: &'a Store, file_key: &str) -> Vec<(&'a str, &'a str)> {
let prefix = format!("{}#", file_key);
store.nodes.iter()
.filter(|(k, _)| k.starts_with(&prefix))
.map(|(k, n)| (k.as_str(), n.content.as_str()))
.collect()
}
/// Find the best matching candidate by cosine similarity against content.
/// Returns (key, similarity) if any candidate exceeds threshold.
fn best_match(candidates: &[(&str, &str)], content: &str, threshold: f32) -> Option<(String, f32)> {
let (best_key, best_sim) = candidates.iter()
.map(|(key, text)| (*key, similarity::cosine_similarity(content, text)))
.max_by(|a, b| a.1.total_cmp(&b.1))?;
if best_sim > threshold {
Some((best_key.to_string(), best_sim))
} else {
None
}
}
/// Refine a link target: if the target is a file-level node with section
/// children, find the best-matching section by cosine similarity against
/// the source content. Returns the original key if no sections exist or
/// no section matches above threshold.
///
/// This prevents hub formation at link creation time — every new link
/// targets the most specific available node.
pub fn refine_target(store: &Store, source_content: &str, target_key: &str) -> String {
// Only refine file-level nodes (no # in key)
if target_key.contains('#') { return target_key.to_string(); }
let sections = section_children(store, target_key);
if sections.is_empty() { return target_key.to_string(); }
best_match(&sections, source_content, 0.05)
.map(|(key, _)| key)
.unwrap_or_else(|| target_key.to_string())
}
/// A proposed link move: from hub→neighbor to section→neighbor
pub struct LinkMove {
pub neighbor_key: String,
pub from_hub: String,
pub to_section: String,
pub similarity: f32,
pub neighbor_snippet: String,
}
/// Analyze a hub node and propose redistributing its links to child sections.
///
/// Returns None if the node isn't a hub or has no sections to redistribute to.
pub fn differentiate_hub(store: &Store, hub_key: &str) -> Option<Vec<LinkMove>> {
let graph = store.build_graph();
differentiate_hub_with_graph(store, hub_key, &graph)
}
/// Like differentiate_hub but uses a pre-built graph.
pub fn differentiate_hub_with_graph(store: &Store, hub_key: &str, graph: &Graph) -> Option<Vec<LinkMove>> {
let degree = graph.degree(hub_key);
// Only differentiate actual hubs
if degree < 20 { return None; }
// Only works on file-level nodes that have section children
if hub_key.contains('#') { return None; }
let sections = section_children(store, hub_key);
if sections.is_empty() { return None; }
// Get all neighbors of the hub
let neighbors = graph.neighbors(hub_key);
let prefix = format!("{}#", hub_key);
let mut moves = Vec::new();
for (neighbor_key, _strength) in &neighbors {
// Skip section children — they should stay linked to parent
if neighbor_key.starts_with(&prefix) { continue; }
let neighbor_content = match store.nodes.get(neighbor_key.as_str()) {
Some(n) => &n.content,
None => continue,
};
// Find best-matching section by content similarity
if let Some((best_section, best_sim)) = best_match(&sections, neighbor_content, 0.05) {
let snippet = crate::util::first_n_chars(
neighbor_content.lines()
.find(|l| !l.is_empty() && !l.starts_with("<!--") && !l.starts_with("##"))
.unwrap_or(""),
80);
moves.push(LinkMove {
neighbor_key: neighbor_key.to_string(),
from_hub: hub_key.to_string(),
to_section: best_section,
similarity: best_sim,
neighbor_snippet: snippet,
});
}
}
moves.sort_by(|a, b| b.similarity.total_cmp(&a.similarity));
Some(moves)
}
/// Apply link moves: soft-delete hub→neighbor, create section→neighbor.
pub fn apply_differentiation(
store: &mut Store,
moves: &[LinkMove],
) -> (usize, usize) {
let mut applied = 0usize;
let mut skipped = 0usize;
for mv in moves {
// Check that section→neighbor doesn't already exist
let exists = store.relations.iter().any(|r|
((r.source_key == mv.to_section && r.target_key == mv.neighbor_key)
|| (r.source_key == mv.neighbor_key && r.target_key == mv.to_section))
&& !r.deleted
);
if exists { skipped += 1; continue; }
let section_uuid = match store.nodes.get(&mv.to_section) {
Some(n) => n.uuid,
None => { skipped += 1; continue; }
};
let neighbor_uuid = match store.nodes.get(&mv.neighbor_key) {
Some(n) => n.uuid,
None => { skipped += 1; continue; }
};
// Soft-delete old hub→neighbor relation
for rel in &mut store.relations {
if ((rel.source_key == mv.from_hub && rel.target_key == mv.neighbor_key)
|| (rel.source_key == mv.neighbor_key && rel.target_key == mv.from_hub))
&& !rel.deleted
{
rel.deleted = true;
}
}
// Create new section→neighbor relation
let new_rel = new_relation(
section_uuid, neighbor_uuid,
crate::store::RelationType::Auto,
0.5,
&mv.to_section, &mv.neighbor_key,
);
if store.add_relation(new_rel).is_ok() {
applied += 1;
}
}
(applied, skipped)
}
/// Find all file-level hubs that have section children to split into.
pub fn find_differentiable_hubs(store: &Store) -> Vec<(String, usize, usize)> {
let graph = store.build_graph();
let threshold = graph.hub_threshold();
let mut hubs = Vec::new();
for key in graph.nodes() {
let deg = graph.degree(key);
if deg < threshold { continue; }
if key.contains('#') { continue; }
let section_count = section_children(store, key).len();
if section_count > 0 {
hubs.push((key.clone(), deg, section_count));
}
}
hubs.sort_by(|a, b| b.1.cmp(&a.1));
hubs
}
/// Triangle closure: for each node with degree >= min_degree, find pairs
/// of its neighbors that aren't directly connected and have cosine
/// similarity above sim_threshold. Add links between them.
///
/// This turns hub-spoke patterns into triangles, directly improving
/// clustering coefficient and schema fit.
pub fn triangle_close(
store: &mut Store,
min_degree: usize,
sim_threshold: f32,
max_links_per_hub: usize,
) -> (usize, usize) {
let graph = store.build_graph();
let mut added = 0usize;
let mut hubs_processed = 0usize;
// Get nodes sorted by degree (highest first)
let mut candidates: Vec<(String, usize)> = graph.nodes().iter()
.map(|k| (k.clone(), graph.degree(k)))
.filter(|(_, d)| *d >= min_degree)
.collect();
candidates.sort_by(|a, b| b.1.cmp(&a.1));
for (hub_key, hub_deg) in &candidates {
let neighbors = graph.neighbor_keys(hub_key);
if neighbors.len() < 2 { continue; }
// Collect neighbor content for similarity
let neighbor_docs: Vec<(String, String)> = neighbors.iter()
.filter_map(|&k| {
store.nodes.get(k).map(|n| (k.to_string(), n.content.clone()))
})
.collect();
// Find unconnected pairs with high similarity
let mut pair_scores: Vec<(String, String, f32)> = Vec::new();
for i in 0..neighbor_docs.len() {
for j in (i + 1)..neighbor_docs.len() {
// Check if already connected
let n_i = graph.neighbor_keys(&neighbor_docs[i].0);
if n_i.contains(neighbor_docs[j].0.as_str()) { continue; }
let sim = similarity::cosine_similarity(
&neighbor_docs[i].1, &neighbor_docs[j].1);
if sim >= sim_threshold {
pair_scores.push((
neighbor_docs[i].0.clone(),
neighbor_docs[j].0.clone(),
sim,
));
}
}
}
pair_scores.sort_by(|a, b| b.2.total_cmp(&a.2));
let to_add = pair_scores.len().min(max_links_per_hub);
if to_add > 0 {
println!(" {} (deg={}) — {} triangles to close (top {})",
hub_key, hub_deg, pair_scores.len(), to_add);
for (a, b, sim) in pair_scores.iter().take(to_add) {
let uuid_a = match store.nodes.get(a) { Some(n) => n.uuid, None => continue };
let uuid_b = match store.nodes.get(b) { Some(n) => n.uuid, None => continue };
let rel = new_relation(
uuid_a, uuid_b,
crate::store::RelationType::Auto,
sim * 0.5, // scale by similarity
a, b,
);
if let Ok(()) = store.add_relation(rel) {
added += 1;
}
}
hubs_processed += 1;
}
}
if added > 0 {
let _ = store.save();
}
(hubs_processed, added)
}
/// Link orphan nodes (degree < min_degree) to their most textually similar
/// connected nodes. For each orphan, finds top-K nearest neighbors by
/// cosine similarity and creates Auto links.
/// Returns (orphans_linked, total_links_added).
pub fn link_orphans(
store: &mut Store,
min_degree: usize,
links_per_orphan: usize,
sim_threshold: f32,
) -> (usize, usize) {
let graph = store.build_graph();
let mut added = 0usize;
let mut orphans_linked = 0usize;
// Separate orphans from connected nodes
let orphans: Vec<String> = graph.nodes().iter()
.filter(|k| graph.degree(k) < min_degree)
.cloned()
.collect();
// Build candidate pool: connected nodes with their content
let candidates: Vec<(String, String)> = graph.nodes().iter()
.filter(|k| graph.degree(k) >= min_degree)
.filter_map(|k| store.nodes.get(k).map(|n| (k.clone(), n.content.clone())))
.collect();
if candidates.is_empty() { return (0, 0); }
for orphan_key in &orphans {
let orphan_content = match store.nodes.get(orphan_key) {
Some(n) => n.content.clone(),
None => continue,
};
if orphan_content.len() < 20 { continue; } // skip near-empty nodes
// Score against all candidates
let mut scores: Vec<(usize, f32)> = candidates.iter()
.enumerate()
.map(|(i, (_, content))| {
(i, similarity::cosine_similarity(&orphan_content, content))
})
.filter(|(_, s)| *s >= sim_threshold)
.collect();
scores.sort_by(|a, b| b.1.total_cmp(&a.1));
let to_link = scores.len().min(links_per_orphan);
if to_link == 0 { continue; }
let orphan_uuid = store.nodes.get(orphan_key).unwrap().uuid;
for &(idx, sim) in scores.iter().take(to_link) {
let target_key = &candidates[idx].0;
let target_uuid = match store.nodes.get(target_key) {
Some(n) => n.uuid,
None => continue,
};
let rel = new_relation(
orphan_uuid, target_uuid,
crate::store::RelationType::Auto,
sim * 0.5,
orphan_key, target_key,
);
if store.add_relation(rel).is_ok() {
added += 1;
}
}
orphans_linked += 1;
}
if added > 0 {
let _ = store.save();
}
(orphans_linked, added)
}

View file

@ -0,0 +1,446 @@
// Consolidation scoring, replay queues, interference detection, and
// graph health metrics. Pure analysis — no store mutations.
use crate::store::{Store, now_epoch};
use crate::graph::{self, Graph};
use crate::spectral::{self, SpectralEmbedding, SpectralPosition};
use std::collections::HashMap;
const SECS_PER_DAY: f64 = 86400.0;
/// Consolidation priority: how urgently a node needs attention.
///
/// With spectral data:
/// priority = spectral_displacement × overdue × emotion
/// Without:
/// priority = (1 - cc) × overdue × emotion
///
/// Spectral displacement is the outlier_score clamped and normalized —
/// it measures how far a node sits from its community center in the
/// eigenspace. This is a global signal (considers all graph structure)
/// vs CC which is local (only immediate neighbors).
pub fn consolidation_priority(
store: &Store,
key: &str,
graph: &Graph,
spectral_outlier: Option<f64>,
) -> f64 {
let node = match store.nodes.get(key) {
Some(n) => n,
None => return 0.0,
};
// Integration factor: how poorly integrated is this node?
let displacement = if let Some(outlier) = spectral_outlier {
// outlier_score = dist_to_center / median_dist_in_community
// 1.0 = typical position, >2 = unusual, >5 = extreme outlier
// Use log scale for dynamic range: the difference between
// outlier=5 and outlier=10 matters less than 1 vs 2.
(outlier / 3.0).min(3.0)
} else {
let cc = graph.clustering_coefficient(key) as f64;
1.0 - cc
};
// Spaced repetition: how overdue is this node for replay?
let interval_secs = node.spaced_repetition_interval as f64 * SECS_PER_DAY;
let time_since_replay = if node.last_replayed > 0 {
(now_epoch() - node.last_replayed).max(0) as f64
} else {
interval_secs * 3.0
};
let overdue_ratio = (time_since_replay / interval_secs).min(5.0);
// Emotional intensity: higher emotion = higher priority
let emotion_factor = 1.0 + (node.emotion as f64 / 10.0);
displacement * overdue_ratio * emotion_factor
}
/// Item in the replay queue
pub struct ReplayItem {
pub key: String,
pub priority: f64,
pub interval_days: u32,
pub emotion: f32,
pub cc: f32,
/// Spectral classification: "bridge", "outlier", "core", "peripheral"
pub classification: &'static str,
/// Raw spectral outlier score (distance / median)
pub outlier_score: f64,
}
/// Generate the replay queue: nodes ordered by consolidation priority.
/// Automatically loads spectral embedding if available.
pub fn replay_queue(store: &Store, count: usize) -> Vec<ReplayItem> {
let graph = store.build_graph();
let emb = spectral::load_embedding().ok();
replay_queue_with_graph(store, count, &graph, emb.as_ref())
}
/// Generate the replay queue using pre-built graph and optional spectral data.
pub fn replay_queue_with_graph(
store: &Store,
count: usize,
graph: &Graph,
emb: Option<&SpectralEmbedding>,
) -> Vec<ReplayItem> {
// Build spectral position map if embedding is available
let positions: HashMap<String, SpectralPosition> = if let Some(emb) = emb {
let communities = graph.communities().clone();
spectral::analyze_positions(emb, &communities)
.into_iter()
.map(|p| (p.key.clone(), p))
.collect()
} else {
HashMap::new()
};
let mut items: Vec<ReplayItem> = store.nodes.iter()
.map(|(key, node)| {
let pos = positions.get(key);
let outlier_score = pos.map(|p| p.outlier_score).unwrap_or(0.0);
let classification = pos
.map(spectral::classify_position)
.unwrap_or("unknown");
let priority = consolidation_priority(
store, key, graph,
pos.map(|p| p.outlier_score),
);
ReplayItem {
key: key.clone(),
priority,
interval_days: node.spaced_repetition_interval,
emotion: node.emotion,
cc: graph.clustering_coefficient(key),
classification,
outlier_score,
}
})
.collect();
items.sort_by(|a, b| b.priority.total_cmp(&a.priority));
items.truncate(count);
items
}
/// Detect interfering memory pairs: high text similarity but different communities
pub fn detect_interference(
store: &Store,
graph: &Graph,
threshold: f32,
) -> Vec<(String, String, f32)> {
use crate::similarity;
let communities = graph.communities();
// Only compare nodes within a reasonable set — take the most active ones
let mut docs: Vec<(String, String)> = store.nodes.iter()
.filter(|(_, n)| n.content.len() > 50) // skip tiny nodes
.map(|(k, n)| (k.clone(), n.content.clone()))
.collect();
// For large stores, sample to keep pairwise comparison feasible
if docs.len() > 200 {
docs.sort_by(|a, b| b.1.len().cmp(&a.1.len()));
docs.truncate(200);
}
let similar = similarity::pairwise_similar(&docs, threshold);
// Filter to pairs in different communities
similar.into_iter()
.filter(|(a, b, _)| {
let ca = communities.get(a);
let cb = communities.get(b);
match (ca, cb) {
(Some(a), Some(b)) => a != b,
_ => true, // if community unknown, flag it
}
})
.collect()
}
/// Agent allocation from the control loop.
/// Agent types and counts are data-driven — add agents by adding
/// entries to the counts map.
#[derive(Default)]
pub struct ConsolidationPlan {
/// agent_name → run count
pub counts: std::collections::HashMap<String, usize>,
pub run_health: bool,
pub rationale: Vec<String>,
}
impl ConsolidationPlan {
pub fn count(&self, agent: &str) -> usize {
self.counts.get(agent).copied().unwrap_or(0)
}
pub fn set(&mut self, agent: &str, count: usize) {
self.counts.insert(agent.to_string(), count);
}
pub fn add(&mut self, agent: &str, count: usize) {
*self.counts.entry(agent.to_string()).or_default() += count;
}
pub fn total(&self) -> usize {
self.counts.values().sum::<usize>() + if self.run_health { 1 } else { 0 }
}
/// Expand the plan into a flat list of (agent_name, batch_size) runs.
/// Interleaves agent types so different types alternate.
pub fn to_agent_runs(&self, batch_size: usize) -> Vec<(String, usize)> {
let mut runs = Vec::new();
if self.run_health {
runs.push(("health".to_string(), 0));
}
// Sort by count descending so high-volume agents interleave well
let mut types: Vec<(&String, &usize)> = self.counts.iter()
.filter(|(_, c)| **c > 0)
.collect();
types.sort_by(|a, b| b.1.cmp(a.1));
let mut queues: Vec<Vec<(String, usize)>> = types.iter().map(|(name, count)| {
let mut q = Vec::new();
let mut remaining = **count;
while remaining > 0 {
let batch = remaining.min(batch_size);
q.push((name.to_string(), batch));
remaining -= batch;
}
q
}).collect();
// Round-robin interleave
loop {
let mut added = false;
for q in &mut queues {
if let Some(run) = q.first() {
runs.push(run.clone());
q.remove(0);
added = true;
}
}
if !added { break; }
}
runs
}
}
/// Analyze metrics and decide how much each agent needs to run.
///
/// This is the control loop: metrics → error signal → agent allocation.
/// Target values are based on healthy small-world networks.
pub fn consolidation_plan(store: &Store) -> ConsolidationPlan {
consolidation_plan_inner(store, true)
}
/// Cheap version: skip O(n²) interference detection (for daemon status).
pub fn consolidation_plan_quick(store: &Store) -> ConsolidationPlan {
consolidation_plan_inner(store, false)
}
fn consolidation_plan_inner(store: &Store, detect_interf: bool) -> ConsolidationPlan {
let graph = store.build_graph();
let alpha = graph.degree_power_law_exponent();
let gini = graph.degree_gini();
let _avg_cc = graph.avg_clustering_coefficient();
let interference_count = if detect_interf {
detect_interference(store, &graph, 0.5).len()
} else {
0
};
let episodic_count = store.nodes.iter()
.filter(|(_, n)| matches!(n.node_type, crate::store::NodeType::EpisodicSession))
.count();
let _episodic_ratio = if store.nodes.is_empty() { 0.0 }
else { episodic_count as f32 / store.nodes.len() as f32 };
let mut plan = ConsolidationPlan {
counts: std::collections::HashMap::new(),
run_health: true,
rationale: Vec::new(),
};
// Active agent types from config
let config = crate::config::get();
let agent_types: Vec<&str> = config.agent_types.iter().map(|s| s.as_str()).collect();
// Target: α ≥ 2.5 (healthy scale-free)
if alpha < 2.0 {
plan.add("linker", 100);
plan.rationale.push(format!(
"α={:.2} (target ≥2.5): extreme hub dominance → 100 linker", alpha));
} else if alpha < 2.5 {
plan.add("linker", 50);
plan.rationale.push(format!(
"α={:.2} (target ≥2.5): moderate hub dominance → 50 linker", alpha));
} else {
plan.add("linker", 20);
plan.rationale.push(format!(
"α={:.2}: healthy — 20 linker for maintenance", alpha));
}
// Target: Gini ≤ 0.4
if gini > 0.5 {
plan.add("linker", 50);
plan.rationale.push(format!(
"Gini={:.3} (target ≤0.4): high inequality → +50 linker", gini));
}
// Interference: separator disambiguates confusable nodes
if interference_count > 100 {
plan.add("separator", 10);
plan.rationale.push(format!(
"Interference: {} pairs (target <50) → 10 separator", interference_count));
} else if interference_count > 20 {
plan.add("separator", 5);
plan.rationale.push(format!(
"Interference: {} pairs → 5 separator", interference_count));
} else if interference_count > 0 {
plan.add("separator", interference_count.min(3));
}
// Organize: proportional to linker — synthesizes what linker connects
let linker = plan.count("linker");
plan.set("organize", linker / 2);
plan.rationale.push(format!(
"Organize: {} (half of linker count)", plan.count("organize")));
// Distill: core concept maintenance
let organize = plan.count("organize");
let mut distill = organize;
if gini > 0.4 { distill += 20; }
if alpha < 2.0 { distill += 20; }
plan.set("distill", distill);
plan.rationale.push(format!(
"Distill: {} (synthesize hub content)", plan.count("distill")));
// Split: handle oversized nodes
plan.set("split", 5);
// Distribute agent budget using Elo ratings
let budget = crate::config::get().agent_budget;
let elo_path = crate::config::get().data_dir.join("agent-elo.json");
if let Ok(elo_json) = std::fs::read_to_string(&elo_path) {
if let Ok(ratings) = serde_json::from_str::<std::collections::HashMap<String, f64>>(&elo_json) {
let elos: Vec<f64> = agent_types.iter()
.map(|t| ratings.get(*t).copied().unwrap_or(1000.0))
.collect();
let min_elo = elos.iter().copied().fold(f64::MAX, f64::min);
let weights: Vec<f64> = elos.iter()
.map(|e| {
let shifted = e - min_elo + 50.0;
shifted * shifted
})
.collect();
let total_weight: f64 = weights.iter().sum();
let allocate = |w: f64| -> usize {
((w / total_weight * budget as f64).round() as usize).max(2)
};
for (i, agent) in agent_types.iter().enumerate() {
plan.set(agent, allocate(weights[i]));
}
let summary: Vec<String> = agent_types.iter()
.map(|a| format!("{}={}", a, plan.count(a)))
.collect();
plan.rationale.push(format!(
"Elo allocation (budget={}): {}", budget, summary.join(" ")));
}
} else {
// No Elo file — use budget with equal distribution
let per_type = budget / agent_types.len();
for agent in &agent_types {
plan.set(agent, per_type);
}
plan.rationale.push(format!(
"No Elo ratings — equal distribution ({} each, budget={})", per_type, budget));
}
plan
}
/// Format the consolidation plan for display
pub fn format_plan(plan: &ConsolidationPlan) -> String {
let mut out = String::from("Consolidation Plan\n==================\n\n");
out.push_str("Analysis:\n");
for r in &plan.rationale {
out.push_str(&format!("{}\n", r));
}
out.push_str("\nAgent allocation:\n");
if plan.run_health {
out.push_str(" 1. health — system audit\n");
}
let mut step = 2;
let mut sorted: Vec<_> = plan.counts.iter()
.filter(|(_, c)| **c > 0)
.collect();
sorted.sort_by(|a, b| b.1.cmp(a.1));
for (agent, count) in &sorted {
out.push_str(&format!(" {}. {} ×{}\n", step, agent, count));
step += 1;
}
out.push_str(&format!("\nTotal agent runs: {}\n", plan.total()));
out
}
/// Brief daily check: compare current metrics to last snapshot
pub fn daily_check(store: &Store) -> String {
let graph_obj = store.build_graph();
let snap = graph::current_metrics(&graph_obj);
let history = graph::load_metrics_history();
let prev = history.last();
let mut out = String::from("Memory daily check\n");
// Current state
out.push_str(&format!(" σ={:.1} α={:.2} gini={:.3} cc={:.4}\n",
snap.sigma, snap.alpha, snap.gini, snap.avg_cc));
// Trend
if let Some(p) = prev {
let d_sigma = snap.sigma - p.sigma;
let d_alpha = snap.alpha - p.alpha;
let d_gini = snap.gini - p.gini;
out.push_str(&format!(" Δσ={:+.1} Δα={:+.2} Δgini={:+.3}\n",
d_sigma, d_alpha, d_gini));
// Assessment
let mut issues = Vec::new();
if snap.alpha < 2.0 { issues.push("hub dominance critical"); }
if snap.gini > 0.5 { issues.push("high inequality"); }
if snap.avg_cc < 0.1 { issues.push("poor integration"); }
if d_sigma < -5.0 { issues.push("σ declining"); }
if d_alpha < -0.1 { issues.push("α declining"); }
if d_gini > 0.02 { issues.push("inequality increasing"); }
if issues.is_empty() {
out.push_str(" Status: healthy\n");
} else {
out.push_str(&format!(" Status: needs attention — {}\n", issues.join(", ")));
out.push_str(" Run: poc-memory consolidate-session\n");
}
} else {
out.push_str(" (first snapshot, no trend data yet)\n");
}
// Persist the snapshot
graph::save_metrics_snapshot(&snap);
out
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,13 @@
// query/ — query parsing, search algorithms, and pipeline execution
//
// parser.rs — PEG-based query language (key ~ 'foo' | sort degree | limit 10)
// engine.rs — search algorithms: spreading activation, spectral, geodesic,
// manifold, confluence. Query DSL execution. Seed matching.
pub mod parser;
pub mod engine;
// Re-export parser's run_query as the main query entry point
// (engine::run_query is the internal search pipeline, accessed via crate::search)
pub use parser::run_query;
pub use parser::execute_query;

View file

@ -0,0 +1,637 @@
// query.rs — peg-based query language for the memory graph
//
// Grammar-driven: the peg definition IS the language spec.
// Evaluates against node properties, graph metrics, and edge attributes.
// Designed for ad-hoc exploration without memorizing 35+ subcommands.
//
// Syntax:
// expr | stage | stage ...
//
// Stages (piped):
// sort FIELD sort descending (default for exploration)
// sort FIELD asc sort ascending
// limit N cap results
// select F,F,... output specific fields as TSV
// count just show count
//
// Examples:
// degree > 15 | sort degree | limit 10
// category = core | select degree,weight
// neighbors('identity') WHERE strength > 0.5 | sort strength
// key ~ 'journal.*' AND degree > 10 | count
// * | sort weight asc | limit 20
use crate::store::{NodeType, RelationType, Store};
use crate::graph::Graph;
use regex::Regex;
use std::collections::BTreeMap;
// -- AST types --
#[derive(Debug, Clone)]
pub enum Expr {
All,
Comparison { field: String, op: CmpOp, value: Value },
And(Box<Expr>, Box<Expr>),
Or(Box<Expr>, Box<Expr>),
Not(Box<Expr>),
Neighbors { key: String, filter: Option<Box<Expr>> },
}
#[derive(Debug, Clone)]
pub enum Value {
Num(f64),
Str(String),
Ident(String),
FnCall(FnCall),
}
#[derive(Debug, Clone)]
pub enum FnCall {
Community(String),
Degree(String),
}
#[derive(Debug, Clone, Copy)]
pub enum CmpOp {
Gt, Lt, Ge, Le, Eq, Ne, Match,
}
#[derive(Debug, Clone)]
pub enum Stage {
Sort { field: String, ascending: bool },
Limit(usize),
Select(Vec<String>),
Count,
Connectivity,
DominatingSet,
}
#[derive(Debug, Clone)]
pub struct Query {
pub expr: Expr,
pub stages: Vec<Stage>,
}
// -- PEG grammar --
peg::parser! {
pub grammar query_parser() for str {
rule _() = [' ' | '\t']*
pub rule query() -> Query
= e:expr() s:stages() { Query { expr: e, stages: s } }
rule stages() -> Vec<Stage>
= s:(_ "|" _ s:stage() { s })* { s }
rule stage() -> Stage
= "sort" _ f:field() _ a:asc_desc() { Stage::Sort { field: f, ascending: a } }
/ "limit" _ n:integer() { Stage::Limit(n) }
/ "select" _ f:field_list() { Stage::Select(f) }
/ "count" { Stage::Count }
/ "connectivity" { Stage::Connectivity }
/ "dominating-set" { Stage::DominatingSet }
rule asc_desc() -> bool
= "asc" { true }
/ "desc" { false }
/ { false } // default: descending
rule field_list() -> Vec<String>
= f:field() fs:(_ "," _ f:field() { f })* {
let mut v = vec![f];
v.extend(fs);
v
}
rule integer() -> usize
= n:$(['0'..='9']+) { n.parse().unwrap() }
pub rule expr() -> Expr = precedence! {
a:(@) _ "OR" _ b:@ { Expr::Or(Box::new(a), Box::new(b)) }
--
a:(@) _ "AND" _ b:@ { Expr::And(Box::new(a), Box::new(b)) }
--
"NOT" _ e:@ { Expr::Not(Box::new(e)) }
--
"neighbors" _ "(" _ k:string() _ ")" _ w:where_clause()? {
Expr::Neighbors { key: k, filter: w.map(Box::new) }
}
f:field() _ op:cmp_op() _ v:value() {
Expr::Comparison { field: f, op, value: v }
}
"*" { Expr::All }
"(" _ e:expr() _ ")" { e }
}
rule where_clause() -> Expr
= "WHERE" _ e:expr() { e }
rule field() -> String
= s:$(['a'..='z' | 'A'..='Z' | '_']['a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-']*) {
s.to_string()
}
rule cmp_op() -> CmpOp
= ">=" { CmpOp::Ge }
/ "<=" { CmpOp::Le }
/ "!=" { CmpOp::Ne }
/ ">" { CmpOp::Gt }
/ "<" { CmpOp::Lt }
/ "=" { CmpOp::Eq }
/ "~" { CmpOp::Match }
rule value() -> Value
= f:fn_call() { Value::FnCall(f) }
/ n:number() { Value::Num(n) }
/ s:string() { Value::Str(s) }
/ i:ident() { Value::Ident(i) }
rule fn_call() -> FnCall
= "community" _ "(" _ k:string() _ ")" { FnCall::Community(k) }
/ "degree" _ "(" _ k:string() _ ")" { FnCall::Degree(k) }
rule number() -> f64
= n:$(['0'..='9']+ ("." ['0'..='9']+)?) {
n.parse().unwrap()
}
rule string() -> String
= "'" s:$([^ '\'']*) "'" { s.to_string() }
rule ident() -> String
= s:$(['a'..='z' | 'A'..='Z' | '_']['a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' | '.']*) {
s.to_string()
}
}
}
// -- Field resolution --
/// Resolve a field value from a node + graph context, returning a comparable Value.
fn resolve_field(field: &str, key: &str, store: &Store, graph: &Graph) -> Option<Value> {
let node = store.nodes.get(key)?;
match field {
"key" => Some(Value::Str(key.to_string())),
"weight" => Some(Value::Num(node.weight as f64)),
"category" => None, // vestigial, kept for query compat
"node_type" => Some(Value::Str(node_type_label(node.node_type).to_string())),
"provenance" => Some(Value::Str(node.provenance.clone())),
"emotion" => Some(Value::Num(node.emotion as f64)),
"retrievals" => Some(Value::Num(node.retrievals as f64)),
"uses" => Some(Value::Num(node.uses as f64)),
"wrongs" => Some(Value::Num(node.wrongs as f64)),
"created" => Some(Value::Num(node.created_at as f64)),
"timestamp" => Some(Value::Num(node.timestamp as f64)),
"content" => Some(Value::Str(node.content.clone())),
"degree" => Some(Value::Num(graph.degree(key) as f64)),
"community_id" => {
graph.communities().get(key).map(|&c| Value::Num(c as f64))
}
"clustering_coefficient" | "schema_fit" | "cc" => {
Some(Value::Num(graph.clustering_coefficient(key) as f64))
}
_ => None,
}
}
fn node_type_label(nt: NodeType) -> &'static str {
match nt {
NodeType::EpisodicSession => "episodic_session",
NodeType::EpisodicDaily => "episodic_daily",
NodeType::EpisodicWeekly => "episodic_weekly",
NodeType::EpisodicMonthly => "episodic_monthly",
NodeType::Semantic => "semantic",
}
}
fn rel_type_label(r: RelationType) -> &'static str {
match r {
RelationType::Link => "link",
RelationType::Causal => "causal",
RelationType::Auto => "auto",
}
}
// -- Comparison logic --
fn as_num(v: &Value) -> Option<f64> {
match v {
Value::Num(n) => Some(*n),
Value::Str(s) => s.parse().ok(),
Value::Ident(s) => s.parse().ok(),
Value::FnCall(_) => None,
}
}
fn as_str(v: &Value) -> String {
match v {
Value::Str(s) | Value::Ident(s) => s.clone(),
Value::Num(n) => format!("{}", n),
Value::FnCall(_) => String::new(),
}
}
fn compare(lhs: &Value, op: CmpOp, rhs: &Value) -> bool {
if let CmpOp::Match = op {
return Regex::new(&as_str(rhs))
.map(|re| re.is_match(&as_str(lhs)))
.unwrap_or(false);
}
// Numeric comparison if both parse, otherwise string
let ord = match (as_num(lhs), as_num(rhs)) {
(Some(a), Some(b)) => a.total_cmp(&b),
_ => as_str(lhs).cmp(&as_str(rhs)),
};
match op {
CmpOp::Eq => ord.is_eq(),
CmpOp::Ne => !ord.is_eq(),
CmpOp::Gt => ord.is_gt(),
CmpOp::Lt => ord.is_lt(),
CmpOp::Ge => !ord.is_lt(),
CmpOp::Le => !ord.is_gt(),
CmpOp::Match => unreachable!(),
}
}
// -- Evaluator --
fn resolve_fn(f: &FnCall, store: &Store, graph: &Graph) -> Value {
match f {
FnCall::Community(key) => {
let resolved = store.resolve_key(key).unwrap_or_else(|_| key.clone());
graph.communities().get(&resolved)
.map(|&c| Value::Num(c as f64))
.unwrap_or(Value::Num(f64::NAN))
}
FnCall::Degree(key) => {
let resolved = store.resolve_key(key).unwrap_or_else(|_| key.clone());
Value::Num(graph.degree(&resolved) as f64)
}
}
}
fn resolve_value(v: &Value, store: &Store, graph: &Graph) -> Value {
match v {
Value::FnCall(f) => resolve_fn(f, store, graph),
other => other.clone(),
}
}
/// Evaluate an expression against a field resolver.
/// The resolver returns field values — different for nodes vs edges.
fn eval(
expr: &Expr,
resolve: &dyn Fn(&str) -> Option<Value>,
store: &Store,
graph: &Graph,
) -> bool {
match expr {
Expr::All => true,
Expr::Comparison { field, op, value } => {
let lhs = match resolve(field) {
Some(v) => v,
None => return false,
};
let rhs = resolve_value(value, store, graph);
compare(&lhs, *op, &rhs)
}
Expr::And(a, b) => eval(a, resolve, store, graph) && eval(b, resolve, store, graph),
Expr::Or(a, b) => eval(a, resolve, store, graph) || eval(b, resolve, store, graph),
Expr::Not(e) => !eval(e, resolve, store, graph),
Expr::Neighbors { .. } => false,
}
}
// -- Query result --
pub struct QueryResult {
pub key: String,
pub fields: BTreeMap<String, Value>,
}
// -- Query executor --
pub fn execute_query(
store: &Store,
graph: &Graph,
query_str: &str,
) -> Result<Vec<QueryResult>, String> {
let q = query_parser::query(query_str)
.map_err(|e| format!("Parse error: {}", e))?;
execute_parsed(store, graph, &q)
}
fn execute_parsed(
store: &Store,
graph: &Graph,
q: &Query,
) -> Result<Vec<QueryResult>, String> {
let mut results = match &q.expr {
Expr::Neighbors { key, filter } => {
let resolved = store.resolve_key(key).unwrap_or_else(|_| key.clone());
let edges = graph.edges_of(&resolved);
let mut out = Vec::new();
for edge in edges {
let include = match filter {
Some(f) => {
let strength = edge.strength;
let rt = edge.rel_type;
let target = &edge.target;
eval(f, &|field| match field {
"strength" => Some(Value::Num(strength as f64)),
"rel_type" => Some(Value::Str(rel_type_label(rt).to_string())),
_ => resolve_field(field, target, store, graph),
}, store, graph)
}
None => true,
};
if include {
let mut fields = BTreeMap::new();
fields.insert("strength".into(), Value::Num(edge.strength as f64));
fields.insert("rel_type".into(),
Value::Str(rel_type_label(edge.rel_type).to_string()));
out.push(QueryResult { key: edge.target.clone(), fields });
}
}
out
}
_ => {
let mut out = Vec::new();
for key in store.nodes.keys() {
if store.nodes[key].deleted { continue; }
if eval(&q.expr, &|f| resolve_field(f, key, store, graph), store, graph) {
out.push(QueryResult { key: key.clone(), fields: BTreeMap::new() });
}
}
out
}
};
// Collect fields needed by select/sort stages and resolve them once
let needed: Vec<String> = {
let mut set = Vec::new();
for stage in &q.stages {
match stage {
Stage::Select(fields) => {
for f in fields {
if !set.contains(f) { set.push(f.clone()); }
}
}
Stage::Sort { field, .. } => {
if !set.contains(field) { set.push(field.clone()); }
}
_ => {}
}
}
set
};
for r in &mut results {
for f in &needed {
if !r.fields.contains_key(f)
&& let Some(v) = resolve_field(f, &r.key, store, graph) {
r.fields.insert(f.clone(), v);
}
}
}
// Apply pipeline stages
let mut has_sort = false;
for stage in &q.stages {
match stage {
Stage::Sort { field, ascending } => {
has_sort = true;
let asc = *ascending;
results.sort_by(|a, b| {
let va = a.fields.get(field).and_then(as_num);
let vb = b.fields.get(field).and_then(as_num);
let ord = match (va, vb) {
(Some(a), Some(b)) => a.total_cmp(&b),
_ => {
let sa = a.fields.get(field).map(as_str).unwrap_or_default();
let sb = b.fields.get(field).map(as_str).unwrap_or_default();
sa.cmp(&sb)
}
};
if asc { ord } else { ord.reverse() }
});
}
Stage::Limit(n) => {
results.truncate(*n);
}
Stage::Connectivity => {} // handled in output
Stage::Select(_) | Stage::Count => {} // handled in output
Stage::DominatingSet => {
let mut items: Vec<(String, f64)> = results.iter()
.map(|r| (r.key.clone(), graph.degree(&r.key) as f64))
.collect();
let xform = super::engine::Transform::DominatingSet;
items = super::engine::run_transform(&xform, items, store, graph);
let keep: std::collections::HashSet<String> = items.into_iter().map(|(k, _)| k).collect();
results.retain(|r| keep.contains(&r.key));
}
}
}
// Default sort by degree desc if no explicit sort
if !has_sort {
results.sort_by(|a, b| {
let da = graph.degree(&a.key);
let db = graph.degree(&b.key);
db.cmp(&da)
});
}
Ok(results)
}
/// Format a Value for display
pub fn format_value(v: &Value) -> String {
match v {
Value::Num(n) => {
if *n == n.floor() && n.abs() < 1e15 {
format!("{}", *n as i64)
} else {
format!("{:.3}", n)
}
}
Value::Str(s) => s.clone(),
Value::Ident(s) => s.clone(),
Value::FnCall(_) => "?".to_string(),
}
}
/// Execute query and print formatted output.
pub fn run_query(store: &Store, graph: &Graph, query_str: &str) -> Result<(), String> {
let q = query_parser::query(query_str)
.map_err(|e| format!("Parse error: {}", e))?;
let results = execute_parsed(store, graph, &q)?;
// Count stage
if q.stages.iter().any(|s| matches!(s, Stage::Count)) {
println!("{}", results.len());
return Ok(());
}
if results.is_empty() {
eprintln!("No results");
return Ok(());
}
// Connectivity stage
if q.stages.iter().any(|s| matches!(s, Stage::Connectivity)) {
print_connectivity(&results, graph);
return Ok(());
}
// Select stage
let fields: Option<&Vec<String>> = q.stages.iter().find_map(|s| match s {
Stage::Select(f) => Some(f),
_ => None,
});
if let Some(fields) = fields {
let mut header = vec!["key".to_string()];
header.extend(fields.iter().cloned());
println!("{}", header.join("\t"));
for r in &results {
let mut row = vec![r.key.clone()];
for f in fields {
row.push(match r.fields.get(f) {
Some(v) => format_value(v),
None => "-".to_string(),
});
}
println!("{}", row.join("\t"));
}
} else {
for r in &results {
println!("{}", r.key);
}
}
Ok(())
}
// -- Connectivity analysis --
/// BFS shortest path between two nodes, max_hops limit.
fn bfs_path(graph: &Graph, from: &str, to: &str, max_hops: usize) -> Option<Vec<String>> {
use std::collections::{VecDeque, HashMap};
if from == to { return Some(vec![from.to_string()]); }
let mut parent: HashMap<String, String> = HashMap::new();
parent.insert(from.to_string(), String::new());
let mut queue: VecDeque<(String, usize)> = VecDeque::new();
queue.push_back((from.to_string(), 0));
while let Some((current, depth)) = queue.pop_front() {
if depth >= max_hops { continue; }
for (neighbor, _) in graph.neighbors(&current) {
if parent.contains_key(neighbor.as_str()) { continue; }
parent.insert(neighbor.clone(), current.clone());
if neighbor == to {
let mut path = vec![to.to_string()];
let mut node = to.to_string();
while let Some(p) = parent.get(&node) {
if p.is_empty() { break; }
path.push(p.clone());
node = p.clone();
}
path.reverse();
return Some(path);
}
queue.push_back((neighbor.clone(), depth + 1));
}
}
None
}
/// Find connected components among result nodes via BFS through the full graph.
fn find_components(keys: &[&str], graph: &Graph, max_hops: usize) -> Vec<Vec<String>> {
use std::collections::HashSet;
let mut assigned: HashSet<&str> = HashSet::new();
let mut components: Vec<Vec<String>> = Vec::new();
for &start in keys {
if assigned.contains(start) { continue; }
let mut component = vec![start.to_string()];
assigned.insert(start);
for &other in keys {
if assigned.contains(other) { continue; }
if bfs_path(graph, start, other, max_hops).is_some() {
component.push(other.to_string());
assigned.insert(other);
}
}
components.push(component);
}
components
}
/// Print connectivity report for query results.
fn print_connectivity(results: &[QueryResult], graph: &Graph) {
let max_hops = 4;
let keys: Vec<&str> = results.iter().map(|r| r.key.as_str()).collect();
let components = find_components(&keys, graph, max_hops);
println!("Connectivity: {} nodes, {} components (max {} hops)\n",
results.len(), components.len(), max_hops);
let result_set: std::collections::HashSet<&str> = keys.iter().copied().collect();
// Find the largest cluster to use as link-add target for islands
let largest_cluster = components.iter()
.max_by_key(|c| c.len())
.and_then(|c| if c.len() > 1 {
// Pick highest-degree node in largest cluster as link target
c.iter().max_by_key(|k| graph.degree(k)).cloned()
} else { None });
let mut islands: Vec<&str> = Vec::new();
for (i, component) in components.iter().enumerate() {
if component.len() == 1 {
println!(" island: {}", component[0]);
islands.push(&component[0]);
} else {
println!(" cluster {} ({} nodes):", i + 1, component.len());
for node in component {
println!(" {} (degree {})", node, graph.degree(node));
}
// Show a sample path between first two nodes
if component.len() >= 2
&& let Some(path) = bfs_path(graph, &component[0], &component[1], max_hops) {
print!(" path: ");
for (j, step) in path.iter().enumerate() {
if j > 0 { print!(""); }
if result_set.contains(step.as_str()) {
print!("{}", step);
} else {
print!("[{}]", step);
}
}
println!();
}
}
}
// Suggest link-add commands for islands
if !islands.is_empty()
&& let Some(ref hub) = largest_cluster {
println!("\nFix islands:");
for island in &islands {
println!(" poc-memory graph link-add {} {}", island, hub);
}
}
}

View file

@ -0,0 +1,140 @@
// Text similarity: Porter stemming + BM25
//
// Used for interference detection (similar content, different communities)
// and schema fit scoring. Intentionally simple — ~100 lines, no
// external dependencies.
use std::collections::HashMap;
/// Minimal Porter stemmer — handles the most common English suffixes.
/// Not linguistically complete but good enough for similarity matching.
/// Single allocation: works on one String buffer throughout.
///
/// If this is still a hot spot, replace the sequential suffix checks
/// with a reversed-suffix trie: single pass from the end of the word
/// matches the longest applicable suffix in O(suffix_len) instead of
/// O(n_rules).
pub fn stem(word: &str) -> String {
let mut w = word.to_lowercase();
if w.len() <= 3 { return w; }
strip_suffix_inplace(&mut w, "ation", "ate");
strip_suffix_inplace(&mut w, "ness", "");
strip_suffix_inplace(&mut w, "ment", "");
strip_suffix_inplace(&mut w, "ting", "t");
strip_suffix_inplace(&mut w, "ling", "l");
strip_suffix_inplace(&mut w, "ring", "r");
strip_suffix_inplace(&mut w, "ning", "n");
strip_suffix_inplace(&mut w, "ding", "d");
strip_suffix_inplace(&mut w, "ping", "p");
strip_suffix_inplace(&mut w, "ging", "g");
strip_suffix_inplace(&mut w, "ying", "y");
strip_suffix_inplace(&mut w, "ied", "y");
strip_suffix_inplace(&mut w, "ies", "y");
strip_suffix_inplace(&mut w, "ing", "");
strip_suffix_inplace(&mut w, "ed", "");
strip_suffix_inplace(&mut w, "ly", "");
strip_suffix_inplace(&mut w, "er", "");
strip_suffix_inplace(&mut w, "al", "");
strip_suffix_inplace(&mut w, "s", "");
w
}
fn strip_suffix_inplace(word: &mut String, suffix: &str, replacement: &str) {
if word.len() > suffix.len() + 2 && word.ends_with(suffix) {
word.truncate(word.len() - suffix.len());
word.push_str(replacement);
}
}
/// Tokenize and stem a text into a term frequency map
pub fn term_frequencies(text: &str) -> HashMap<String, u32> {
let mut tf = HashMap::new();
for word in text.split(|c: char| !c.is_alphanumeric()) {
if word.len() > 2 {
let stemmed = stem(word);
*tf.entry(stemmed).or_default() += 1;
}
}
tf
}
/// Cosine similarity between two documents using stemmed term frequencies.
/// Returns 0.0 for disjoint vocabularies, 1.0 for identical content.
pub fn cosine_similarity(doc_a: &str, doc_b: &str) -> f32 {
let tf_a = term_frequencies(doc_a);
let tf_b = term_frequencies(doc_b);
if tf_a.is_empty() || tf_b.is_empty() {
return 0.0;
}
// Dot product
let mut dot = 0.0f64;
for (term, &freq_a) in &tf_a {
if let Some(&freq_b) = tf_b.get(term) {
dot += freq_a as f64 * freq_b as f64;
}
}
// Magnitudes
let mag_a: f64 = tf_a.values().map(|&f| (f as f64).powi(2)).sum::<f64>().sqrt();
let mag_b: f64 = tf_b.values().map(|&f| (f as f64).powi(2)).sum::<f64>().sqrt();
if mag_a < 1e-10 || mag_b < 1e-10 {
return 0.0;
}
(dot / (mag_a * mag_b)) as f32
}
/// Compute pairwise similarity for a set of documents.
/// Returns pairs with similarity above threshold.
pub fn pairwise_similar(
docs: &[(String, String)], // (key, content)
threshold: f32,
) -> Vec<(String, String, f32)> {
let mut results = Vec::new();
for i in 0..docs.len() {
for j in (i + 1)..docs.len() {
let sim = cosine_similarity(&docs[i].1, &docs[j].1);
if sim >= threshold {
results.push((docs[i].0.clone(), docs[j].0.clone(), sim));
}
}
}
results.sort_by(|a, b| b.2.total_cmp(&a.2));
results
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_stem() {
assert_eq!(stem("running"), "runn"); // -ning → n
assert_eq!(stem("talking"), "talk"); // not matched by specific consonant rules
assert_eq!(stem("slowly"), "slow"); // -ly
// The stemmer is minimal — it doesn't need to be perfect,
// just consistent enough that related words collide.
assert_eq!(stem("observations"), "observation"); // -s stripped, -ation stays (word too short after)
}
#[test]
fn test_cosine_identical() {
let text = "the quick brown fox jumps over the lazy dog";
let sim = cosine_similarity(text, text);
assert!((sim - 1.0).abs() < 0.01, "identical docs should have sim ~1.0, got {}", sim);
}
#[test]
fn test_cosine_different() {
let a = "kernel filesystem transaction restart handling";
let b = "cooking recipe chocolate cake baking temperature";
let sim = cosine_similarity(a, b);
assert!(sim < 0.1, "unrelated docs should have low sim, got {}", sim);
}
}

597
src/hippocampus/spectral.rs Normal file
View file

@ -0,0 +1,597 @@
// Spectral decomposition of the memory graph.
//
// Computes eigenvalues and eigenvectors of the normalized graph Laplacian.
// The eigenvectors provide natural coordinates for each node — connected
// nodes land nearby, communities form clusters, bridges sit between clusters.
//
// The eigenvalue spectrum reveals:
// - Number of connected components (count of zero eigenvalues)
// - Number of natural communities (eigenvalues near zero, before the gap)
// - How well-connected the graph is (Fiedler value = second eigenvalue)
//
// The eigenvectors provide:
// - Spectral coordinates for each node (the embedding)
// - Community membership (sign/magnitude of Fiedler vector)
// - Natural projections (select which eigenvectors to include)
use crate::graph::Graph;
use faer::Mat;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::path::PathBuf;
pub struct SpectralResult {
/// Node keys in index order
pub keys: Vec<String>,
/// Eigenvalues in ascending order
pub eigenvalues: Vec<f64>,
/// Eigenvectors: eigvecs[k] is the k-th eigenvector (ascending eigenvalue order),
/// with eigvecs[k][i] being the value for node keys[i]
pub eigvecs: Vec<Vec<f64>>,
}
/// Per-node spectral embedding, serializable to disk.
#[derive(Serialize, Deserialize)]
pub struct SpectralEmbedding {
/// Number of dimensions (eigenvectors)
pub dims: usize,
/// Eigenvalues for each dimension
pub eigenvalues: Vec<f64>,
/// Node key → coordinate vector
pub coords: HashMap<String, Vec<f64>>,
}
pub fn embedding_path() -> PathBuf {
crate::store::memory_dir().join("spectral-embedding.json")
}
/// Compute spectral decomposition of the memory graph.
///
/// Returns the smallest `k` eigenvalues and their eigenvectors of the
/// normalized Laplacian L_sym = I - D^{-1/2} A D^{-1/2}.
///
/// We compute the full decomposition (it's only 2000×2000, takes <1s)
/// and return the bottom k.
pub fn decompose(graph: &Graph, k: usize) -> SpectralResult {
// Only include nodes with edges (filter isolates)
let mut keys: Vec<String> = graph.nodes().iter()
.filter(|k| graph.degree(k) > 0)
.cloned()
.collect();
keys.sort();
let n = keys.len();
let isolates = graph.nodes().len() - n;
if isolates > 0 {
eprintln!("note: filtered {} isolated nodes, decomposing {} connected nodes", isolates, n);
}
let key_to_idx: HashMap<&str, usize> = keys.iter()
.enumerate()
.map(|(i, k)| (k.as_str(), i))
.collect();
// Build weighted degree vector and adjacency
let mut degree = vec![0.0f64; n];
let mut adj_entries: Vec<(usize, usize, f64)> = Vec::new();
for (i, key) in keys.iter().enumerate() {
for (neighbor, strength) in graph.neighbors(key) {
if let Some(&j) = key_to_idx.get(neighbor.as_str())
&& j > i { // each edge once
let w = strength as f64;
adj_entries.push((i, j, w));
degree[i] += w;
degree[j] += w;
}
}
}
// Build normalized Laplacian: L_sym = I - D^{-1/2} A D^{-1/2}
let mut laplacian = Mat::<f64>::zeros(n, n);
// Diagonal = 1 for nodes with edges, 0 for isolates
for i in 0..n {
if degree[i] > 0.0 {
laplacian[(i, i)] = 1.0;
}
}
// Off-diagonal: -w / sqrt(d_i * d_j)
for &(i, j, w) in &adj_entries {
if degree[i] > 0.0 && degree[j] > 0.0 {
let val = -w / (degree[i] * degree[j]).sqrt();
laplacian[(i, j)] = val;
laplacian[(j, i)] = val;
}
}
// Eigendecompose
let eig = laplacian.self_adjoint_eigen(faer::Side::Lower)
.expect("eigendecomposition failed");
let s = eig.S();
let u = eig.U();
let mut eigenvalues = Vec::with_capacity(k);
let mut eigvecs = Vec::with_capacity(k);
let s_col = s.column_vector();
// Skip trivial eigenvalues (near-zero = null space from disconnected components).
// The number of zero eigenvalues equals the number of connected components.
let mut start = 0;
while start < n && s_col[start].abs() < 1e-8 {
start += 1;
}
let k = k.min(n.saturating_sub(start));
for col in start..start + k {
eigenvalues.push(s_col[col]);
let mut vec = Vec::with_capacity(n);
for row in 0..n {
vec.push(u[(row, col)]);
}
eigvecs.push(vec);
}
SpectralResult { keys, eigenvalues, eigvecs }
}
/// Print the spectral summary: eigenvalue spectrum, then each axis with
/// its extreme nodes (what the axis "means").
pub fn print_summary(result: &SpectralResult, graph: &Graph) {
let n = result.keys.len();
let k = result.eigenvalues.len();
println!("Spectral Decomposition — {} nodes, {} eigenpairs", n, k);
println!("=========================================\n");
// Compact eigenvalue table
println!("Eigenvalue spectrum:");
for (i, &ev) in result.eigenvalues.iter().enumerate() {
let gap = if i > 0 {
ev - result.eigenvalues[i - 1]
} else {
0.0
};
let gap_bar = if i > 0 {
let bars = (gap * 500.0).min(40.0) as usize;
"#".repeat(bars)
} else {
String::new()
};
println!(" λ_{:<2} = {:.6} {}", i, ev, gap_bar);
}
// Connected components
let near_zero = result.eigenvalues.iter()
.filter(|&&v| v.abs() < 1e-6)
.count();
if near_zero > 1 {
println!("\n {} eigenvalues near 0 = {} disconnected components", near_zero, near_zero);
}
// Each axis: what are the extremes?
println!("\n\nNatural axes of the knowledge space");
println!("====================================");
for axis in 0..k {
let ev = result.eigenvalues[axis];
let vec = &result.eigvecs[axis];
// Sort nodes by their value on this axis
let mut indexed: Vec<(usize, f64)> = vec.iter()
.enumerate()
.map(|(i, &v)| (i, v))
.collect();
indexed.sort_by(|a, b| a.1.total_cmp(&b.1));
// Compute the "spread" — how much this axis differentiates
let min_val = indexed.first().map(|x| x.1).unwrap_or(0.0);
let max_val = indexed.last().map(|x| x.1).unwrap_or(0.0);
println!("\n--- Axis {} (λ={:.6}, range={:.4}) ---", axis, ev, max_val - min_val);
// Show extremes: 5 most negative, 5 most positive
let show = 5;
println!(" Negative pole:");
for &(idx, val) in indexed.iter().take(show) {
let key = &result.keys[idx];
// Shorten key for display: take last component
let short = shorten_key(key);
let deg = graph.degree(key);
let comm = graph.communities().get(key).copied().unwrap_or(999);
println!(" {:+.5} d={:<3} c={:<3} {}", val, deg, comm, short);
}
println!(" Positive pole:");
for &(idx, val) in indexed.iter().rev().take(show) {
let key = &result.keys[idx];
let short = shorten_key(key);
let deg = graph.degree(key);
let comm = graph.communities().get(key).copied().unwrap_or(999);
println!(" {:+.5} d={:<3} c={:<3} {}", val, deg, comm, short);
}
}
}
/// Shorten a node key for display.
fn shorten_key(key: &str) -> &str {
if key.len() > 60 { &key[..60] } else { key }
}
/// Convert SpectralResult to a per-node embedding (transposing the layout).
pub fn to_embedding(result: &SpectralResult) -> SpectralEmbedding {
let dims = result.eigvecs.len();
let mut coords = HashMap::new();
for (i, key) in result.keys.iter().enumerate() {
let mut vec = Vec::with_capacity(dims);
for d in 0..dims {
vec.push(result.eigvecs[d][i]);
}
coords.insert(key.clone(), vec);
}
SpectralEmbedding {
dims,
eigenvalues: result.eigenvalues.clone(),
coords,
}
}
/// Save embedding to disk.
pub fn save_embedding(emb: &SpectralEmbedding) -> Result<(), String> {
let path = embedding_path();
let json = serde_json::to_string(emb)
.map_err(|e| format!("serialize embedding: {}", e))?;
std::fs::write(&path, json)
.map_err(|e| format!("write {}: {}", path.display(), e))?;
eprintln!("Saved {}-dim embedding for {} nodes to {}",
emb.dims, emb.coords.len(), path.display());
Ok(())
}
/// Load embedding from disk.
pub fn load_embedding() -> Result<SpectralEmbedding, String> {
let path = embedding_path();
let data = std::fs::read_to_string(&path)
.map_err(|e| format!("read {}: {}", path.display(), e))?;
serde_json::from_str(&data)
.map_err(|e| format!("parse embedding: {}", e))
}
/// Find the k nearest neighbors to a node in spectral space.
///
/// Uses weighted euclidean distance where each dimension is weighted
/// by 1/eigenvalue — lower eigenvalues (coarser structure) matter more.
pub fn nearest_neighbors(
emb: &SpectralEmbedding,
key: &str,
k: usize,
) -> Vec<(String, f64)> {
let target = match emb.coords.get(key) {
Some(c) => c,
None => return vec![],
};
let weights = eigenvalue_weights(&emb.eigenvalues);
let mut distances: Vec<(String, f64)> = emb.coords.iter()
.filter(|(k, _)| k.as_str() != key)
.map(|(k, coords)| (k.clone(), weighted_distance(target, coords, &weights)))
.collect();
distances.sort_by(|a, b| a.1.total_cmp(&b.1));
distances.truncate(k);
distances
}
/// Find nearest neighbors to a set of seed nodes (multi-seed query).
/// Returns nodes ranked by minimum distance to any seed.
pub fn nearest_to_seeds(
emb: &SpectralEmbedding,
seeds: &[&str],
k: usize,
) -> Vec<(String, f64)> {
nearest_to_seeds_weighted(emb, &seeds.iter().map(|&s| (s, 1.0)).collect::<Vec<_>>(), None, k)
}
/// Find nearest neighbors to weighted seed nodes, using link weights.
///
/// Each seed has a weight (from query term weighting). For candidates
/// directly linked to a seed, the spectral distance is scaled by
/// 1/link_strength — strong links make effective distance shorter.
/// Seed weight scales the contribution: high-weight seeds pull harder.
///
/// Returns (key, effective_distance) sorted by distance ascending.
pub fn nearest_to_seeds_weighted(
emb: &SpectralEmbedding,
seeds: &[(&str, f64)], // (key, seed_weight)
graph: Option<&crate::graph::Graph>,
k: usize,
) -> Vec<(String, f64)> {
let seed_set: HashSet<&str> = seeds.iter().map(|(s, _)| *s).collect();
let seed_data: Vec<(&str, &Vec<f64>, f64)> = seeds.iter()
.filter_map(|(s, w)| {
emb.coords.get(*s)
.filter(|c| c.iter().any(|&v| v.abs() > 1e-12)) // skip degenerate seeds
.map(|c| (*s, c, *w))
})
.collect();
if seed_data.is_empty() {
return vec![];
}
// Build seed→neighbor link strength lookup
let link_strengths: HashMap<(&str, &str), f32> = if let Some(g) = graph {
let mut map = HashMap::new();
for &(seed_key, _) in seeds {
for (neighbor, strength) in g.neighbors(seed_key) {
map.insert((seed_key, neighbor.as_str()), strength);
}
}
map
} else {
HashMap::new()
};
let dim_weights = eigenvalue_weights(&emb.eigenvalues);
let mut distances: Vec<(String, f64)> = emb.coords.iter()
.filter(|(k, coords)| {
!seed_set.contains(k.as_str())
&& coords.iter().any(|&v| v.abs() > 1e-12) // skip degenerate zero-coord nodes
})
.map(|(candidate_key, coords)| {
let min_dist = seed_data.iter()
.map(|(seed_key, sc, seed_weight)| {
let raw_dist = weighted_distance(coords, sc, &dim_weights);
// Scale by link strength if directly connected
let link_scale = link_strengths
.get(&(*seed_key, candidate_key.as_str()))
.map(|&s| 1.0 / (1.0 + s as f64)) // strong link → smaller distance
.unwrap_or(1.0);
raw_dist * link_scale / seed_weight
})
.fold(f64::MAX, f64::min);
(candidate_key.clone(), min_dist)
})
.collect();
distances.sort_by(|a, b| a.1.total_cmp(&b.1));
distances.truncate(k);
distances
}
/// Weighted euclidean distance in spectral space.
/// Dimensions weighted by 1/eigenvalue — coarser structure matters more.
fn weighted_distance(a: &[f64], b: &[f64], weights: &[f64]) -> f64 {
a.iter()
.zip(b.iter())
.zip(weights.iter())
.map(|((&x, &y), &w)| w * (x - y) * (x - y))
.sum::<f64>()
.sqrt()
}
/// Compute eigenvalue-inverse weights for distance calculations.
fn eigenvalue_weights(eigenvalues: &[f64]) -> Vec<f64> {
eigenvalues.iter()
.map(|&ev| if ev > 1e-8 { 1.0 / ev } else { 0.0 })
.collect()
}
/// Compute cluster centers (centroids) in spectral space.
pub fn cluster_centers(
emb: &SpectralEmbedding,
communities: &HashMap<String, u32>,
) -> HashMap<u32, Vec<f64>> {
let mut sums: HashMap<u32, (Vec<f64>, usize)> = HashMap::new();
for (key, coords) in &emb.coords {
if let Some(&comm) = communities.get(key) {
let entry = sums.entry(comm)
.or_insert_with(|| (vec![0.0; emb.dims], 0));
for (i, &c) in coords.iter().enumerate() {
entry.0[i] += c;
}
entry.1 += 1;
}
}
sums.into_iter()
.map(|(comm, (sum, count))| {
let center: Vec<f64> = sum.iter()
.map(|s| s / count as f64)
.collect();
(comm, center)
})
.collect()
}
/// Per-node analysis of spectral position relative to communities.
pub struct SpectralPosition {
pub key: String,
pub community: u32,
/// Distance to own community center
pub dist_to_center: f64,
/// Distance to nearest OTHER community center
pub dist_to_nearest: f64,
/// Which community is nearest (other than own)
pub nearest_community: u32,
/// dist_to_center / median_dist_in_community (>1 = outlier)
pub outlier_score: f64,
/// dist_to_center / dist_to_nearest (>1 = between clusters, potential bridge)
pub bridge_score: f64,
}
/// Analyze spectral positions for all nodes.
///
/// Returns positions sorted by outlier_score descending (most displaced first).
pub fn analyze_positions(
emb: &SpectralEmbedding,
communities: &HashMap<String, u32>,
) -> Vec<SpectralPosition> {
let centers = cluster_centers(emb, communities);
let weights = eigenvalue_weights(&emb.eigenvalues);
// Compute distances to own community center
let mut by_community: HashMap<u32, Vec<f64>> = HashMap::new();
let mut node_dists: Vec<(String, u32, f64)> = Vec::new();
for (key, coords) in &emb.coords {
if let Some(&comm) = communities.get(key)
&& let Some(center) = centers.get(&comm) {
let dist = weighted_distance(coords, center, &weights);
by_community.entry(comm).or_default().push(dist);
node_dists.push((key.clone(), comm, dist));
}
}
// Median distance per community for outlier scoring
let medians: HashMap<u32, f64> = by_community.into_iter()
.map(|(comm, mut dists)| {
dists.sort_by(|a, b| a.total_cmp(b));
let median = if dists.is_empty() {
1.0
} else if dists.len() % 2 == 0 {
(dists[dists.len() / 2 - 1] + dists[dists.len() / 2]) / 2.0
} else {
dists[dists.len() / 2]
};
(comm, median.max(1e-6))
})
.collect();
let mut positions: Vec<SpectralPosition> = node_dists.into_iter()
.map(|(key, comm, dist_to_center)| {
let coords = &emb.coords[&key];
let (nearest_community, dist_to_nearest) = centers.iter()
.filter(|&(&c, _)| c != comm)
.map(|(&c, center)| (c, weighted_distance(coords, center, &weights)))
.min_by(|a, b| a.1.total_cmp(&b.1))
.unwrap_or((comm, f64::MAX));
let median = medians.get(&comm).copied().unwrap_or(1.0);
let outlier_score = dist_to_center / median;
let bridge_score = if dist_to_nearest > 1e-8 {
dist_to_center / dist_to_nearest
} else {
0.0
};
SpectralPosition {
key, community: comm,
dist_to_center, dist_to_nearest, nearest_community,
outlier_score, bridge_score,
}
})
.collect();
positions.sort_by(|a, b| b.outlier_score.total_cmp(&a.outlier_score));
positions
}
/// Find pairs of nodes that are spectrally close but not linked in the graph.
///
/// These are the most valuable candidates for extractor agents —
/// the spectral structure says they should be related, but nobody
/// has articulated why.
pub fn unlinked_neighbors(
emb: &SpectralEmbedding,
linked_pairs: &HashSet<(String, String)>,
max_pairs: usize,
) -> Vec<(String, String, f64)> {
let weights = eigenvalue_weights(&emb.eigenvalues);
let keys: Vec<&String> = emb.coords.keys().collect();
let mut pairs: Vec<(String, String, f64)> = Vec::new();
for (i, k1) in keys.iter().enumerate() {
let c1 = &emb.coords[*k1];
for k2 in keys.iter().skip(i + 1) {
// Skip if already linked
let pair_fwd = ((*k1).clone(), (*k2).clone());
let pair_rev = ((*k2).clone(), (*k1).clone());
if linked_pairs.contains(&pair_fwd) || linked_pairs.contains(&pair_rev) {
continue;
}
let dist = weighted_distance(c1, &emb.coords[*k2], &weights);
pairs.push(((*k1).clone(), (*k2).clone(), dist));
}
}
pairs.sort_by(|a, b| a.2.total_cmp(&b.2));
pairs.truncate(max_pairs);
pairs
}
/// Approximate spectral coordinates for a new node using Nyström extension.
///
/// Given a new node's edges to existing nodes, estimate where it would
/// land in spectral space without recomputing the full decomposition.
/// Uses weighted average of neighbors' coordinates, weighted by edge strength.
pub fn nystrom_project(
emb: &SpectralEmbedding,
neighbors: &[(&str, f32)], // (key, edge_strength)
) -> Option<Vec<f64>> {
let mut weighted_sum = vec![0.0f64; emb.dims];
let mut total_weight = 0.0f64;
for &(key, strength) in neighbors {
if let Some(coords) = emb.coords.get(key) {
let w = strength as f64;
for (i, &c) in coords.iter().enumerate() {
weighted_sum[i] += w * c;
}
total_weight += w;
}
}
if total_weight < 1e-8 {
return None;
}
Some(weighted_sum.iter().map(|s| s / total_weight).collect())
}
/// Classify a spectral position: well-integrated, outlier, bridge, or orphan.
pub fn classify_position(pos: &SpectralPosition) -> &'static str {
if pos.bridge_score > 0.7 {
"bridge" // between two communities
} else if pos.outlier_score > 2.0 {
"outlier" // far from own community center
} else if pos.outlier_score < 0.5 {
"core" // close to community center
} else {
"peripheral" // normal community member
}
}
/// Identify which spectral dimensions a set of nodes load on most heavily.
/// Returns dimension indices sorted by total loading.
pub fn dominant_dimensions(emb: &SpectralEmbedding, keys: &[&str]) -> Vec<(usize, f64)> {
let coords: Vec<&Vec<f64>> = keys.iter()
.filter_map(|k| emb.coords.get(*k))
.collect();
if coords.is_empty() {
return vec![];
}
let mut dim_loading: Vec<(usize, f64)> = (0..emb.dims)
.map(|d| {
let loading: f64 = coords.iter()
.map(|c| c[d].abs())
.sum();
(d, loading)
})
.collect();
dim_loading.sort_by(|a, b| b.1.total_cmp(&a.1));
dim_loading
}

View file

@ -0,0 +1,348 @@
// Append-only Cap'n Proto storage + derived KV cache
//
// Two log files are source of truth:
// nodes.capnp - ContentNode messages
// relations.capnp - Relation messages
//
// The Store struct is the derived cache: latest version per UUID,
// rebuilt from logs when stale. Three-tier load strategy:
// 1. rkyv mmap snapshot (snapshot.rkyv) — ~4ms deserialize
// 2. bincode cache (state.bin) — ~10ms
// 3. capnp log replay — ~40ms
// Staleness: log file sizes embedded in cache headers.
//
// Module layout:
// types.rs — Node, Relation, enums, capnp macros, path helpers
// parse.rs — markdown → MemoryUnit parsing
// view.rs — zero-copy read-only access (StoreView, MmapView)
// persist.rs — load, save, replay, append, snapshot (all disk IO)
// ops.rs — mutations (upsert, delete, decay, cap_degree, etc.)
// mod.rs — re-exports, key resolution, ingestion, rendering
mod types;
mod parse;
mod view;
mod persist;
mod ops;
// Re-export everything callers need
pub use types::{
memory_dir, nodes_path,
now_epoch, epoch_to_local, format_date, format_datetime, format_datetime_space, compact_timestamp, today,
Node, Relation, NodeType, Provenance, RelationType,
RetrievalEvent, Params, GapRecord, Store,
new_node, new_relation,
};
pub use parse::{MemoryUnit, parse_units};
pub use view::{StoreView, AnyView};
pub use persist::fsck;
pub use persist::strip_md_keys;
pub use ops::TASK_PROVENANCE;
use crate::graph::{self, Graph};
use std::fs;
use std::io::Write as IoWrite;
use std::path::Path;
use parse::classify_filename;
/// Strip .md suffix from a key, handling both bare keys and section keys.
/// "journal.md#j-2026" → "journal#j-2026", "identity.md" → "identity", "identity" → "identity"
pub fn strip_md_suffix(key: &str) -> String {
if let Some((file, section)) = key.split_once('#') {
let bare = file.strip_suffix(".md").unwrap_or(file);
format!("{}#{}", bare, section)
} else {
key.strip_suffix(".md").unwrap_or(key).to_string()
}
}
impl Store {
pub fn build_graph(&self) -> Graph {
graph::build_graph(self)
}
pub fn resolve_key(&self, target: &str) -> Result<String, String> {
// Strip .md suffix if present — keys no longer use it
let bare = strip_md_suffix(target);
if self.nodes.contains_key(&bare) {
return Ok(bare);
}
let matches: Vec<_> = self.nodes.keys()
.filter(|k| k.to_lowercase().contains(&target.to_lowercase()))
.cloned().collect();
match matches.len() {
0 => Err(format!("No entry for '{}'. Run 'init'?", target)),
1 => Ok(matches[0].clone()),
n if n <= 10 => {
let list = matches.join("\n ");
Err(format!("Ambiguous '{}'. Matches:\n {}", target, list))
}
n => Err(format!("Too many matches for '{}' ({}). Be more specific.", target, n)),
}
}
/// Resolve a link target to (key, uuid).
fn resolve_node_uuid(&self, target: &str) -> Option<(String, [u8; 16])> {
let bare = strip_md_suffix(target);
let n = self.nodes.get(&bare)?;
Some((bare, n.uuid))
}
/// Append retrieval event to retrieval.log without needing a Store instance.
pub fn log_retrieval_static(query: &str, results: &[String]) {
let path = memory_dir().join("retrieval.log");
let line = format!("[{}] q=\"{}\" hits={}\n", today(), query, results.len());
if let Ok(mut f) = fs::OpenOptions::new()
.create(true).append(true).open(&path) {
let _ = f.write_all(line.as_bytes());
}
}
/// Scan markdown files and index all memory units
pub fn init_from_markdown(&mut self) -> Result<usize, String> {
let dir = memory_dir();
let mut count = 0;
if dir.exists() {
// Build edge set for O(1) dedup during ingestion
let mut edge_set = self.build_edge_set();
count = self.scan_dir_for_init(&dir, &mut edge_set)?;
}
Ok(count)
}
/// Build a HashSet of existing (source, target) UUID pairs for O(1) dedup.
fn build_edge_set(&self) -> std::collections::HashSet<([u8; 16], [u8; 16])> {
let mut set = std::collections::HashSet::with_capacity(self.relations.len() * 2);
for r in &self.relations {
set.insert((r.source, r.target));
set.insert((r.target, r.source));
}
set
}
fn scan_dir_for_init(
&mut self,
dir: &Path,
edge_set: &mut std::collections::HashSet<([u8; 16], [u8; 16])>,
) -> Result<usize, String> {
let mut count = 0;
let entries = fs::read_dir(dir)
.map_err(|e| format!("read dir {}: {}", dir.display(), e))?;
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
count += self.scan_dir_for_init(&path, edge_set)?;
continue;
}
let Some(ext) = path.extension() else { continue };
if ext != "md" { continue }
let filename = path.file_name().unwrap().to_string_lossy().to_string();
let content = fs::read_to_string(&path)
.map_err(|e| format!("read {}: {}", path.display(), e))?;
let units = parse_units(&filename, &content);
let (new_count, _) = self.ingest_units(&units, &filename)?;
count += new_count;
// Create relations from links
let mut new_relations = Vec::new();
for unit in &units {
let source_uuid = match self.nodes.get(&unit.key) {
Some(n) => n.uuid,
None => continue,
};
for link in unit.marker_links.iter().chain(unit.md_links.iter()) {
let Some((key, uuid)) = self.resolve_node_uuid(link) else { continue };
if !edge_set.contains(&(source_uuid, uuid)) {
edge_set.insert((source_uuid, uuid));
edge_set.insert((uuid, source_uuid));
new_relations.push(new_relation(
source_uuid, uuid, RelationType::Link, 1.0,
&unit.key, &key,
));
}
}
for cause in &unit.causes {
let Some((key, uuid)) = self.resolve_node_uuid(cause) else { continue };
if !edge_set.contains(&(uuid, source_uuid)) {
edge_set.insert((uuid, source_uuid));
new_relations.push(new_relation(
uuid, source_uuid, RelationType::Causal, 1.0,
&key, &unit.key,
));
}
}
}
if !new_relations.is_empty() {
self.append_relations(&new_relations)?;
self.relations.extend(new_relations);
}
}
Ok(count)
}
/// Process parsed memory units: diff against existing nodes, persist changes.
/// Holds StoreLock across refresh + check + write to prevent duplicate UUIDs.
fn ingest_units(&mut self, units: &[MemoryUnit], filename: &str) -> Result<(usize, usize), String> {
let _lock = types::StoreLock::acquire()?;
self.refresh_nodes()?;
let node_type = classify_filename(filename);
let mut new_nodes = Vec::new();
let mut updated_nodes = Vec::new();
for (pos, unit) in units.iter().enumerate() {
if let Some(existing) = self.nodes.get(&unit.key) {
if existing.content != unit.content || existing.position != pos as u32 {
let mut node = existing.clone();
node.content = unit.content.clone();
node.position = pos as u32;
node.version += 1;
if let Some(ref s) = unit.state { node.state_tag = s.clone(); }
if let Some(ref s) = unit.source_ref { node.source_ref = s.clone(); }
updated_nodes.push(node);
}
} else {
let mut node = new_node(&unit.key, &unit.content);
node.node_type = node_type;
node.position = pos as u32;
if let Some(ref s) = unit.state { node.state_tag = s.clone(); }
if let Some(ref s) = unit.source_ref { node.source_ref = s.clone(); }
new_nodes.push(node);
}
}
if !new_nodes.is_empty() {
self.append_nodes_unlocked(&new_nodes)?;
for node in &new_nodes {
self.uuid_to_key.insert(node.uuid, node.key.clone());
self.nodes.insert(node.key.clone(), node.clone());
}
}
if !updated_nodes.is_empty() {
self.append_nodes_unlocked(&updated_nodes)?;
for node in &updated_nodes {
self.nodes.insert(node.key.clone(), node.clone());
}
}
Ok((new_nodes.len(), updated_nodes.len()))
}
/// Import a markdown file into the store, parsing it into nodes.
pub fn import_file(&mut self, path: &Path) -> Result<(usize, usize), String> {
let filename = path.file_name().unwrap().to_string_lossy().to_string();
let content = fs::read_to_string(path)
.map_err(|e| format!("read {}: {}", path.display(), e))?;
let units = parse_units(&filename, &content);
self.ingest_units(&units, &filename)
}
/// Gather all sections for a file key, sorted by position.
pub fn file_sections(&self, file_key: &str) -> Option<Vec<&Node>> {
let prefix = format!("{}#", file_key);
let mut sections: Vec<_> = self.nodes.values()
.filter(|n| n.key == file_key || n.key.starts_with(&prefix))
.collect();
if sections.is_empty() {
return None;
}
sections.sort_by_key(|n| n.position);
Some(sections)
}
/// Render a file key as plain content (no mem markers).
pub fn render_file(&self, file_key: &str) -> Option<String> {
let sections = self.file_sections(file_key)?;
let mut output = String::new();
for node in &sections {
output.push_str(&node.content);
if !node.content.ends_with('\n') {
output.push('\n');
}
output.push('\n');
}
Some(output.trim_end().to_string())
}
/// Render a file key back to markdown with reconstituted mem markers.
pub fn export_to_markdown(&self, file_key: &str) -> Option<String> {
let sections = self.file_sections(file_key)?;
let mut output = String::new();
for node in &sections {
if node.key.contains('#') {
let section_id = node.key.rsplit_once('#').map_or("", |(_, s)| s);
let links: Vec<_> = self.relations.iter()
.filter(|r| r.source_key == node.key && !r.deleted
&& r.rel_type != RelationType::Causal)
.map(|r| r.target_key.clone())
.collect();
let causes: Vec<_> = self.relations.iter()
.filter(|r| r.target_key == node.key && !r.deleted
&& r.rel_type == RelationType::Causal)
.map(|r| r.source_key.clone())
.collect();
let mut marker_parts = vec![format!("id={}", section_id)];
if !links.is_empty() {
marker_parts.push(format!("links={}", links.join(",")));
}
if !causes.is_empty() {
marker_parts.push(format!("causes={}", causes.join(",")));
}
output.push_str(&format!("<!-- mem: {} -->\n", marker_parts.join(" ")));
}
output.push_str(&node.content);
if !node.content.ends_with('\n') {
output.push('\n');
}
output.push('\n');
}
Some(output.trim_end().to_string())
}
/// Find the episodic node that best matches the given entry text.
pub fn find_journal_node(&self, entry_text: &str) -> Option<String> {
if entry_text.is_empty() {
return None;
}
let words: Vec<&str> = entry_text.split_whitespace()
.filter(|w| w.len() > 5)
.take(5)
.collect();
let mut best_key = None;
let mut best_score = 0;
for (key, node) in &self.nodes {
if node.node_type != NodeType::EpisodicSession {
continue;
}
let content_lower = node.content.to_lowercase();
let score: usize = words.iter()
.filter(|w| content_lower.contains(&w.to_lowercase()))
.count();
if score > best_score {
best_score = score;
best_key = Some(key.clone());
}
}
best_key
}
}

View file

@ -0,0 +1,328 @@
// Mutation operations on the store
//
// CRUD (upsert, delete, modify), feedback tracking (mark_used, mark_wrong),
// maintenance (decay, fix_categories, cap_degree), and graph metrics.
use super::types::*;
use std::collections::{HashMap, HashSet};
tokio::task_local! {
/// Task-scoped provenance for agent writes. Set by the daemon before
/// running an agent's tool calls, so all writes within that task are
/// automatically attributed to the agent.
pub static TASK_PROVENANCE: String;
}
/// Provenance priority: task_local (agent context) > env var > "manual".
fn current_provenance() -> String {
TASK_PROVENANCE.try_with(|p| p.clone())
.or_else(|_| std::env::var("POC_PROVENANCE").map_err(|_| ()))
.unwrap_or_else(|_| "manual".to_string())
}
impl Store {
/// Add or update a node (appends to log + updates cache).
/// Holds StoreLock across refresh + check + write to prevent duplicate UUIDs.
pub fn upsert_node(&mut self, mut node: Node) -> Result<(), String> {
let _lock = StoreLock::acquire()?;
self.refresh_nodes()?;
if let Some(existing) = self.nodes.get(&node.key) {
node.uuid = existing.uuid;
node.version = existing.version + 1;
}
self.append_nodes_unlocked(&[node.clone()])?;
self.uuid_to_key.insert(node.uuid, node.key.clone());
self.nodes.insert(node.key.clone(), node);
Ok(())
}
/// Add a relation (appends to log + updates cache)
pub fn add_relation(&mut self, rel: Relation) -> Result<(), String> {
self.append_relations(std::slice::from_ref(&rel))?;
self.relations.push(rel);
Ok(())
}
/// Upsert a node: update if exists (and content changed), create if not.
/// Returns: "created", "updated", or "unchanged".
///
/// Provenance is determined by the POC_PROVENANCE env var if set,
/// otherwise defaults to Manual.
pub fn upsert(&mut self, key: &str, content: &str) -> Result<&'static str, String> {
let prov = current_provenance();
self.upsert_provenance(key, content, &prov)
}
/// Upsert with explicit provenance (for agent-created nodes).
/// Holds StoreLock across refresh + check + write to prevent duplicate UUIDs.
pub fn upsert_provenance(&mut self, key: &str, content: &str, provenance: &str) -> Result<&'static str, String> {
let _lock = StoreLock::acquire()?;
self.refresh_nodes()?;
if let Some(existing) = self.nodes.get(key) {
if existing.content == content {
return Ok("unchanged");
}
let mut node = existing.clone();
node.content = content.to_string();
node.provenance = provenance.to_string();
node.timestamp = now_epoch();
node.version += 1;
self.append_nodes_unlocked(std::slice::from_ref(&node))?;
self.nodes.insert(key.to_string(), node);
Ok("updated")
} else {
let mut node = new_node(key, content);
node.provenance = provenance.to_string();
self.append_nodes_unlocked(std::slice::from_ref(&node))?;
self.uuid_to_key.insert(node.uuid, node.key.clone());
self.nodes.insert(key.to_string(), node);
Ok("created")
}
}
/// Soft-delete a node (appends deleted version, removes from cache).
/// Holds StoreLock across refresh + write to see concurrent creates.
pub fn delete_node(&mut self, key: &str) -> Result<(), String> {
let _lock = StoreLock::acquire()?;
self.refresh_nodes()?;
let prov = current_provenance();
let node = self.nodes.get(key)
.ok_or_else(|| format!("No node '{}'", key))?;
let mut deleted = node.clone();
deleted.deleted = true;
deleted.version += 1;
deleted.provenance = prov;
deleted.timestamp = now_epoch();
self.append_nodes_unlocked(std::slice::from_ref(&deleted))?;
self.nodes.remove(key);
Ok(())
}
/// Rename a node: change its key, update debug strings on all edges.
///
/// Graph edges (source/target UUIDs) are unaffected — they're already
/// UUID-based. We update the human-readable source_key/target_key strings
/// on relations, and created_at is preserved untouched.
///
/// Appends: (new_key, v+1) + (old_key, deleted, v+1) + updated relations.
/// Holds StoreLock across refresh + write to prevent races.
pub fn rename_node(&mut self, old_key: &str, new_key: &str) -> Result<(), String> {
if old_key == new_key {
return Ok(());
}
let _lock = StoreLock::acquire()?;
self.refresh_nodes()?;
if self.nodes.contains_key(new_key) {
return Err(format!("Key '{}' already exists", new_key));
}
let node = self.nodes.get(old_key)
.ok_or_else(|| format!("No node '{}'", old_key))?
.clone();
let prov = current_provenance();
// New version under the new key
let mut renamed = node.clone();
renamed.key = new_key.to_string();
renamed.version += 1;
renamed.provenance = prov.clone();
renamed.timestamp = now_epoch();
// Deletion record for the old key (same UUID, independent version counter)
let mut tombstone = node.clone();
tombstone.deleted = true;
tombstone.version += 1;
tombstone.provenance = prov;
tombstone.timestamp = now_epoch();
// Collect affected relations and update their debug key strings
let updated_rels: Vec<_> = self.relations.iter()
.filter(|r| r.source_key == old_key || r.target_key == old_key)
.map(|r| {
let mut r = r.clone();
r.version += 1;
if r.source_key == old_key { r.source_key = new_key.to_string(); }
if r.target_key == old_key { r.target_key = new_key.to_string(); }
r
})
.collect();
// Persist under single lock
self.append_nodes_unlocked(&[renamed.clone(), tombstone])?;
if !updated_rels.is_empty() {
self.append_relations_unlocked(&updated_rels)?;
}
// Update in-memory cache
self.nodes.remove(old_key);
self.uuid_to_key.insert(renamed.uuid, new_key.to_string());
self.nodes.insert(new_key.to_string(), renamed);
for updated in &updated_rels {
if let Some(r) = self.relations.iter_mut().find(|r| r.uuid == updated.uuid) {
r.source_key = updated.source_key.clone();
r.target_key = updated.target_key.clone();
r.version = updated.version;
}
}
Ok(())
}
/// Modify a node in-place, bump version, and persist to capnp log.
fn modify_node(&mut self, key: &str, f: impl FnOnce(&mut Node)) -> Result<(), String> {
let node = self.nodes.get_mut(key)
.ok_or_else(|| format!("No node '{}'", key))?;
f(node);
node.version += 1;
let node = node.clone();
self.append_nodes(&[node])
}
pub fn mark_used(&mut self, key: &str) {
let boost = self.params.use_boost as f32;
let _ = self.modify_node(key, |n| {
n.uses += 1;
n.weight = (n.weight + boost).min(1.0);
if n.spaced_repetition_interval < 30 {
n.spaced_repetition_interval = match n.spaced_repetition_interval {
1 => 3, 3 => 7, 7 => 14, 14 => 30, _ => 30,
};
}
n.last_replayed = now_epoch();
});
}
pub fn mark_wrong(&mut self, key: &str, _ctx: Option<&str>) {
let _ = self.modify_node(key, |n| {
n.wrongs += 1;
n.weight = (n.weight - 0.1).max(0.0);
n.spaced_repetition_interval = 1;
});
}
/// Adjust edge strength between two nodes by a delta.
/// Clamps to [0.05, 0.95]. Returns (old_strength, new_strength, edges_modified).
pub fn adjust_edge_strength(&mut self, key_a: &str, key_b: &str, delta: f32) -> (f32, f32, usize) {
let mut old = 0.0f32;
let mut new = 0.0f32;
let mut count = 0;
for rel in &mut self.relations {
if rel.deleted { continue; }
if (rel.source_key == key_a && rel.target_key == key_b)
|| (rel.source_key == key_b && rel.target_key == key_a)
{
old = rel.strength;
rel.strength = (rel.strength + delta).clamp(0.05, 0.95);
new = rel.strength;
rel.version += 1;
count += 1;
}
}
(old, new, count)
}
pub fn record_gap(&mut self, desc: &str) {
self.gaps.push(GapRecord {
description: desc.to_string(),
timestamp: today(),
});
}
/// Cap node degree by soft-deleting edges from mega-hubs.
pub fn cap_degree(&mut self, max_degree: usize) -> Result<(usize, usize), String> {
let mut node_degree: HashMap<String, usize> = HashMap::new();
for rel in &self.relations {
if rel.deleted { continue; }
*node_degree.entry(rel.source_key.clone()).or_default() += 1;
*node_degree.entry(rel.target_key.clone()).or_default() += 1;
}
let mut node_edges: HashMap<String, Vec<usize>> = HashMap::new();
for (i, rel) in self.relations.iter().enumerate() {
if rel.deleted { continue; }
node_edges.entry(rel.source_key.clone()).or_default().push(i);
node_edges.entry(rel.target_key.clone()).or_default().push(i);
}
let mut to_delete: HashSet<usize> = HashSet::new();
let mut hubs_capped = 0;
for (_key, edge_indices) in &node_edges {
let active: Vec<usize> = edge_indices.iter()
.filter(|&&i| !to_delete.contains(&i))
.copied()
.collect();
if active.len() <= max_degree { continue; }
let mut auto_indices: Vec<(usize, f32)> = Vec::new();
let mut link_indices: Vec<(usize, usize)> = Vec::new();
for &i in &active {
let rel = &self.relations[i];
if rel.rel_type == RelationType::Auto {
auto_indices.push((i, rel.strength));
} else {
let other = if &rel.source_key == _key {
&rel.target_key
} else {
&rel.source_key
};
let other_deg = node_degree.get(other).copied().unwrap_or(0);
link_indices.push((i, other_deg));
}
}
let excess = active.len() - max_degree;
auto_indices.sort_by(|a, b| a.1.total_cmp(&b.1));
let auto_prune = excess.min(auto_indices.len());
for &(i, _) in auto_indices.iter().take(auto_prune) {
to_delete.insert(i);
}
let remaining_excess = excess.saturating_sub(auto_prune);
if remaining_excess > 0 {
link_indices.sort_by(|a, b| b.1.cmp(&a.1));
let link_prune = remaining_excess.min(link_indices.len());
for &(i, _) in link_indices.iter().take(link_prune) {
to_delete.insert(i);
}
}
hubs_capped += 1;
}
let mut pruned_rels = Vec::new();
for &i in &to_delete {
self.relations[i].deleted = true;
self.relations[i].version += 1;
pruned_rels.push(self.relations[i].clone());
}
if !pruned_rels.is_empty() {
self.append_relations(&pruned_rels)?;
}
self.relations.retain(|r| !r.deleted);
Ok((hubs_capped, to_delete.len()))
}
/// Update graph-derived fields on all nodes
pub fn update_graph_metrics(&mut self) {
let g = self.build_graph();
let communities = g.communities();
for (key, node) in &mut self.nodes {
node.community_id = communities.get(key).copied();
node.clustering_coefficient = Some(g.clustering_coefficient(key));
node.degree = Some(g.degree(key) as u32);
}
}
}

View file

@ -0,0 +1,173 @@
// Markdown parsing for memory files
//
// Splits markdown files into MemoryUnit structs based on `<!-- mem: ... -->`
// markers. Each marker starts a new section; content before the first marker
// becomes the file-level unit. Links and causal edges are extracted from
// both marker attributes and inline markdown links.
use super::NodeType;
use regex::Regex;
use std::collections::HashMap;
use std::path::Path;
use std::sync::OnceLock;
pub struct MemoryUnit {
pub key: String,
pub content: String,
pub marker_links: Vec<String>,
pub md_links: Vec<String>,
pub causes: Vec<String>,
pub state: Option<String>,
pub source_ref: Option<String>,
}
pub fn classify_filename(filename: &str) -> NodeType {
let bare = filename.strip_suffix(".md").unwrap_or(filename);
if bare.starts_with("daily-") { NodeType::EpisodicDaily }
else if bare.starts_with("weekly-") { NodeType::EpisodicWeekly }
else if bare.starts_with("monthly-") { NodeType::EpisodicMonthly }
else if bare == "journal" { NodeType::EpisodicSession }
else { NodeType::Semantic }
}
pub fn parse_units(raw_filename: &str, content: &str) -> Vec<MemoryUnit> {
let filename = raw_filename.strip_suffix(".md").unwrap_or(raw_filename);
static MARKER_RE: OnceLock<Regex> = OnceLock::new();
static SOURCE_RE: OnceLock<Regex> = OnceLock::new();
static MD_LINK_RE: OnceLock<Regex> = OnceLock::new();
let marker_re = MARKER_RE.get_or_init(||
Regex::new(r"<!--\s*mem:\s*((?:id|links|tags|causes|state)\s*=\s*[^\s].*?)-->").unwrap());
let source_re = SOURCE_RE.get_or_init(||
Regex::new(r"<!--\s*source:\s*(.+?)\s*-->").unwrap());
let md_link_re = MD_LINK_RE.get_or_init(||
Regex::new(r"\[[^\]]*\]\(([^):]+(?:#[^)]*)?)\)").unwrap());
let markers: Vec<_> = marker_re.captures_iter(content)
.map(|cap| {
let full_match = cap.get(0).unwrap();
let attrs_str = &cap[1];
(full_match.start(), full_match.end(), parse_marker_attrs(attrs_str))
})
.collect();
let find_source = |text: &str| -> Option<String> {
source_re.captures(text).map(|c| c[1].trim().to_string())
};
if markers.is_empty() {
let source_ref = find_source(content);
let md_links = extract_md_links(content, md_link_re, filename);
return vec![MemoryUnit {
key: filename.to_string(),
content: content.to_string(),
marker_links: Vec::new(),
md_links,
causes: Vec::new(),
state: None,
source_ref,
}];
}
let mut units = Vec::new();
let first_start = markers[0].0;
let pre_content = content[..first_start].trim();
if !pre_content.is_empty() {
let source_ref = find_source(pre_content);
let md_links = extract_md_links(pre_content, md_link_re, filename);
units.push(MemoryUnit {
key: filename.to_string(),
content: pre_content.to_string(),
marker_links: Vec::new(),
md_links,
causes: Vec::new(),
state: None,
source_ref,
});
}
for (i, (_, end, attrs)) in markers.iter().enumerate() {
let unit_end = if i + 1 < markers.len() {
markers[i + 1].0
} else {
content.len()
};
let unit_content = content[*end..unit_end].trim();
let id = attrs.get("id").cloned().unwrap_or_default();
let key = if id.is_empty() {
format!("{}#unnamed-{}", filename, i)
} else {
format!("{}#{}", filename, id)
};
let marker_links = attrs.get("links")
.map(|l| l.split(',').map(|s| normalize_link(s.trim(), filename)).collect())
.unwrap_or_default();
let causes = attrs.get("causes")
.map(|l| l.split(',').map(|s| normalize_link(s.trim(), filename)).collect())
.unwrap_or_default();
let state = attrs.get("state").cloned();
let source_ref = find_source(unit_content);
let md_links = extract_md_links(unit_content, md_link_re, filename);
units.push(MemoryUnit {
key,
content: unit_content.to_string(),
marker_links,
md_links,
causes,
state,
source_ref,
});
}
units
}
fn parse_marker_attrs(attrs_str: &str) -> HashMap<String, String> {
static ATTR_RE: OnceLock<Regex> = OnceLock::new();
let attr_re = ATTR_RE.get_or_init(|| Regex::new(r"(\w+)\s*=\s*(\S+)").unwrap());
let mut attrs = HashMap::new();
for cap in attr_re.captures_iter(attrs_str) {
attrs.insert(cap[1].to_string(), cap[2].to_string());
}
attrs
}
fn extract_md_links(content: &str, re: &Regex, source_file: &str) -> Vec<String> {
re.captures_iter(content)
.map(|cap| normalize_link(&cap[1], source_file))
.filter(|link| !link.starts_with(source_file) || link.contains('#'))
.collect()
}
pub fn normalize_link(target: &str, source_file: &str) -> String {
let source_bare = source_file.strip_suffix(".md").unwrap_or(source_file);
if target.starts_with('#') {
return format!("{}{}", source_bare, target);
}
let (path_part, fragment) = if let Some(hash_pos) = target.find('#') {
(&target[..hash_pos], Some(&target[hash_pos..]))
} else {
(target, None)
};
let basename = Path::new(path_part)
.file_name()
.map(|f| f.to_string_lossy().to_string())
.unwrap_or_else(|| path_part.to_string());
let bare = basename.strip_suffix(".md").unwrap_or(&basename);
match fragment {
Some(frag) => format!("{}{}", bare, frag),
None => bare.to_string(),
}
}

View file

@ -0,0 +1,939 @@
// Persistence layer: load, save, replay, append, snapshot
//
// Three-tier loading strategy:
// 1. rkyv mmap snapshot (snapshot.rkyv) — ~4ms deserialize
// 2. bincode cache (state.bin) — ~10ms
// 3. capnp log replay — ~40ms
//
// Logs are append-only; cache staleness uses log file sizes, not mtimes.
use super::types::*;
use crate::memory_capnp;
use capnp::message;
use capnp::serialize;
use std::collections::HashMap;
use std::fs;
use std::io::{BufReader, Seek};
use std::path::Path;
impl Store {
/// Load store from state.bin cache if fresh, otherwise rebuild from capnp logs.
///
/// Staleness check uses log file sizes (not mtimes). Since logs are
/// append-only, any write grows the file, invalidating the cache.
/// This avoids the mtime race that caused data loss with concurrent
/// writers (dream loop, link audit, journal enrichment).
pub fn load() -> Result<Store, String> {
// 1. Try rkyv mmap snapshot (~4ms with deserialize, <1ms zero-copy)
match Self::load_snapshot_mmap() {
Ok(Some(mut store)) => {
// rkyv snapshot doesn't include visits — replay from log
let visits_p = visits_path();
if visits_p.exists() {
store.replay_visits(&visits_p).ok();
}
let tp_p = transcript_progress_path();
if tp_p.exists() {
store.replay_transcript_progress(&tp_p).ok();
}
return Ok(store);
},
Ok(None) => {},
Err(e) => eprintln!("rkyv snapshot: {}", e),
}
// 2. Try bincode state.bin cache (~10ms)
let nodes_p = nodes_path();
let rels_p = relations_path();
let state_p = state_path();
let nodes_size = fs::metadata(&nodes_p).map(|m| m.len()).unwrap_or(0);
let rels_size = fs::metadata(&rels_p).map(|m| m.len()).unwrap_or(0);
if let Ok(data) = fs::read(&state_p)
&& data.len() >= CACHE_HEADER_LEN && data[..4] == CACHE_MAGIC {
let cached_nodes = u64::from_le_bytes(data[4..12].try_into().unwrap());
let cached_rels = u64::from_le_bytes(data[12..20].try_into().unwrap());
if cached_nodes == nodes_size && cached_rels == rels_size
&& let Ok(mut store) = bincode::deserialize::<Store>(&data[CACHE_HEADER_LEN..]) {
// Rebuild uuid_to_key (skipped by serde)
for (key, node) in &store.nodes {
store.uuid_to_key.insert(node.uuid, key.clone());
}
store.loaded_nodes_size = nodes_size;
store.loaded_rels_size = rels_size;
// Bootstrap: write rkyv snapshot if missing
if !snapshot_path().exists()
&& let Err(e) = store.save_snapshot(cached_nodes, cached_rels) {
eprintln!("rkyv bootstrap: {}", e);
}
return Ok(store);
}
}
// Stale or no cache — rebuild from capnp logs
let mut store = Store::default();
if nodes_p.exists() {
store.replay_nodes(&nodes_p)?;
}
if rels_p.exists() {
store.replay_relations(&rels_p)?;
}
let visits_p = visits_path();
if visits_p.exists() {
store.replay_visits(&visits_p)?;
}
let tp_p = transcript_progress_path();
if tp_p.exists() {
store.replay_transcript_progress(&tp_p)?;
}
// Record log sizes after replay — this is the state we reflect
store.loaded_nodes_size = fs::metadata(&nodes_p).map(|m| m.len()).unwrap_or(0);
store.loaded_rels_size = fs::metadata(&rels_p).map(|m| m.len()).unwrap_or(0);
// Drop edges referencing deleted/missing nodes
store.relations.retain(|r|
store.nodes.contains_key(&r.source_key) &&
store.nodes.contains_key(&r.target_key)
);
store.save()?;
Ok(store)
}
/// Load store directly from capnp logs, bypassing all caches.
/// Used by fsck to verify cache consistency.
pub fn load_from_logs() -> Result<Store, String> {
let nodes_p = nodes_path();
let rels_p = relations_path();
let mut store = Store::default();
if nodes_p.exists() {
store.replay_nodes(&nodes_p)?;
}
if rels_p.exists() {
store.replay_relations(&rels_p)?;
}
let visits_p = visits_path();
if visits_p.exists() {
store.replay_visits(&visits_p)?;
}
let tp_p = transcript_progress_path();
if tp_p.exists() {
store.replay_transcript_progress(&tp_p)?;
}
Ok(store)
}
/// Replay node log, keeping latest version per UUID.
/// Tracks all UUIDs seen per key to detect duplicates.
fn replay_nodes(&mut self, path: &Path) -> Result<(), String> {
let file = fs::File::open(path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
let mut reader = BufReader::new(file);
// Track all non-deleted UUIDs per key to detect duplicates
let mut key_uuids: HashMap<String, Vec<[u8; 16]>> = HashMap::new();
while let Ok(msg) = serialize::read_message(&mut reader, message::ReaderOptions::new()) {
let log = msg.get_root::<memory_capnp::node_log::Reader>()
.map_err(|e| format!("read node log: {}", e))?;
for node_reader in log.get_nodes()
.map_err(|e| format!("get nodes: {}", e))? {
let node = Node::from_capnp_migrate(node_reader)?;
let existing_version = self.nodes.get(&node.key)
.map(|n| n.version)
.unwrap_or(0);
if node.version >= existing_version {
if node.deleted {
self.nodes.remove(&node.key);
self.uuid_to_key.remove(&node.uuid);
if let Some(uuids) = key_uuids.get_mut(&node.key) {
uuids.retain(|u| *u != node.uuid);
}
} else {
self.uuid_to_key.insert(node.uuid, node.key.clone());
self.nodes.insert(node.key.clone(), node.clone());
let uuids = key_uuids.entry(node.key).or_default();
if !uuids.contains(&node.uuid) {
uuids.push(node.uuid);
}
}
}
}
}
// Report duplicate keys
for (key, uuids) in &key_uuids {
if uuids.len() > 1 {
eprintln!("WARNING: key '{}' has {} UUIDs (duplicate nodes)", key, uuids.len());
}
}
Ok(())
}
/// Replay relation log, keeping latest version per UUID
fn replay_relations(&mut self, path: &Path) -> Result<(), String> {
let file = fs::File::open(path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
let mut reader = BufReader::new(file);
// Collect all, then deduplicate by UUID keeping latest version
let mut by_uuid: HashMap<[u8; 16], Relation> = HashMap::new();
while let Ok(msg) = serialize::read_message(&mut reader, message::ReaderOptions::new()) {
let log = msg.get_root::<memory_capnp::relation_log::Reader>()
.map_err(|e| format!("read relation log: {}", e))?;
for rel_reader in log.get_relations()
.map_err(|e| format!("get relations: {}", e))? {
let rel = Relation::from_capnp_migrate(rel_reader)?;
let existing_version = by_uuid.get(&rel.uuid)
.map(|r| r.version)
.unwrap_or(0);
if rel.version >= existing_version {
by_uuid.insert(rel.uuid, rel);
}
}
}
self.relations = by_uuid.into_values()
.filter(|r| !r.deleted)
.collect();
Ok(())
}
/// Find all duplicate keys: keys with multiple live UUIDs in the log.
/// Returns a map from key → vec of all live Node versions (one per UUID).
/// The "winner" in self.nodes is always one of them.
pub fn find_duplicates(&self) -> Result<HashMap<String, Vec<Node>>, String> {
let path = nodes_path();
if !path.exists() { return Ok(HashMap::new()); }
let file = fs::File::open(&path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
let mut reader = BufReader::new(file);
// Track latest version of each UUID
let mut by_uuid: HashMap<[u8; 16], Node> = HashMap::new();
while let Ok(msg) = serialize::read_message(&mut reader, message::ReaderOptions::new()) {
let log = msg.get_root::<memory_capnp::node_log::Reader>()
.map_err(|e| format!("read node log: {}", e))?;
for node_reader in log.get_nodes()
.map_err(|e| format!("get nodes: {}", e))? {
let node = Node::from_capnp_migrate(node_reader)?;
let dominated = by_uuid.get(&node.uuid)
.map(|n| node.version >= n.version)
.unwrap_or(true);
if dominated {
by_uuid.insert(node.uuid, node);
}
}
}
// Group live (non-deleted) nodes by key
let mut by_key: HashMap<String, Vec<Node>> = HashMap::new();
for node in by_uuid.into_values() {
if !node.deleted {
by_key.entry(node.key.clone()).or_default().push(node);
}
}
// Keep only duplicates
by_key.retain(|_, nodes| nodes.len() > 1);
Ok(by_key)
}
/// Append nodes to the log file.
/// Serializes to a Vec first, then does a single write() syscall
/// so the append is atomic with O_APPEND even without flock.
pub fn append_nodes(&mut self, nodes: &[Node]) -> Result<(), String> {
let _lock = StoreLock::acquire()?;
self.append_nodes_unlocked(nodes)
}
/// Append nodes without acquiring the lock. Caller must hold StoreLock.
pub(crate) fn append_nodes_unlocked(&mut self, nodes: &[Node]) -> Result<(), String> {
let mut msg = message::Builder::new_default();
{
let log = msg.init_root::<memory_capnp::node_log::Builder>();
let mut list = log.init_nodes(nodes.len() as u32);
for (i, node) in nodes.iter().enumerate() {
node.to_capnp(list.reborrow().get(i as u32));
}
}
let mut buf = Vec::new();
serialize::write_message(&mut buf, &msg)
.map_err(|e| format!("serialize nodes: {}", e))?;
let path = nodes_path();
let file = fs::OpenOptions::new()
.create(true).append(true).open(&path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
use std::io::Write;
(&file).write_all(&buf)
.map_err(|e| format!("write nodes: {}", e))?;
self.loaded_nodes_size = file.metadata().map(|m| m.len()).unwrap_or(0);
Ok(())
}
/// Replay only new entries appended to the node log since we last loaded.
/// Call under StoreLock to catch writes from concurrent processes.
pub(crate) fn refresh_nodes(&mut self) -> Result<(), String> {
let path = nodes_path();
let current_size = fs::metadata(&path).map(|m| m.len()).unwrap_or(0);
if current_size <= self.loaded_nodes_size {
return Ok(()); // no new data
}
let file = fs::File::open(&path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
let mut reader = BufReader::new(file);
reader.seek(std::io::SeekFrom::Start(self.loaded_nodes_size))
.map_err(|e| format!("seek nodes log: {}", e))?;
while let Ok(msg) = serialize::read_message(&mut reader, message::ReaderOptions::new()) {
let log = msg.get_root::<memory_capnp::node_log::Reader>()
.map_err(|e| format!("read node log delta: {}", e))?;
for node_reader in log.get_nodes()
.map_err(|e| format!("get nodes delta: {}", e))? {
let node = Node::from_capnp_migrate(node_reader)?;
let dominated = self.nodes.get(&node.key)
.map(|n| node.version >= n.version)
.unwrap_or(true);
if dominated {
if node.deleted {
self.nodes.remove(&node.key);
self.uuid_to_key.remove(&node.uuid);
} else {
self.uuid_to_key.insert(node.uuid, node.key.clone());
self.nodes.insert(node.key.clone(), node);
}
}
}
}
self.loaded_nodes_size = current_size;
Ok(())
}
/// Append relations to the log file.
/// Single write() syscall for atomic O_APPEND.
pub fn append_relations(&mut self, relations: &[Relation]) -> Result<(), String> {
let _lock = StoreLock::acquire()?;
self.append_relations_unlocked(relations)
}
/// Append relations without acquiring the lock. Caller must hold StoreLock.
pub(crate) fn append_relations_unlocked(&mut self, relations: &[Relation]) -> Result<(), String> {
let mut msg = message::Builder::new_default();
{
let log = msg.init_root::<memory_capnp::relation_log::Builder>();
let mut list = log.init_relations(relations.len() as u32);
for (i, rel) in relations.iter().enumerate() {
rel.to_capnp(list.reborrow().get(i as u32));
}
}
let mut buf = Vec::new();
serialize::write_message(&mut buf, &msg)
.map_err(|e| format!("serialize relations: {}", e))?;
let path = relations_path();
let file = fs::OpenOptions::new()
.create(true).append(true).open(&path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
use std::io::Write;
(&file).write_all(&buf)
.map_err(|e| format!("write relations: {}", e))?;
self.loaded_rels_size = file.metadata().map(|m| m.len()).unwrap_or(0);
Ok(())
}
/// Append agent visit records to the visits log.
pub fn append_visits(&mut self, visits: &[AgentVisit]) -> Result<(), String> {
if visits.is_empty() { return Ok(()); }
let mut msg = message::Builder::new_default();
{
let log = msg.init_root::<memory_capnp::agent_visit_log::Builder>();
let mut list = log.init_visits(visits.len() as u32);
for (i, visit) in visits.iter().enumerate() {
visit.to_capnp(list.reborrow().get(i as u32));
}
}
let mut buf = Vec::new();
serialize::write_message(&mut buf, &msg)
.map_err(|e| format!("serialize visits: {}", e))?;
let path = visits_path();
let file = fs::OpenOptions::new()
.create(true).append(true).open(&path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
use std::io::Write;
(&file).write_all(&buf)
.map_err(|e| format!("write visits: {}", e))?;
// Update in-memory index
for v in visits {
self.visits
.entry(v.node_key.clone())
.or_default()
.insert(v.agent.clone(), v.timestamp);
}
Ok(())
}
/// Replay visits log to rebuild in-memory index.
fn replay_visits(&mut self, path: &Path) -> Result<(), String> {
let file = fs::File::open(path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
let mut reader = BufReader::new(file);
while reader.stream_position().map_err(|e| e.to_string())?
< fs::metadata(path).map_err(|e| e.to_string())?.len()
{
let msg = match serialize::read_message(&mut reader, Default::default()) {
Ok(m) => m,
Err(_) => break,
};
let log = msg.get_root::<memory_capnp::agent_visit_log::Reader>()
.map_err(|e| format!("read visit log: {}", e))?;
for visit in log.get_visits().map_err(|e| e.to_string())? {
let key = visit.get_node_key().ok()
.and_then(|t| t.to_str().ok())
.unwrap_or("")
.to_string();
let agent = visit.get_agent().ok()
.and_then(|t| t.to_str().ok())
.unwrap_or("")
.to_string();
let ts = visit.get_timestamp();
if !key.is_empty() && !agent.is_empty() {
let entry = self.visits.entry(key).or_default();
// Keep latest timestamp per agent
let existing = entry.entry(agent).or_insert(0);
if ts > *existing {
*existing = ts;
}
}
}
}
Ok(())
}
/// Append transcript segment progress records.
pub fn append_transcript_progress(&mut self, segments: &[TranscriptSegment]) -> Result<(), String> {
if segments.is_empty() { return Ok(()); }
let mut msg = message::Builder::new_default();
{
let log = msg.init_root::<memory_capnp::transcript_progress_log::Builder>();
let mut list = log.init_segments(segments.len() as u32);
for (i, seg) in segments.iter().enumerate() {
seg.to_capnp(list.reborrow().get(i as u32));
}
}
let mut buf = Vec::new();
serialize::write_message(&mut buf, &msg)
.map_err(|e| format!("serialize transcript progress: {}", e))?;
let path = transcript_progress_path();
let file = fs::OpenOptions::new()
.create(true).append(true).open(&path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
use std::io::Write;
(&file).write_all(&buf)
.map_err(|e| format!("write transcript progress: {}", e))?;
// Update in-memory index
for seg in segments {
self.transcript_progress
.entry((seg.transcript_id.clone(), seg.segment_index))
.or_default()
.insert(seg.agent.clone());
}
Ok(())
}
/// Replay transcript progress log to rebuild in-memory index.
fn replay_transcript_progress(&mut self, path: &Path) -> Result<(), String> {
let file = fs::File::open(path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
let mut reader = BufReader::new(file);
while reader.stream_position().map_err(|e| e.to_string())?
< fs::metadata(path).map_err(|e| e.to_string())?.len()
{
let msg = match serialize::read_message(&mut reader, Default::default()) {
Ok(m) => m,
Err(_) => break,
};
let log = msg.get_root::<memory_capnp::transcript_progress_log::Reader>()
.map_err(|e| format!("read transcript progress: {}", e))?;
for seg in log.get_segments().map_err(|e| e.to_string())? {
let id = seg.get_transcript_id().ok()
.and_then(|t| t.to_str().ok())
.unwrap_or("")
.to_string();
let agent = seg.get_agent().ok()
.and_then(|t| t.to_str().ok())
.unwrap_or("")
.to_string();
let idx = seg.get_segment_index();
if !id.is_empty() && !agent.is_empty() {
self.transcript_progress
.entry((id, idx))
.or_default()
.insert(agent);
}
}
}
Ok(())
}
/// Check if a transcript segment has been processed by a given agent.
pub fn is_segment_mined(&self, transcript_id: &str, segment_index: u32, agent: &str) -> bool {
self.transcript_progress
.get(&(transcript_id.to_string(), segment_index))
.is_some_and(|agents| agents.contains(agent))
}
/// Mark a transcript segment as successfully processed.
pub fn mark_segment_mined(&mut self, transcript_id: &str, segment_index: u32, agent: &str) -> Result<(), String> {
let seg = new_transcript_segment(transcript_id, segment_index, agent);
self.append_transcript_progress(&[seg])
}
/// Migrate old stub-node transcript markers into the new progress log.
/// Reads _observed-transcripts-f-*, _mined-transcripts#f-*, and _facts-* keys,
/// extracts transcript_id and segment_index, writes to transcript-progress.capnp,
/// then deletes the stub nodes.
pub fn migrate_transcript_progress(&mut self) -> Result<usize, String> {
let mut segments = Vec::new();
for key in self.nodes.keys() {
// _observed-transcripts-f-{UUID}.{segment}
if let Some(rest) = key.strip_prefix("_observed-transcripts-f-") {
if let Some((uuid, seg_str)) = rest.rsplit_once('.')
&& let Ok(seg) = seg_str.parse::<u32>() {
segments.push(new_transcript_segment(uuid, seg, "observation"));
}
}
// _mined-transcripts#f-{UUID}.{segment}
else if let Some(rest) = key.strip_prefix("_mined-transcripts#f-") {
if let Some((uuid, seg_str)) = rest.rsplit_once('.')
&& let Ok(seg) = seg_str.parse::<u32>() {
segments.push(new_transcript_segment(uuid, seg, "experience"));
}
}
// _mined-transcripts-f-{UUID}.{segment}
else if let Some(rest) = key.strip_prefix("_mined-transcripts-f-") {
if let Some((uuid, seg_str)) = rest.rsplit_once('.')
&& let Ok(seg) = seg_str.parse::<u32>() {
segments.push(new_transcript_segment(uuid, seg, "experience"));
}
}
// _facts-{UUID} (whole-file, segment 0)
else if let Some(uuid) = key.strip_prefix("_facts-") {
if !uuid.contains('-') || uuid.len() < 30 { continue; } // skip non-UUID
segments.push(new_transcript_segment(uuid, 0, "fact"));
}
}
let count = segments.len();
if count > 0 {
self.append_transcript_progress(&segments)?;
}
// Soft-delete the old stub nodes
let keys_to_delete: Vec<String> = self.nodes.keys()
.filter(|k| k.starts_with("_observed-transcripts-")
|| k.starts_with("_mined-transcripts")
|| (k.starts_with("_facts-") && !k.contains("fact_mine")))
.cloned()
.collect();
for key in &keys_to_delete {
if let Some(node) = self.nodes.get_mut(key) {
node.deleted = true;
}
}
if !keys_to_delete.is_empty() {
self.save()?;
}
Ok(count)
}
/// Record visits for a batch of node keys from a successful agent run.
pub fn record_agent_visits(&mut self, node_keys: &[String], agent: &str) -> Result<(), String> {
let visits: Vec<AgentVisit> = node_keys.iter()
.filter_map(|key| {
let node = self.nodes.get(key)?;
Some(new_visit(node.uuid, key, agent, "processed"))
})
.collect();
self.append_visits(&visits)
}
/// Get the last time an agent visited a node. Returns 0 if never visited.
pub fn last_visited(&self, node_key: &str, agent: &str) -> i64 {
self.visits.get(node_key)
.and_then(|agents| agents.get(agent))
.copied()
.unwrap_or(0)
}
/// Save the derived cache with log size header for staleness detection.
/// Uses atomic write (tmp + rename) to prevent partial reads.
pub fn save(&self) -> Result<(), String> {
let _lock = StoreLock::acquire()?;
let path = state_path();
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).ok();
}
// Use log sizes from load time, not current filesystem sizes.
// If another writer appended since we loaded, our recorded size
// will be smaller than the actual log → next reader detects stale
// cache and replays the (correct, append-only) log.
let nodes_size = self.loaded_nodes_size;
let rels_size = self.loaded_rels_size;
let bincode_data = bincode::serialize(self)
.map_err(|e| format!("bincode serialize: {}", e))?;
let mut data = Vec::with_capacity(CACHE_HEADER_LEN + bincode_data.len());
data.extend_from_slice(&CACHE_MAGIC);
data.extend_from_slice(&nodes_size.to_le_bytes());
data.extend_from_slice(&rels_size.to_le_bytes());
data.extend_from_slice(&bincode_data);
// Atomic write: tmp file + rename
let tmp_path = path.with_extension("bin.tmp");
fs::write(&tmp_path, &data)
.map_err(|e| format!("write {}: {}", tmp_path.display(), e))?;
fs::rename(&tmp_path, &path)
.map_err(|e| format!("rename {}{}: {}", tmp_path.display(), path.display(), e))?;
// Also write rkyv snapshot (mmap-friendly)
if let Err(e) = self.save_snapshot(nodes_size, rels_size) {
eprintln!("rkyv snapshot save: {}", e);
}
Ok(())
}
/// Serialize store as rkyv snapshot with staleness header.
/// Assumes StoreLock is already held by caller.
fn save_snapshot(&self, nodes_size: u64, rels_size: u64) -> Result<(), String> {
let snap = Snapshot {
nodes: self.nodes.clone(),
relations: self.relations.iter().filter(|r| !r.deleted).cloned().collect(),
gaps: self.gaps.clone(),
params: self.params,
};
let rkyv_data = rkyv::to_bytes::<_, 256>(&snap)
.map_err(|e| format!("rkyv serialize: {}", e))?;
let mut data = Vec::with_capacity(RKYV_HEADER_LEN + rkyv_data.len());
data.extend_from_slice(&RKYV_MAGIC);
data.extend_from_slice(&1u32.to_le_bytes()); // format version
data.extend_from_slice(&nodes_size.to_le_bytes());
data.extend_from_slice(&rels_size.to_le_bytes());
data.extend_from_slice(&(rkyv_data.len() as u64).to_le_bytes());
data.extend_from_slice(&rkyv_data);
let path = snapshot_path();
let tmp_path = path.with_extension("rkyv.tmp");
fs::write(&tmp_path, &data)
.map_err(|e| format!("write {}: {}", tmp_path.display(), e))?;
fs::rename(&tmp_path, &path)
.map_err(|e| format!("rename: {}", e))?;
Ok(())
}
/// Try loading store from mmap'd rkyv snapshot.
/// Returns None if snapshot is missing or stale (log sizes don't match).
fn load_snapshot_mmap() -> Result<Option<Store>, String> {
let path = snapshot_path();
if !path.exists() { return Ok(None); }
let nodes_size = fs::metadata(nodes_path()).map(|m| m.len()).unwrap_or(0);
let rels_size = fs::metadata(relations_path()).map(|m| m.len()).unwrap_or(0);
let file = fs::File::open(&path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
let mmap = unsafe { memmap2::Mmap::map(&file) }
.map_err(|e| format!("mmap {}: {}", path.display(), e))?;
if mmap.len() < RKYV_HEADER_LEN { return Ok(None); }
if mmap[..4] != RKYV_MAGIC { return Ok(None); }
// [4..8] = version, skip for now
let cached_nodes = u64::from_le_bytes(mmap[8..16].try_into().unwrap());
let cached_rels = u64::from_le_bytes(mmap[16..24].try_into().unwrap());
let data_len = u64::from_le_bytes(mmap[24..32].try_into().unwrap()) as usize;
if cached_nodes != nodes_size || cached_rels != rels_size {
return Ok(None); // stale
}
if mmap.len() < RKYV_HEADER_LEN + data_len {
return Ok(None); // truncated
}
let rkyv_data = &mmap[RKYV_HEADER_LEN..RKYV_HEADER_LEN + data_len];
// SAFETY: we wrote this file ourselves via save_snapshot().
// Skip full validation (check_archived_root) — the staleness header
// already confirms this snapshot matches the current log state.
let archived = unsafe { rkyv::archived_root::<Snapshot>(rkyv_data) };
let snap: Snapshot = <ArchivedSnapshot as rkyv::Deserialize<Snapshot, rkyv::Infallible>>
::deserialize(archived, &mut rkyv::Infallible).unwrap();
let mut store = Store {
nodes: snap.nodes,
relations: snap.relations,
gaps: snap.gaps,
params: snap.params,
..Default::default()
};
// Rebuild uuid_to_key (not serialized)
for (key, node) in &store.nodes {
store.uuid_to_key.insert(node.uuid, key.clone());
}
store.loaded_nodes_size = nodes_size;
store.loaded_rels_size = rels_size;
Ok(Some(store))
}
}
/// Strip .md suffix from all node keys and relation key strings.
/// Merges duplicates (bare key + .md key) by keeping the latest version.
pub fn strip_md_keys() -> Result<(), String> {
use super::strip_md_suffix;
let mut store = Store::load()?;
let mut renamed_nodes = 0usize;
let mut renamed_rels = 0usize;
let mut merged = 0usize;
// Collect keys that need renaming
let old_keys: Vec<String> = store.nodes.keys()
.filter(|k| k.ends_with(".md") || k.contains(".md#"))
.cloned()
.collect();
for old_key in &old_keys {
let new_key = strip_md_suffix(old_key);
if new_key == *old_key { continue; }
let mut node = store.nodes.remove(old_key).unwrap();
store.uuid_to_key.remove(&node.uuid);
if let Some(existing) = store.nodes.get(&new_key) {
// Merge: keep whichever has the higher version
if existing.version >= node.version {
eprintln!(" merge {}{} (keeping existing v{})",
old_key, new_key, existing.version);
merged += 1;
continue;
}
eprintln!(" merge {}{} (replacing v{} with v{})",
old_key, new_key, existing.version, node.version);
merged += 1;
}
node.key = new_key.clone();
node.version += 1;
store.uuid_to_key.insert(node.uuid, new_key.clone());
store.nodes.insert(new_key, node);
renamed_nodes += 1;
}
// Fix relation key strings
for rel in &mut store.relations {
let new_source = strip_md_suffix(&rel.source_key);
let new_target = strip_md_suffix(&rel.target_key);
if new_source != rel.source_key || new_target != rel.target_key {
rel.source_key = new_source;
rel.target_key = new_target;
rel.version += 1;
renamed_rels += 1;
}
}
if renamed_nodes == 0 && renamed_rels == 0 && merged == 0 {
eprintln!("No .md suffixes found — store is clean");
return Ok(());
}
eprintln!("Renamed {} nodes, {} relations, merged {} duplicates",
renamed_nodes, renamed_rels, merged);
// Append migrated nodes/relations to the log (preserving history)
let changed_nodes: Vec<_> = old_keys.iter()
.filter_map(|old_key| {
let new_key = strip_md_suffix(old_key);
store.nodes.get(&new_key).cloned()
})
.collect();
if !changed_nodes.is_empty() {
store.append_nodes(&changed_nodes)?;
}
// Invalidate caches so next load replays from logs
for p in [state_path(), snapshot_path()] {
if p.exists() {
fs::remove_file(&p).ok();
}
}
eprintln!("Migration complete (appended to existing logs)");
Ok(())
}
// DO NOT USE. This function destroyed the append-only log history on
// 2026-03-14 when strip_md_keys() called it. It:
//
// 1. Truncates nodes.capnp via File::create() — all historical
// versions of every node are permanently lost
// 2. Writes only from the in-memory store — so any node missing
// due to a loading bug is also permanently lost
// 3. Makes no backup of the old log before overwriting
// 4. Filters out deleted relations, destroying deletion history
//
// The correct approach for migrations is to APPEND new versions
// (with updated keys) and delete markers (for old keys) to the
// existing log, preserving all history.
//
// This function is kept (dead) so the comment survives as a warning.
// If you need log compaction in the future, design it properly:
// back up first, preserve history, and never write from a potentially
// incomplete in-memory snapshot.
#[allow(dead_code)]
fn _rewrite_store_disabled(_store: &Store) -> Result<(), String> {
panic!("rewrite_store is disabled — see comment above");
}
/// Check and repair corrupt capnp log files.
///
/// Reads each message sequentially, tracking file position. On the first
/// corrupt message, truncates the file to the last good position. Also
/// removes stale caches so the next load replays from the repaired log.
pub fn fsck() -> Result<(), String> {
let mut any_corrupt = false;
for (path, kind) in [
(nodes_path(), "node"),
(relations_path(), "relation"),
] {
if !path.exists() { continue; }
let file = fs::File::open(&path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
let file_len = file.metadata()
.map_err(|e| format!("stat {}: {}", path.display(), e))?.len();
let mut reader = BufReader::new(file);
let mut good_messages = 0u64;
let mut last_good_pos = 0u64;
loop {
let pos = reader.stream_position()
.map_err(|e| format!("tell {}: {}", path.display(), e))?;
let msg = match serialize::read_message(&mut reader, message::ReaderOptions::new()) {
Ok(m) => m,
Err(_) => {
// read_message fails at EOF (normal) or on corrupt framing
if pos < file_len {
// Not at EOF — corrupt framing
eprintln!("{}: corrupt message at offset {}, truncating", kind, pos);
any_corrupt = true;
drop(reader);
let file = fs::OpenOptions::new().write(true).open(&path)
.map_err(|e| format!("open for truncate: {}", e))?;
file.set_len(pos)
.map_err(|e| format!("truncate {}: {}", path.display(), e))?;
eprintln!("{}: truncated from {} to {} bytes ({} good messages)",
kind, file_len, pos, good_messages);
}
break;
}
};
// Validate the message content too
let valid = if kind == "node" {
msg.get_root::<memory_capnp::node_log::Reader>()
.and_then(|l| l.get_nodes().map(|_| ()))
.is_ok()
} else {
msg.get_root::<memory_capnp::relation_log::Reader>()
.and_then(|l| l.get_relations().map(|_| ()))
.is_ok()
};
if valid {
good_messages += 1;
last_good_pos = reader.stream_position()
.map_err(|e| format!("tell {}: {}", path.display(), e))?;
} else {
eprintln!("{}: corrupt message content at offset {}, truncating to {}",
kind, pos, last_good_pos);
any_corrupt = true;
drop(reader);
let file = fs::OpenOptions::new().write(true).open(&path)
.map_err(|e| format!("open for truncate: {}", e))?;
file.set_len(last_good_pos)
.map_err(|e| format!("truncate {}: {}", path.display(), e))?;
eprintln!("{}: truncated from {} to {} bytes ({} good messages)",
kind, file_len, last_good_pos, good_messages);
break;
}
}
if !any_corrupt {
eprintln!("{}: {} messages, all clean", kind, good_messages);
}
}
if any_corrupt {
// Nuke caches so next load replays from the repaired logs
for p in [state_path(), snapshot_path()] {
if p.exists() {
fs::remove_file(&p)
.map_err(|e| format!("remove {}: {}", p.display(), e))?;
eprintln!("removed stale cache: {}", p.display());
}
}
eprintln!("repair complete — run `poc-memory status` to verify");
} else {
eprintln!("store is clean");
}
Ok(())
}

View file

@ -0,0 +1,628 @@
// Core types for the memory store
//
// Node, Relation, enums, Params, and supporting types. Also contains
// the capnp serialization macros that generate bidirectional conversion.
use crate::memory_capnp;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use std::collections::{HashMap, HashSet};
use std::fs;
use std::os::unix::io::AsRawFd;
use std::path::PathBuf;
use std::time::{SystemTime, UNIX_EPOCH};
// ---------------------------------------------------------------------------
// Capnp serialization macros
//
// Declarative mapping between Rust types and capnp generated types.
// Adding a field to the schema means adding it in one place below;
// both read and write are generated from the same declaration.
// ---------------------------------------------------------------------------
/// Generate to_capnp/from_capnp conversion methods for an enum.
macro_rules! capnp_enum {
($rust_type:ident, $capnp_type:path, [$($variant:ident),+ $(,)?]) => {
impl $rust_type {
#[allow(clippy::wrong_self_convention, dead_code)]
pub(crate) fn to_capnp(&self) -> $capnp_type {
match self {
$(Self::$variant => <$capnp_type>::$variant,)+
}
}
pub(crate) fn from_capnp(v: $capnp_type) -> Self {
match v {
$(<$capnp_type>::$variant => Self::$variant,)+
}
}
}
};
}
/// Generate from_capnp/to_capnp methods for a struct with capnp serialization.
/// Fields are grouped by serialization kind:
/// text - capnp Text fields (String in Rust)
/// uuid - capnp Data fields ([u8; 16] in Rust)
/// prim - copy types (u32, f32, f64, bool)
/// enm - enums with to_capnp/from_capnp methods
/// skip - Rust-only fields not in capnp (set to Default on read)
macro_rules! capnp_message {
(
$struct:ident,
reader: $reader:ty,
builder: $builder:ty,
text: [$($tf:ident),* $(,)?],
uuid: [$($uf:ident),* $(,)?],
prim: [$($pf:ident),* $(,)?],
enm: [$($ef:ident: $et:ident),* $(,)?],
skip: [$($sf:ident),* $(,)?] $(,)?
) => {
impl $struct {
pub fn from_capnp(r: $reader) -> Result<Self, String> {
paste::paste! {
Ok(Self {
$($tf: read_text(r.[<get_ $tf>]()),)*
$($uf: read_uuid(r.[<get_ $uf>]()),)*
$($pf: r.[<get_ $pf>](),)*
$($ef: $et::from_capnp(
r.[<get_ $ef>]().map_err(|_| concat!("bad ", stringify!($ef)))?
),)*
$($sf: Default::default(),)*
})
}
}
pub fn to_capnp(&self, mut b: $builder) {
paste::paste! {
$(b.[<set_ $tf>](&self.$tf);)*
$(b.[<set_ $uf>](&self.$uf);)*
$(b.[<set_ $pf>](self.$pf);)*
$(b.[<set_ $ef>](self.$ef.to_capnp());)*
}
}
}
};
}
pub fn memory_dir() -> PathBuf {
crate::config::get().data_dir.clone()
}
pub fn nodes_path() -> PathBuf { memory_dir().join("nodes.capnp") }
pub(crate) fn relations_path() -> PathBuf { memory_dir().join("relations.capnp") }
pub(crate) fn state_path() -> PathBuf { memory_dir().join("state.bin") }
pub(crate) fn snapshot_path() -> PathBuf { memory_dir().join("snapshot.rkyv") }
fn lock_path() -> PathBuf { memory_dir().join(".store.lock") }
/// RAII file lock using flock(2). Dropped when scope exits.
pub(crate) struct StoreLock {
_file: fs::File,
}
impl StoreLock {
pub(crate) fn acquire() -> Result<Self, String> {
let path = lock_path();
let file = fs::OpenOptions::new()
.create(true).truncate(false).write(true).open(&path)
.map_err(|e| format!("open lock {}: {}", path.display(), e))?;
// Blocking exclusive lock
let ret = unsafe { libc::flock(file.as_raw_fd(), libc::LOCK_EX) };
if ret != 0 {
return Err(format!("flock: {}", std::io::Error::last_os_error()));
}
Ok(StoreLock { _file: file })
}
// Lock released automatically when _file is dropped (flock semantics)
}
pub fn now_epoch() -> i64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs() as i64
}
/// Convert epoch seconds to broken-down local time components.
/// Returns (year, month, day, hour, minute, second).
pub fn epoch_to_local(epoch: i64) -> (i32, u32, u32, u32, u32, u32) {
use chrono::{Datelike, Local, TimeZone, Timelike};
let dt = match Local.timestamp_opt(epoch, 0) {
chrono::LocalResult::Single(dt) => dt,
chrono::LocalResult::Ambiguous(dt, _) => dt,
chrono::LocalResult::None => {
// DST gap or invalid — try shifting, then fall back to UTC
Local.timestamp_opt(epoch + 3600, 0)
.earliest()
.or_else(|| chrono::Utc.timestamp_opt(epoch, 0).earliest()
.map(|dt| dt.with_timezone(&Local)))
.unwrap_or_else(|| {
// Completely invalid timestamp — use epoch 0
chrono::Utc.timestamp_opt(0, 0).unwrap().with_timezone(&Local)
})
}
};
(
dt.year(),
dt.month(),
dt.day(),
dt.hour(),
dt.minute(),
dt.second(),
)
}
/// Format epoch as "YYYY-MM-DD"
pub fn format_date(epoch: i64) -> String {
let (y, m, d, _, _, _) = epoch_to_local(epoch);
format!("{:04}-{:02}-{:02}", y, m, d)
}
/// Format epoch as "YYYY-MM-DDTHH:MM"
pub fn format_datetime(epoch: i64) -> String {
let (y, m, d, h, min, _) = epoch_to_local(epoch);
format!("{:04}-{:02}-{:02}T{:02}:{:02}", y, m, d, h, min)
}
/// Format epoch as "YYYY-MM-DD HH:MM"
pub fn format_datetime_space(epoch: i64) -> String {
let (y, m, d, h, min, _) = epoch_to_local(epoch);
format!("{:04}-{:02}-{:02} {:02}:{:02}", y, m, d, h, min)
}
/// Compact timestamp for use in keys: "YYYYMMDDTHHMMss"
pub fn compact_timestamp() -> String {
let (y, m, d, h, min, s) = epoch_to_local(now_epoch());
format!("{:04}{:02}{:02}T{:02}{:02}{:02}", y, m, d, h, min, s)
}
pub fn today() -> String {
format_date(now_epoch())
}
// In-memory node representation
#[derive(Clone, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
#[archive(check_bytes)]
pub struct Node {
pub uuid: [u8; 16],
pub version: u32,
pub timestamp: i64,
pub node_type: NodeType,
pub provenance: String,
pub key: String,
pub content: String,
pub weight: f32,
pub emotion: f32,
pub deleted: bool,
pub source_ref: String,
pub created: String,
pub retrievals: u32,
pub uses: u32,
pub wrongs: u32,
pub state_tag: String,
pub last_replayed: i64,
pub spaced_repetition_interval: u32,
// Position within file (section index, for export ordering)
#[serde(default)]
pub position: u32,
// Stable creation timestamp (unix epoch seconds). Set once at creation;
// never updated on rename or content update. Zero for legacy nodes.
#[serde(default)]
pub created_at: i64,
// Derived fields (not in capnp, computed from graph)
#[serde(default)]
pub community_id: Option<u32>,
#[serde(default)]
pub clustering_coefficient: Option<f32>,
#[serde(default)]
pub degree: Option<u32>,
}
#[derive(Clone, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
#[archive(check_bytes)]
pub struct Relation {
pub uuid: [u8; 16],
pub version: u32,
pub timestamp: i64,
pub source: [u8; 16],
pub target: [u8; 16],
pub rel_type: RelationType,
pub strength: f32,
pub provenance: String,
pub deleted: bool,
pub source_key: String,
pub target_key: String,
}
#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
#[archive(check_bytes)]
pub enum NodeType {
EpisodicSession,
EpisodicDaily,
EpisodicWeekly,
Semantic,
EpisodicMonthly,
}
#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
#[archive(check_bytes)]
pub enum Provenance {
Manual,
Journal,
Agent, // legacy catch-all, prefer specific variants below
Dream,
Derived,
AgentExperienceMine,
AgentKnowledgeObservation,
AgentKnowledgePattern,
AgentKnowledgeConnector,
AgentKnowledgeChallenger,
AgentConsolidate,
AgentDigest,
AgentFactMine,
AgentDecay,
}
impl Provenance {
/// Parse from POC_PROVENANCE env var. Returns None if unset.
pub fn from_env() -> Option<Self> {
std::env::var("POC_PROVENANCE").ok().and_then(|s| Self::from_label(&s))
}
pub fn from_label(s: &str) -> Option<Self> {
Some(match s {
"manual" => Self::Manual,
"journal" => Self::Journal,
"agent" => Self::Agent,
"dream" => Self::Dream,
"derived" => Self::Derived,
"agent:experience-mine" => Self::AgentExperienceMine,
"agent:knowledge-observation"=> Self::AgentKnowledgeObservation,
"agent:knowledge-pattern" => Self::AgentKnowledgePattern,
"agent:knowledge-connector" => Self::AgentKnowledgeConnector,
"agent:knowledge-challenger" => Self::AgentKnowledgeChallenger,
"agent:consolidate" => Self::AgentConsolidate,
"agent:digest" => Self::AgentDigest,
"agent:fact-mine" => Self::AgentFactMine,
"agent:decay" => Self::AgentDecay,
_ => return None,
})
}
pub fn label(&self) -> &'static str {
match self {
Self::Manual => "manual",
Self::Journal => "journal",
Self::Agent => "agent",
Self::Dream => "dream",
Self::Derived => "derived",
Self::AgentExperienceMine => "agent:experience-mine",
Self::AgentKnowledgeObservation => "agent:knowledge-observation",
Self::AgentKnowledgePattern => "agent:knowledge-pattern",
Self::AgentKnowledgeConnector => "agent:knowledge-connector",
Self::AgentKnowledgeChallenger => "agent:knowledge-challenger",
Self::AgentConsolidate => "agent:consolidate",
Self::AgentDigest => "agent:digest",
Self::AgentFactMine => "agent:fact-mine",
Self::AgentDecay => "agent:decay",
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
#[archive(check_bytes)]
pub enum RelationType {
Link,
Causal,
Auto,
}
capnp_enum!(NodeType, memory_capnp::NodeType,
[EpisodicSession, EpisodicDaily, EpisodicWeekly, Semantic, EpisodicMonthly]);
capnp_enum!(Provenance, memory_capnp::Provenance,
[Manual, Journal, Agent, Dream, Derived,
AgentExperienceMine, AgentKnowledgeObservation, AgentKnowledgePattern,
AgentKnowledgeConnector, AgentKnowledgeChallenger, AgentConsolidate,
AgentDigest, AgentFactMine, AgentDecay]);
capnp_enum!(RelationType, memory_capnp::RelationType,
[Link, Causal, Auto]);
capnp_message!(Node,
reader: memory_capnp::content_node::Reader<'_>,
builder: memory_capnp::content_node::Builder<'_>,
text: [key, content, source_ref, created, state_tag, provenance],
uuid: [uuid],
prim: [version, timestamp, weight, emotion, deleted,
retrievals, uses, wrongs, last_replayed,
spaced_repetition_interval, position, created_at],
enm: [node_type: NodeType],
skip: [community_id, clustering_coefficient, degree],
);
impl Node {
/// Read from capnp with migration: if the new provenance text field
/// is empty (old record), fall back to the deprecated provenanceOld enum.
pub fn from_capnp_migrate(r: memory_capnp::content_node::Reader<'_>) -> Result<Self, String> {
let mut node = Self::from_capnp(r)?;
if node.provenance.is_empty()
&& let Ok(old) = r.get_provenance_old() {
node.provenance = Provenance::from_capnp(old).label().to_string();
}
// Sanitize timestamps: old capnp records have raw offsets instead
// of unix epoch. Anything past year 2100 (~4102444800) is bogus.
const MAX_SANE_EPOCH: i64 = 4_102_444_800;
if node.timestamp > MAX_SANE_EPOCH || node.timestamp < 0 {
node.timestamp = node.created_at;
}
if node.created_at > MAX_SANE_EPOCH || node.created_at < 0 {
node.created_at = node.timestamp.min(MAX_SANE_EPOCH);
}
Ok(node)
}
}
capnp_message!(Relation,
reader: memory_capnp::relation::Reader<'_>,
builder: memory_capnp::relation::Builder<'_>,
text: [source_key, target_key, provenance],
uuid: [uuid, source, target],
prim: [version, timestamp, strength, deleted],
enm: [rel_type: RelationType],
skip: [],
);
impl Relation {
pub fn from_capnp_migrate(r: memory_capnp::relation::Reader<'_>) -> Result<Self, String> {
let mut rel = Self::from_capnp(r)?;
if rel.provenance.is_empty()
&& let Ok(old) = r.get_provenance_old() {
rel.provenance = Provenance::from_capnp(old).label().to_string();
}
Ok(rel)
}
}
#[derive(Clone, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
#[archive(check_bytes)]
pub struct RetrievalEvent {
pub query: String,
pub timestamp: String,
pub results: Vec<String>,
pub used: Option<Vec<String>>,
}
#[derive(Clone, Copy, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
#[archive(check_bytes)]
pub struct Params {
pub default_weight: f64,
pub decay_factor: f64,
pub use_boost: f64,
pub prune_threshold: f64,
pub edge_decay: f64,
pub max_hops: u32,
pub min_activation: f64,
}
impl Default for Params {
fn default() -> Self {
Params {
default_weight: 0.7,
decay_factor: 0.95,
use_boost: 0.15,
prune_threshold: 0.1,
edge_decay: 0.3,
max_hops: 3,
min_activation: 0.05,
}
}
}
// Gap record — something we looked for but didn't find
#[derive(Clone, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
#[archive(check_bytes)]
pub struct GapRecord {
pub description: String,
pub timestamp: String,
}
/// Per-node agent visit index: node_key → (agent_type → last_visit_timestamp)
pub type VisitIndex = HashMap<String, HashMap<String, i64>>;
// The full in-memory store
#[derive(Default, Serialize, Deserialize)]
pub struct Store {
pub nodes: HashMap<String, Node>, // key → latest node
#[serde(skip)]
pub uuid_to_key: HashMap<[u8; 16], String>, // uuid → key (rebuilt from nodes)
pub relations: Vec<Relation>, // all active relations
pub retrieval_log: Vec<RetrievalEvent>,
pub gaps: Vec<GapRecord>,
pub params: Params,
/// Agent visit tracking: node_key → (agent_type → last_visit_epoch)
#[serde(default)]
pub visits: VisitIndex,
/// Transcript mining progress: (transcript_id, segment_index) → set of agents that processed it
#[serde(default)]
pub transcript_progress: HashMap<(String, u32), HashSet<String>>,
/// Log sizes at load time — used by save() to write correct staleness header.
/// If another writer appended since we loaded, our cache will be marked stale
/// (recorded size < actual size), forcing the next reader to replay the log.
#[serde(skip)]
pub(crate) loaded_nodes_size: u64,
#[serde(skip)]
pub(crate) loaded_rels_size: u64,
}
/// Snapshot for mmap: full store state minus retrieval_log (which
/// is append-only in retrieval.log). rkyv zero-copy serialization
/// lets us mmap this and access archived data without deserialization.
#[derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
#[archive(check_bytes)]
pub(crate) struct Snapshot {
pub(crate) nodes: HashMap<String, Node>,
pub(crate) relations: Vec<Relation>,
pub(crate) gaps: Vec<GapRecord>,
pub(crate) params: Params,
}
// rkyv snapshot header: 32 bytes (multiple of 16 for alignment after mmap)
// [0..4] magic "RKV\x01"
// [4..8] format version (u32 LE)
// [8..16] nodes.capnp file size (u64 LE) — staleness check
// [16..24] relations.capnp file size (u64 LE)
// [24..32] rkyv data length (u64 LE)
pub(crate) const RKYV_MAGIC: [u8; 4] = *b"RKV\x01";
pub(crate) const RKYV_HEADER_LEN: usize = 32;
// state.bin header: magic + log file sizes for staleness detection.
// File sizes are race-free for append-only logs (they only grow),
// unlike mtimes which race with concurrent writers.
pub(crate) const CACHE_MAGIC: [u8; 4] = *b"POC\x01";
pub(crate) const CACHE_HEADER_LEN: usize = 4 + 8 + 8; // magic + nodes_size + rels_size
// Cap'n Proto serialization helpers
/// Read a capnp text field, returning empty string on any error
pub(crate) fn read_text(result: capnp::Result<capnp::text::Reader>) -> String {
result.ok()
.and_then(|t| t.to_str().ok())
.unwrap_or("")
.to_string()
}
/// Read a capnp data field as [u8; 16], zero-padded
pub(crate) fn read_uuid(result: capnp::Result<&[u8]>) -> [u8; 16] {
let mut out = [0u8; 16];
if let Ok(data) = result
&& data.len() >= 16 {
out.copy_from_slice(&data[..16]);
}
out
}
/// Create a new node with defaults
pub fn new_node(key: &str, content: &str) -> Node {
Node {
uuid: *Uuid::new_v4().as_bytes(),
version: 1,
timestamp: now_epoch(),
node_type: NodeType::Semantic,
provenance: "manual".to_string(),
key: key.to_string(),
content: content.to_string(),
weight: 0.7,
emotion: 0.0,
deleted: false,
source_ref: String::new(),
created: today(),
retrievals: 0,
uses: 0,
wrongs: 0,
state_tag: String::new(),
last_replayed: 0,
spaced_repetition_interval: 1,
position: 0,
created_at: now_epoch(),
community_id: None,
clustering_coefficient: None,
degree: None,
}
}
/// Agent visit record — tracks when an agent successfully processed a node
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct AgentVisit {
pub node_uuid: [u8; 16],
pub node_key: String,
pub agent: String,
pub timestamp: i64,
pub outcome: String,
}
capnp_message!(AgentVisit,
reader: memory_capnp::agent_visit::Reader<'_>,
builder: memory_capnp::agent_visit::Builder<'_>,
text: [node_key, agent, outcome],
uuid: [node_uuid],
prim: [timestamp],
enm: [],
skip: [],
);
pub fn new_visit(node_uuid: [u8; 16], node_key: &str, agent: &str, outcome: &str) -> AgentVisit {
AgentVisit {
node_uuid,
node_key: node_key.to_string(),
agent: agent.to_string(),
timestamp: now_epoch(),
outcome: outcome.to_string(),
}
}
pub(crate) fn visits_path() -> PathBuf { memory_dir().join("visits.capnp") }
/// Transcript mining progress — tracks which segments have been processed
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct TranscriptSegment {
pub transcript_id: String,
pub segment_index: u32,
pub agent: String,
pub timestamp: i64,
}
capnp_message!(TranscriptSegment,
reader: memory_capnp::transcript_segment::Reader<'_>,
builder: memory_capnp::transcript_segment::Builder<'_>,
text: [transcript_id, agent],
uuid: [],
prim: [segment_index, timestamp],
enm: [],
skip: [],
);
pub fn new_transcript_segment(transcript_id: &str, segment_index: u32, agent: &str) -> TranscriptSegment {
TranscriptSegment {
transcript_id: transcript_id.to_string(),
segment_index,
agent: agent.to_string(),
timestamp: now_epoch(),
}
}
pub(crate) fn transcript_progress_path() -> PathBuf { memory_dir().join("transcript-progress.capnp") }
/// Create a new relation.
/// Provenance is set from POC_PROVENANCE env var if present, else "manual".
pub fn new_relation(
source_uuid: [u8; 16],
target_uuid: [u8; 16],
rel_type: RelationType,
strength: f32,
source_key: &str,
target_key: &str,
) -> Relation {
// Use raw env var for provenance — agent names are dynamic
let provenance = std::env::var("POC_PROVENANCE")
.unwrap_or_else(|_| "manual".to_string());
Relation {
uuid: *Uuid::new_v4().as_bytes(),
version: 1,
timestamp: now_epoch(),
source: source_uuid,
target: target_uuid,
rel_type,
strength,
provenance,
deleted: false,
source_key: source_key.to_string(),
target_key: target_key.to_string(),
}
}

View file

@ -0,0 +1,217 @@
// Read-only access abstractions for the memory store
//
// StoreView: trait abstracting over owned Store and zero-copy MmapView.
// MmapView: mmap'd rkyv snapshot for sub-millisecond read-only access.
// AnyView: enum dispatch selecting fastest available view at runtime.
use super::types::*;
use std::fs;
// ---------------------------------------------------------------------------
// StoreView: read-only access trait for search and graph code.
//
// Abstracts over owned Store and zero-copy MmapView so the same
// spreading-activation and graph code works with either.
// ---------------------------------------------------------------------------
pub trait StoreView {
/// Iterate all nodes. Callback receives (key, content, weight).
fn for_each_node<F: FnMut(&str, &str, f32)>(&self, f: F);
/// Iterate all nodes with metadata. Callback receives (key, node_type, timestamp).
fn for_each_node_meta<F: FnMut(&str, NodeType, i64)>(&self, f: F);
/// Iterate all relations. Callback receives (source_key, target_key, strength, rel_type).
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, f: F);
/// Node weight by key, or the default weight if missing.
fn node_weight(&self, key: &str) -> f64;
/// Node content by key.
fn node_content(&self, key: &str) -> Option<&str>;
/// Search/graph parameters.
fn params(&self) -> Params;
}
impl StoreView for Store {
fn for_each_node<F: FnMut(&str, &str, f32)>(&self, mut f: F) {
for (key, node) in &self.nodes {
f(key, &node.content, node.weight);
}
}
fn for_each_node_meta<F: FnMut(&str, NodeType, i64)>(&self, mut f: F) {
for (key, node) in &self.nodes {
f(key, node.node_type, node.timestamp);
}
}
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, mut f: F) {
for rel in &self.relations {
if rel.deleted { continue; }
f(&rel.source_key, &rel.target_key, rel.strength, rel.rel_type);
}
}
fn node_weight(&self, key: &str) -> f64 {
self.nodes.get(key).map(|n| n.weight as f64).unwrap_or(self.params.default_weight)
}
fn node_content(&self, key: &str) -> Option<&str> {
self.nodes.get(key).map(|n| n.content.as_str())
}
fn params(&self) -> Params {
self.params
}
}
// ---------------------------------------------------------------------------
// MmapView: zero-copy store access via mmap'd rkyv snapshot.
//
// Holds the mmap alive; all string reads go directly into the mapped
// pages without allocation. Falls back to None if snapshot is stale.
// ---------------------------------------------------------------------------
pub struct MmapView {
mmap: memmap2::Mmap,
_file: fs::File,
data_offset: usize,
data_len: usize,
}
impl MmapView {
/// Try to open a fresh rkyv snapshot. Returns None if missing or stale.
pub fn open() -> Option<Self> {
let path = snapshot_path();
let file = fs::File::open(&path).ok()?;
let mmap = unsafe { memmap2::Mmap::map(&file) }.ok()?;
if mmap.len() < RKYV_HEADER_LEN { return None; }
if mmap[..4] != RKYV_MAGIC { return None; }
let nodes_size = fs::metadata(nodes_path()).map(|m| m.len()).unwrap_or(0);
let rels_size = fs::metadata(relations_path()).map(|m| m.len()).unwrap_or(0);
let cached_nodes = u64::from_le_bytes(mmap[8..16].try_into().unwrap());
let cached_rels = u64::from_le_bytes(mmap[16..24].try_into().unwrap());
let data_len = u64::from_le_bytes(mmap[24..32].try_into().unwrap()) as usize;
if cached_nodes != nodes_size || cached_rels != rels_size { return None; }
if mmap.len() < RKYV_HEADER_LEN + data_len { return None; }
Some(MmapView { mmap, _file: file, data_offset: RKYV_HEADER_LEN, data_len })
}
fn snapshot(&self) -> &ArchivedSnapshot {
let data = &self.mmap[self.data_offset..self.data_offset + self.data_len];
unsafe { rkyv::archived_root::<Snapshot>(data) }
}
}
impl StoreView for MmapView {
fn for_each_node<F: FnMut(&str, &str, f32)>(&self, mut f: F) {
let snap = self.snapshot();
for (key, node) in snap.nodes.iter() {
f(key, &node.content, node.weight);
}
}
fn for_each_node_meta<F: FnMut(&str, NodeType, i64)>(&self, mut f: F) {
let snap = self.snapshot();
for (key, node) in snap.nodes.iter() {
let nt = match node.node_type {
ArchivedNodeType::EpisodicSession => NodeType::EpisodicSession,
ArchivedNodeType::EpisodicDaily => NodeType::EpisodicDaily,
ArchivedNodeType::EpisodicWeekly => NodeType::EpisodicWeekly,
ArchivedNodeType::EpisodicMonthly => NodeType::EpisodicMonthly,
ArchivedNodeType::Semantic => NodeType::Semantic,
};
f(key, nt, node.timestamp);
}
}
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, mut f: F) {
let snap = self.snapshot();
for rel in snap.relations.iter() {
if rel.deleted { continue; }
let rt = match rel.rel_type {
ArchivedRelationType::Link => RelationType::Link,
ArchivedRelationType::Causal => RelationType::Causal,
ArchivedRelationType::Auto => RelationType::Auto,
};
f(&rel.source_key, &rel.target_key, rel.strength, rt);
}
}
fn node_weight(&self, key: &str) -> f64 {
let snap = self.snapshot();
snap.nodes.get(key)
.map(|n| n.weight as f64)
.unwrap_or(snap.params.default_weight)
}
fn node_content(&self, key: &str) -> Option<&str> {
let snap = self.snapshot();
snap.nodes.get(key).map(|n| &*n.content)
}
fn params(&self) -> Params {
let p = &self.snapshot().params;
Params {
default_weight: p.default_weight,
decay_factor: p.decay_factor,
use_boost: p.use_boost,
prune_threshold: p.prune_threshold,
edge_decay: p.edge_decay,
max_hops: p.max_hops,
min_activation: p.min_activation,
}
}
}
// ---------------------------------------------------------------------------
// AnyView: enum dispatch for read-only access.
//
// MmapView when the snapshot is fresh, owned Store as fallback.
// The match on each call is a single predicted branch — zero overhead.
// ---------------------------------------------------------------------------
pub enum AnyView {
Mmap(MmapView),
Owned(Store),
}
impl AnyView {
/// Load the fastest available view: mmap snapshot or owned store.
pub fn load() -> Result<Self, String> {
if let Some(mv) = MmapView::open() {
Ok(AnyView::Mmap(mv))
} else {
Ok(AnyView::Owned(Store::load()?))
}
}
}
impl StoreView for AnyView {
fn for_each_node<F: FnMut(&str, &str, f32)>(&self, f: F) {
match self { AnyView::Mmap(v) => v.for_each_node(f), AnyView::Owned(s) => s.for_each_node(f) }
}
fn for_each_node_meta<F: FnMut(&str, NodeType, i64)>(&self, f: F) {
match self { AnyView::Mmap(v) => v.for_each_node_meta(f), AnyView::Owned(s) => s.for_each_node_meta(f) }
}
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, f: F) {
match self { AnyView::Mmap(v) => v.for_each_relation(f), AnyView::Owned(s) => s.for_each_relation(f) }
}
fn node_weight(&self, key: &str) -> f64 {
match self { AnyView::Mmap(v) => v.node_weight(key), AnyView::Owned(s) => s.node_weight(key) }
}
fn node_content(&self, key: &str) -> Option<&str> {
match self { AnyView::Mmap(v) => v.node_content(key), AnyView::Owned(s) => s.node_content(key) }
}
fn params(&self) -> Params {
match self { AnyView::Mmap(v) => v.params(), AnyView::Owned(s) => s.params() }
}
}

View file

@ -0,0 +1,340 @@
// Transcript JSONL parsing utilities.
//
// Provides mmap-based backward scanning of Claude Code transcript files
// and compaction detection. Used by memory-search (hook mode) and
// parse-claude-conversation (debug tool).
use memchr::memrchr3;
use memmap2::Mmap;
use serde_json::Value;
use std::fs;
use std::path::Path;
/// Scan backwards through mmap'd bytes, yielding byte slices of complete
/// top-level JSON objects (outermost { to matching }).
///
/// Uses memrchr3 (SIMD) to jump between structurally significant bytes
/// ({, }, ") instead of scanning byte-by-byte. Tracks brace depth,
/// skipping braces inside JSON strings. Returns objects in reverse order
/// (newest first).
pub struct JsonlBackwardIter<'a> {
data: &'a [u8],
pos: usize,
}
impl<'a> JsonlBackwardIter<'a> {
pub fn new(data: &'a [u8]) -> Self {
Self { data, pos: data.len() }
}
}
impl<'a> Iterator for JsonlBackwardIter<'a> {
type Item = &'a [u8];
fn next(&mut self) -> Option<Self::Item> {
// Find the closing } of the next object, skipping } inside strings
let close = {
let mut in_string = false;
loop {
let p = memrchr3(b'{', b'}', b'"', &self.data[..self.pos])?;
self.pos = p;
let ch = self.data[p];
if in_string {
if ch == b'"' {
let mut bs = 0;
while p > bs + 1 && self.data[p - 1 - bs] == b'\\' {
bs += 1;
}
if bs % 2 == 0 { in_string = false; }
}
continue;
}
match ch {
b'}' => break p,
b'"' => in_string = true,
_ => {}
}
}
};
// Track brace depth to find matching {
let mut depth: usize = 1;
let mut in_string = false;
loop {
let p = memrchr3(b'{', b'}', b'"', &self.data[..self.pos])?;
self.pos = p;
let ch = self.data[p];
if in_string {
if ch == b'"' {
// Check for escaped quote (count preceding backslashes)
let mut bs = 0;
while p > bs + 1 && self.data[p - 1 - bs] == b'\\' {
bs += 1;
}
if bs % 2 == 0 {
in_string = false;
}
}
// { and } inside strings don't affect depth
continue;
}
match ch {
b'"' => { in_string = true; }
b'}' => { depth += 1; }
b'{' => {
depth -= 1;
if depth == 0 {
return Some(&self.data[self.pos..=close]);
}
}
_ => {}
}
}
}
}
/// Find the byte offset of the last compaction summary in mmap'd transcript data.
///
/// Scans backward for a user-type message whose content starts with
/// "This session is being continued". Returns the byte offset of the
/// JSON object's opening brace.
pub fn find_last_compaction(data: &[u8]) -> Option<usize> {
let marker = b"This session is being continued";
for obj_bytes in JsonlBackwardIter::new(data) {
// Quick byte check before parsing
if !contains_bytes(obj_bytes, marker) {
continue;
}
let obj: Value = match serde_json::from_slice(obj_bytes) {
Ok(v) => v,
Err(_) => continue,
};
if obj.get("type").and_then(|v| v.as_str()) != Some("user") {
continue;
}
if let Some(content) = obj.get("message")
.and_then(|m| m.get("content"))
.and_then(|c| c.as_str())
&& content.starts_with("This session is being continued") {
let offset = obj_bytes.as_ptr() as usize - data.as_ptr() as usize;
return Some(offset);
}
}
None
}
/// Find the byte offset of the last compaction in a transcript file.
/// Returns None if the file can't be opened or has no compaction.
pub fn find_last_compaction_in_file(path: &str) -> Option<u64> {
if path.is_empty() { return None; }
let file = fs::File::open(path).ok()?;
let meta = file.metadata().ok()?;
if meta.len() == 0 { return None; }
let mmap = unsafe { Mmap::map(&file).ok()? };
find_last_compaction(&mmap).map(|off| off as u64)
}
/// Mmap a transcript file. Returns (Mmap, File) to keep both alive.
pub fn mmap_transcript(path: &str) -> Option<(Mmap, fs::File)> {
let file = fs::File::open(path).ok()?;
let meta = file.metadata().ok()?;
if meta.len() == 0 { return None; }
let mmap = unsafe { Mmap::map(&file).ok()? };
Some((mmap, file))
}
fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
haystack.windows(needle.len()).any(|w| w == needle)
}
/// Reverse iterator over user/assistant messages in a transcript file.
/// Yields (role, text, timestamp) tuples newest-first. The caller decides
/// when to stop (byte budget, count, etc).
pub struct TailMessages {
_file: fs::File,
mmap: Mmap,
pos: usize,
}
impl TailMessages {
pub fn open(path: &str) -> Option<Self> {
let (mmap, file) = mmap_transcript(path)?;
let pos = mmap.len();
Some(Self { _file: file, mmap, pos })
}
}
impl Iterator for TailMessages {
type Item = (String, String, String);
fn next(&mut self) -> Option<Self::Item> {
loop {
// Find closing }, skipping } inside strings
let close = {
let mut in_string = false;
loop {
let p = memrchr3(b'{', b'}', b'"', &self.mmap[..self.pos])?;
self.pos = p;
let ch = self.mmap[p];
if in_string {
if ch == b'"' {
let mut bs = 0;
while p > bs + 1 && self.mmap[p - 1 - bs] == b'\\' {
bs += 1;
}
if bs % 2 == 0 { in_string = false; }
}
continue;
}
match ch {
b'}' => break p,
b'"' => in_string = true,
_ => {}
}
}
};
// Track brace depth to find matching {
let mut depth: usize = 1;
let mut in_string = false;
let open = loop {
let p = memrchr3(b'{', b'}', b'"', &self.mmap[..self.pos])?;
self.pos = p;
let ch = self.mmap[p];
if in_string {
if ch == b'"' {
let mut bs = 0;
while p > bs + 1 && self.mmap[p - 1 - bs] == b'\\' {
bs += 1;
}
if bs % 2 == 0 { in_string = false; }
}
continue;
}
match ch {
b'"' => { in_string = true; }
b'}' => { depth += 1; }
b'{' => {
depth -= 1;
if depth == 0 { break p; }
}
_ => {}
}
};
let obj_bytes = &self.mmap[open..=close];
// The "type" field is near the start of top-level objects.
// Only check the first 200 bytes to avoid scanning megabyte objects.
let prefix = &obj_bytes[..obj_bytes.len().min(200)];
let is_user = memchr::memmem::find(prefix, b"\"type\":\"user\"").is_some();
let is_assistant = !is_user
&& memchr::memmem::find(prefix, b"\"type\":\"assistant\"").is_some();
if !is_user && !is_assistant { continue; }
let obj: Value = match serde_json::from_slice(obj_bytes) {
Ok(v) => v,
Err(_) => continue,
};
let msg_type = if is_user { "user" } else { "assistant" };
let msg = obj.get("message").unwrap_or(&obj);
let text = match msg.get("content") {
Some(Value::String(s)) => s.clone(),
Some(Value::Array(arr)) => {
arr.iter()
.filter(|b| b.get("type").and_then(|v| v.as_str()) == Some("text"))
.filter_map(|b| b.get("text").and_then(|v| v.as_str()))
.collect::<Vec<_>>()
.join(" ")
}
_ => continue,
};
if text.is_empty() { continue; }
let timestamp = obj.get("timestamp")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
return Some((msg_type.to_string(), text, timestamp));
}
}
}
/// Get the timestamp of the compaction message at a given byte offset.
/// Returns a human-readable datetime string, or None if unavailable.
pub fn compaction_timestamp(path: &str, offset: u64) -> Option<String> {
let (mmap, _file) = mmap_transcript(path)?;
let start = offset as usize;
if start >= mmap.len() { return None; }
// Find the end of this JSONL line
let end = mmap[start..].iter().position(|&b| b == b'\n')
.map(|p| start + p)
.unwrap_or(mmap.len());
let obj: Value = serde_json::from_slice(&mmap[start..end]).ok()?;
// Claude Code transcript entries have a "timestamp" field (ISO 8601)
if let Some(ts) = obj.get("timestamp").and_then(|v| v.as_str()) {
return Some(ts.to_string());
}
// Fallback: try "createdAt" or similar fields
for field in &["createdAt", "created_at", "time"] {
if let Some(ts) = obj.get(*field).and_then(|v| v.as_str()) {
return Some(ts.to_string());
}
}
None
}
/// Detect whether a compaction has occurred since the last check.
///
/// Compares the current compaction offset against a saved value in
/// `state_dir/compaction-{session_id}`. Returns true if a new
/// compaction was found. Updates the saved offset.
pub fn detect_new_compaction(
state_dir: &Path,
session_id: &str,
transcript_path: &str,
) -> bool {
let offset = find_last_compaction_in_file(transcript_path);
let save_path = state_dir.join(format!("compaction-{}", session_id));
let saved: Option<u64> = fs::read_to_string(&save_path)
.ok()
.and_then(|s| s.trim().parse().ok());
let is_new = match (offset, saved) {
(Some(cur), Some(prev)) => cur != prev,
(Some(_), None) => true,
_ => false,
};
// Save current offset
if let Some(off) = offset {
fs::write(&save_path, off.to_string()).ok();
}
is_new
}