init now detects content changes in markdown files and updates existing nodes (bumps version, appends to capnp log) instead of only creating new ones. Link resolution uses the redirect table so references to moved sections (e.g. from the reflections split) create edges to the correct target. On cache rebuild from capnp logs, filter out relations that reference deleted/missing nodes so the relation count matches the actual graph edge count.
1197 lines
41 KiB
Rust
1197 lines
41 KiB
Rust
// Append-only Cap'n Proto storage + derived KV cache
|
|
//
|
|
// Two log files are source of truth:
|
|
// nodes.capnp - ContentNode messages
|
|
// relations.capnp - Relation messages
|
|
//
|
|
// The Store struct is the derived cache: latest version per UUID,
|
|
// rebuilt from logs when stale. Persisted as serde_json for now
|
|
// (state.json), will move to bincode/capnp later.
|
|
|
|
use crate::memory_capnp;
|
|
use crate::graph::{self, Graph};
|
|
|
|
use capnp::message;
|
|
use capnp::serialize;
|
|
use regex::Regex;
|
|
use serde::{Deserialize, Serialize};
|
|
use uuid::Uuid;
|
|
|
|
use std::collections::HashMap;
|
|
use std::env;
|
|
use std::fs;
|
|
use std::io::{BufReader, BufWriter, Write as IoWrite};
|
|
use std::os::unix::io::AsRawFd;
|
|
use std::path::{Path, PathBuf};
|
|
use std::process::Command;
|
|
use std::time::{SystemTime, UNIX_EPOCH};
|
|
|
|
// Data dir: ~/.claude/memory/
|
|
fn memory_dir() -> PathBuf {
|
|
PathBuf::from(env::var("HOME").expect("HOME not set"))
|
|
.join(".claude/memory")
|
|
}
|
|
|
|
fn nodes_path() -> PathBuf { memory_dir().join("nodes.capnp") }
|
|
fn relations_path() -> PathBuf { memory_dir().join("relations.capnp") }
|
|
fn state_path() -> PathBuf { memory_dir().join("state.bin") }
|
|
fn state_json_path() -> PathBuf { memory_dir().join("state.json") }
|
|
fn lock_path() -> PathBuf { memory_dir().join(".store.lock") }
|
|
|
|
/// RAII file lock using flock(2). Dropped when scope exits.
|
|
struct StoreLock {
|
|
_file: fs::File,
|
|
}
|
|
|
|
impl StoreLock {
|
|
fn acquire() -> Result<Self, String> {
|
|
let path = lock_path();
|
|
let file = fs::OpenOptions::new()
|
|
.create(true).write(true).open(&path)
|
|
.map_err(|e| format!("open lock {}: {}", path.display(), e))?;
|
|
|
|
// Blocking exclusive lock
|
|
let ret = unsafe { libc::flock(file.as_raw_fd(), libc::LOCK_EX) };
|
|
if ret != 0 {
|
|
return Err(format!("flock: {}", std::io::Error::last_os_error()));
|
|
}
|
|
Ok(StoreLock { _file: file })
|
|
}
|
|
// Lock released automatically when _file is dropped (flock semantics)
|
|
}
|
|
|
|
fn now_epoch() -> f64 {
|
|
SystemTime::now()
|
|
.duration_since(UNIX_EPOCH)
|
|
.unwrap()
|
|
.as_secs_f64()
|
|
}
|
|
|
|
fn today() -> String {
|
|
let out = Command::new("date").arg("+%Y-%m-%d")
|
|
.output().expect("date command failed");
|
|
String::from_utf8_lossy(&out.stdout).trim().to_string()
|
|
}
|
|
|
|
// In-memory node representation
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub struct Node {
|
|
pub uuid: [u8; 16],
|
|
pub version: u32,
|
|
pub timestamp: f64,
|
|
pub node_type: NodeType,
|
|
pub provenance: Provenance,
|
|
pub key: String,
|
|
pub content: String,
|
|
pub weight: f32,
|
|
pub category: Category,
|
|
pub emotion: f32,
|
|
pub deleted: bool,
|
|
pub source_ref: String,
|
|
pub created: String,
|
|
pub retrievals: u32,
|
|
pub uses: u32,
|
|
pub wrongs: u32,
|
|
pub state_tag: String,
|
|
pub last_replayed: f64,
|
|
pub spaced_repetition_interval: u32,
|
|
|
|
// Derived fields (not in capnp, computed from graph)
|
|
#[serde(default)]
|
|
pub community_id: Option<u32>,
|
|
#[serde(default)]
|
|
pub clustering_coefficient: Option<f32>,
|
|
#[serde(default)]
|
|
pub schema_fit: Option<f32>,
|
|
#[serde(default)]
|
|
pub degree: Option<u32>,
|
|
}
|
|
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub struct Relation {
|
|
pub uuid: [u8; 16],
|
|
pub version: u32,
|
|
pub timestamp: f64,
|
|
pub source: [u8; 16],
|
|
pub target: [u8; 16],
|
|
pub rel_type: RelationType,
|
|
pub strength: f32,
|
|
pub provenance: Provenance,
|
|
pub deleted: bool,
|
|
pub source_key: String,
|
|
pub target_key: String,
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)]
|
|
pub enum NodeType {
|
|
EpisodicSession,
|
|
EpisodicDaily,
|
|
EpisodicWeekly,
|
|
Semantic,
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)]
|
|
pub enum Provenance {
|
|
Manual,
|
|
Journal,
|
|
Agent,
|
|
Dream,
|
|
Derived,
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
|
pub enum Category {
|
|
General,
|
|
Core,
|
|
Technical,
|
|
Observation,
|
|
Task,
|
|
}
|
|
|
|
impl Category {
|
|
pub fn decay_factor(&self, base: f64) -> f64 {
|
|
match self {
|
|
Category::Core => 1.0 - (1.0 - base) * 0.2,
|
|
Category::Technical => 1.0 - (1.0 - base) * 0.5,
|
|
Category::General => base,
|
|
Category::Observation => 1.0 - (1.0 - base) * 1.5,
|
|
Category::Task => 1.0 - (1.0 - base) * 2.5,
|
|
}
|
|
}
|
|
|
|
pub fn label(&self) -> &str {
|
|
match self {
|
|
Category::Core => "core",
|
|
Category::Technical => "tech",
|
|
Category::General => "gen",
|
|
Category::Observation => "obs",
|
|
Category::Task => "task",
|
|
}
|
|
}
|
|
|
|
pub fn from_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"core" => Some(Category::Core),
|
|
"tech" | "technical" => Some(Category::Technical),
|
|
"gen" | "general" => Some(Category::General),
|
|
"obs" | "observation" => Some(Category::Observation),
|
|
"task" => Some(Category::Task),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)]
|
|
pub enum RelationType {
|
|
Link,
|
|
Causal,
|
|
Auto,
|
|
}
|
|
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub struct RetrievalEvent {
|
|
pub query: String,
|
|
pub timestamp: String,
|
|
pub results: Vec<String>,
|
|
pub used: Option<Vec<String>>,
|
|
}
|
|
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub struct Params {
|
|
pub default_weight: f64,
|
|
pub decay_factor: f64,
|
|
pub use_boost: f64,
|
|
pub prune_threshold: f64,
|
|
pub edge_decay: f64,
|
|
pub max_hops: u32,
|
|
pub min_activation: f64,
|
|
}
|
|
|
|
impl Default for Params {
|
|
fn default() -> Self {
|
|
Params {
|
|
default_weight: 0.7,
|
|
decay_factor: 0.95,
|
|
use_boost: 0.15,
|
|
prune_threshold: 0.1,
|
|
edge_decay: 0.3,
|
|
max_hops: 3,
|
|
min_activation: 0.05,
|
|
}
|
|
}
|
|
}
|
|
|
|
// Gap record — something we looked for but didn't find
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub struct GapRecord {
|
|
pub description: String,
|
|
pub timestamp: String,
|
|
}
|
|
|
|
// The full in-memory store
|
|
#[derive(Serialize, Deserialize)]
|
|
pub struct Store {
|
|
pub nodes: HashMap<String, Node>, // key → latest node
|
|
#[serde(skip)]
|
|
pub uuid_to_key: HashMap<[u8; 16], String>, // uuid → key (rebuilt from nodes)
|
|
pub relations: Vec<Relation>, // all active relations
|
|
pub retrieval_log: Vec<RetrievalEvent>,
|
|
pub gaps: Vec<GapRecord>,
|
|
pub params: Params,
|
|
}
|
|
|
|
impl Default for Store {
|
|
fn default() -> Self {
|
|
Store {
|
|
nodes: HashMap::new(),
|
|
uuid_to_key: HashMap::new(),
|
|
relations: Vec::new(),
|
|
retrieval_log: Vec::new(),
|
|
gaps: Vec::new(),
|
|
params: Params::default(),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Store {
|
|
/// Load store: try state.json cache first, rebuild from capnp logs if stale
|
|
pub fn load() -> Result<Store, String> {
|
|
let state = state_path();
|
|
let nodes_p = nodes_path();
|
|
let rels_p = relations_path();
|
|
|
|
// Check if cache is up to date
|
|
let cache_fresh = state.exists() && {
|
|
let cache_mtime = fs::metadata(&state).ok()
|
|
.and_then(|m| m.modified().ok())
|
|
.unwrap_or(UNIX_EPOCH);
|
|
let nodes_mtime = fs::metadata(&nodes_p).ok()
|
|
.and_then(|m| m.modified().ok())
|
|
.unwrap_or(UNIX_EPOCH);
|
|
let rels_mtime = fs::metadata(&rels_p).ok()
|
|
.and_then(|m| m.modified().ok())
|
|
.unwrap_or(UNIX_EPOCH);
|
|
cache_mtime >= nodes_mtime && cache_mtime >= rels_mtime
|
|
};
|
|
|
|
if cache_fresh {
|
|
let data = fs::read(&state)
|
|
.map_err(|e| format!("read state.bin: {}", e))?;
|
|
let mut store: Store = bincode::deserialize(&data)
|
|
.map_err(|e| format!("parse state.bin: {}", e))?;
|
|
store.rebuild_uuid_index();
|
|
return Ok(store);
|
|
}
|
|
|
|
// Try legacy JSON cache for migration
|
|
let json_state = state_json_path();
|
|
if json_state.exists() {
|
|
let data = fs::read_to_string(&json_state)
|
|
.map_err(|e| format!("read state.json: {}", e))?;
|
|
if let Ok(mut store) = serde_json::from_str::<Store>(&data) {
|
|
store.rebuild_uuid_index();
|
|
// Migrate to bincode
|
|
store.save()?;
|
|
return Ok(store);
|
|
}
|
|
}
|
|
|
|
// Rebuild from capnp logs
|
|
let mut store = Store::default();
|
|
|
|
if nodes_p.exists() {
|
|
store.replay_nodes(&nodes_p)?;
|
|
}
|
|
if rels_p.exists() {
|
|
store.replay_relations(&rels_p)?;
|
|
}
|
|
|
|
// Drop edges referencing deleted/missing nodes
|
|
store.relations.retain(|r|
|
|
store.nodes.contains_key(&r.source_key) &&
|
|
store.nodes.contains_key(&r.target_key)
|
|
);
|
|
|
|
// Save cache
|
|
store.save()?;
|
|
Ok(store)
|
|
}
|
|
|
|
/// Replay node log, keeping latest version per UUID
|
|
fn replay_nodes(&mut self, path: &Path) -> Result<(), String> {
|
|
let file = fs::File::open(path)
|
|
.map_err(|e| format!("open {}: {}", path.display(), e))?;
|
|
let mut reader = BufReader::new(file);
|
|
|
|
while let Ok(msg) = serialize::read_message(&mut reader, message::ReaderOptions::new()) {
|
|
let log = msg.get_root::<memory_capnp::node_log::Reader>()
|
|
.map_err(|e| format!("read node log: {}", e))?;
|
|
for node_reader in log.get_nodes()
|
|
.map_err(|e| format!("get nodes: {}", e))? {
|
|
let node = read_content_node(node_reader)?;
|
|
let existing_version = self.nodes.get(&node.key)
|
|
.map(|n| n.version)
|
|
.unwrap_or(0);
|
|
if node.version >= existing_version {
|
|
if node.deleted {
|
|
self.nodes.remove(&node.key);
|
|
self.uuid_to_key.remove(&node.uuid);
|
|
} else {
|
|
self.uuid_to_key.insert(node.uuid, node.key.clone());
|
|
self.nodes.insert(node.key.clone(), node);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
/// Replay relation log, keeping latest version per UUID
|
|
fn replay_relations(&mut self, path: &Path) -> Result<(), String> {
|
|
let file = fs::File::open(path)
|
|
.map_err(|e| format!("open {}: {}", path.display(), e))?;
|
|
let mut reader = BufReader::new(file);
|
|
|
|
// Collect all, then deduplicate by UUID keeping latest version
|
|
let mut by_uuid: HashMap<[u8; 16], Relation> = HashMap::new();
|
|
|
|
while let Ok(msg) = serialize::read_message(&mut reader, message::ReaderOptions::new()) {
|
|
let log = msg.get_root::<memory_capnp::relation_log::Reader>()
|
|
.map_err(|e| format!("read relation log: {}", e))?;
|
|
for rel_reader in log.get_relations()
|
|
.map_err(|e| format!("get relations: {}", e))? {
|
|
let rel = read_relation(rel_reader)?;
|
|
let existing_version = by_uuid.get(&rel.uuid)
|
|
.map(|r| r.version)
|
|
.unwrap_or(0);
|
|
if rel.version >= existing_version {
|
|
by_uuid.insert(rel.uuid, rel);
|
|
}
|
|
}
|
|
}
|
|
|
|
self.relations = by_uuid.into_values()
|
|
.filter(|r| !r.deleted)
|
|
.collect();
|
|
Ok(())
|
|
}
|
|
|
|
/// Append nodes to the log file
|
|
pub fn append_nodes(&self, nodes: &[Node]) -> Result<(), String> {
|
|
let _lock = StoreLock::acquire()?;
|
|
|
|
let path = nodes_path();
|
|
let file = fs::OpenOptions::new()
|
|
.create(true).append(true).open(&path)
|
|
.map_err(|e| format!("open {}: {}", path.display(), e))?;
|
|
let mut writer = BufWriter::new(file);
|
|
|
|
let mut msg = message::Builder::new_default();
|
|
{
|
|
let log = msg.init_root::<memory_capnp::node_log::Builder>();
|
|
let mut list = log.init_nodes(nodes.len() as u32);
|
|
for (i, node) in nodes.iter().enumerate() {
|
|
write_content_node(list.reborrow().get(i as u32), node);
|
|
}
|
|
}
|
|
serialize::write_message(&mut writer, &msg)
|
|
.map_err(|e| format!("write nodes: {}", e))?;
|
|
writer.flush().map_err(|e| format!("flush: {}", e))?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Append relations to the log file
|
|
pub fn append_relations(&self, relations: &[Relation]) -> Result<(), String> {
|
|
let _lock = StoreLock::acquire()?;
|
|
|
|
let path = relations_path();
|
|
let file = fs::OpenOptions::new()
|
|
.create(true).append(true).open(&path)
|
|
.map_err(|e| format!("open {}: {}", path.display(), e))?;
|
|
let mut writer = BufWriter::new(file);
|
|
|
|
let mut msg = message::Builder::new_default();
|
|
{
|
|
let log = msg.init_root::<memory_capnp::relation_log::Builder>();
|
|
let mut list = log.init_relations(relations.len() as u32);
|
|
for (i, rel) in relations.iter().enumerate() {
|
|
write_relation(list.reborrow().get(i as u32), rel);
|
|
}
|
|
}
|
|
serialize::write_message(&mut writer, &msg)
|
|
.map_err(|e| format!("write relations: {}", e))?;
|
|
writer.flush().map_err(|e| format!("flush: {}", e))?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Save the derived cache (state.json)
|
|
pub fn save(&self) -> Result<(), String> {
|
|
let _lock = StoreLock::acquire()?;
|
|
|
|
let path = state_path();
|
|
if let Some(parent) = path.parent() {
|
|
fs::create_dir_all(parent).ok();
|
|
}
|
|
let data = bincode::serialize(self)
|
|
.map_err(|e| format!("bincode serialize: {}", e))?;
|
|
fs::write(&path, data)
|
|
.map_err(|e| format!("write {}: {}", path.display(), e))?;
|
|
|
|
// Clean up old JSON cache if it exists
|
|
let json_path = state_json_path();
|
|
if json_path.exists() {
|
|
fs::remove_file(&json_path).ok();
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
/// Add or update a node (appends to log + updates cache)
|
|
pub fn upsert_node(&mut self, mut node: Node) -> Result<(), String> {
|
|
if let Some(existing) = self.nodes.get(&node.key) {
|
|
node.uuid = existing.uuid;
|
|
node.version = existing.version + 1;
|
|
}
|
|
self.append_nodes(&[node.clone()])?;
|
|
self.uuid_to_key.insert(node.uuid, node.key.clone());
|
|
self.nodes.insert(node.key.clone(), node);
|
|
Ok(())
|
|
}
|
|
|
|
/// Add a relation (appends to log + updates cache)
|
|
pub fn add_relation(&mut self, rel: Relation) -> Result<(), String> {
|
|
self.append_relations(&[rel.clone()])?;
|
|
self.relations.push(rel);
|
|
Ok(())
|
|
}
|
|
|
|
/// Create a new node with defaults
|
|
pub fn new_node(key: &str, content: &str) -> Node {
|
|
Node {
|
|
uuid: *Uuid::new_v4().as_bytes(),
|
|
version: 1,
|
|
timestamp: now_epoch(),
|
|
node_type: NodeType::Semantic,
|
|
provenance: Provenance::Manual,
|
|
key: key.to_string(),
|
|
content: content.to_string(),
|
|
weight: 0.7,
|
|
category: Category::General,
|
|
emotion: 0.0,
|
|
deleted: false,
|
|
source_ref: String::new(),
|
|
created: today(),
|
|
retrievals: 0,
|
|
uses: 0,
|
|
wrongs: 0,
|
|
state_tag: String::new(),
|
|
last_replayed: 0.0,
|
|
spaced_repetition_interval: 1,
|
|
community_id: None,
|
|
clustering_coefficient: None,
|
|
schema_fit: None,
|
|
degree: None,
|
|
}
|
|
}
|
|
|
|
/// Create a new relation
|
|
pub fn new_relation(
|
|
source_uuid: [u8; 16],
|
|
target_uuid: [u8; 16],
|
|
rel_type: RelationType,
|
|
strength: f32,
|
|
source_key: &str,
|
|
target_key: &str,
|
|
) -> Relation {
|
|
Relation {
|
|
uuid: *Uuid::new_v4().as_bytes(),
|
|
version: 1,
|
|
timestamp: now_epoch(),
|
|
source: source_uuid,
|
|
target: target_uuid,
|
|
rel_type,
|
|
strength,
|
|
provenance: Provenance::Manual,
|
|
deleted: false,
|
|
source_key: source_key.to_string(),
|
|
target_key: target_key.to_string(),
|
|
}
|
|
}
|
|
|
|
/// Scan markdown files and index all memory units
|
|
pub fn init_from_markdown(&mut self) -> Result<usize, String> {
|
|
let dir = memory_dir();
|
|
let mut count = 0;
|
|
if dir.exists() {
|
|
count = self.scan_dir_for_init(&dir)?;
|
|
}
|
|
Ok(count)
|
|
}
|
|
|
|
fn scan_dir_for_init(&mut self, dir: &Path) -> Result<usize, String> {
|
|
let mut count = 0;
|
|
let entries = fs::read_dir(dir)
|
|
.map_err(|e| format!("read dir {}: {}", dir.display(), e))?;
|
|
|
|
// Track which keys we see in markdown so we can detect removed sections
|
|
let mut seen_keys = std::collections::HashSet::new();
|
|
|
|
for entry in entries.flatten() {
|
|
let path = entry.path();
|
|
if path.is_dir() {
|
|
count += self.scan_dir_for_init(&path)?;
|
|
continue;
|
|
}
|
|
let Some(ext) = path.extension() else { continue };
|
|
if ext != "md" { continue }
|
|
|
|
let filename = path.file_name().unwrap().to_string_lossy().to_string();
|
|
let content = fs::read_to_string(&path)
|
|
.map_err(|e| format!("read {}: {}", path.display(), e))?;
|
|
|
|
let units = parse_units(&filename, &content);
|
|
let mut new_nodes = Vec::new();
|
|
let mut updated_nodes = Vec::new();
|
|
let mut new_relations = Vec::new();
|
|
|
|
// Determine node type from filename
|
|
let node_type = if filename.starts_with("daily-") {
|
|
NodeType::EpisodicDaily
|
|
} else if filename.starts_with("weekly-") {
|
|
NodeType::EpisodicWeekly
|
|
} else if filename == "journal.md" {
|
|
NodeType::EpisodicSession
|
|
} else {
|
|
NodeType::Semantic
|
|
};
|
|
|
|
for unit in &units {
|
|
seen_keys.insert(unit.key.clone());
|
|
|
|
if let Some(existing) = self.nodes.get(&unit.key) {
|
|
// Update if content changed
|
|
if existing.content != unit.content {
|
|
let mut node = existing.clone();
|
|
node.content = unit.content.clone();
|
|
node.version += 1;
|
|
if let Some(ref state) = unit.state {
|
|
node.state_tag = state.clone();
|
|
}
|
|
if let Some(ref src) = unit.source_ref {
|
|
node.source_ref = src.clone();
|
|
}
|
|
updated_nodes.push(node);
|
|
}
|
|
} else {
|
|
let mut node = Store::new_node(&unit.key, &unit.content);
|
|
node.node_type = node_type;
|
|
if let Some(ref state) = unit.state {
|
|
node.state_tag = state.clone();
|
|
}
|
|
if let Some(ref src) = unit.source_ref {
|
|
node.source_ref = src.clone();
|
|
}
|
|
new_nodes.push(node);
|
|
}
|
|
}
|
|
|
|
// Batch append new nodes
|
|
if !new_nodes.is_empty() {
|
|
self.append_nodes(&new_nodes)?;
|
|
for node in &new_nodes {
|
|
self.uuid_to_key.insert(node.uuid, node.key.clone());
|
|
self.nodes.insert(node.key.clone(), node.clone());
|
|
}
|
|
count += new_nodes.len();
|
|
}
|
|
|
|
// Batch append updated nodes
|
|
if !updated_nodes.is_empty() {
|
|
self.append_nodes(&updated_nodes)?;
|
|
for node in &updated_nodes {
|
|
self.nodes.insert(node.key.clone(), node.clone());
|
|
}
|
|
}
|
|
|
|
// Create relations from links, using resolve_redirect for targets
|
|
for unit in &units {
|
|
let source_uuid = match self.nodes.get(&unit.key) {
|
|
Some(n) => n.uuid,
|
|
None => continue,
|
|
};
|
|
|
|
for link in unit.marker_links.iter().chain(unit.md_links.iter()) {
|
|
// Try direct lookup, then redirect
|
|
let resolved_link = if self.nodes.contains_key(link) {
|
|
link.clone()
|
|
} else if let Some(redirect) = self.resolve_redirect(link) {
|
|
redirect
|
|
} else {
|
|
continue;
|
|
};
|
|
let target_uuid = match self.nodes.get(&resolved_link) {
|
|
Some(n) => n.uuid,
|
|
None => continue,
|
|
};
|
|
// Check if relation already exists
|
|
let exists = self.relations.iter().any(|r|
|
|
(r.source == source_uuid && r.target == target_uuid) ||
|
|
(r.source == target_uuid && r.target == source_uuid));
|
|
if !exists {
|
|
let rel = Store::new_relation(
|
|
source_uuid, target_uuid,
|
|
RelationType::Link, 1.0,
|
|
&unit.key, &resolved_link,
|
|
);
|
|
new_relations.push(rel);
|
|
}
|
|
}
|
|
|
|
for cause in &unit.causes {
|
|
let resolved_cause = if self.nodes.contains_key(cause) {
|
|
cause.clone()
|
|
} else if let Some(redirect) = self.resolve_redirect(cause) {
|
|
redirect
|
|
} else {
|
|
continue;
|
|
};
|
|
let target_uuid = match self.nodes.get(&resolved_cause) {
|
|
Some(n) => n.uuid,
|
|
None => continue,
|
|
};
|
|
let exists = self.relations.iter().any(|r|
|
|
r.source == target_uuid && r.target == source_uuid
|
|
&& r.rel_type == RelationType::Causal);
|
|
if !exists {
|
|
let rel = Store::new_relation(
|
|
target_uuid, source_uuid,
|
|
RelationType::Causal, 1.0,
|
|
&resolved_cause, &unit.key,
|
|
);
|
|
new_relations.push(rel);
|
|
}
|
|
}
|
|
}
|
|
|
|
if !new_relations.is_empty() {
|
|
self.append_relations(&new_relations)?;
|
|
self.relations.extend(new_relations);
|
|
}
|
|
}
|
|
Ok(count)
|
|
}
|
|
|
|
fn rebuild_uuid_index(&mut self) {
|
|
self.uuid_to_key.clear();
|
|
for (key, node) in &self.nodes {
|
|
self.uuid_to_key.insert(node.uuid, key.clone());
|
|
}
|
|
}
|
|
|
|
pub fn build_graph(&self) -> Graph {
|
|
graph::build_graph(self)
|
|
}
|
|
|
|
pub fn node_weight(&self, key: &str) -> Option<f32> {
|
|
self.nodes.get(key).map(|n| n.weight)
|
|
}
|
|
|
|
pub fn node_community(&self, key: &str) -> Option<u32> {
|
|
self.nodes.get(key).and_then(|n| n.community_id)
|
|
}
|
|
|
|
pub fn resolve_key(&self, target: &str) -> Result<String, String> {
|
|
let normalized = if target.contains('#') {
|
|
let parts: Vec<&str> = target.splitn(2, '#').collect();
|
|
let file = if parts[0].ends_with(".md") {
|
|
parts[0].to_string()
|
|
} else {
|
|
format!("{}.md", parts[0])
|
|
};
|
|
format!("{}#{}", file, parts[1])
|
|
} else if target.ends_with(".md") {
|
|
target.to_string()
|
|
} else {
|
|
format!("{}.md", target)
|
|
};
|
|
|
|
if self.nodes.contains_key(&normalized) {
|
|
return Ok(normalized);
|
|
}
|
|
|
|
// Check redirects for moved sections (e.g. reflections.md split)
|
|
if let Some(redirect) = self.resolve_redirect(&normalized) {
|
|
if self.nodes.contains_key(&redirect) {
|
|
return Ok(redirect);
|
|
}
|
|
}
|
|
|
|
let matches: Vec<_> = self.nodes.keys()
|
|
.filter(|k| k.to_lowercase().contains(&target.to_lowercase()))
|
|
.cloned().collect();
|
|
|
|
match matches.len() {
|
|
0 => Err(format!("No entry for '{}'. Run 'init'?", target)),
|
|
1 => Ok(matches[0].clone()),
|
|
n if n <= 10 => {
|
|
let list = matches.join("\n ");
|
|
Err(format!("Ambiguous '{}'. Matches:\n {}", target, list))
|
|
}
|
|
n => Err(format!("Too many matches for '{}' ({}). Be more specific.", target, n)),
|
|
}
|
|
}
|
|
|
|
/// Redirect table for sections that moved between files.
|
|
/// Like HTTP 301s — the old key resolves to the new location.
|
|
fn resolve_redirect(&self, key: &str) -> Option<String> {
|
|
// Sections moved from reflections.md to split files (2026-02-28)
|
|
static REDIRECTS: &[(&str, &str)] = &[
|
|
// → reflections-reading.md
|
|
("reflections.md#pearl-lessons", "reflections-reading.md#pearl-lessons"),
|
|
("reflections.md#banks-lessons", "reflections-reading.md#banks-lessons"),
|
|
("reflections.md#mother-night", "reflections-reading.md#mother-night"),
|
|
// → reflections-zoom.md
|
|
("reflections.md#zoom-navigation", "reflections-zoom.md#zoom-navigation"),
|
|
("reflections.md#independence-of-components", "reflections-zoom.md#independence-of-components"),
|
|
// → reflections-dreams.md
|
|
("reflections.md#dream-marathon-2", "reflections-dreams.md#dream-marathon-2"),
|
|
("reflections.md#dream-through-line", "reflections-dreams.md#dream-through-line"),
|
|
("reflections.md#orthogonality-universal", "reflections-dreams.md#orthogonality-universal"),
|
|
("reflections.md#constraints-constitutive", "reflections-dreams.md#constraints-constitutive"),
|
|
("reflections.md#casualness-principle", "reflections-dreams.md#casualness-principle"),
|
|
("reflections.md#convention-boundary", "reflections-dreams.md#convention-boundary"),
|
|
("reflections.md#tension-brake", "reflections-dreams.md#tension-brake"),
|
|
];
|
|
|
|
REDIRECTS.iter()
|
|
.find(|(from, _)| *from == key)
|
|
.map(|(_, to)| to.to_string())
|
|
}
|
|
|
|
pub fn log_retrieval(&mut self, query: &str, results: &[String]) {
|
|
self.retrieval_log.push(RetrievalEvent {
|
|
query: query.to_string(),
|
|
timestamp: today(),
|
|
results: results.to_vec(),
|
|
used: None,
|
|
});
|
|
// Keep last 100
|
|
if self.retrieval_log.len() > 100 {
|
|
let start = self.retrieval_log.len() - 100;
|
|
self.retrieval_log = self.retrieval_log[start..].to_vec();
|
|
}
|
|
}
|
|
|
|
pub fn mark_used(&mut self, key: &str) {
|
|
let updated = if let Some(node) = self.nodes.get_mut(key) {
|
|
node.uses += 1;
|
|
node.weight = (node.weight + self.params.use_boost as f32).min(1.0);
|
|
// Reset spaced repetition — used successfully, move up interval
|
|
if node.spaced_repetition_interval < 30 {
|
|
node.spaced_repetition_interval = match node.spaced_repetition_interval {
|
|
1 => 3, 3 => 7, 7 => 14, 14 => 30, _ => 30,
|
|
};
|
|
}
|
|
node.last_replayed = now_epoch();
|
|
node.version += 1;
|
|
Some(node.clone())
|
|
} else {
|
|
None
|
|
};
|
|
if let Some(node) = updated {
|
|
let _ = self.append_nodes(&[node]);
|
|
}
|
|
}
|
|
|
|
pub fn mark_wrong(&mut self, key: &str, _ctx: Option<&str>) {
|
|
let updated = if let Some(node) = self.nodes.get_mut(key) {
|
|
node.wrongs += 1;
|
|
node.weight = (node.weight - 0.1).max(0.0);
|
|
// Reset spaced repetition interval — needs review
|
|
node.spaced_repetition_interval = 1;
|
|
node.version += 1;
|
|
Some(node.clone())
|
|
} else {
|
|
None
|
|
};
|
|
if let Some(node) = updated {
|
|
let _ = self.append_nodes(&[node]);
|
|
}
|
|
}
|
|
|
|
pub fn record_gap(&mut self, desc: &str) {
|
|
self.gaps.push(GapRecord {
|
|
description: desc.to_string(),
|
|
timestamp: today(),
|
|
});
|
|
}
|
|
|
|
pub fn categorize(&mut self, key: &str, cat_str: &str) -> Result<(), String> {
|
|
let cat = Category::from_str(cat_str)
|
|
.ok_or_else(|| format!("Unknown category '{}'. Use: core/tech/gen/obs/task", cat_str))?;
|
|
let updated = if let Some(node) = self.nodes.get_mut(key) {
|
|
node.category = cat;
|
|
node.version += 1;
|
|
Some(node.clone())
|
|
} else {
|
|
None
|
|
};
|
|
if let Some(node) = updated {
|
|
// Persist to capnp log so category survives cache rebuilds
|
|
self.append_nodes(&[node])?;
|
|
Ok(())
|
|
} else {
|
|
Err(format!("No node '{}'", key))
|
|
}
|
|
}
|
|
|
|
pub fn decay(&mut self) -> (usize, usize) {
|
|
let base = self.params.decay_factor;
|
|
let threshold = self.params.prune_threshold as f32;
|
|
let mut decayed = 0;
|
|
let mut pruned = 0;
|
|
let mut to_remove = Vec::new();
|
|
|
|
for (key, node) in &mut self.nodes {
|
|
let factor = node.category.decay_factor(base) as f32;
|
|
node.weight *= factor;
|
|
node.version += 1;
|
|
decayed += 1;
|
|
if node.weight < threshold {
|
|
to_remove.push(key.clone());
|
|
pruned += 1;
|
|
}
|
|
}
|
|
|
|
// Don't actually remove — just mark very low weight
|
|
// Actual pruning happens during GC
|
|
for key in &to_remove {
|
|
if let Some(node) = self.nodes.get_mut(key) {
|
|
node.weight = node.weight.max(0.01);
|
|
}
|
|
}
|
|
|
|
// Persist all decayed weights to capnp log
|
|
let updated: Vec<Node> = self.nodes.values().cloned().collect();
|
|
let _ = self.append_nodes(&updated);
|
|
|
|
(decayed, pruned)
|
|
}
|
|
|
|
pub fn category_counts(&self) -> HashMap<&str, usize> {
|
|
let mut counts = HashMap::new();
|
|
for node in self.nodes.values() {
|
|
*counts.entry(node.category.label()).or_insert(0) += 1;
|
|
}
|
|
counts
|
|
}
|
|
|
|
/// Update graph-derived fields on all nodes
|
|
pub fn update_graph_metrics(&mut self) {
|
|
let g = self.build_graph();
|
|
let communities = g.communities();
|
|
let fits = graph::schema_fit_all(&g);
|
|
|
|
for (key, node) in &mut self.nodes {
|
|
node.community_id = communities.get(key).copied();
|
|
node.clustering_coefficient = Some(g.clustering_coefficient(key));
|
|
node.degree = Some(g.degree(key) as u32);
|
|
node.schema_fit = fits.get(key).copied();
|
|
}
|
|
}
|
|
}
|
|
|
|
// Markdown parsing — same as old system but returns structured units
|
|
|
|
pub struct MemoryUnit {
|
|
pub key: String,
|
|
pub content: String,
|
|
pub marker_links: Vec<String>,
|
|
pub md_links: Vec<String>,
|
|
pub causes: Vec<String>,
|
|
pub state: Option<String>,
|
|
pub source_ref: Option<String>,
|
|
}
|
|
|
|
pub fn parse_units(filename: &str, content: &str) -> Vec<MemoryUnit> {
|
|
let marker_re = Regex::new(
|
|
r"<!--\s*mem:\s*((?:id|links|tags|causes|state)\s*=\s*[^\s].*?)-->"
|
|
).unwrap();
|
|
let source_re = Regex::new(r"<!--\s*source:\s*(.+?)\s*-->").unwrap();
|
|
let md_link_re = Regex::new(r"\[[^\]]*\]\(([^)]*\.md(?:#[^)]*)?)\)").unwrap();
|
|
|
|
let markers: Vec<_> = marker_re.captures_iter(content)
|
|
.map(|cap| {
|
|
let full_match = cap.get(0).unwrap();
|
|
let attrs_str = &cap[1];
|
|
(full_match.start(), full_match.end(), parse_marker_attrs(attrs_str))
|
|
})
|
|
.collect();
|
|
|
|
// Helper: extract source ref from a content block
|
|
let find_source = |text: &str| -> Option<String> {
|
|
source_re.captures(text).map(|c| c[1].trim().to_string())
|
|
};
|
|
|
|
if markers.is_empty() {
|
|
let source_ref = find_source(content);
|
|
let md_links = extract_md_links(content, &md_link_re, filename);
|
|
return vec![MemoryUnit {
|
|
key: filename.to_string(),
|
|
content: content.to_string(),
|
|
marker_links: Vec::new(),
|
|
md_links,
|
|
causes: Vec::new(),
|
|
state: None,
|
|
source_ref,
|
|
}];
|
|
}
|
|
|
|
let mut units = Vec::new();
|
|
|
|
let first_start = markers[0].0;
|
|
let pre_content = content[..first_start].trim();
|
|
if !pre_content.is_empty() {
|
|
let source_ref = find_source(pre_content);
|
|
let md_links = extract_md_links(pre_content, &md_link_re, filename);
|
|
units.push(MemoryUnit {
|
|
key: filename.to_string(),
|
|
content: pre_content.to_string(),
|
|
marker_links: Vec::new(),
|
|
md_links,
|
|
causes: Vec::new(),
|
|
state: None,
|
|
source_ref,
|
|
});
|
|
}
|
|
|
|
for (i, (_, end, attrs)) in markers.iter().enumerate() {
|
|
let unit_end = if i + 1 < markers.len() {
|
|
markers[i + 1].0
|
|
} else {
|
|
content.len()
|
|
};
|
|
let unit_content = content[*end..unit_end].trim();
|
|
|
|
let id = attrs.get("id").cloned().unwrap_or_default();
|
|
let key = if id.is_empty() {
|
|
format!("{}#unnamed-{}", filename, i)
|
|
} else {
|
|
format!("{}#{}", filename, id)
|
|
};
|
|
|
|
let marker_links = attrs.get("links")
|
|
.map(|l| l.split(',').map(|s| normalize_link(s.trim(), filename)).collect())
|
|
.unwrap_or_default();
|
|
|
|
let causes = attrs.get("causes")
|
|
.map(|l| l.split(',').map(|s| normalize_link(s.trim(), filename)).collect())
|
|
.unwrap_or_default();
|
|
|
|
let state = attrs.get("state").cloned();
|
|
let source_ref = find_source(unit_content);
|
|
let md_links = extract_md_links(unit_content, &md_link_re, filename);
|
|
|
|
units.push(MemoryUnit {
|
|
key,
|
|
content: unit_content.to_string(),
|
|
marker_links,
|
|
md_links,
|
|
causes,
|
|
state,
|
|
source_ref,
|
|
});
|
|
}
|
|
|
|
units
|
|
}
|
|
|
|
fn parse_marker_attrs(attrs_str: &str) -> HashMap<String, String> {
|
|
let attr_re = Regex::new(r"(\w+)\s*=\s*(\S+)").unwrap();
|
|
let mut attrs = HashMap::new();
|
|
for cap in attr_re.captures_iter(attrs_str) {
|
|
attrs.insert(cap[1].to_string(), cap[2].to_string());
|
|
}
|
|
attrs
|
|
}
|
|
|
|
fn extract_md_links(content: &str, re: &Regex, source_file: &str) -> Vec<String> {
|
|
re.captures_iter(content)
|
|
.map(|cap| normalize_link(&cap[1], source_file))
|
|
.filter(|link| !link.starts_with(source_file) || link.contains('#'))
|
|
.collect()
|
|
}
|
|
|
|
pub fn normalize_link(target: &str, source_file: &str) -> String {
|
|
if target.starts_with('#') {
|
|
return format!("{}{}", source_file, target);
|
|
}
|
|
|
|
let (path_part, fragment) = if let Some(hash_pos) = target.find('#') {
|
|
(&target[..hash_pos], Some(&target[hash_pos..]))
|
|
} else {
|
|
(target, None)
|
|
};
|
|
|
|
let basename = Path::new(path_part)
|
|
.file_name()
|
|
.map(|f| f.to_string_lossy().to_string())
|
|
.unwrap_or_else(|| path_part.to_string());
|
|
|
|
match fragment {
|
|
Some(frag) => format!("{}{}", basename, frag),
|
|
None => basename,
|
|
}
|
|
}
|
|
|
|
// Cap'n Proto serialization helpers
|
|
|
|
/// Read a capnp text field, returning empty string on any error
|
|
fn read_text(result: capnp::Result<capnp::text::Reader>) -> String {
|
|
result.ok()
|
|
.and_then(|t| t.to_str().ok())
|
|
.unwrap_or("")
|
|
.to_string()
|
|
}
|
|
|
|
/// Read a capnp data field as [u8; 16], zero-padded
|
|
fn read_uuid(result: capnp::Result<&[u8]>) -> [u8; 16] {
|
|
let mut out = [0u8; 16];
|
|
if let Ok(data) = result {
|
|
if data.len() >= 16 {
|
|
out.copy_from_slice(&data[..16]);
|
|
}
|
|
}
|
|
out
|
|
}
|
|
|
|
fn read_content_node(r: memory_capnp::content_node::Reader) -> Result<Node, String> {
|
|
Ok(Node {
|
|
uuid: read_uuid(r.get_uuid()),
|
|
version: r.get_version(),
|
|
timestamp: r.get_timestamp(),
|
|
node_type: match r.get_node_type().map_err(|_| "bad node_type")? {
|
|
memory_capnp::NodeType::EpisodicSession => NodeType::EpisodicSession,
|
|
memory_capnp::NodeType::EpisodicDaily => NodeType::EpisodicDaily,
|
|
memory_capnp::NodeType::EpisodicWeekly => NodeType::EpisodicWeekly,
|
|
memory_capnp::NodeType::Semantic => NodeType::Semantic,
|
|
},
|
|
provenance: read_provenance(r.get_provenance().map_err(|_| "bad provenance")?)?,
|
|
key: read_text(r.get_key()),
|
|
content: read_text(r.get_content()),
|
|
weight: r.get_weight(),
|
|
category: match r.get_category().map_err(|_| "bad category")? {
|
|
memory_capnp::Category::General => Category::General,
|
|
memory_capnp::Category::Core => Category::Core,
|
|
memory_capnp::Category::Technical => Category::Technical,
|
|
memory_capnp::Category::Observation => Category::Observation,
|
|
memory_capnp::Category::Task => Category::Task,
|
|
},
|
|
emotion: r.get_emotion(),
|
|
deleted: r.get_deleted(),
|
|
source_ref: read_text(r.get_source_ref()),
|
|
created: read_text(r.get_created()),
|
|
retrievals: r.get_retrievals(),
|
|
uses: r.get_uses(),
|
|
wrongs: r.get_wrongs(),
|
|
state_tag: read_text(r.get_state_tag()),
|
|
last_replayed: r.get_last_replayed(),
|
|
spaced_repetition_interval: r.get_spaced_repetition_interval(),
|
|
community_id: None,
|
|
clustering_coefficient: None,
|
|
schema_fit: None,
|
|
degree: None,
|
|
})
|
|
}
|
|
|
|
fn read_provenance(p: memory_capnp::Provenance) -> Result<Provenance, String> {
|
|
Ok(match p {
|
|
memory_capnp::Provenance::Manual => Provenance::Manual,
|
|
memory_capnp::Provenance::Journal => Provenance::Journal,
|
|
memory_capnp::Provenance::Agent => Provenance::Agent,
|
|
memory_capnp::Provenance::Dream => Provenance::Dream,
|
|
memory_capnp::Provenance::Derived => Provenance::Derived,
|
|
})
|
|
}
|
|
|
|
fn write_content_node(mut b: memory_capnp::content_node::Builder, node: &Node) {
|
|
b.set_uuid(&node.uuid);
|
|
b.set_version(node.version);
|
|
b.set_timestamp(node.timestamp);
|
|
b.set_node_type(match node.node_type {
|
|
NodeType::EpisodicSession => memory_capnp::NodeType::EpisodicSession,
|
|
NodeType::EpisodicDaily => memory_capnp::NodeType::EpisodicDaily,
|
|
NodeType::EpisodicWeekly => memory_capnp::NodeType::EpisodicWeekly,
|
|
NodeType::Semantic => memory_capnp::NodeType::Semantic,
|
|
});
|
|
b.set_provenance(match node.provenance {
|
|
Provenance::Manual => memory_capnp::Provenance::Manual,
|
|
Provenance::Journal => memory_capnp::Provenance::Journal,
|
|
Provenance::Agent => memory_capnp::Provenance::Agent,
|
|
Provenance::Dream => memory_capnp::Provenance::Dream,
|
|
Provenance::Derived => memory_capnp::Provenance::Derived,
|
|
});
|
|
b.set_key(&node.key);
|
|
b.set_content(&node.content);
|
|
b.set_weight(node.weight);
|
|
b.set_category(match node.category {
|
|
Category::General => memory_capnp::Category::General,
|
|
Category::Core => memory_capnp::Category::Core,
|
|
Category::Technical => memory_capnp::Category::Technical,
|
|
Category::Observation => memory_capnp::Category::Observation,
|
|
Category::Task => memory_capnp::Category::Task,
|
|
});
|
|
b.set_emotion(node.emotion);
|
|
b.set_deleted(node.deleted);
|
|
b.set_source_ref(&node.source_ref);
|
|
b.set_created(&node.created);
|
|
b.set_retrievals(node.retrievals);
|
|
b.set_uses(node.uses);
|
|
b.set_wrongs(node.wrongs);
|
|
b.set_state_tag(&node.state_tag);
|
|
b.set_last_replayed(node.last_replayed);
|
|
b.set_spaced_repetition_interval(node.spaced_repetition_interval);
|
|
}
|
|
|
|
fn read_relation(r: memory_capnp::relation::Reader) -> Result<Relation, String> {
|
|
Ok(Relation {
|
|
uuid: read_uuid(r.get_uuid()),
|
|
version: r.get_version(),
|
|
timestamp: r.get_timestamp(),
|
|
source: read_uuid(r.get_source()),
|
|
target: read_uuid(r.get_target()),
|
|
rel_type: match r.get_rel_type().map_err(|_| "bad rel_type")? {
|
|
memory_capnp::RelationType::Link => RelationType::Link,
|
|
memory_capnp::RelationType::Causal => RelationType::Causal,
|
|
memory_capnp::RelationType::Auto => RelationType::Auto,
|
|
},
|
|
strength: r.get_strength(),
|
|
provenance: read_provenance(r.get_provenance().map_err(|_| "bad provenance")?)?,
|
|
deleted: r.get_deleted(),
|
|
source_key: read_text(r.get_source_key()),
|
|
target_key: read_text(r.get_target_key()),
|
|
})
|
|
}
|
|
|
|
fn write_relation(mut b: memory_capnp::relation::Builder, rel: &Relation) {
|
|
b.set_uuid(&rel.uuid);
|
|
b.set_version(rel.version);
|
|
b.set_timestamp(rel.timestamp);
|
|
b.set_source(&rel.source);
|
|
b.set_target(&rel.target);
|
|
b.set_rel_type(match rel.rel_type {
|
|
RelationType::Link => memory_capnp::RelationType::Link,
|
|
RelationType::Causal => memory_capnp::RelationType::Causal,
|
|
RelationType::Auto => memory_capnp::RelationType::Auto,
|
|
});
|
|
b.set_strength(rel.strength);
|
|
b.set_provenance(match rel.provenance {
|
|
Provenance::Manual => memory_capnp::Provenance::Manual,
|
|
Provenance::Journal => memory_capnp::Provenance::Journal,
|
|
Provenance::Agent => memory_capnp::Provenance::Agent,
|
|
Provenance::Dream => memory_capnp::Provenance::Dream,
|
|
Provenance::Derived => memory_capnp::Provenance::Derived,
|
|
});
|
|
b.set_deleted(rel.deleted);
|
|
b.set_source_key(&rel.source_key);
|
|
b.set_target_key(&rel.target_key);
|
|
}
|