Rust core: - Cap'n Proto append-only storage (nodes + relations) - Graph algorithms: clustering coefficient, community detection, schema fit, small-world metrics, interference detection - BM25 text similarity with Porter stemming - Spaced repetition replay queue - Commands: search, init, health, status, graph, categorize, link-add, link-impact, decay, consolidate-session, etc. Python scripts: - Episodic digest pipeline: daily/weekly/monthly-digest.py - retroactive-digest.py for backfilling - consolidation-agents.py: 3 parallel Sonnet agents - apply-consolidation.py: structured action extraction + apply - digest-link-parser.py: extract ~400 explicit links from digests - content-promotion-agent.py: promote episodic obs to semantic files - bulk-categorize.py: categorize all nodes via single Sonnet call - consolidation-loop.py: multi-round automated consolidation Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
386 lines
12 KiB
Rust
386 lines
12 KiB
Rust
// Migration from old weights.json + markdown marker system
|
|
//
|
|
// Reads:
|
|
// ~/.claude/memory/weights.json (1,874 entries with metrics)
|
|
// ~/.claude/memory/*.md (content + mem markers + edges)
|
|
//
|
|
// Emits:
|
|
// ~/.claude/memory/nodes.capnp (all nodes with preserved metadata)
|
|
// ~/.claude/memory/relations.capnp (all edges from markers + md links)
|
|
// ~/.claude/memory/state.json (derived cache)
|
|
//
|
|
// Old files are preserved as backup. Run once.
|
|
|
|
use crate::capnp_store::{
|
|
self, Store, Node, Category, NodeType, Provenance, RelationType,
|
|
parse_units,
|
|
};
|
|
|
|
use serde::Deserialize;
|
|
use uuid::Uuid;
|
|
|
|
use std::collections::HashMap;
|
|
use std::env;
|
|
use std::fs;
|
|
use std::path::{Path, PathBuf};
|
|
use std::time::{SystemTime, UNIX_EPOCH};
|
|
|
|
fn home() -> PathBuf {
|
|
PathBuf::from(env::var("HOME").expect("HOME not set"))
|
|
}
|
|
|
|
fn now_epoch() -> f64 {
|
|
SystemTime::now()
|
|
.duration_since(UNIX_EPOCH)
|
|
.unwrap()
|
|
.as_secs_f64()
|
|
}
|
|
|
|
// Old system data structures (just enough for deserialization)
|
|
|
|
#[derive(Deserialize)]
|
|
struct OldStore {
|
|
#[serde(default)]
|
|
entries: HashMap<String, OldEntry>,
|
|
#[serde(default)]
|
|
retrieval_log: Vec<OldRetrievalEvent>,
|
|
#[serde(default)]
|
|
params: OldParams,
|
|
}
|
|
|
|
#[derive(Deserialize)]
|
|
struct OldEntry {
|
|
weight: f64,
|
|
created: String,
|
|
#[serde(default)]
|
|
last_retrieved: Option<String>,
|
|
#[serde(default)]
|
|
last_used: Option<String>,
|
|
#[serde(default)]
|
|
retrievals: u32,
|
|
#[serde(default)]
|
|
uses: u32,
|
|
#[serde(default)]
|
|
wrongs: u32,
|
|
#[serde(default = "default_category")]
|
|
category: String,
|
|
}
|
|
|
|
fn default_category() -> String { "General".to_string() }
|
|
|
|
#[derive(Deserialize)]
|
|
struct OldRetrievalEvent {
|
|
query: String,
|
|
timestamp: String,
|
|
results: Vec<String>,
|
|
#[serde(default)]
|
|
used: Option<Vec<String>>,
|
|
}
|
|
|
|
#[derive(Deserialize)]
|
|
struct OldParams {
|
|
#[serde(default = "default_0_7")]
|
|
default_weight: f64,
|
|
#[serde(default = "default_0_95")]
|
|
decay_factor: f64,
|
|
#[serde(default = "default_0_15")]
|
|
use_boost: f64,
|
|
#[serde(default = "default_0_1")]
|
|
prune_threshold: f64,
|
|
#[serde(default = "default_0_3")]
|
|
edge_decay: f64,
|
|
#[serde(default = "default_3")]
|
|
max_hops: u32,
|
|
#[serde(default = "default_0_05")]
|
|
min_activation: f64,
|
|
}
|
|
|
|
impl Default for OldParams {
|
|
fn default() -> Self {
|
|
OldParams {
|
|
default_weight: 0.7,
|
|
decay_factor: 0.95,
|
|
use_boost: 0.15,
|
|
prune_threshold: 0.1,
|
|
edge_decay: 0.3,
|
|
max_hops: 3,
|
|
min_activation: 0.05,
|
|
}
|
|
}
|
|
}
|
|
|
|
fn default_0_7() -> f64 { 0.7 }
|
|
fn default_0_95() -> f64 { 0.95 }
|
|
fn default_0_15() -> f64 { 0.15 }
|
|
fn default_0_1() -> f64 { 0.1 }
|
|
fn default_0_3() -> f64 { 0.3 }
|
|
fn default_3() -> u32 { 3 }
|
|
fn default_0_05() -> f64 { 0.05 }
|
|
|
|
fn parse_old_category(s: &str) -> Category {
|
|
match s {
|
|
"Core" | "core" => Category::Core,
|
|
"Technical" | "technical" | "tech" => Category::Technical,
|
|
"Observation" | "observation" | "obs" => Category::Observation,
|
|
"Task" | "task" => Category::Task,
|
|
_ => Category::General,
|
|
}
|
|
}
|
|
|
|
pub fn migrate() -> Result<(), String> {
|
|
let weights_path = home().join(".claude/memory/weights.json");
|
|
let memory_dir = home().join(".claude/memory");
|
|
let nodes_path = memory_dir.join("nodes.capnp");
|
|
let rels_path = memory_dir.join("relations.capnp");
|
|
|
|
// Safety check
|
|
if nodes_path.exists() || rels_path.exists() {
|
|
return Err("nodes.capnp or relations.capnp already exist. \
|
|
Remove them first if you want to re-migrate.".into());
|
|
}
|
|
|
|
// Load old store
|
|
let old_store: OldStore = if weights_path.exists() {
|
|
let data = fs::read_to_string(&weights_path)
|
|
.map_err(|e| format!("read weights.json: {}", e))?;
|
|
serde_json::from_str(&data)
|
|
.map_err(|e| format!("parse weights.json: {}", e))?
|
|
} else {
|
|
eprintln!("Warning: no weights.json found, migrating markdown only");
|
|
OldStore {
|
|
entries: HashMap::new(),
|
|
retrieval_log: Vec::new(),
|
|
params: OldParams::default(),
|
|
}
|
|
};
|
|
|
|
eprintln!("Old store: {} entries, {} retrieval events",
|
|
old_store.entries.len(), old_store.retrieval_log.len());
|
|
|
|
// Scan markdown files to get content + edges
|
|
let mut units_by_key: HashMap<String, capnp_store::MemoryUnit> = HashMap::new();
|
|
scan_markdown_dir(&memory_dir, &mut units_by_key)?;
|
|
|
|
eprintln!("Scanned {} markdown units", units_by_key.len());
|
|
|
|
// Create new store
|
|
let mut store = Store::default();
|
|
|
|
// Migrate params
|
|
store.params.default_weight = old_store.params.default_weight;
|
|
store.params.decay_factor = old_store.params.decay_factor;
|
|
store.params.use_boost = old_store.params.use_boost;
|
|
store.params.prune_threshold = old_store.params.prune_threshold;
|
|
store.params.edge_decay = old_store.params.edge_decay;
|
|
store.params.max_hops = old_store.params.max_hops;
|
|
store.params.min_activation = old_store.params.min_activation;
|
|
|
|
// Migrate retrieval log
|
|
store.retrieval_log = old_store.retrieval_log.iter().map(|e| {
|
|
capnp_store::RetrievalEvent {
|
|
query: e.query.clone(),
|
|
timestamp: e.timestamp.clone(),
|
|
results: e.results.clone(),
|
|
used: e.used.clone(),
|
|
}
|
|
}).collect();
|
|
|
|
// Phase 1: Create nodes
|
|
// Merge old entries (weight metadata) with markdown units (content)
|
|
let mut all_nodes: Vec<Node> = Vec::new();
|
|
let mut key_to_uuid: HashMap<String, [u8; 16]> = HashMap::new();
|
|
|
|
// First, all entries from the old store
|
|
for (key, old_entry) in &old_store.entries {
|
|
let uuid = *Uuid::new_v4().as_bytes();
|
|
key_to_uuid.insert(key.clone(), uuid);
|
|
|
|
let content = units_by_key.get(key)
|
|
.map(|u| u.content.clone())
|
|
.unwrap_or_default();
|
|
|
|
let state_tag = units_by_key.get(key)
|
|
.and_then(|u| u.state.clone())
|
|
.unwrap_or_default();
|
|
|
|
let node = Node {
|
|
uuid,
|
|
version: 1,
|
|
timestamp: now_epoch(),
|
|
node_type: if key.contains("journal") {
|
|
NodeType::EpisodicSession
|
|
} else {
|
|
NodeType::Semantic
|
|
},
|
|
provenance: Provenance::Manual,
|
|
key: key.clone(),
|
|
content,
|
|
weight: old_entry.weight as f32,
|
|
category: parse_old_category(&old_entry.category),
|
|
emotion: 0.0,
|
|
deleted: false,
|
|
source_ref: String::new(),
|
|
created: old_entry.created.clone(),
|
|
retrievals: old_entry.retrievals,
|
|
uses: old_entry.uses,
|
|
wrongs: old_entry.wrongs,
|
|
state_tag,
|
|
last_replayed: 0.0,
|
|
spaced_repetition_interval: 1,
|
|
community_id: None,
|
|
clustering_coefficient: None,
|
|
schema_fit: None,
|
|
degree: None,
|
|
};
|
|
all_nodes.push(node);
|
|
}
|
|
|
|
// Then, any markdown units not in the old store
|
|
for (key, unit) in &units_by_key {
|
|
if key_to_uuid.contains_key(key) { continue; }
|
|
|
|
let uuid = *Uuid::new_v4().as_bytes();
|
|
key_to_uuid.insert(key.clone(), uuid);
|
|
|
|
let node = Node {
|
|
uuid,
|
|
version: 1,
|
|
timestamp: now_epoch(),
|
|
node_type: if key.contains("journal") {
|
|
NodeType::EpisodicSession
|
|
} else {
|
|
NodeType::Semantic
|
|
},
|
|
provenance: Provenance::Manual,
|
|
key: key.clone(),
|
|
content: unit.content.clone(),
|
|
weight: 0.7,
|
|
category: Category::General,
|
|
emotion: 0.0,
|
|
deleted: false,
|
|
source_ref: String::new(),
|
|
created: String::new(),
|
|
retrievals: 0,
|
|
uses: 0,
|
|
wrongs: 0,
|
|
state_tag: unit.state.clone().unwrap_or_default(),
|
|
last_replayed: 0.0,
|
|
spaced_repetition_interval: 1,
|
|
community_id: None,
|
|
clustering_coefficient: None,
|
|
schema_fit: None,
|
|
degree: None,
|
|
};
|
|
all_nodes.push(node);
|
|
}
|
|
|
|
// Write nodes to capnp log
|
|
store.append_nodes(&all_nodes)?;
|
|
for node in &all_nodes {
|
|
store.uuid_to_key.insert(node.uuid, node.key.clone());
|
|
store.nodes.insert(node.key.clone(), node.clone());
|
|
}
|
|
|
|
eprintln!("Migrated {} nodes", all_nodes.len());
|
|
|
|
// Phase 2: Create relations from markdown links + causal edges
|
|
let mut all_relations = Vec::new();
|
|
|
|
for (key, unit) in &units_by_key {
|
|
let source_uuid = match key_to_uuid.get(key) {
|
|
Some(u) => *u,
|
|
None => continue,
|
|
};
|
|
|
|
// Association links (bidirectional)
|
|
for link in unit.marker_links.iter().chain(unit.md_links.iter()) {
|
|
let target_uuid = match key_to_uuid.get(link) {
|
|
Some(u) => *u,
|
|
None => continue,
|
|
};
|
|
|
|
// Avoid duplicate relations
|
|
let exists = all_relations.iter().any(|r: &capnp_store::Relation|
|
|
(r.source == source_uuid && r.target == target_uuid) ||
|
|
(r.source == target_uuid && r.target == source_uuid));
|
|
if exists { continue; }
|
|
|
|
all_relations.push(Store::new_relation(
|
|
source_uuid, target_uuid,
|
|
RelationType::Link, 1.0,
|
|
key, link,
|
|
));
|
|
}
|
|
|
|
// Causal edges (directed)
|
|
for cause in &unit.causes {
|
|
let cause_uuid = match key_to_uuid.get(cause) {
|
|
Some(u) => *u,
|
|
None => continue,
|
|
};
|
|
|
|
all_relations.push(Store::new_relation(
|
|
cause_uuid, source_uuid,
|
|
RelationType::Causal, 1.0,
|
|
cause, key,
|
|
));
|
|
}
|
|
}
|
|
|
|
// Write relations to capnp log
|
|
store.append_relations(&all_relations)?;
|
|
store.relations = all_relations;
|
|
|
|
eprintln!("Migrated {} relations", store.relations.len());
|
|
|
|
// Phase 3: Compute graph metrics
|
|
store.update_graph_metrics();
|
|
|
|
// Save derived cache
|
|
store.save()?;
|
|
|
|
eprintln!("Migration complete. Files:");
|
|
eprintln!(" {}", nodes_path.display());
|
|
eprintln!(" {}", rels_path.display());
|
|
eprintln!(" {}", memory_dir.join("state.json").display());
|
|
|
|
// Verify
|
|
let g = store.build_graph();
|
|
eprintln!("\nVerification:");
|
|
eprintln!(" Nodes: {}", store.nodes.len());
|
|
eprintln!(" Relations: {}", store.relations.len());
|
|
eprintln!(" Graph edges: {}", g.edge_count());
|
|
eprintln!(" Communities: {}", g.community_count());
|
|
eprintln!(" Avg CC: {:.4}", g.avg_clustering_coefficient());
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn scan_markdown_dir(
|
|
dir: &Path,
|
|
units: &mut HashMap<String, capnp_store::MemoryUnit>,
|
|
) -> Result<(), String> {
|
|
let entries = fs::read_dir(dir)
|
|
.map_err(|e| format!("read dir {}: {}", dir.display(), e))?;
|
|
|
|
for entry in entries.flatten() {
|
|
let path = entry.path();
|
|
if path.is_dir() {
|
|
scan_markdown_dir(&path, units)?;
|
|
continue;
|
|
}
|
|
let Some(ext) = path.extension() else { continue };
|
|
if ext != "md" { continue }
|
|
|
|
let filename = path.file_name().unwrap().to_string_lossy().to_string();
|
|
let content = match fs::read_to_string(&path) {
|
|
Ok(c) => c,
|
|
Err(_) => continue,
|
|
};
|
|
|
|
for unit in parse_units(&filename, &content) {
|
|
units.insert(unit.key.clone(), unit);
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|