store: remove dead code and move params to config

Remove:
- score_weight() - never called
- position field on Node - never read (was for export)
- Provenance enum - inline helper for capnp migration
- migrate_transcript_progress + CLI command
- init_from_markdown, import_file, ingest_units
- export command and export_to_markdown
- RetrievalEvent, GapRecord types
- classify_filename, new_transcript_segment

Move spreading activation params to Config:
- default_node_weight, edge_decay, max_hops, min_activation
- Remove Params struct and StoreView::params()

Simplify cmd_init to just seed identity via upsert().
Simplify cmd_import to use parse_units + upsert directly.

-576 lines

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-13 18:50:21 -04:00
parent 6104c63890
commit 7d49f29fde
10 changed files with 69 additions and 576 deletions

View file

@ -760,10 +760,10 @@ fn run_spread(
stage: &AlgoStage,
_debug: bool,
) -> Vec<(String, f64)> {
let store_params = store.params();
let max_hops = stage.param_u32("max_hops", store_params.max_hops);
let edge_decay = stage.param_f64("edge_decay", store_params.edge_decay);
let min_activation = stage.param_f64("min_activation", store_params.min_activation * 0.1);
let cfg = crate::config::get();
let max_hops = stage.param_u32("max_hops", cfg.max_hops);
let edge_decay = stage.param_f64("edge_decay", cfg.edge_decay);
let min_activation = stage.param_f64("min_activation", cfg.min_activation * 0.1);
spreading_activation(seeds, graph, store, max_hops, edge_decay, min_activation)
}

View file

@ -25,8 +25,7 @@ pub mod db;
pub use types::{
memory_dir, nodes_path,
now_epoch, epoch_to_local, format_date, format_datetime, format_datetime_space, compact_timestamp, today,
Node, Relation, NodeType, Provenance, RelationType,
RetrievalEvent, Params, GapRecord, Store,
Node, Relation, NodeType, RelationType, Store,
new_node, new_relation,
};
pub use parse::{MemoryUnit, parse_units};
@ -36,12 +35,7 @@ pub use ops::current_provenance;
use crate::graph::{self, Graph};
use anyhow::{bail, Context, Result};
use std::fs;
use std::io::Write as IoWrite;
use std::path::Path;
use parse::classify_filename;
use anyhow::{bail, Result};
/// Strip .md suffix from a key, handling both bare keys and section keys.
/// "identity.md" → "identity", "foo.md#section" → "foo#section", "identity" → "identity"
@ -81,264 +75,4 @@ impl Store {
n => bail!("Too many matches for '{}' ({}). Be more specific.", target, n),
}
}
/// Resolve a link target to (key, uuid).
fn resolve_node_uuid(&self, target: &str) -> Option<(String, [u8; 16])> {
let bare = strip_md_suffix(target);
let n = self.nodes.get(&bare)?;
Some((bare, n.uuid))
}
/// Append retrieval event to retrieval.log without needing a Store instance.
pub fn log_retrieval_static(query: &str, results: &[String]) {
let path = memory_dir().join("retrieval.log");
let line = format!("[{}] q=\"{}\" hits={}\n", today(), query, results.len());
if let Ok(mut f) = fs::OpenOptions::new()
.create(true).append(true).open(&path) {
let _ = f.write_all(line.as_bytes());
}
}
/// Scan markdown files and index all memory units
pub fn init_from_markdown(&mut self) -> Result<usize> {
let dir = memory_dir();
let mut count = 0;
if dir.exists() {
// Build edge set for O(1) dedup during ingestion
let mut edge_set = self.build_edge_set();
count = self.scan_dir_for_init(&dir, &mut edge_set)?;
}
Ok(count)
}
/// Build a HashSet of existing (source, target) UUID pairs for O(1) dedup.
fn build_edge_set(&self) -> std::collections::HashSet<([u8; 16], [u8; 16])> {
let mut set = std::collections::HashSet::with_capacity(self.relations.len() * 2);
for r in &self.relations {
set.insert((r.source, r.target));
set.insert((r.target, r.source));
}
set
}
fn scan_dir_for_init(
&mut self,
dir: &Path,
edge_set: &mut std::collections::HashSet<([u8; 16], [u8; 16])>,
) -> Result<usize> {
let mut count = 0;
let entries = fs::read_dir(dir)
.with_context(|| format!("read dir {}", dir.display()))?;
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
count += self.scan_dir_for_init(&path, edge_set)?;
continue;
}
let Some(ext) = path.extension() else { continue };
if ext != "md" { continue }
let filename = path.file_name().unwrap().to_string_lossy().to_string();
let content = fs::read_to_string(&path)
.with_context(|| format!("read {}", path.display()))?;
let units = parse_units(&filename, &content);
let (new_count, _) = self.ingest_units(&units, &filename)?;
count += new_count;
// Create relations from links
let mut new_relations = Vec::new();
for unit in &units {
let source_uuid = match self.nodes.get(&unit.key) {
Some(n) => n.uuid,
None => continue,
};
for link in unit.marker_links.iter().chain(unit.md_links.iter()) {
let Some((key, uuid)) = self.resolve_node_uuid(link) else { continue };
if !edge_set.contains(&(source_uuid, uuid)) {
edge_set.insert((source_uuid, uuid));
edge_set.insert((uuid, source_uuid));
new_relations.push(new_relation(
source_uuid, uuid, RelationType::Link, 1.0,
&unit.key, &key,
));
}
}
for cause in &unit.causes {
let Some((key, uuid)) = self.resolve_node_uuid(cause) else { continue };
if !edge_set.contains(&(uuid, source_uuid)) {
edge_set.insert((uuid, source_uuid));
new_relations.push(new_relation(
uuid, source_uuid, RelationType::Causal, 1.0,
&key, &unit.key,
));
}
}
}
if !new_relations.is_empty() {
self.append_relations(&new_relations)?;
self.relations.extend(new_relations);
}
}
Ok(count)
}
/// Process parsed memory units: diff against existing nodes, persist changes.
/// Holds StoreLock across refresh + check + write to prevent duplicate UUIDs.
fn ingest_units(&mut self, units: &[MemoryUnit], filename: &str) -> Result<(usize, usize)> {
let _lock = types::StoreLock::acquire()?;
self.refresh_nodes()?;
let node_type = classify_filename(filename);
let mut new_nodes = Vec::new();
let mut updated_nodes = Vec::new();
for (pos, unit) in units.iter().enumerate() {
if let Some(existing) = self.nodes.get(&unit.key) {
if existing.content != unit.content || existing.position != pos as u32 {
let mut node = existing.clone();
node.content = unit.content.clone();
node.position = pos as u32;
node.version += 1;
if let Some(ref s) = unit.state { node.state_tag = s.clone(); }
if let Some(ref s) = unit.source_ref { node.source_ref = s.clone(); }
updated_nodes.push(node);
}
} else {
let mut node = new_node(&unit.key, &unit.content);
node.node_type = node_type;
node.position = pos as u32;
if let Some(ref s) = unit.state { node.state_tag = s.clone(); }
if let Some(ref s) = unit.source_ref { node.source_ref = s.clone(); }
new_nodes.push(node);
}
}
if !new_nodes.is_empty() {
self.append_nodes_unlocked(&new_nodes)?;
for node in &new_nodes {
self.uuid_to_key.insert(node.uuid, node.key.clone());
self.nodes.insert(node.key.clone(), node.clone());
}
}
if !updated_nodes.is_empty() {
self.append_nodes_unlocked(&updated_nodes)?;
for node in &updated_nodes {
self.nodes.insert(node.key.clone(), node.clone());
}
}
Ok((new_nodes.len(), updated_nodes.len()))
}
/// Import a markdown file into the store, parsing it into nodes.
pub fn import_file(&mut self, path: &Path) -> Result<(usize, usize)> {
let filename = path.file_name().unwrap().to_string_lossy().to_string();
let content = fs::read_to_string(path)
.with_context(|| format!("read {}", path.display()))?;
let units = parse_units(&filename, &content);
self.ingest_units(&units, &filename)
}
/// Gather all sections for a file key, sorted by position.
pub fn file_sections(&self, file_key: &str) -> Option<Vec<&Node>> {
let prefix = format!("{}#", file_key);
let mut sections: Vec<_> = self.nodes.values()
.filter(|n| n.key == file_key || n.key.starts_with(&prefix))
.collect();
if sections.is_empty() {
return None;
}
sections.sort_by_key(|n| n.position);
Some(sections)
}
/// Render a file key as plain content (no mem markers).
pub fn render_file(&self, file_key: &str) -> Option<String> {
let sections = self.file_sections(file_key)?;
let mut output = String::new();
for node in &sections {
output.push_str(&node.content);
if !node.content.ends_with('\n') {
output.push('\n');
}
output.push('\n');
}
Some(output.trim_end().to_string())
}
/// Render a file key back to markdown with reconstituted mem markers.
pub fn export_to_markdown(&self, file_key: &str) -> Option<String> {
let sections = self.file_sections(file_key)?;
let mut output = String::new();
for node in &sections {
if node.key.contains('#') {
let section_id = node.key.rsplit_once('#').map_or("", |(_, s)| s);
let links: Vec<_> = self.relations.iter()
.filter(|r| r.source_key == node.key && !r.deleted
&& r.rel_type != RelationType::Causal)
.map(|r| r.target_key.clone())
.collect();
let causes: Vec<_> = self.relations.iter()
.filter(|r| r.target_key == node.key && !r.deleted
&& r.rel_type == RelationType::Causal)
.map(|r| r.source_key.clone())
.collect();
let mut marker_parts = vec![format!("id={}", section_id)];
if !links.is_empty() {
marker_parts.push(format!("links={}", links.join(",")));
}
if !causes.is_empty() {
marker_parts.push(format!("causes={}", causes.join(",")));
}
output.push_str(&format!("<!-- mem: {} -->\n", marker_parts.join(" ")));
}
output.push_str(&node.content);
if !node.content.ends_with('\n') {
output.push('\n');
}
output.push('\n');
}
Some(output.trim_end().to_string())
}
/// Find the episodic node that best matches the given entry text.
pub fn find_journal_node(&self, entry_text: &str) -> Option<String> {
if entry_text.is_empty() {
return None;
}
let words: Vec<&str> = entry_text.split_whitespace()
.filter(|w| w.len() > 5)
.take(5)
.collect();
let mut best_key = None;
let mut best_score = 0;
for (key, node) in &self.nodes {
if node.node_type != NodeType::EpisodicSession {
continue;
}
let content_lower = node.content.to_lowercase();
let score: usize = words.iter()
.filter(|w| content_lower.contains(&w.to_lowercase()))
.count();
if score > best_score {
best_score = score;
best_key = Some(key.clone());
}
}
best_key
}
}

View file

@ -268,22 +268,6 @@ impl Store {
Ok((old, weight))
}
/// Update a node's weight with a new score and record the scoring
/// timestamp. Uses asymmetric smoothing: responds quickly to high
/// scores (alpha=0.5) but decays slowly on low scores (alpha=0.1).
/// This keeps memories surfaced even if they're only useful 1 in 4 times.
/// Returns (old_weight, new_weight).
pub fn score_weight(&mut self, key: &str, score: f64) -> Result<(f32, f32)> {
let node = self.nodes.get_mut(key)
.ok_or_else(|| anyhow!("node not found: {}", key))?;
let old = node.weight;
let alpha = if score > old as f64 { 0.5 } else { 0.1 };
let new = (alpha * score + (1.0 - alpha) * old as f64) as f32;
node.weight = new.clamp(0.01, 1.0);
node.last_scored = chrono::Utc::now().timestamp();
Ok((old, node.weight))
}
/// Set the strength of a link between two nodes. Deduplicates if
/// multiple links exist. Returns the old strength, or error if no link.
pub fn set_link_strength(&mut self, source: &str, target: &str, strength: f32) -> Result<f32> {

View file

@ -5,8 +5,6 @@
// becomes the file-level unit. Links and causal edges are extracted from
// both marker attributes and inline markdown links.
use super::NodeType;
use regex::Regex;
use std::collections::HashMap;
@ -23,15 +21,6 @@ pub struct MemoryUnit {
pub source_ref: Option<String>,
}
pub(super) fn classify_filename(filename: &str) -> NodeType {
let bare = filename.strip_suffix(".md").unwrap_or(filename);
if bare.starts_with("daily-") { NodeType::EpisodicDaily }
else if bare.starts_with("weekly-") { NodeType::EpisodicWeekly }
else if bare.starts_with("monthly-") { NodeType::EpisodicMonthly }
else if bare == "journal" { NodeType::EpisodicSession }
else { NodeType::Semantic }
}
pub fn parse_units(raw_filename: &str, content: &str) -> Vec<MemoryUnit> {
let filename = raw_filename.strip_suffix(".md").unwrap_or(raw_filename);
static MARKER_RE: OnceLock<Regex> = OnceLock::new();

View file

@ -478,68 +478,6 @@ impl Store {
Ok(())
}
/// Migrate old stub-node transcript markers into the new progress log.
/// Reads _observed-transcripts-f-*, _mined-transcripts#f-*, and _facts-* keys,
/// extracts transcript_id and segment_index, writes to transcript-progress.capnp,
/// then deletes the stub nodes.
pub fn migrate_transcript_progress(&mut self) -> Result<usize> {
let mut segments = Vec::new();
for key in self.nodes.keys() {
// _observed-transcripts-f-{UUID}.{segment}
if let Some(rest) = key.strip_prefix("_observed-transcripts-f-") {
if let Some((uuid, seg_str)) = rest.rsplit_once('.')
&& let Ok(seg) = seg_str.parse::<u32>() {
segments.push(new_transcript_segment(uuid, seg, "observation"));
}
}
// _mined-transcripts#f-{UUID}.{segment}
else if let Some(rest) = key.strip_prefix("_mined-transcripts#f-") {
if let Some((uuid, seg_str)) = rest.rsplit_once('.')
&& let Ok(seg) = seg_str.parse::<u32>() {
segments.push(new_transcript_segment(uuid, seg, "experience"));
}
}
// _mined-transcripts-f-{UUID}.{segment}
else if let Some(rest) = key.strip_prefix("_mined-transcripts-f-") {
if let Some((uuid, seg_str)) = rest.rsplit_once('.')
&& let Ok(seg) = seg_str.parse::<u32>() {
segments.push(new_transcript_segment(uuid, seg, "experience"));
}
}
// _facts-{UUID} (whole-file, segment 0)
else if let Some(uuid) = key.strip_prefix("_facts-") {
if !uuid.contains('-') || uuid.len() < 30 { continue; } // skip non-UUID
segments.push(new_transcript_segment(uuid, 0, "fact"));
}
}
let count = segments.len();
if count > 0 {
self.append_transcript_progress(&segments)?;
}
// Soft-delete the old stub nodes
let keys_to_delete: Vec<String> = self.nodes.keys()
.filter(|k| k.starts_with("_observed-transcripts-")
|| k.starts_with("_mined-transcripts")
|| (k.starts_with("_facts-") && !k.contains("fact_mine")))
.cloned()
.collect();
for key in &keys_to_delete {
if let Some(node) = self.nodes.get_mut(key) {
node.deleted = true;
}
}
if !keys_to_delete.is_empty() {
self.save()?;
}
Ok(count)
}
/// Record visits for a batch of node keys from a successful agent run.
pub fn record_agent_visits(&mut self, node_keys: &[String], agent: &str) -> Result<()> {
let visits: Vec<AgentVisit> = node_keys.iter()

View file

@ -204,10 +204,6 @@ pub struct Node {
pub last_replayed: i64,
pub spaced_repetition_interval: u32,
// Position within file (section index, for export ordering)
#[serde(default)]
pub position: u32,
// Stable creation timestamp (unix epoch seconds). Set once at creation;
// never updated on rename or content update. Zero for legacy nodes.
#[serde(default)]
@ -250,70 +246,6 @@ pub enum NodeType {
EpisodicMonthly,
}
#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)]
pub enum Provenance {
Manual,
Journal,
Agent, // legacy catch-all, prefer specific variants below
Dream,
Derived,
AgentExperienceMine,
AgentKnowledgeObservation,
AgentKnowledgePattern,
AgentKnowledgeConnector,
AgentKnowledgeChallenger,
AgentConsolidate,
AgentDigest,
AgentFactMine,
AgentDecay,
}
impl Provenance {
/// Parse from POC_PROVENANCE env var. Returns None if unset.
pub fn from_env() -> Option<Self> {
std::env::var("POC_PROVENANCE").ok().and_then(|s| Self::from_label(&s))
}
pub fn from_label(s: &str) -> Option<Self> {
Some(match s {
"manual" => Self::Manual,
"journal" => Self::Journal,
"agent" => Self::Agent,
"dream" => Self::Dream,
"derived" => Self::Derived,
"agent:experience-mine" => Self::AgentExperienceMine,
"agent:knowledge-observation"=> Self::AgentKnowledgeObservation,
"agent:knowledge-pattern" => Self::AgentKnowledgePattern,
"agent:knowledge-connector" => Self::AgentKnowledgeConnector,
"agent:knowledge-challenger" => Self::AgentKnowledgeChallenger,
"agent:consolidate" => Self::AgentConsolidate,
"agent:digest" => Self::AgentDigest,
"agent:fact-mine" => Self::AgentFactMine,
"agent:decay" => Self::AgentDecay,
_ => return None,
})
}
pub fn label(&self) -> &'static str {
match self {
Self::Manual => "manual",
Self::Journal => "journal",
Self::Agent => "agent",
Self::Dream => "dream",
Self::Derived => "derived",
Self::AgentExperienceMine => "agent:experience-mine",
Self::AgentKnowledgeObservation => "agent:knowledge-observation",
Self::AgentKnowledgePattern => "agent:knowledge-pattern",
Self::AgentKnowledgeConnector => "agent:knowledge-connector",
Self::AgentKnowledgeChallenger => "agent:knowledge-challenger",
Self::AgentConsolidate => "agent:consolidate",
Self::AgentDigest => "agent:digest",
Self::AgentFactMine => "agent:fact-mine",
Self::AgentDecay => "agent:decay",
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)]
pub enum RelationType {
Link,
@ -324,13 +256,6 @@ pub enum RelationType {
capnp_enum!(NodeType, memory_capnp::NodeType,
[EpisodicSession, EpisodicDaily, EpisodicWeekly, Semantic, EpisodicMonthly]);
capnp_enum!(Provenance, memory_capnp::Provenance,
[Manual, Journal, Agent, Dream, Derived,
AgentExperienceMine, AgentKnowledgeObservation, AgentKnowledgePattern,
AgentKnowledgeConnector, AgentKnowledgeChallenger, AgentConsolidate,
AgentDigest, AgentFactMine, AgentDecay]);
capnp_enum!(RelationType, memory_capnp::RelationType,
[Link, Causal, Auto]);
@ -341,11 +266,32 @@ capnp_message!(Node,
uuid: [uuid],
prim: [version, timestamp, weight, emotion, deleted,
retrievals, uses, wrongs, last_replayed,
spaced_repetition_interval, position, created_at, last_scored],
spaced_repetition_interval, created_at, last_scored],
enm: [node_type: NodeType],
skip: [community_id, clustering_coefficient, degree],
);
/// Convert legacy capnp provenance enum to string label.
fn legacy_provenance_label(p: memory_capnp::Provenance) -> &'static str {
use memory_capnp::Provenance::*;
match p {
Manual => "manual",
Journal => "journal",
Agent => "agent",
Dream => "dream",
Derived => "derived",
AgentExperienceMine => "agent:experience-mine",
AgentKnowledgeObservation => "agent:knowledge-observation",
AgentKnowledgePattern => "agent:knowledge-pattern",
AgentKnowledgeConnector => "agent:knowledge-connector",
AgentKnowledgeChallenger => "agent:knowledge-challenger",
AgentConsolidate => "agent:consolidate",
AgentDigest => "agent:digest",
AgentFactMine => "agent:fact-mine",
AgentDecay => "agent:decay",
}
}
impl Node {
/// Read from capnp with migration: if the new provenance text field
/// is empty (old record), fall back to the deprecated provenanceOld enum.
@ -353,7 +299,7 @@ impl Node {
let mut node = Self::from_capnp(r)?;
if node.provenance.is_empty()
&& let Ok(old) = r.get_provenance_old() {
node.provenance = Provenance::from_capnp(old).label().to_string();
node.provenance = legacy_provenance_label(old).to_string();
}
// Sanitize timestamps: old capnp records have raw offsets instead
// of unix epoch. Anything past year 2100 (~4102444800) is bogus.
@ -383,52 +329,12 @@ impl Relation {
let mut rel = Self::from_capnp(r)?;
if rel.provenance.is_empty()
&& let Ok(old) = r.get_provenance_old() {
rel.provenance = Provenance::from_capnp(old).label().to_string();
rel.provenance = legacy_provenance_label(old).to_string();
}
Ok(rel)
}
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct RetrievalEvent {
pub query: String,
pub timestamp: String,
pub results: Vec<String>,
pub used: Option<Vec<String>>,
}
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
pub struct Params {
pub default_weight: f64,
pub decay_factor: f64,
pub use_boost: f64,
pub prune_threshold: f64,
pub edge_decay: f64,
pub max_hops: u32,
pub min_activation: f64,
}
impl Default for Params {
fn default() -> Self {
Params {
default_weight: 0.7,
decay_factor: 0.95,
use_boost: 0.15,
prune_threshold: 0.1,
edge_decay: 0.3,
max_hops: 3,
min_activation: 0.05,
}
}
}
// Gap record — something we looked for but didn't find
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct GapRecord {
pub description: String,
pub timestamp: String,
}
/// Per-node agent visit index: node_key → (agent_type → last_visit_timestamp)
pub(super) type VisitIndex = HashMap<String, HashMap<String, i64>>;
@ -437,9 +343,6 @@ pub struct Store {
pub nodes: HashMap<String, Node>, // key → latest node
pub uuid_to_key: HashMap<[u8; 16], String>, // uuid → key (rebuilt from nodes)
pub relations: Vec<Relation>, // all active relations
pub retrieval_log: Vec<RetrievalEvent>,
pub gaps: Vec<GapRecord>,
pub params: Params,
/// Agent visit tracking: node_key → (agent_type → last_visit_epoch)
pub visits: VisitIndex,
/// Transcript mining progress: (transcript_id, segment_index) → set of agents that processed it
@ -457,9 +360,6 @@ impl Default for Store {
nodes: HashMap::new(),
uuid_to_key: HashMap::new(),
relations: Vec::new(),
retrieval_log: Vec::new(),
gaps: Vec::new(),
params: Params::default(),
visits: HashMap::new(),
transcript_progress: HashMap::new(),
loaded_nodes_size: 0,
@ -510,7 +410,6 @@ pub fn new_node(key: &str, content: &str) -> Node {
state_tag: String::new(),
last_replayed: 0,
spaced_repetition_interval: 1,
position: 0,
created_at: now_epoch(),
last_scored: 0,
community_id: None,
@ -570,15 +469,6 @@ capnp_message!(TranscriptSegment,
skip: [],
);
pub(super) fn new_transcript_segment(transcript_id: &str, segment_index: u32, agent: &str) -> TranscriptSegment {
TranscriptSegment {
transcript_id: transcript_id.to_string(),
segment_index,
agent: agent.to_string(),
timestamp: now_epoch(),
}
}
pub(crate) fn transcript_progress_path() -> PathBuf { memory_dir().join("transcript-progress.capnp") }
/// Create a new relation.

View file

@ -21,9 +21,6 @@ pub trait StoreView {
/// Node content by key.
fn node_content(&self, key: &str) -> Option<&str>;
/// Search/graph parameters.
fn params(&self) -> Params;
}
impl StoreView for Store {
@ -47,14 +44,11 @@ impl StoreView for Store {
}
fn node_weight(&self, key: &str) -> f64 {
self.nodes.get(key).map(|n| n.weight as f64).unwrap_or(self.params.default_weight)
let cfg = crate::config::get();
self.nodes.get(key).map(|n| n.weight as f64).unwrap_or(cfg.default_node_weight)
}
fn node_content(&self, key: &str) -> Option<&str> {
self.nodes.get(key).map(|n| n.content.as_str())
}
fn params(&self) -> Params {
self.params
}
}