migrate agent output to capnp store, add provenance tracking

All agent output now goes to the store as nodes instead of
markdown/JSON files. Each node carries a Provenance enum identifying
which agent created it (AgentDigest, AgentConsolidate, AgentFactMine,
AgentKnowledgeObservation, etc — 14 variants total).

Store changes:
- upsert_provenance() method for agent-created nodes
- Provenance enum expanded from 5 to 14 variants

Agent changes:
- digest: writes to store nodes (daily-YYYY-MM-DD.md etc)
- consolidate: reports/actions/logs stored as _consolidation-* nodes
- knowledge: depth DB and agent output stored as _knowledge-* nodes
- enrich: experience-mine results go directly to store
- llm: --no-session-persistence prevents transcript accumulation

Deleted: 14 Python/shell scripts replaced by Rust implementations.
This commit is contained in:
ProofOfConcept 2026-03-05 15:30:57 -05:00
parent e37f819dd2
commit 552d255dc3
23 changed files with 1381 additions and 4095 deletions

View file

@ -15,59 +15,40 @@ use crate::llm::{call_sonnet, parse_json_response};
use crate::neuro;
use crate::store::{self, Store, new_relation};
use std::fs;
use std::io::Write;
use std::path::{Path, PathBuf};
use crate::util::memory_subdir;
/// Simple append-only log writer for consolidate-full.
struct LogWriter {
path: PathBuf,
}
impl LogWriter {
fn new(path: &Path) -> Result<Self, String> {
fs::write(path, "").map_err(|e| format!("create log: {}", e))?;
Ok(LogWriter { path: path.to_path_buf() })
}
fn write(&mut self, line: &str) -> Result<(), String> {
let mut f = fs::OpenOptions::new()
.append(true)
.open(&self.path)
.map_err(|e| format!("open log: {}", e))?;
writeln!(f, "{}", line)
.map_err(|e| format!("write log: {}", e))
}
/// Append a line to the log buffer.
fn log_line(buf: &mut String, line: &str) {
buf.push_str(line);
buf.push('\n');
}
/// Run the full autonomous consolidation pipeline with logging.
pub fn consolidate_full(store: &mut Store) -> Result<(), String> {
let start = std::time::Instant::now();
let log_path = memory_subdir("agent-results")?.join("consolidate-full.log");
let mut log = LogWriter::new(&log_path)?;
let log_key = format!("_consolidate-log-{}",
store::format_datetime(store::now_epoch()).replace([':', '-', 'T'], ""));
let mut log_buf = String::new();
log.write("=== CONSOLIDATE FULL ===")?;
log.write(&format!("Started: {}", store::format_datetime(store::now_epoch())))?;
log.write(&format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()))?;
log.write("")?;
log_line(&mut log_buf, "=== CONSOLIDATE FULL ===");
log_line(&mut log_buf, &format!("Started: {}", store::format_datetime(store::now_epoch())));
log_line(&mut log_buf, &format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()));
log_line(&mut log_buf, "");
// --- Step 1: Plan ---
log.write("--- Step 1: Plan ---")?;
log_line(&mut log_buf, "--- Step 1: Plan ---");
let plan = neuro::consolidation_plan(store);
let plan_text = neuro::format_plan(&plan);
log.write(&plan_text)?;
log_line(&mut log_buf, &plan_text);
println!("{}", plan_text);
let total_agents = plan.replay_count + plan.linker_count
+ plan.separator_count + plan.transfer_count
+ if plan.run_health { 1 } else { 0 };
log.write(&format!("Total agents to run: {}", total_agents))?;
log_line(&mut log_buf, &format!("Total agents to run: {}", total_agents));
// --- Step 2: Execute agents ---
log.write("\n--- Step 2: Execute agents ---")?;
let mut reports: Vec<PathBuf> = Vec::new();
log_line(&mut log_buf, "\n--- Step 2: Execute agents ---");
let mut reports: Vec<String> = Vec::new();
let mut agent_num = 0usize;
let mut agent_errors = 0usize;
@ -121,7 +102,7 @@ pub fn consolidate_full(store: &mut Store) -> Result<(), String> {
format!("[{}/{}] {}", agent_num, runs.len(), agent_type)
};
log.write(&format!("\n{}", label))?;
log_line(&mut log_buf, &format!("\n{}", label));
println!("{}", label);
// Reload store to pick up changes from previous agents
@ -133,191 +114,173 @@ pub fn consolidate_full(store: &mut Store) -> Result<(), String> {
Ok(p) => p,
Err(e) => {
let msg = format!(" ERROR building prompt: {}", e);
log.write(&msg)?;
log_line(&mut log_buf, &msg);
eprintln!("{}", msg);
agent_errors += 1;
continue;
}
};
log.write(&format!(" Prompt: {} chars (~{} tokens)",
prompt.len(), prompt.len() / 4))?;
log_line(&mut log_buf, &format!(" Prompt: {} chars (~{} tokens)",
prompt.len(), prompt.len() / 4));
let response = match call_sonnet(&prompt, 300) {
Ok(r) => r,
Err(e) => {
let msg = format!(" ERROR from Sonnet: {}", e);
log.write(&msg)?;
log_line(&mut log_buf, &msg);
eprintln!("{}", msg);
agent_errors += 1;
continue;
}
};
// Save report
// Store report as a node
let ts = store::format_datetime(store::now_epoch())
.replace([':', '-', 'T'], "");
let report_name = format!("consolidation-{}-{}.md", agent_type, ts);
let report_path = memory_subdir("agent-results")?.join(&report_name);
fs::write(&report_path, &response)
.map_err(|e| format!("write report: {}", e))?;
reports.push(report_path.clone());
let report_key = format!("_consolidation-{}-{}", agent_type, ts);
store.upsert_provenance(&report_key, &response,
store::Provenance::AgentConsolidate).ok();
reports.push(report_key.clone());
let msg = format!(" Done: {} lines → {}", response.lines().count(), report_name);
log.write(&msg)?;
let msg = format!(" Done: {} lines → {}", response.lines().count(), report_key);
log_line(&mut log_buf, &msg);
println!("{}", msg);
}
log.write(&format!("\nAgents complete: {} run, {} errors",
agent_num - agent_errors, agent_errors))?;
log_line(&mut log_buf, &format!("\nAgents complete: {} run, {} errors",
agent_num - agent_errors, agent_errors));
// --- Step 3: Apply consolidation actions ---
log.write("\n--- Step 3: Apply consolidation actions ---")?;
log_line(&mut log_buf, "\n--- Step 3: Apply consolidation actions ---");
println!("\n--- Applying consolidation actions ---");
*store = Store::load()?;
if reports.is_empty() {
log.write(" No reports to apply.")?;
log_line(&mut log_buf, " No reports to apply.");
} else {
match apply_consolidation(store, true, None) {
Ok(()) => log.write(" Applied.")?,
Ok(()) => log_line(&mut log_buf, " Applied."),
Err(e) => {
let msg = format!(" ERROR applying consolidation: {}", e);
log.write(&msg)?;
log_line(&mut log_buf, &msg);
eprintln!("{}", msg);
}
}
}
// --- Step 3b: Link orphans ---
log.write("\n--- Step 3b: Link orphans ---")?;
log_line(&mut log_buf, "\n--- Step 3b: Link orphans ---");
println!("\n--- Linking orphan nodes ---");
*store = Store::load()?;
let (lo_orphans, lo_added) = neuro::link_orphans(store, 2, 3, 0.15);
log.write(&format!(" {} orphans, {} links added", lo_orphans, lo_added))?;
log_line(&mut log_buf, &format!(" {} orphans, {} links added", lo_orphans, lo_added));
// --- Step 3c: Cap degree ---
log.write("\n--- Step 3c: Cap degree ---")?;
log_line(&mut log_buf, "\n--- Step 3c: Cap degree ---");
println!("\n--- Capping node degree ---");
*store = Store::load()?;
match store.cap_degree(50) {
Ok((hubs, pruned)) => {
store.save()?;
log.write(&format!(" {} hubs capped, {} edges pruned", hubs, pruned))?;
log_line(&mut log_buf, &format!(" {} hubs capped, {} edges pruned", hubs, pruned));
}
Err(e) => log.write(&format!(" ERROR: {}", e))?,
Err(e) => log_line(&mut log_buf, &format!(" ERROR: {}", e)),
}
// --- Step 4: Digest auto ---
log.write("\n--- Step 4: Digest auto ---")?;
log_line(&mut log_buf, "\n--- Step 4: Digest auto ---");
println!("\n--- Generating missing digests ---");
*store = Store::load()?;
match digest::digest_auto(store) {
Ok(()) => log.write(" Digests done.")?,
Ok(()) => log_line(&mut log_buf, " Digests done."),
Err(e) => {
let msg = format!(" ERROR in digest auto: {}", e);
log.write(&msg)?;
log_line(&mut log_buf, &msg);
eprintln!("{}", msg);
}
}
// --- Step 5: Apply digest links ---
log.write("\n--- Step 5: Apply digest links ---")?;
log_line(&mut log_buf, "\n--- Step 5: Apply digest links ---");
println!("\n--- Applying digest links ---");
*store = Store::load()?;
let links = digest::parse_all_digest_links()?;
let links = digest::parse_all_digest_links(store);
let (applied, skipped, fallbacks) = digest::apply_digest_links(store, &links);
store.save()?;
log.write(&format!(" {} links applied, {} skipped, {} fallbacks",
applied, skipped, fallbacks))?;
log_line(&mut log_buf, &format!(" {} links applied, {} skipped, {} fallbacks",
applied, skipped, fallbacks));
// --- Step 6: Summary ---
let elapsed = start.elapsed();
log.write("\n--- Summary ---")?;
log.write(&format!("Finished: {}", store::format_datetime(store::now_epoch())))?;
log.write(&format!("Duration: {:.0}s", elapsed.as_secs_f64()))?;
log_line(&mut log_buf, "\n--- Summary ---");
log_line(&mut log_buf, &format!("Finished: {}", store::format_datetime(store::now_epoch())));
log_line(&mut log_buf, &format!("Duration: {:.0}s", elapsed.as_secs_f64()));
*store = Store::load()?;
log.write(&format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()))?;
log_line(&mut log_buf, &format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()));
let summary = format!(
"\n=== CONSOLIDATE FULL COMPLETE ===\n\
Duration: {:.0}s\n\
Agents: {} run, {} errors\n\
Nodes: {} Relations: {}\n\
Log: {}\n",
Nodes: {} Relations: {}\n",
elapsed.as_secs_f64(),
agent_num - agent_errors, agent_errors,
store.nodes.len(), store.relations.len(),
log_path.display(),
);
log.write(&summary)?;
log_line(&mut log_buf, &summary);
println!("{}", summary);
// Store the log as a node
store.upsert_provenance(&log_key, &log_buf,
store::Provenance::AgentConsolidate).ok();
store.save()?;
Ok(())
}
/// Find the most recent set of consolidation reports.
fn find_consolidation_reports() -> Result<Vec<PathBuf>, String> {
let dir = memory_subdir("agent-results")?;
let mut reports: Vec<PathBuf> = fs::read_dir(&dir)
.map(|entries| {
entries.filter_map(|e| e.ok())
.map(|e| e.path())
.filter(|p| {
p.file_name()
.and_then(|n| n.to_str())
.map(|n| n.starts_with("consolidation-") && n.ends_with(".md"))
.unwrap_or(false)
})
.collect()
})
.unwrap_or_default();
reports.sort();
reports.reverse();
/// Find the most recent set of consolidation report keys from the store.
fn find_consolidation_reports(store: &Store) -> Vec<String> {
let mut keys: Vec<&String> = store.nodes.keys()
.filter(|k| k.starts_with("_consolidation-"))
.collect();
keys.sort();
keys.reverse();
if reports.is_empty() { return Ok(reports); }
if keys.is_empty() { return Vec::new(); }
// Group by timestamp (last segment of stem before .md)
let latest_ts = reports[0].file_stem()
.and_then(|s| s.to_str())
.unwrap_or("")
.rsplit('-').next().unwrap_or("")
.to_string();
// Group by timestamp (last segment after last '-')
let latest_ts = keys[0].rsplit('-').next().unwrap_or("").to_string();
reports.retain(|r| {
r.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("")
.ends_with(latest_ts.as_str())
});
Ok(reports)
keys.into_iter()
.filter(|k| k.ends_with(&latest_ts))
.cloned()
.collect()
}
fn build_consolidation_prompt(reports: &[PathBuf]) -> Result<String, String> {
fn build_consolidation_prompt(store: &Store, report_keys: &[String]) -> Result<String, String> {
let mut report_text = String::new();
for r in reports {
let content = fs::read_to_string(r)
.map_err(|e| format!("read {}: {}", r.display(), e))?;
for key in report_keys {
let content = store.nodes.get(key)
.map(|n| n.content.as_str())
.unwrap_or("");
report_text.push_str(&format!("\n{}\n## Report: {}\n\n{}\n",
"=".repeat(60),
r.file_stem().and_then(|s| s.to_str()).unwrap_or(""),
content));
"=".repeat(60), key, content));
}
neuro::load_prompt("consolidation", &[("{{REPORTS}}", &report_text)])
}
/// Run the full apply-consolidation pipeline.
pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_file: Option<&str>) -> Result<(), String> {
let reports = if let Some(path) = report_file {
vec![PathBuf::from(path)]
pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_key: Option<&str>) -> Result<(), String> {
let reports = if let Some(key) = report_key {
vec![key.to_string()]
} else {
find_consolidation_reports()?
find_consolidation_reports(store)
};
if reports.is_empty() {
@ -328,11 +291,11 @@ pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_file: Optio
println!("Found {} reports:", reports.len());
for r in &reports {
println!(" {}", r.file_name().and_then(|s| s.to_str()).unwrap_or("?"));
println!(" {}", r);
}
println!("\nExtracting actions from reports...");
let prompt = build_consolidation_prompt(&reports)?;
let prompt = build_consolidation_prompt(store, &reports)?;
println!(" Prompt: {} chars", prompt.len());
let response = call_sonnet(&prompt, 300)?;
@ -343,14 +306,14 @@ pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_file: Optio
println!(" {} actions extracted", actions.len());
// Save actions
// Store actions in the store
let timestamp = store::format_datetime(store::now_epoch())
.replace([':', '-'], "");
let actions_path = memory_subdir("agent-results")?
.join(format!("consolidation-actions-{}.json", timestamp));
fs::write(&actions_path, serde_json::to_string_pretty(&actions_value).unwrap())
.map_err(|e| format!("write {}: {}", actions_path.display(), e))?;
println!(" Saved: {}", actions_path.display());
let actions_key = format!("_consolidation-actions-{}", timestamp);
let actions_json = serde_json::to_string_pretty(&actions_value).unwrap();
store.upsert_provenance(&actions_key, &actions_json,
store::Provenance::AgentConsolidate).ok();
println!(" Stored: {}", actions_key);
let link_actions: Vec<_> = actions.iter()
.filter(|a| a.get("action").and_then(|v| v.as_str()) == Some("link"))

View file

@ -8,13 +8,10 @@
use crate::llm::{call_sonnet, semantic_keys};
use crate::store::{self, Store, new_relation};
use crate::neuro;
use crate::util::memory_subdir;
use chrono::{Datelike, Duration, Local, NaiveDate};
use regex::Regex;
use std::collections::BTreeSet;
use std::fs;
use std::path::{Path, PathBuf};
// --- Digest level descriptors ---
@ -113,19 +110,24 @@ const MONTHLY: DigestLevel = DigestLevel {
const LEVELS: &[&DigestLevel] = &[&DAILY, &WEEKLY, &MONTHLY];
/// Store key for a digest node: "daily-2026-03-04.md", "weekly-2026-W09.md", etc.
/// Matches the key format from the old import_file() path.
fn digest_node_key(level_name: &str, label: &str) -> String {
format!("{}-{}.md", level_name, label)
}
// --- Input gathering ---
/// Load child digest files from the episodic directory.
fn load_child_digests(prefix: &str, labels: &[String]) -> Result<Vec<(String, String)>, String> {
let dir = memory_subdir("episodic")?;
/// Load child digest content from the store.
fn load_child_digests(store: &Store, prefix: &str, labels: &[String]) -> Vec<(String, String)> {
let mut digests = Vec::new();
for label in labels {
let path = dir.join(format!("{}-{}.md", prefix, label));
if let Ok(content) = fs::read_to_string(&path) {
digests.push((label.clone(), content));
let key = digest_node_key(prefix, label);
if let Some(node) = store.nodes.get(&key) {
digests.push((label.clone(), node.content.clone()));
}
}
Ok(digests)
digests
}
/// Unified: gather inputs for any digest level.
@ -142,7 +144,7 @@ fn gather(level: &DigestLevel, store: &Store, arg: &str) -> Result<(String, Vec<
.collect::<BTreeSet<_>>()
.into_iter()
.collect();
load_child_digests(child_name, &child_labels)?
load_child_digests(store, child_name, &child_labels)
} else {
// Leaf level: scan store for journal entries matching label
let date_re = Regex::new(&format!(
@ -227,14 +229,10 @@ fn generate_digest(
println!(" Calling Sonnet...");
let digest = call_sonnet(&prompt, level.timeout)?;
let output_path = memory_subdir("episodic")?
.join(format!("{}-{}.md", level.name, label));
fs::write(&output_path, &digest)
.map_err(|e| format!("write {}: {}", output_path.display(), e))?;
println!(" Written: {}", output_path.display());
store.import_file(&output_path)?;
let key = digest_node_key(level.name, label);
store.upsert_provenance(&key, &digest, store::Provenance::AgentDigest)?;
store.save()?;
println!(" Stored: {}", key);
println!(" Done: {} lines", digest.lines().count());
Ok(())
@ -254,7 +252,6 @@ pub fn generate(store: &mut Store, level_name: &str, arg: &str) -> Result<(), St
pub fn digest_auto(store: &mut Store) -> Result<(), String> {
let today = Local::now().format("%Y-%m-%d").to_string();
let epi = memory_subdir("episodic")?;
// Collect all dates with journal entries
let date_re = Regex::new(r"^\d{4}-\d{2}-\d{2}").unwrap();
@ -277,7 +274,8 @@ pub fn digest_auto(store: &mut Store) -> Result<(), String> {
for arg in &candidates {
let (label, inputs) = gather(level, store, arg)?;
if epi.join(format!("{}-{}.md", level.name, label)).exists() {
let key = digest_node_key(level.name, &label);
if store.nodes.contains_key(&key) {
skipped += 1;
continue;
}
@ -357,21 +355,8 @@ fn normalize_link_key(raw: &str) -> String {
key
}
/// Parse the Links section from a single digest file.
fn parse_digest_file_links(path: &Path) -> Vec<DigestLink> {
let content = match fs::read_to_string(path) {
Ok(c) => c,
Err(_) => return Vec::new(),
};
let digest_name = path.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("");
let digest_key = format!("{}.md", digest_name);
let filename = path.file_name()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_string();
/// Parse the Links section from a digest node's content.
fn parse_digest_node_links(key: &str, content: &str) -> Vec<DigestLink> {
let link_re = Regex::new(r"^-\s+(.+?)\s*[→↔←]\s*(.+?)(?:\s*\((.+?)\))?\s*$").unwrap();
let header_re = Regex::new(r"^##\s+Links").unwrap();
@ -399,8 +384,8 @@ fn parse_digest_file_links(path: &Path) -> Vec<DigestLink> {
let mut target = normalize_link_key(raw_target);
// Replace self-references with digest key
if source.is_empty() { source = digest_key.clone(); }
if target.is_empty() { target = digest_key.clone(); }
if source.is_empty() { source = key.to_string(); }
if target.is_empty() { target = key.to_string(); }
// Handle "this daily/weekly/monthly" in raw text
let raw_s_lower = raw_source.to_lowercase();
@ -408,49 +393,39 @@ fn parse_digest_file_links(path: &Path) -> Vec<DigestLink> {
if raw_s_lower.contains("this daily") || raw_s_lower.contains("this weekly")
|| raw_s_lower.contains("this monthly")
{
source = digest_key.clone();
source = key.to_string();
}
if raw_t_lower.contains("this daily") || raw_t_lower.contains("this weekly")
|| raw_t_lower.contains("this monthly")
{
target = digest_key.clone();
target = key.to_string();
}
// Skip NEW: and self-links
if source.starts_with("NEW:") || target.starts_with("NEW:") { continue; }
if source == target { continue; }
links.push(DigestLink { source, target, reason, file: filename.clone() });
links.push(DigestLink { source, target, reason, file: key.to_string() });
}
}
links
}
/// Parse links from all digest files in the episodic dir.
pub fn parse_all_digest_links() -> Result<Vec<DigestLink>, String> {
let dir = memory_subdir("episodic")?;
/// Parse links from all digest nodes in the store.
pub fn parse_all_digest_links(store: &Store) -> Vec<DigestLink> {
let mut all_links = Vec::new();
for pattern in &["daily-*.md", "weekly-*.md", "monthly-*.md"] {
if let Ok(entries) = fs::read_dir(&dir) {
let mut files: Vec<PathBuf> = entries
.filter_map(|e| e.ok())
.map(|e| e.path())
.filter(|p| {
p.file_name()
.and_then(|n| n.to_str())
.map(|n| {
let prefix = pattern.split('*').next().unwrap_or("");
n.starts_with(prefix) && n.ends_with(".md")
})
.unwrap_or(false)
})
.collect();
files.sort();
for path in files {
all_links.extend(parse_digest_file_links(&path));
}
let mut digest_keys: Vec<&String> = store.nodes.keys()
.filter(|k| k.starts_with("daily-")
|| k.starts_with("weekly-")
|| k.starts_with("monthly-"))
.collect();
digest_keys.sort();
for key in digest_keys {
if let Some(node) = store.nodes.get(key) {
all_links.extend(parse_digest_node_links(key, &node.content));
}
}
@ -458,7 +433,7 @@ pub fn parse_all_digest_links() -> Result<Vec<DigestLink>, String> {
let mut seen = std::collections::HashSet::new();
all_links.retain(|link| seen.insert((link.source.clone(), link.target.clone())));
Ok(all_links)
all_links
}
/// Apply parsed digest links to the store.

View file

@ -13,10 +13,60 @@ use crate::store::{self, Store, new_node, new_relation};
use regex::Regex;
use std::collections::hash_map::DefaultHasher;
use std::collections::HashSet;
use std::fs;
use std::hash::{Hash, Hasher};
use crate::util::memory_subdir;
use crate::store::StoreView;
/// Compute the store dedup key for a transcript file.
/// This is the same key experience_mine uses to mark a transcript as mined.
pub fn transcript_dedup_key(path: &str) -> Result<String, String> {
let bytes = fs::read(path).map_err(|e| format!("read {}: {}", path, e))?;
let mut hasher = DefaultHasher::new();
bytes.hash(&mut hasher);
Ok(format!("_mined-transcripts.md#h-{:016x}", hasher.finish()))
}
/// Check if a transcript has already been mined (dedup key exists in store).
pub fn is_transcript_mined(store: &impl StoreView, path: &str) -> bool {
match transcript_dedup_key(path) {
Ok(key) => store.node_content(&key).is_some(),
Err(_) => false,
}
}
/// Dedup key for a transcript based on its filename (UUID).
/// Used by the daemon reconcile loop — no file reads needed.
pub fn transcript_filename_key(path: &str) -> String {
let filename = std::path::Path::new(path)
.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_else(|| path.to_string());
format!("_mined-transcripts.md#f-{}", filename)
}
/// Get the set of all mined transcript keys (both content-hash and filename)
/// from the store. Load once per daemon tick, check many.
pub fn mined_transcript_keys() -> HashSet<String> {
use crate::store::AnyView;
let Ok(view) = AnyView::load() else { return HashSet::new() };
let mut keys = HashSet::new();
view.for_each_node(|key, _, _| {
if key.starts_with("_mined-transcripts.md#") {
keys.insert(key.to_string());
}
});
keys
}
/// Check if a transcript has been mined, given a pre-loaded set of mined keys.
/// Checks filename-based key only (no file read). Sessions mined before the
/// filename key was added will pass through and short-circuit in experience_mine
/// via the content hash check — a one-time cost on first restart after this change.
pub fn is_transcript_mined_with_keys(mined: &HashSet<String>, path: &str) -> bool {
mined.contains(&transcript_filename_key(path))
}
/// Extract user/assistant messages with line numbers from a JSONL transcript.
/// (line_number, role, text, timestamp)
@ -187,21 +237,6 @@ pub fn journal_enrich(
}
}
// Save result to agent-results
let timestamp = store::format_datetime(store::now_epoch())
.replace([':', '-'], "");
let result_file = memory_subdir("agent-results")?
.join(format!("{}.json", timestamp));
let output = serde_json::json!({
"timestamp": timestamp,
"jsonl_path": jsonl_path,
"entry_text": &entry_text[..entry_text.len().min(500)],
"agent_result": result,
});
fs::write(&result_file, serde_json::to_string_pretty(&output).unwrap())
.map_err(|e| format!("write {}: {}", result_file.display(), e))?;
println!(" Results saved: {}", result_file.display());
store.save()?;
Ok(())
}
@ -320,6 +355,7 @@ pub fn experience_mine(
let mut node = new_node(&key, &full_content);
node.node_type = store::NodeType::EpisodicSession;
node.category = store::Category::Observation;
node.provenance = store::Provenance::AgentExperienceMine;
let _ = store.upsert_node(node);
count += 1;
@ -328,11 +364,19 @@ pub fn experience_mine(
}
// Record this transcript as mined (even if count == 0, to prevent re-runs)
// Two keys: content hash (exact dedup) and filename (fast daemon reconcile)
let dedup_content = format!("Mined {} ({} entries)", jsonl_path, count);
let mut dedup_node = new_node(&dedup_key, &dedup_content);
dedup_node.category = store::Category::Task;
dedup_node.provenance = store::Provenance::AgentExperienceMine;
let _ = store.upsert_node(dedup_node);
let fname_key = transcript_filename_key(jsonl_path);
let mut fname_node = new_node(&fname_key, &dedup_content);
fname_node.category = store::Category::Task;
fname_node.provenance = store::Provenance::AgentExperienceMine;
let _ = store.upsert_node(fname_node);
if count > 0 {
println!(" Saved {} new journal entries.", count);
}

976
src/knowledge.rs Normal file
View file

@ -0,0 +1,976 @@
// knowledge.rs — knowledge production agents and convergence loop
//
// Rust port of knowledge_agents.py + knowledge_loop.py.
// Four agents mine the memory graph for new knowledge:
// 1. Observation — extract facts from raw conversations
// 2. Extractor — find patterns in node clusters
// 3. Connector — find cross-domain structural connections
// 4. Challenger — stress-test existing knowledge nodes
//
// The loop runs agents in sequence, applies results, measures
// convergence via graph-structural metrics (sigma, CC, communities).
use crate::graph::Graph;
use crate::llm;
use crate::spectral;
use crate::store::{self, Store, new_relation, RelationType};
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::fs;
use std::path::{Path, PathBuf};
fn memory_dir() -> PathBuf {
store::memory_dir()
}
fn prompts_dir() -> PathBuf {
let manifest = env!("CARGO_MANIFEST_DIR");
PathBuf::from(manifest).join("prompts")
}
fn projects_dir() -> PathBuf {
let home = std::env::var("HOME").unwrap_or_else(|_| ".".into());
PathBuf::from(home).join(".claude/projects")
}
// ---------------------------------------------------------------------------
// Action types
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Action {
pub kind: ActionKind,
pub confidence: Confidence,
pub weight: f64,
pub depth: i32,
pub applied: Option<bool>,
pub rejected_reason: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ActionKind {
WriteNode {
key: String,
content: String,
covers: Vec<String>,
},
Link {
source: String,
target: String,
},
Refine {
key: String,
content: String,
},
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Confidence {
High,
Medium,
Low,
}
impl Confidence {
fn weight(self) -> f64 {
match self {
Self::High => 1.0,
Self::Medium => 0.6,
Self::Low => 0.3,
}
}
fn value(self) -> f64 {
match self {
Self::High => 0.9,
Self::Medium => 0.6,
Self::Low => 0.3,
}
}
fn parse(s: &str) -> Self {
match s.to_lowercase().as_str() {
"high" => Self::High,
"low" => Self::Low,
_ => Self::Medium,
}
}
}
// ---------------------------------------------------------------------------
// Action parsing
// ---------------------------------------------------------------------------
pub fn parse_write_nodes(text: &str) -> Vec<Action> {
let re = Regex::new(r"(?s)WRITE_NODE\s+(\S+)\s*\n(.*?)END_NODE").unwrap();
let conf_re = Regex::new(r"(?i)CONFIDENCE:\s*(high|medium|low)").unwrap();
let covers_re = Regex::new(r"COVERS:\s*(.+)").unwrap();
re.captures_iter(text)
.map(|cap| {
let key = cap[1].to_string();
let mut content = cap[2].trim().to_string();
let confidence = conf_re
.captures(&content)
.map(|c| Confidence::parse(&c[1]))
.unwrap_or(Confidence::Medium);
content = conf_re.replace(&content, "").trim().to_string();
let covers: Vec<String> = covers_re
.captures(&content)
.map(|c| c[1].split(',').map(|s| s.trim().to_string()).collect())
.unwrap_or_default();
content = covers_re.replace(&content, "").trim().to_string();
Action {
weight: confidence.weight(),
kind: ActionKind::WriteNode { key, content, covers },
confidence,
depth: 0,
applied: None,
rejected_reason: None,
}
})
.collect()
}
pub fn parse_links(text: &str) -> Vec<Action> {
let re = Regex::new(r"(?m)^LINK\s+(\S+)\s+(\S+)").unwrap();
re.captures_iter(text)
.map(|cap| Action {
kind: ActionKind::Link {
source: cap[1].to_string(),
target: cap[2].to_string(),
},
confidence: Confidence::Low,
weight: 0.3,
depth: -1,
applied: None,
rejected_reason: None,
})
.collect()
}
pub fn parse_refines(text: &str) -> Vec<Action> {
let re = Regex::new(r"(?s)REFINE\s+(\S+)\s*\n(.*?)END_REFINE").unwrap();
re.captures_iter(text)
.map(|cap| {
let key = cap[1].trim_matches('*').trim().to_string();
Action {
kind: ActionKind::Refine {
key,
content: cap[2].trim().to_string(),
},
confidence: Confidence::Medium,
weight: 0.7,
depth: 0,
applied: None,
rejected_reason: None,
}
})
.collect()
}
pub fn parse_all_actions(text: &str) -> Vec<Action> {
let mut actions = parse_write_nodes(text);
actions.extend(parse_links(text));
actions.extend(parse_refines(text));
actions
}
pub fn count_no_ops(text: &str) -> usize {
let no_conn = Regex::new(r"\bNO_CONNECTION\b").unwrap().find_iter(text).count();
let affirm = Regex::new(r"\bAFFIRM\b").unwrap().find_iter(text).count();
let no_extract = Regex::new(r"\bNO_EXTRACTION\b").unwrap().find_iter(text).count();
no_conn + affirm + no_extract
}
// ---------------------------------------------------------------------------
// Inference depth tracking
// ---------------------------------------------------------------------------
const DEPTH_DB_KEY: &str = "_knowledge-depths";
#[derive(Default)]
pub struct DepthDb {
depths: HashMap<String, i32>,
}
impl DepthDb {
pub fn load(store: &Store) -> Self {
let depths = store.nodes.get(DEPTH_DB_KEY)
.and_then(|n| serde_json::from_str(&n.content).ok())
.unwrap_or_default();
Self { depths }
}
pub fn save(&self, store: &mut Store) {
if let Ok(json) = serde_json::to_string(&self.depths) {
store.upsert_provenance(DEPTH_DB_KEY, &json,
store::Provenance::AgentKnowledgeObservation).ok();
}
}
pub fn get(&self, key: &str) -> i32 {
self.depths.get(key).copied().unwrap_or(0)
}
pub fn set(&mut self, key: String, depth: i32) {
self.depths.insert(key, depth);
}
}
/// Agent base depths: observation=1, extractor=2, connector=3
fn agent_base_depth(agent: &str) -> Option<i32> {
match agent {
"observation" => Some(1),
"extractor" => Some(2),
"connector" => Some(3),
"challenger" => None,
_ => Some(2),
}
}
pub fn compute_action_depth(db: &DepthDb, action: &Action, agent: &str) -> i32 {
match &action.kind {
ActionKind::Link { .. } => -1,
ActionKind::Refine { key, .. } => db.get(key),
ActionKind::WriteNode { covers, .. } => {
if !covers.is_empty() {
covers.iter().map(|k| db.get(k)).max().unwrap_or(0) + 1
} else {
agent_base_depth(agent).unwrap_or(2)
}
}
}
}
/// Confidence threshold that scales with inference depth.
pub fn required_confidence(depth: i32, base: f64) -> f64 {
if depth <= 0 {
return 0.0;
}
1.0 - (1.0 - base).powi(depth)
}
/// Confidence bonus from real-world use.
pub fn use_bonus(use_count: u32) -> f64 {
if use_count == 0 {
return 0.0;
}
1.0 - 1.0 / (1.0 + 0.15 * use_count as f64)
}
// ---------------------------------------------------------------------------
// Action application
// ---------------------------------------------------------------------------
fn stamp_content(content: &str, agent: &str, timestamp: &str, depth: i32) -> String {
format!("<!-- author: {} | created: {} | depth: {} -->\n{}", agent, timestamp, depth, content)
}
/// Check if a link already exists between two keys.
fn has_edge(store: &Store, source: &str, target: &str) -> bool {
store.relations.iter().any(|r| {
!r.deleted
&& ((r.source_key == source && r.target_key == target)
|| (r.source_key == target && r.target_key == source))
})
}
pub fn apply_action(
store: &mut Store,
action: &Action,
agent: &str,
timestamp: &str,
depth: i32,
) -> bool {
let provenance = agent_provenance(agent);
match &action.kind {
ActionKind::WriteNode { key, content, .. } => {
let stamped = stamp_content(content, agent, timestamp, depth);
store.upsert_provenance(key, &stamped, provenance).is_ok()
}
ActionKind::Link { source, target } => {
if has_edge(store, source, target) {
return false;
}
let source_uuid = match store.nodes.get(source.as_str()) {
Some(n) => n.uuid,
None => return false,
};
let target_uuid = match store.nodes.get(target.as_str()) {
Some(n) => n.uuid,
None => return false,
};
let mut rel = new_relation(
source_uuid, target_uuid,
RelationType::Link,
0.3,
source, target,
);
rel.provenance = provenance;
store.add_relation(rel).is_ok()
}
ActionKind::Refine { key, content } => {
let stamped = stamp_content(content, agent, timestamp, depth);
store.upsert_provenance(key, &stamped, provenance).is_ok()
}
}
}
fn agent_provenance(agent: &str) -> store::Provenance {
match agent {
"observation" => store::Provenance::AgentKnowledgeObservation,
"extractor" | "pattern" => store::Provenance::AgentKnowledgePattern,
"connector" => store::Provenance::AgentKnowledgeConnector,
"challenger" => store::Provenance::AgentKnowledgeChallenger,
_ => store::Provenance::Agent,
}
}
// ---------------------------------------------------------------------------
// Agent runners
// ---------------------------------------------------------------------------
fn load_prompt(name: &str) -> Result<String, String> {
let path = prompts_dir().join(format!("{}.md", name));
fs::read_to_string(&path).map_err(|e| format!("load prompt {}: {}", name, e))
}
fn get_graph_topology(store: &Store, graph: &Graph) -> String {
format!("Nodes: {} Relations: {}\n", store.nodes.len(), graph.edge_count())
}
/// Strip <system-reminder> blocks from text
fn strip_system_tags(text: &str) -> String {
let re = Regex::new(r"(?s)<system-reminder>.*?</system-reminder>").unwrap();
re.replace_all(text, "").trim().to_string()
}
/// Extract human-readable dialogue from a conversation JSONL
fn extract_conversation_text(path: &Path, max_chars: usize) -> String {
let Ok(content) = fs::read_to_string(path) else { return String::new() };
let mut fragments = Vec::new();
let mut total = 0;
for line in content.lines() {
let Ok(obj) = serde_json::from_str::<serde_json::Value>(line) else { continue };
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
if msg_type == "user" && obj.get("userType").and_then(|v| v.as_str()) == Some("external") {
if let Some(text) = extract_text_content(&obj) {
let text = strip_system_tags(&text);
if text.starts_with("[Request interrupted") { continue; }
if text.len() > 5 {
fragments.push(format!("**Kent:** {}", text));
total += text.len();
}
}
} else if msg_type == "assistant" {
if let Some(text) = extract_text_content(&obj) {
let text = strip_system_tags(&text);
if text.len() > 10 {
fragments.push(format!("**PoC:** {}", text));
total += text.len();
}
}
}
if total > max_chars { break; }
}
fragments.join("\n\n")
}
fn extract_text_content(obj: &serde_json::Value) -> Option<String> {
let msg = obj.get("message")?;
let content = msg.get("content")?;
if let Some(s) = content.as_str() {
return Some(s.to_string());
}
if let Some(arr) = content.as_array() {
let texts: Vec<&str> = arr.iter()
.filter_map(|b| {
if b.get("type")?.as_str()? == "text" {
b.get("text")?.as_str()
} else {
None
}
})
.collect();
if !texts.is_empty() {
return Some(texts.join("\n"));
}
}
None
}
/// Count short user messages (dialogue turns) in a JSONL
fn count_dialogue_turns(path: &Path) -> usize {
let Ok(content) = fs::read_to_string(path) else { return 0 };
content.lines()
.filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
.filter(|obj| {
obj.get("type").and_then(|v| v.as_str()) == Some("user")
&& obj.get("userType").and_then(|v| v.as_str()) == Some("external")
})
.filter(|obj| {
let text = extract_text_content(obj).unwrap_or_default();
text.len() > 5 && text.len() < 500
&& !text.starts_with("[Request interrupted")
&& !text.starts_with("Implement the following")
})
.count()
}
/// Select conversation fragments for the observation extractor
fn select_conversation_fragments(n: usize) -> Vec<(String, String)> {
let projects = projects_dir();
if !projects.exists() { return Vec::new(); }
let mut jsonl_files: Vec<PathBuf> = Vec::new();
if let Ok(dirs) = fs::read_dir(&projects) {
for dir in dirs.filter_map(|e| e.ok()) {
if !dir.path().is_dir() { continue; }
if let Ok(files) = fs::read_dir(dir.path()) {
for f in files.filter_map(|e| e.ok()) {
let p = f.path();
if p.extension().map(|x| x == "jsonl").unwrap_or(false) {
if let Ok(meta) = p.metadata() {
if meta.len() > 50_000 {
jsonl_files.push(p);
}
}
}
}
}
}
}
let mut scored: Vec<(usize, PathBuf)> = jsonl_files.into_iter()
.map(|f| (count_dialogue_turns(&f), f))
.filter(|(turns, _)| *turns >= 10)
.collect();
scored.sort_by(|a, b| b.0.cmp(&a.0));
let mut fragments = Vec::new();
for (_, f) in scored.iter().take(n * 2) {
let session_id = f.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_else(|| "unknown".into());
let text = extract_conversation_text(f, 8000);
if text.len() > 500 {
fragments.push((session_id, text));
}
if fragments.len() >= n { break; }
}
fragments
}
pub fn run_observation_extractor(store: &Store, graph: &Graph, batch_size: usize) -> Result<String, String> {
let template = load_prompt("observation-extractor")?;
let topology = get_graph_topology(store, graph);
let fragments = select_conversation_fragments(batch_size);
let mut results = Vec::new();
for (i, (session_id, text)) in fragments.iter().enumerate() {
eprintln!(" Observation extractor {}/{}: session {}... ({} chars)",
i + 1, fragments.len(), &session_id[..session_id.len().min(12)], text.len());
let prompt = template
.replace("{{TOPOLOGY}}", &topology)
.replace("{{CONVERSATIONS}}", &format!("### Session {}\n\n{}", session_id, text));
let response = llm::call_sonnet(&prompt, 600)?;
results.push(format!("## Session: {}\n\n{}", session_id, response));
}
Ok(results.join("\n\n---\n\n"))
}
/// Load spectral embedding from disk
fn load_spectral_embedding() -> HashMap<String, Vec<f64>> {
spectral::load_embedding()
.map(|emb| emb.coords)
.unwrap_or_default()
}
fn spectral_distance(embedding: &HashMap<String, Vec<f64>>, a: &str, b: &str) -> f64 {
let (Some(va), Some(vb)) = (embedding.get(a), embedding.get(b)) else {
return f64::INFINITY;
};
let dot: f64 = va.iter().zip(vb.iter()).map(|(a, b)| a * b).sum();
let norm_a: f64 = va.iter().map(|x| x * x).sum::<f64>().sqrt();
let norm_b: f64 = vb.iter().map(|x| x * x).sum::<f64>().sqrt();
if norm_a == 0.0 || norm_b == 0.0 {
return f64::INFINITY;
}
1.0 - dot / (norm_a * norm_b)
}
fn select_extractor_clusters(_store: &Store, n: usize) -> Vec<Vec<String>> {
let embedding = load_spectral_embedding();
let skip = ["journal.md", "MEMORY.md", "where-am-i.md", "work-queue.md"];
let semantic_keys: Vec<&String> = embedding.keys()
.filter(|k| !k.starts_with("journal.md#") && !skip.contains(&k.as_str()))
.collect();
let cluster_size = 5;
let mut used = HashSet::new();
let mut clusters = Vec::new();
for _ in 0..n {
let available: Vec<&&String> = semantic_keys.iter()
.filter(|k| !used.contains(**k))
.collect();
if available.len() < cluster_size { break; }
let seed = available[0];
let mut distances: Vec<(f64, &String)> = available.iter()
.filter(|k| ***k != *seed)
.map(|k| (spectral_distance(&embedding, seed, k), **k))
.filter(|(d, _)| d.is_finite())
.collect();
distances.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
let cluster: Vec<String> = std::iter::once((*seed).clone())
.chain(distances.iter().take(cluster_size - 1).map(|(_, k)| (*k).clone()))
.collect();
for k in &cluster { used.insert(k.clone()); }
clusters.push(cluster);
}
clusters
}
pub fn run_extractor(store: &Store, graph: &Graph, batch_size: usize) -> Result<String, String> {
let template = load_prompt("extractor")?;
let topology = get_graph_topology(store, graph);
let clusters = select_extractor_clusters(store, batch_size);
let mut results = Vec::new();
for (i, cluster) in clusters.iter().enumerate() {
eprintln!(" Extractor cluster {}/{}: {} nodes", i + 1, clusters.len(), cluster.len());
let node_texts: Vec<String> = cluster.iter()
.filter_map(|key| {
let content = store.nodes.get(key)?.content.as_str();
Some(format!("### {}\n{}", key, content))
})
.collect();
if node_texts.is_empty() { continue; }
let prompt = template
.replace("{{TOPOLOGY}}", &topology)
.replace("{{NODES}}", &node_texts.join("\n\n"));
let response = llm::call_sonnet(&prompt, 600)?;
results.push(format!("## Cluster {}: {}...\n\n{}", i + 1,
cluster.iter().take(3).cloned().collect::<Vec<_>>().join(", "), response));
}
Ok(results.join("\n\n---\n\n"))
}
fn select_connector_pairs(store: &Store, graph: &Graph, n: usize) -> Vec<(Vec<String>, Vec<String>)> {
let embedding = load_spectral_embedding();
let skip_prefixes = ["journal.md#", "daily-", "weekly-", "monthly-", "all-sessions"];
let skip_exact: HashSet<&str> = ["journal.md", "MEMORY.md", "where-am-i.md",
"work-queue.md", "work-state"].iter().copied().collect();
let semantic_keys: Vec<&String> = embedding.keys()
.filter(|k| {
!skip_exact.contains(k.as_str())
&& !skip_prefixes.iter().any(|p| k.starts_with(p))
})
.collect();
let mut pairs = Vec::new();
let mut used = HashSet::new();
for seed in semantic_keys.iter().take(n * 10) {
if used.contains(*seed) { continue; }
let mut near: Vec<(f64, &String)> = semantic_keys.iter()
.filter(|k| ***k != **seed && !used.contains(**k))
.map(|k| (spectral_distance(&embedding, seed, k), *k))
.filter(|(d, _)| *d < 0.5 && d.is_finite())
.collect();
near.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
for (_, target) in near.iter().take(5) {
if !has_edge(store, seed, target) {
let _ = graph; // graph available for future use
used.insert((*seed).clone());
used.insert((*target).clone());
pairs.push((vec![(*seed).clone()], vec![(*target).clone()]));
break;
}
}
if pairs.len() >= n { break; }
}
pairs
}
pub fn run_connector(store: &Store, graph: &Graph, batch_size: usize) -> Result<String, String> {
let template = load_prompt("connector")?;
let topology = get_graph_topology(store, graph);
let pairs = select_connector_pairs(store, graph, batch_size);
let mut results = Vec::new();
for (i, (group_a, group_b)) in pairs.iter().enumerate() {
eprintln!(" Connector pair {}/{}", i + 1, pairs.len());
let nodes_a: Vec<String> = group_a.iter()
.filter_map(|k| {
let c = store.nodes.get(k)?.content.as_str();
Some(format!("### {}\n{}", k, c))
})
.collect();
let nodes_b: Vec<String> = group_b.iter()
.filter_map(|k| {
let c = store.nodes.get(k)?.content.as_str();
Some(format!("### {}\n{}", k, c))
})
.collect();
let prompt = template
.replace("{{TOPOLOGY}}", &topology)
.replace("{{NODES_A}}", &nodes_a.join("\n\n"))
.replace("{{NODES_B}}", &nodes_b.join("\n\n"));
let response = llm::call_sonnet(&prompt, 600)?;
results.push(format!("## Pair {}: {}{}\n\n{}",
i + 1, group_a.join(", "), group_b.join(", "), response));
}
Ok(results.join("\n\n---\n\n"))
}
pub fn run_challenger(store: &Store, graph: &Graph, batch_size: usize) -> Result<String, String> {
let template = load_prompt("challenger")?;
let topology = get_graph_topology(store, graph);
let mut candidates: Vec<(&String, usize)> = store.nodes.iter()
.filter(|(k, _)| {
!k.starts_with("journal.md#")
&& !["journal.md", "MEMORY.md", "where-am-i.md"].contains(&k.as_str())
})
.map(|(k, _)| (k, graph.degree(k)))
.collect();
candidates.sort_by(|a, b| b.1.cmp(&a.1));
let mut results = Vec::new();
for (i, (key, _)) in candidates.iter().take(batch_size).enumerate() {
eprintln!(" Challenger {}/{}: {}", i + 1, batch_size.min(candidates.len()), key);
let content = match store.nodes.get(key.as_str()) {
Some(n) => &n.content,
None => continue,
};
let prompt = template
.replace("{{TOPOLOGY}}", &topology)
.replace("{{NODE_KEY}}", key)
.replace("{{NODE_CONTENT}}", content);
let response = llm::call_sonnet(&prompt, 600)?;
results.push(format!("## Challenge: {}\n\n{}", key, response));
}
Ok(results.join("\n\n---\n\n"))
}
// ---------------------------------------------------------------------------
// Convergence metrics
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CycleResult {
pub cycle: usize,
pub timestamp: String,
pub total_actions: usize,
pub total_applied: usize,
pub total_no_ops: usize,
pub depth_rejected: usize,
pub weighted_delta: f64,
pub graph_metrics_before: GraphMetrics,
pub graph_metrics_after: GraphMetrics,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct GraphMetrics {
pub nodes: usize,
pub edges: usize,
pub cc: f64,
pub sigma: f64,
pub communities: usize,
}
impl GraphMetrics {
pub fn from_graph(store: &Store, graph: &Graph) -> Self {
Self {
nodes: store.nodes.len(),
edges: graph.edge_count(),
cc: graph.avg_clustering_coefficient() as f64,
sigma: graph.small_world_sigma() as f64,
communities: graph.community_count(),
}
}
}
fn metric_stability(history: &[CycleResult], key: &str, window: usize) -> f64 {
if history.len() < window { return f64::INFINITY; }
let values: Vec<f64> = history[history.len() - window..].iter()
.map(|h| match key {
"sigma" => h.graph_metrics_after.sigma,
"cc" => h.graph_metrics_after.cc,
"communities" => h.graph_metrics_after.communities as f64,
_ => 0.0,
})
.collect();
if values.len() < 2 { return f64::INFINITY; }
let mean = values.iter().sum::<f64>() / values.len() as f64;
if mean == 0.0 { return 0.0; }
let variance = values.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / values.len() as f64;
variance.sqrt() / mean.abs()
}
pub fn check_convergence(history: &[CycleResult], window: usize) -> bool {
if history.len() < window { return false; }
let sigma_cv = metric_stability(history, "sigma", window);
let cc_cv = metric_stability(history, "cc", window);
let comm_cv = metric_stability(history, "communities", window);
let recent = &history[history.len() - window..];
let avg_delta = recent.iter().map(|r| r.weighted_delta).sum::<f64>() / recent.len() as f64;
eprintln!("\n Convergence check (last {} cycles):", window);
eprintln!(" sigma CV: {:.4} (< 0.05?)", sigma_cv);
eprintln!(" CC CV: {:.4} (< 0.05?)", cc_cv);
eprintln!(" community CV: {:.4} (< 0.10?)", comm_cv);
eprintln!(" avg delta: {:.2} (< 1.00?)", avg_delta);
let structural = sigma_cv < 0.05 && cc_cv < 0.05 && comm_cv < 0.10;
let behavioral = avg_delta < 1.0;
if structural && behavioral {
eprintln!(" → CONVERGED");
true
} else {
false
}
}
// ---------------------------------------------------------------------------
// The knowledge loop
// ---------------------------------------------------------------------------
pub struct KnowledgeLoopConfig {
pub max_cycles: usize,
pub batch_size: usize,
pub window: usize,
pub max_depth: i32,
pub confidence_base: f64,
}
impl Default for KnowledgeLoopConfig {
fn default() -> Self {
Self {
max_cycles: 20,
batch_size: 5,
window: 5,
max_depth: 4,
confidence_base: 0.3,
}
}
}
pub fn run_knowledge_loop(config: &KnowledgeLoopConfig) -> Result<Vec<CycleResult>, String> {
let mut store = Store::load()?;
let mut depth_db = DepthDb::load(&store);
let mut history = Vec::new();
eprintln!("Knowledge Loop — fixed-point iteration");
eprintln!(" max_cycles={} batch_size={}", config.max_cycles, config.batch_size);
eprintln!(" window={} max_depth={}", config.window, config.max_depth);
for cycle in 1..=config.max_cycles {
let result = run_cycle(cycle, config, &mut depth_db)?;
history.push(result);
if check_convergence(&history, config.window) {
eprintln!("\n CONVERGED after {} cycles", cycle);
break;
}
}
// Save loop summary as a store node
if let Some(first) = history.first() {
let key = format!("_knowledge-loop-{}", first.timestamp);
if let Ok(json) = serde_json::to_string_pretty(&history) {
store = Store::load()?;
store.upsert_provenance(&key, &json,
store::Provenance::AgentKnowledgeObservation).ok();
depth_db.save(&mut store);
store.save()?;
}
}
Ok(history)
}
fn run_cycle(
cycle_num: usize,
config: &KnowledgeLoopConfig,
depth_db: &mut DepthDb,
) -> Result<CycleResult, String> {
let timestamp = chrono::Local::now().format("%Y%m%dT%H%M%S").to_string();
eprintln!("\n{}", "=".repeat(60));
eprintln!("CYCLE {}{}", cycle_num, timestamp);
eprintln!("{}", "=".repeat(60));
let mut store = Store::load()?;
let graph = store.build_graph();
let metrics_before = GraphMetrics::from_graph(&store, &graph);
eprintln!(" Before: nodes={} edges={} cc={:.3} sigma={:.3}",
metrics_before.nodes, metrics_before.edges, metrics_before.cc, metrics_before.sigma);
let mut all_actions = Vec::new();
let mut all_no_ops = 0;
let mut depth_rejected = 0;
let mut total_applied = 0;
// Run each agent, rebuilding graph after mutations
let agent_names = ["observation", "extractor", "connector", "challenger"];
for agent_name in &agent_names {
eprintln!("\n --- {} (n={}) ---", agent_name, config.batch_size);
// Rebuild graph to reflect any mutations from previous agents
let graph = store.build_graph();
let output = match *agent_name {
"observation" => run_observation_extractor(&store, &graph, config.batch_size),
"extractor" => run_extractor(&store, &graph, config.batch_size),
"connector" => run_connector(&store, &graph, config.batch_size),
"challenger" => run_challenger(&store, &graph, config.batch_size),
_ => unreachable!(),
};
let output = match output {
Ok(o) => o,
Err(e) => {
eprintln!(" ERROR: {}", e);
continue;
}
};
// Store raw output as a node (for debugging/audit)
let raw_key = format!("_knowledge-{}-{}", agent_name, timestamp);
let raw_content = format!("# {} Agent Results — {}\n\n{}", agent_name, timestamp, output);
store.upsert_provenance(&raw_key, &raw_content,
agent_provenance(agent_name)).ok();
let mut actions = parse_all_actions(&output);
let no_ops = count_no_ops(&output);
all_no_ops += no_ops;
eprintln!(" Actions: {} No-ops: {}", actions.len(), no_ops);
let mut applied = 0;
for action in &mut actions {
let depth = compute_action_depth(depth_db, action, agent_name);
action.depth = depth;
match &action.kind {
ActionKind::WriteNode { key, covers, .. } => {
let conf_val = action.confidence.value();
let req = required_confidence(depth, config.confidence_base);
let source_uses: Vec<u32> = covers.iter()
.filter_map(|k| store.nodes.get(k).map(|n| n.uses))
.collect();
let avg_uses = if source_uses.is_empty() { 0 }
else { source_uses.iter().sum::<u32>() / source_uses.len() as u32 };
let eff_conf = (conf_val + use_bonus(avg_uses)).min(1.0);
if eff_conf < req {
action.applied = Some(false);
action.rejected_reason = Some("depth_threshold".into());
depth_rejected += 1;
continue;
}
if depth > config.max_depth {
action.applied = Some(false);
action.rejected_reason = Some("max_depth".into());
depth_rejected += 1;
continue;
}
eprintln!(" WRITE {} depth={} conf={:.2} eff={:.2} req={:.2}",
key, depth, conf_val, eff_conf, req);
}
ActionKind::Link { source, target } => {
eprintln!(" LINK {}{}", source, target);
}
ActionKind::Refine { key, .. } => {
eprintln!(" REFINE {} depth={}", key, depth);
}
}
if apply_action(&mut store, action, agent_name, &timestamp, depth) {
applied += 1;
action.applied = Some(true);
if let ActionKind::WriteNode { key, .. } | ActionKind::Refine { key, .. } = &action.kind {
depth_db.set(key.clone(), depth);
}
} else {
action.applied = Some(false);
}
}
eprintln!(" Applied: {}/{}", applied, actions.len());
total_applied += applied;
all_actions.extend(actions);
}
depth_db.save(&mut store);
// Recompute spectral if anything changed
if total_applied > 0 {
eprintln!("\n Recomputing spectral embedding...");
let graph = store.build_graph();
let result = spectral::decompose(&graph, 8);
let emb = spectral::to_embedding(&result);
spectral::save_embedding(&emb).ok();
}
let graph = store.build_graph();
let metrics_after = GraphMetrics::from_graph(&store, &graph);
let weighted_delta: f64 = all_actions.iter()
.filter(|a| a.applied == Some(true))
.map(|a| a.weight)
.sum();
eprintln!("\n CYCLE {} SUMMARY", cycle_num);
eprintln!(" Applied: {}/{} depth-rejected: {} no-ops: {}",
total_applied, all_actions.len(), depth_rejected, all_no_ops);
eprintln!(" Weighted delta: {:.2}", weighted_delta);
Ok(CycleResult {
cycle: cycle_num,
timestamp,
total_actions: all_actions.len(),
total_applied,
total_no_ops: all_no_ops,
depth_rejected,
weighted_delta,
graph_metrics_before: metrics_before,
graph_metrics_after: metrics_after,
})
}

View file

@ -1,6 +1,6 @@
// LLM utilities: Sonnet invocation and response parsing
// LLM utilities: model invocation and response parsing
//
// Shared by digest, audit, enrich, and consolidate modules.
// Shared by digest, audit, enrich, consolidate, knowledge, and fact_mine.
use crate::store::Store;
@ -8,8 +8,8 @@ use regex::Regex;
use std::fs;
use std::process::Command;
/// Call Sonnet via claude CLI. Returns the response text.
pub(crate) fn call_sonnet(prompt: &str, _timeout_secs: u64) -> Result<String, String> {
/// Call a model via claude CLI. Returns the response text.
fn call_model(model: &str, prompt: &str) -> Result<String, String> {
// Write prompt to temp file (claude CLI needs file input for large prompts)
// Use thread ID + PID to avoid collisions under parallel rayon calls
let tmp = std::env::temp_dir().join(format!("poc-llm-{}-{:?}.txt",
@ -18,7 +18,7 @@ pub(crate) fn call_sonnet(prompt: &str, _timeout_secs: u64) -> Result<String, St
.map_err(|e| format!("write temp prompt: {}", e))?;
let result = Command::new("claude")
.args(["-p", "--model", "sonnet", "--tools", ""])
.args(["-p", "--model", model, "--tools", "", "--no-session-persistence"])
.stdin(fs::File::open(&tmp).map_err(|e| format!("open temp: {}", e))?)
.env_remove("CLAUDECODE")
.output();
@ -38,7 +38,17 @@ pub(crate) fn call_sonnet(prompt: &str, _timeout_secs: u64) -> Result<String, St
}
}
/// Parse a JSON response from Sonnet, handling markdown fences.
/// Call Sonnet via claude CLI.
pub(crate) fn call_sonnet(prompt: &str, _timeout_secs: u64) -> Result<String, String> {
call_model("sonnet", prompt)
}
/// Call Haiku via claude CLI (cheaper, faster — good for high-volume extraction).
pub(crate) fn call_haiku(prompt: &str) -> Result<String, String> {
call_model("haiku", prompt)
}
/// Parse a JSON response, handling markdown fences.
pub(crate) fn parse_json_response(response: &str) -> Result<serde_json::Value, String> {
let cleaned = response.trim();
let cleaned = cleaned.strip_prefix("```json").unwrap_or(cleaned);

View file

@ -29,6 +29,8 @@ mod query;
mod spectral;
mod lookups;
mod daemon;
mod fact_mine;
mod knowledge;
pub mod memory_capnp {
include!(concat!(env!("OUT_DIR"), "/schema/memory_capnp.rs"));
@ -132,6 +134,9 @@ fn main() {
"lookup-bump" => cmd_lookup_bump(&args[2..]),
"lookups" => cmd_lookups(&args[2..]),
"daemon" => cmd_daemon(&args[2..]),
"knowledge-loop" => cmd_knowledge_loop(&args[2..]),
"fact-mine" => cmd_fact_mine(&args[2..]),
"fact-mine-store" => cmd_fact_mine_store(&args[2..]),
_ => {
eprintln!("Unknown command: {}", args[1]);
usage();
@ -216,7 +221,14 @@ Commands:
lookups [DATE] Show daily lookup counts (default: today)
daemon Start background job daemon
daemon status Show daemon status
daemon log [JOB] [N] Show last N log lines (default 50, optional job filter)");
daemon log [JOB] [N] Show last N log lines (default 50, optional job filter)
knowledge-loop [OPTIONS] Run knowledge agents to convergence
--max-cycles N (default 20)
--batch-size N (default 5)
--window N (default 5)
--max-depth N (default 4)
fact-mine JSONL [OPTIONS] Extract atomic facts from conversation transcripts
fact-mine --batch DIR Mine all .jsonl files in directory");
}
fn cmd_search(args: &[String]) -> Result<(), String> {
@ -850,8 +862,10 @@ fn cmd_digest(args: &[String]) -> Result<(), String> {
fn cmd_digest_links(args: &[String]) -> Result<(), String> {
let do_apply = args.iter().any(|a| a == "--apply");
let links = digest::parse_all_digest_links()?;
println!("Found {} unique links from digest files", links.len());
let store = store::Store::load()?;
let links = digest::parse_all_digest_links(&store);
drop(store);
println!("Found {} unique links from digest nodes", links.len());
if !do_apply {
for (i, link) in links.iter().enumerate() {
@ -1821,10 +1835,19 @@ fn cmd_daemon(args: &[String]) -> Result<(), String> {
match args[0].as_str() {
"status" => daemon::show_status(),
"log" => {
let job = args.get(1).map(|s| s.as_str());
let lines = args.get(2)
.and_then(|s| s.parse().ok())
.unwrap_or(50);
// daemon log [N] — last N lines (default 20)
// daemon log JOB [N] — last N lines for job
let (job, lines) = match args.get(1) {
None => (None, 20),
Some(s) => {
if let Ok(n) = s.parse::<usize>() {
(None, n)
} else {
let n = args.get(2).and_then(|s| s.parse().ok()).unwrap_or(20);
(Some(s.as_str()), n)
}
}
};
daemon::show_log(job, lines)
}
_ => {
@ -1833,3 +1856,125 @@ fn cmd_daemon(args: &[String]) -> Result<(), String> {
}
}
}
fn cmd_knowledge_loop(args: &[String]) -> Result<(), String> {
if args.iter().any(|a| a == "--help" || a == "-h") {
eprintln!("Usage: poc-memory knowledge-loop [OPTIONS]
Run knowledge agents (observation, extractor, connector, challenger) in
a convergence loop. Each cycle runs all agents, applies actions to the
graph, and checks structural stability metrics.
Options:
--max-cycles N Maximum cycles before stopping (default: 20)
--batch-size N Items per agent per cycle (default: 5)
--window N Cycles to check for convergence (default: 5)
--max-depth N Maximum inference depth (default: 4)");
return Ok(());
}
let mut config = knowledge::KnowledgeLoopConfig::default();
let mut i = 0;
while i < args.len() {
match args[i].as_str() {
"--max-cycles" => { i += 1; config.max_cycles = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.max_cycles); }
"--batch-size" => { i += 1; config.batch_size = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.batch_size); }
"--window" => { i += 1; config.window = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.window); }
"--max-depth" => { i += 1; config.max_depth = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.max_depth); }
other => return Err(format!("Unknown arg: {}. Use --help for usage.", other)),
}
i += 1;
}
let results = knowledge::run_knowledge_loop(&config)?;
eprintln!("\nCompleted {} cycles, {} total actions applied",
results.len(),
results.iter().map(|r| r.total_applied).sum::<usize>());
Ok(())
}
fn cmd_fact_mine(args: &[String]) -> Result<(), String> {
if args.is_empty() || args.iter().any(|a| a == "--help" || a == "-h") {
eprintln!("Usage: poc-memory fact-mine <JSONL> [OPTIONS]
poc-memory fact-mine --batch <DIR> [OPTIONS]
Extract atomic factual claims from conversation transcripts using Haiku.
Options:
--batch Process all .jsonl files in directory
--dry-run Show chunks without calling model
--output FILE Write JSON to file (default: stdout)
--min-messages N Skip transcripts with fewer messages (default: 10)");
return Ok(());
}
let mut batch = false;
let mut dry_run = false;
let mut output_file: Option<String> = None;
let mut min_messages = 10usize;
let mut path: Option<String> = None;
let mut i = 0;
while i < args.len() {
match args[i].as_str() {
"--batch" => batch = true,
"--dry-run" => dry_run = true,
"--output" | "-o" => { i += 1; output_file = args.get(i).cloned(); }
"--min-messages" => { i += 1; min_messages = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(min_messages); }
s if !s.starts_with('-') => path = Some(s.to_string()),
other => return Err(format!("Unknown arg: {}", other)),
}
i += 1;
}
let path = path.ok_or("Missing path argument")?;
let p = std::path::Path::new(&path);
let paths: Vec<std::path::PathBuf> = if batch {
if !p.is_dir() {
return Err(format!("Not a directory: {}", path));
}
let mut files: Vec<_> = std::fs::read_dir(p)
.map_err(|e| format!("read dir: {}", e))?
.filter_map(|e| e.ok())
.map(|e| e.path())
.filter(|p| p.extension().map(|x| x == "jsonl").unwrap_or(false))
.collect();
files.sort();
eprintln!("Found {} transcripts", files.len());
files
} else {
vec![p.to_path_buf()]
};
let path_refs: Vec<&std::path::Path> = paths.iter().map(|p| p.as_path()).collect();
let facts = fact_mine::mine_batch(&path_refs, min_messages, dry_run)?;
if !dry_run {
let json = serde_json::to_string_pretty(&facts)
.map_err(|e| format!("serialize: {}", e))?;
if let Some(out) = &output_file {
std::fs::write(out, &json).map_err(|e| format!("write: {}", e))?;
eprintln!("\nWrote {} facts to {}", facts.len(), out);
} else {
println!("{}", json);
}
}
eprintln!("\nTotal: {} facts from {} transcripts", facts.len(), paths.len());
Ok(())
}
fn cmd_fact_mine_store(args: &[String]) -> Result<(), String> {
if args.len() != 1 {
return Err("Usage: poc-memory fact-mine-store <JSONL>".into());
}
let path = std::path::Path::new(&args[0]);
if !path.exists() {
return Err(format!("File not found: {}", args[0]));
}
let count = fact_mine::mine_and_store(path)?;
eprintln!("Stored {} facts", count);
Ok(())
}

View file

@ -202,11 +202,20 @@ fn node_type_label(nt: NodeType) -> &'static str {
fn provenance_label(p: Provenance) -> &'static str {
match p {
Provenance::Manual => "manual",
Provenance::Journal => "journal",
Provenance::Agent => "agent",
Provenance::Dream => "dream",
Provenance::Derived => "derived",
Provenance::Manual => "manual",
Provenance::Journal => "journal",
Provenance::Agent => "agent",
Provenance::Dream => "dream",
Provenance::Derived => "derived",
Provenance::AgentExperienceMine => "agent:experience-mine",
Provenance::AgentKnowledgeObservation => "agent:knowledge-observation",
Provenance::AgentKnowledgePattern => "agent:knowledge-pattern",
Provenance::AgentKnowledgeConnector => "agent:knowledge-connector",
Provenance::AgentKnowledgeChallenger => "agent:knowledge-challenger",
Provenance::AgentConsolidate => "agent:consolidate",
Provenance::AgentDigest => "agent:digest",
Provenance::AgentFactMine => "agent:fact-mine",
Provenance::AgentDecay => "agent:decay",
}
}

View file

@ -30,18 +30,25 @@ impl Store {
/// Upsert a node: update if exists (and content changed), create if not.
/// Returns: "created", "updated", or "unchanged".
pub fn upsert(&mut self, key: &str, content: &str) -> Result<&'static str, String> {
self.upsert_provenance(key, content, Provenance::Manual)
}
/// Upsert with explicit provenance (for agent-created nodes).
pub fn upsert_provenance(&mut self, key: &str, content: &str, provenance: Provenance) -> Result<&'static str, String> {
if let Some(existing) = self.nodes.get(key) {
if existing.content == content {
return Ok("unchanged");
}
let mut node = existing.clone();
node.content = content.to_string();
node.provenance = provenance;
node.version += 1;
self.append_nodes(std::slice::from_ref(&node))?;
self.nodes.insert(key.to_string(), node);
Ok("updated")
} else {
let node = new_node(key, content);
let mut node = new_node(key, content);
node.provenance = provenance;
self.append_nodes(std::slice::from_ref(&node))?;
self.uuid_to_key.insert(node.uuid, node.key.clone());
self.nodes.insert(key.to_string(), node);

View file

@ -236,9 +236,18 @@ pub enum NodeType {
pub enum Provenance {
Manual,
Journal,
Agent,
Agent, // legacy catch-all, prefer specific variants below
Dream,
Derived,
AgentExperienceMine,
AgentKnowledgeObservation,
AgentKnowledgePattern,
AgentKnowledgeConnector,
AgentKnowledgeChallenger,
AgentConsolidate,
AgentDigest,
AgentFactMine,
AgentDecay,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
@ -296,7 +305,10 @@ capnp_enum!(NodeType, memory_capnp::NodeType,
[EpisodicSession, EpisodicDaily, EpisodicWeekly, Semantic]);
capnp_enum!(Provenance, memory_capnp::Provenance,
[Manual, Journal, Agent, Dream, Derived]);
[Manual, Journal, Agent, Dream, Derived,
AgentExperienceMine, AgentKnowledgeObservation, AgentKnowledgePattern,
AgentKnowledgeConnector, AgentKnowledgeChallenger, AgentConsolidate,
AgentDigest, AgentFactMine, AgentDecay]);
capnp_enum!(Category, memory_capnp::Category,
[General, Core, Technical, Observation, Task]);