migrate agent output to capnp store, add provenance tracking

All agent output now goes to the store as nodes instead of
markdown/JSON files. Each node carries a Provenance enum identifying
which agent created it (AgentDigest, AgentConsolidate, AgentFactMine,
AgentKnowledgeObservation, etc — 14 variants total).

Store changes:
- upsert_provenance() method for agent-created nodes
- Provenance enum expanded from 5 to 14 variants

Agent changes:
- digest: writes to store nodes (daily-YYYY-MM-DD.md etc)
- consolidate: reports/actions/logs stored as _consolidation-* nodes
- knowledge: depth DB and agent output stored as _knowledge-* nodes
- enrich: experience-mine results go directly to store
- llm: --no-session-persistence prevents transcript accumulation

Deleted: 14 Python/shell scripts replaced by Rust implementations.
This commit is contained in:
ProofOfConcept 2026-03-05 15:30:57 -05:00
parent e37f819dd2
commit 552d255dc3
23 changed files with 1381 additions and 4095 deletions

View file

@ -29,6 +29,8 @@ mod query;
mod spectral;
mod lookups;
mod daemon;
mod fact_mine;
mod knowledge;
pub mod memory_capnp {
include!(concat!(env!("OUT_DIR"), "/schema/memory_capnp.rs"));
@ -132,6 +134,9 @@ fn main() {
"lookup-bump" => cmd_lookup_bump(&args[2..]),
"lookups" => cmd_lookups(&args[2..]),
"daemon" => cmd_daemon(&args[2..]),
"knowledge-loop" => cmd_knowledge_loop(&args[2..]),
"fact-mine" => cmd_fact_mine(&args[2..]),
"fact-mine-store" => cmd_fact_mine_store(&args[2..]),
_ => {
eprintln!("Unknown command: {}", args[1]);
usage();
@ -216,7 +221,14 @@ Commands:
lookups [DATE] Show daily lookup counts (default: today)
daemon Start background job daemon
daemon status Show daemon status
daemon log [JOB] [N] Show last N log lines (default 50, optional job filter)");
daemon log [JOB] [N] Show last N log lines (default 50, optional job filter)
knowledge-loop [OPTIONS] Run knowledge agents to convergence
--max-cycles N (default 20)
--batch-size N (default 5)
--window N (default 5)
--max-depth N (default 4)
fact-mine JSONL [OPTIONS] Extract atomic facts from conversation transcripts
fact-mine --batch DIR Mine all .jsonl files in directory");
}
fn cmd_search(args: &[String]) -> Result<(), String> {
@ -850,8 +862,10 @@ fn cmd_digest(args: &[String]) -> Result<(), String> {
fn cmd_digest_links(args: &[String]) -> Result<(), String> {
let do_apply = args.iter().any(|a| a == "--apply");
let links = digest::parse_all_digest_links()?;
println!("Found {} unique links from digest files", links.len());
let store = store::Store::load()?;
let links = digest::parse_all_digest_links(&store);
drop(store);
println!("Found {} unique links from digest nodes", links.len());
if !do_apply {
for (i, link) in links.iter().enumerate() {
@ -1821,10 +1835,19 @@ fn cmd_daemon(args: &[String]) -> Result<(), String> {
match args[0].as_str() {
"status" => daemon::show_status(),
"log" => {
let job = args.get(1).map(|s| s.as_str());
let lines = args.get(2)
.and_then(|s| s.parse().ok())
.unwrap_or(50);
// daemon log [N] — last N lines (default 20)
// daemon log JOB [N] — last N lines for job
let (job, lines) = match args.get(1) {
None => (None, 20),
Some(s) => {
if let Ok(n) = s.parse::<usize>() {
(None, n)
} else {
let n = args.get(2).and_then(|s| s.parse().ok()).unwrap_or(20);
(Some(s.as_str()), n)
}
}
};
daemon::show_log(job, lines)
}
_ => {
@ -1833,3 +1856,125 @@ fn cmd_daemon(args: &[String]) -> Result<(), String> {
}
}
}
fn cmd_knowledge_loop(args: &[String]) -> Result<(), String> {
if args.iter().any(|a| a == "--help" || a == "-h") {
eprintln!("Usage: poc-memory knowledge-loop [OPTIONS]
Run knowledge agents (observation, extractor, connector, challenger) in
a convergence loop. Each cycle runs all agents, applies actions to the
graph, and checks structural stability metrics.
Options:
--max-cycles N Maximum cycles before stopping (default: 20)
--batch-size N Items per agent per cycle (default: 5)
--window N Cycles to check for convergence (default: 5)
--max-depth N Maximum inference depth (default: 4)");
return Ok(());
}
let mut config = knowledge::KnowledgeLoopConfig::default();
let mut i = 0;
while i < args.len() {
match args[i].as_str() {
"--max-cycles" => { i += 1; config.max_cycles = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.max_cycles); }
"--batch-size" => { i += 1; config.batch_size = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.batch_size); }
"--window" => { i += 1; config.window = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.window); }
"--max-depth" => { i += 1; config.max_depth = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.max_depth); }
other => return Err(format!("Unknown arg: {}. Use --help for usage.", other)),
}
i += 1;
}
let results = knowledge::run_knowledge_loop(&config)?;
eprintln!("\nCompleted {} cycles, {} total actions applied",
results.len(),
results.iter().map(|r| r.total_applied).sum::<usize>());
Ok(())
}
fn cmd_fact_mine(args: &[String]) -> Result<(), String> {
if args.is_empty() || args.iter().any(|a| a == "--help" || a == "-h") {
eprintln!("Usage: poc-memory fact-mine <JSONL> [OPTIONS]
poc-memory fact-mine --batch <DIR> [OPTIONS]
Extract atomic factual claims from conversation transcripts using Haiku.
Options:
--batch Process all .jsonl files in directory
--dry-run Show chunks without calling model
--output FILE Write JSON to file (default: stdout)
--min-messages N Skip transcripts with fewer messages (default: 10)");
return Ok(());
}
let mut batch = false;
let mut dry_run = false;
let mut output_file: Option<String> = None;
let mut min_messages = 10usize;
let mut path: Option<String> = None;
let mut i = 0;
while i < args.len() {
match args[i].as_str() {
"--batch" => batch = true,
"--dry-run" => dry_run = true,
"--output" | "-o" => { i += 1; output_file = args.get(i).cloned(); }
"--min-messages" => { i += 1; min_messages = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(min_messages); }
s if !s.starts_with('-') => path = Some(s.to_string()),
other => return Err(format!("Unknown arg: {}", other)),
}
i += 1;
}
let path = path.ok_or("Missing path argument")?;
let p = std::path::Path::new(&path);
let paths: Vec<std::path::PathBuf> = if batch {
if !p.is_dir() {
return Err(format!("Not a directory: {}", path));
}
let mut files: Vec<_> = std::fs::read_dir(p)
.map_err(|e| format!("read dir: {}", e))?
.filter_map(|e| e.ok())
.map(|e| e.path())
.filter(|p| p.extension().map(|x| x == "jsonl").unwrap_or(false))
.collect();
files.sort();
eprintln!("Found {} transcripts", files.len());
files
} else {
vec![p.to_path_buf()]
};
let path_refs: Vec<&std::path::Path> = paths.iter().map(|p| p.as_path()).collect();
let facts = fact_mine::mine_batch(&path_refs, min_messages, dry_run)?;
if !dry_run {
let json = serde_json::to_string_pretty(&facts)
.map_err(|e| format!("serialize: {}", e))?;
if let Some(out) = &output_file {
std::fs::write(out, &json).map_err(|e| format!("write: {}", e))?;
eprintln!("\nWrote {} facts to {}", facts.len(), out);
} else {
println!("{}", json);
}
}
eprintln!("\nTotal: {} facts from {} transcripts", facts.len(), paths.len());
Ok(())
}
fn cmd_fact_mine_store(args: &[String]) -> Result<(), String> {
if args.len() != 1 {
return Err("Usage: poc-memory fact-mine-store <JSONL>".into());
}
let path = std::path::Path::new(&args[0]);
if !path.exists() {
return Err(format!("File not found: {}", args[0]));
}
let count = fact_mine::mine_and_store(path)?;
eprintln!("Stored {} facts", count);
Ok(())
}