migrate agent output to capnp store, add provenance tracking

All agent output now goes to the store as nodes instead of markdown/JSON files. Each node carries a Provenance enum identifying which agent created it (AgentDigest, AgentConsolidate, AgentFactMine, AgentKnowledgeObservation, etc — 14 variants total). Store changes: - upsert_provenance() method for agent-created nodes - Provenance enum expanded from 5 to 14 variants Agent changes: - digest: writes to store nodes (daily-YYYY-MM-DD.md etc) - consolidate: reports/actions/logs stored as _consolidation-* nodes - knowledge: depth DB and agent output stored as _knowledge-* nodes - enrich: experience-mine results go directly to store - llm: --no-session-persistence prevents transcript accumulation Deleted: 14 Python/shell scripts replaced by Rust implementations.
2026-03-05 15:30:57 -05:00 · 2026-03-05 15:30:57 -05:00 · 552d255dc3
commit 552d255dc3
parent e37f819dd2
23 changed files with 1381 additions and 4095 deletions
--- a/src/main.rs
+++ b/src/main.rs
@ -29,6 +29,8 @@ mod query;
 mod spectral;
 mod lookups;
 mod daemon;
+mod fact_mine;
+mod knowledge;

 pub mod memory_capnp {
    include!(concat!(env!("OUT_DIR"), "/schema/memory_capnp.rs"));
@ -132,6 +134,9 @@ fn main() {
        "lookup-bump"           => cmd_lookup_bump(&args[2..]),
        "lookups"               => cmd_lookups(&args[2..]),
        "daemon"                => cmd_daemon(&args[2..]),
+        "knowledge-loop"        => cmd_knowledge_loop(&args[2..]),
+        "fact-mine"             => cmd_fact_mine(&args[2..]),
+        "fact-mine-store"       => cmd_fact_mine_store(&args[2..]),
        _ => {
            eprintln!("Unknown command: {}", args[1]);
            usage();
@ -216,7 +221,14 @@ Commands:
  lookups [DATE]            Show daily lookup counts (default: today)
  daemon                    Start background job daemon
  daemon status             Show daemon status
-  daemon log [JOB] [N]     Show last N log lines (default 50, optional job filter)");
+  daemon log [JOB] [N]     Show last N log lines (default 50, optional job filter)
+  knowledge-loop [OPTIONS] Run knowledge agents to convergence
+                            --max-cycles N  (default 20)
+                            --batch-size N  (default 5)
+                            --window N      (default 5)
+                            --max-depth N   (default 4)
+  fact-mine JSONL [OPTIONS] Extract atomic facts from conversation transcripts
+  fact-mine --batch DIR     Mine all .jsonl files in directory");
 }

 fn cmd_search(args: &[String]) -> Result<(), String> {
@ -850,8 +862,10 @@ fn cmd_digest(args: &[String]) -> Result<(), String> {
 fn cmd_digest_links(args: &[String]) -> Result<(), String> {
    let do_apply = args.iter().any(|a| a == "--apply");

-    let links = digest::parse_all_digest_links()?;
-    println!("Found {} unique links from digest files", links.len());
+    let store = store::Store::load()?;
+    let links = digest::parse_all_digest_links(&store);
+    drop(store);
+    println!("Found {} unique links from digest nodes", links.len());

    if !do_apply {
        for (i, link) in links.iter().enumerate() {
@ -1821,10 +1835,19 @@ fn cmd_daemon(args: &[String]) -> Result<(), String> {
    match args[0].as_str() {
        "status" => daemon::show_status(),
        "log" => {
-            let job = args.get(1).map(|s| s.as_str());
-            let lines = args.get(2)
-                .and_then(|s| s.parse().ok())
-                .unwrap_or(50);
+            // daemon log [N]          — last N lines (default 20)
+            // daemon log JOB [N]      — last N lines for job
+            let (job, lines) = match args.get(1) {
+                None => (None, 20),
+                Some(s) => {
+                    if let Ok(n) = s.parse::<usize>() {
+                        (None, n)
+                    } else {
+                        let n = args.get(2).and_then(|s| s.parse().ok()).unwrap_or(20);
+                        (Some(s.as_str()), n)
+                    }
+                }
+            };
            daemon::show_log(job, lines)
        }
        _ => {
@ -1833,3 +1856,125 @@ fn cmd_daemon(args: &[String]) -> Result<(), String> {
        }
    }
 }
+
+fn cmd_knowledge_loop(args: &[String]) -> Result<(), String> {
+    if args.iter().any(|a| a == "--help" || a == "-h") {
+        eprintln!("Usage: poc-memory knowledge-loop [OPTIONS]
+
+Run knowledge agents (observation, extractor, connector, challenger) in
+a convergence loop. Each cycle runs all agents, applies actions to the
+graph, and checks structural stability metrics.
+
+Options:
+  --max-cycles N  Maximum cycles before stopping (default: 20)
+  --batch-size N  Items per agent per cycle (default: 5)
+  --window N      Cycles to check for convergence (default: 5)
+  --max-depth N   Maximum inference depth (default: 4)");
+        return Ok(());
+    }
+
+    let mut config = knowledge::KnowledgeLoopConfig::default();
+
+    let mut i = 0;
+    while i < args.len() {
+        match args[i].as_str() {
+            "--max-cycles" => { i += 1; config.max_cycles = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.max_cycles); }
+            "--batch-size" => { i += 1; config.batch_size = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.batch_size); }
+            "--window" => { i += 1; config.window = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.window); }
+            "--max-depth" => { i += 1; config.max_depth = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.max_depth); }
+            other => return Err(format!("Unknown arg: {}. Use --help for usage.", other)),
+        }
+        i += 1;
+    }
+
+    let results = knowledge::run_knowledge_loop(&config)?;
+    eprintln!("\nCompleted {} cycles, {} total actions applied",
+        results.len(),
+        results.iter().map(|r| r.total_applied).sum::<usize>());
+    Ok(())
+}
+
+fn cmd_fact_mine(args: &[String]) -> Result<(), String> {
+    if args.is_empty() || args.iter().any(|a| a == "--help" || a == "-h") {
+        eprintln!("Usage: poc-memory fact-mine <JSONL> [OPTIONS]
+       poc-memory fact-mine --batch <DIR> [OPTIONS]
+
+Extract atomic factual claims from conversation transcripts using Haiku.
+
+Options:
+  --batch           Process all .jsonl files in directory
+  --dry-run         Show chunks without calling model
+  --output FILE     Write JSON to file (default: stdout)
+  --min-messages N  Skip transcripts with fewer messages (default: 10)");
+        return Ok(());
+    }
+
+    let mut batch = false;
+    let mut dry_run = false;
+    let mut output_file: Option<String> = None;
+    let mut min_messages = 10usize;
+    let mut path: Option<String> = None;
+
+    let mut i = 0;
+    while i < args.len() {
+        match args[i].as_str() {
+            "--batch" => batch = true,
+            "--dry-run" => dry_run = true,
+            "--output" | "-o" => { i += 1; output_file = args.get(i).cloned(); }
+            "--min-messages" => { i += 1; min_messages = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(min_messages); }
+            s if !s.starts_with('-') => path = Some(s.to_string()),
+            other => return Err(format!("Unknown arg: {}", other)),
+        }
+        i += 1;
+    }
+
+    let path = path.ok_or("Missing path argument")?;
+    let p = std::path::Path::new(&path);
+
+    let paths: Vec<std::path::PathBuf> = if batch {
+        if !p.is_dir() {
+            return Err(format!("Not a directory: {}", path));
+        }
+        let mut files: Vec<_> = std::fs::read_dir(p)
+            .map_err(|e| format!("read dir: {}", e))?
+            .filter_map(|e| e.ok())
+            .map(|e| e.path())
+            .filter(|p| p.extension().map(|x| x == "jsonl").unwrap_or(false))
+            .collect();
+        files.sort();
+        eprintln!("Found {} transcripts", files.len());
+        files
+    } else {
+        vec![p.to_path_buf()]
+    };
+
+    let path_refs: Vec<&std::path::Path> = paths.iter().map(|p| p.as_path()).collect();
+    let facts = fact_mine::mine_batch(&path_refs, min_messages, dry_run)?;
+
+    if !dry_run {
+        let json = serde_json::to_string_pretty(&facts)
+            .map_err(|e| format!("serialize: {}", e))?;
+        if let Some(out) = &output_file {
+            std::fs::write(out, &json).map_err(|e| format!("write: {}", e))?;
+            eprintln!("\nWrote {} facts to {}", facts.len(), out);
+        } else {
+            println!("{}", json);
+        }
+    }
+
+    eprintln!("\nTotal: {} facts from {} transcripts", facts.len(), paths.len());
+    Ok(())
+}
+
+fn cmd_fact_mine_store(args: &[String]) -> Result<(), String> {
+    if args.len() != 1 {
+        return Err("Usage: poc-memory fact-mine-store <JSONL>".into());
+    }
+    let path = std::path::Path::new(&args[0]);
+    if !path.exists() {
+        return Err(format!("File not found: {}", args[0]));
+    }
+    let count = fact_mine::mine_and_store(path)?;
+    eprintln!("Stored {} facts", count);
+    Ok(())
+}