migrate agent output to capnp store, add provenance tracking

All agent output now goes to the store as nodes instead of
markdown/JSON files. Each node carries a Provenance enum identifying
which agent created it (AgentDigest, AgentConsolidate, AgentFactMine,
AgentKnowledgeObservation, etc — 14 variants total).

Store changes:
- upsert_provenance() method for agent-created nodes
- Provenance enum expanded from 5 to 14 variants

Agent changes:
- digest: writes to store nodes (daily-YYYY-MM-DD.md etc)
- consolidate: reports/actions/logs stored as _consolidation-* nodes
- knowledge: depth DB and agent output stored as _knowledge-* nodes
- enrich: experience-mine results go directly to store
- llm: --no-session-persistence prevents transcript accumulation

Deleted: 14 Python/shell scripts replaced by Rust implementations.
This commit is contained in:
ProofOfConcept 2026-03-05 15:30:57 -05:00
parent e37f819dd2
commit 552d255dc3
23 changed files with 1381 additions and 4095 deletions

View file

@ -13,10 +13,60 @@ use crate::store::{self, Store, new_node, new_relation};
use regex::Regex;
use std::collections::hash_map::DefaultHasher;
use std::collections::HashSet;
use std::fs;
use std::hash::{Hash, Hasher};
use crate::util::memory_subdir;
use crate::store::StoreView;
/// Compute the store dedup key for a transcript file.
/// This is the same key experience_mine uses to mark a transcript as mined.
pub fn transcript_dedup_key(path: &str) -> Result<String, String> {
let bytes = fs::read(path).map_err(|e| format!("read {}: {}", path, e))?;
let mut hasher = DefaultHasher::new();
bytes.hash(&mut hasher);
Ok(format!("_mined-transcripts.md#h-{:016x}", hasher.finish()))
}
/// Check if a transcript has already been mined (dedup key exists in store).
pub fn is_transcript_mined(store: &impl StoreView, path: &str) -> bool {
match transcript_dedup_key(path) {
Ok(key) => store.node_content(&key).is_some(),
Err(_) => false,
}
}
/// Dedup key for a transcript based on its filename (UUID).
/// Used by the daemon reconcile loop — no file reads needed.
pub fn transcript_filename_key(path: &str) -> String {
let filename = std::path::Path::new(path)
.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_else(|| path.to_string());
format!("_mined-transcripts.md#f-{}", filename)
}
/// Get the set of all mined transcript keys (both content-hash and filename)
/// from the store. Load once per daemon tick, check many.
pub fn mined_transcript_keys() -> HashSet<String> {
use crate::store::AnyView;
let Ok(view) = AnyView::load() else { return HashSet::new() };
let mut keys = HashSet::new();
view.for_each_node(|key, _, _| {
if key.starts_with("_mined-transcripts.md#") {
keys.insert(key.to_string());
}
});
keys
}
/// Check if a transcript has been mined, given a pre-loaded set of mined keys.
/// Checks filename-based key only (no file read). Sessions mined before the
/// filename key was added will pass through and short-circuit in experience_mine
/// via the content hash check — a one-time cost on first restart after this change.
pub fn is_transcript_mined_with_keys(mined: &HashSet<String>, path: &str) -> bool {
mined.contains(&transcript_filename_key(path))
}
/// Extract user/assistant messages with line numbers from a JSONL transcript.
/// (line_number, role, text, timestamp)
@ -187,21 +237,6 @@ pub fn journal_enrich(
}
}
// Save result to agent-results
let timestamp = store::format_datetime(store::now_epoch())
.replace([':', '-'], "");
let result_file = memory_subdir("agent-results")?
.join(format!("{}.json", timestamp));
let output = serde_json::json!({
"timestamp": timestamp,
"jsonl_path": jsonl_path,
"entry_text": &entry_text[..entry_text.len().min(500)],
"agent_result": result,
});
fs::write(&result_file, serde_json::to_string_pretty(&output).unwrap())
.map_err(|e| format!("write {}: {}", result_file.display(), e))?;
println!(" Results saved: {}", result_file.display());
store.save()?;
Ok(())
}
@ -320,6 +355,7 @@ pub fn experience_mine(
let mut node = new_node(&key, &full_content);
node.node_type = store::NodeType::EpisodicSession;
node.category = store::Category::Observation;
node.provenance = store::Provenance::AgentExperienceMine;
let _ = store.upsert_node(node);
count += 1;
@ -328,11 +364,19 @@ pub fn experience_mine(
}
// Record this transcript as mined (even if count == 0, to prevent re-runs)
// Two keys: content hash (exact dedup) and filename (fast daemon reconcile)
let dedup_content = format!("Mined {} ({} entries)", jsonl_path, count);
let mut dedup_node = new_node(&dedup_key, &dedup_content);
dedup_node.category = store::Category::Task;
dedup_node.provenance = store::Provenance::AgentExperienceMine;
let _ = store.upsert_node(dedup_node);
let fname_key = transcript_filename_key(jsonl_path);
let mut fname_node = new_node(&fname_key, &dedup_content);
fname_node.category = store::Category::Task;
fname_node.provenance = store::Provenance::AgentExperienceMine;
let _ = store.upsert_node(fname_node);
if count > 0 {
println!(" Saved {} new journal entries.", count);
}