Remove experience_mine, journal_enrich, and old mining helpers

experience_mine and journal_enrich are replaced by the observation
agent. enrich.rs reduced from 465 to 40 lines — only extract_conversation
and split_on_compaction remain (used by observation fragment selection).

-455 lines.
This commit is contained in:
ProofOfConcept 2026-03-17 00:54:12 -04:00
parent 7a24d84ce3
commit 2b25fee520
4 changed files with 12 additions and 455 deletions

View file

@ -684,9 +684,9 @@ pub fn run_daemon() -> Result<(), String> {
let stale = find_stale_sessions(); let stale = find_stale_sessions();
// Load mined transcript keys once for this tick // Load mined transcript keys once for this tick
let mined = super::enrich::mined_transcript_keys(); let mined = std::collections::HashSet::<String>::new(); // mining removed
const MAX_NEW_PER_TICK: usize = 10; // MAX_NEW_PER_TICK removed — mining handled by observation agent
// Load fact-mined keys too // Load fact-mined keys too
let fact_keys: HashSet<String> = { let fact_keys: HashSet<String> = {
@ -771,7 +771,7 @@ pub fn run_daemon() -> Result<(), String> {
continue; continue;
} }
let fname_key = super::enrich::transcript_filename_key(&path_str); let fname_key = format!("_experience-{}", filename.trim_end_matches(".jsonl"));
let has_whole_file_key = mined.contains(&fname_key); let has_whole_file_key = mined.contains(&fname_key);
// Check per-segment keys, find unmined segments // Check per-segment keys, find unmined segments

View file

@ -1,122 +1,10 @@
// Journal enrichment and experience mining // Conversation extraction from JSONL transcripts
// //
// Two modes of processing conversation transcripts: // extract_conversation — parse JSONL transcript to messages
// journal_enrich — enrich a specific journal entry with source location and links // split_on_compaction — split messages at compaction boundaries
// experience_mine — retroactively find experiential moments not yet journaled
//
// Both extract conversation from JSONL transcripts, build prompts, call Sonnet,
// and apply results to the store.
use super::llm::{call_sonnet, parse_json_response, semantic_keys}; /// Extract conversation messages from a JSONL transcript file.
use crate::neuro; /// Returns (line_number, role, text, timestamp) tuples.
use crate::store::{self, Store, new_node, new_relation};
use std::collections::hash_map::DefaultHasher;
use std::collections::HashSet;
use std::fs;
use std::hash::{Hash, Hasher};
use crate::store::StoreView;
use crate::util::parse_timestamp_to_epoch;
/// Compute the store dedup key for a transcript file.
/// This is the same key experience_mine uses to mark a transcript as mined.
fn transcript_dedup_key(path: &str) -> Result<String, String> {
let bytes = fs::read(path).map_err(|e| format!("read {}: {}", path, e))?;
let mut hasher = DefaultHasher::new();
bytes.hash(&mut hasher);
Ok(format!("_mined-transcripts-h-{:016x}", hasher.finish()))
}
/// Check if a transcript has already been mined (dedup key exists in store).
pub fn is_transcript_mined(store: &impl StoreView, path: &str) -> bool {
match transcript_dedup_key(path) {
Ok(key) => store.node_content(&key).is_some(),
Err(_) => false,
}
}
/// Dedup key for a transcript based on its filename (UUID).
/// Used by the daemon reconcile loop — no file reads needed.
pub fn transcript_filename_key(path: &str) -> String {
transcript_key("_mined-transcripts", path)
}
/// Build a namespaced transcript key from a prefix and path.
pub fn transcript_key(prefix: &str, path: &str) -> String {
let filename = std::path::Path::new(path)
.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_else(|| path.to_string());
format!("{}#f-{}", prefix, filename)
}
/// Per-segment key: `{base_key}.{segment_index}`
pub fn segment_key(base: &str, segment: usize) -> String {
format!("{}.{}", base, segment)
}
/// Load all keys with a given prefix from the store.
pub fn keys_with_prefix(prefix: &str) -> HashSet<String> {
use crate::store::AnyView;
let Ok(view) = AnyView::load() else { return HashSet::new() };
let mut keys = HashSet::new();
view.for_each_node(|key, _, _| {
if key.starts_with(prefix) {
keys.insert(key.to_string());
}
});
keys
}
/// Find unmined segments for a transcript file against a set of known keys.
/// Returns segment indices that haven't been processed yet.
pub fn unmined_segments(
path: &std::path::Path,
prefix: &str,
known: &HashSet<String>,
) -> Vec<(usize, Vec<(usize, String, String, String)>)> {
let path_str = path.to_string_lossy();
let base = transcript_key(prefix, &path_str);
let messages = match extract_conversation(&path_str) {
Ok(m) => m,
Err(_) => return Vec::new(),
};
let segments = split_on_compaction(messages);
segments.into_iter()
.enumerate()
.filter(|(i, _)| !known.contains(&segment_key(&base, *i)))
.collect()
}
/// Mark a segment as processed in the store.
pub fn mark_segment(
store: &mut Store,
path: &str,
prefix: &str,
segment: usize,
provenance: &str,
content: &str,
) {
let base = transcript_key(prefix, path);
let key = segment_key(&base, segment);
let mut node = new_node(&key, content);
node.provenance = provenance.to_string();
let _ = store.upsert_node(node);
}
/// Get the set of all mined transcript keys (both content-hash and filename)
/// from the store. Load once per daemon tick, check many.
pub fn mined_transcript_keys() -> HashSet<String> {
keys_with_prefix("_mined-transcripts-")
}
/// Extract user/assistant messages with line numbers from a JSONL transcript.
/// (line_number, role, text, timestamp)
pub fn extract_conversation(jsonl_path: &str) -> Result<Vec<(usize, String, String, String)>, String> { pub fn extract_conversation(jsonl_path: &str) -> Result<Vec<(usize, String, String, String)>, String> {
let path = std::path::Path::new(jsonl_path); let path = std::path::Path::new(jsonl_path);
let messages = super::transcript::parse_transcript(path)?; let messages = super::transcript::parse_transcript(path)?;
@ -139,7 +27,6 @@ pub fn split_on_compaction(messages: Vec<(usize, String, String, String)>) -> Ve
segments.push(current); segments.push(current);
current = Vec::new(); current = Vec::new();
} }
// The continuation message itself is part of the new segment
current.push(msg); current.push(msg);
} else { } else {
current.push(msg); current.push(msg);
@ -151,315 +38,3 @@ pub fn split_on_compaction(messages: Vec<(usize, String, String, String)>) -> Ve
segments segments
} }
/// Format conversation messages for the prompt (truncating long messages).
fn format_conversation(messages: &[(usize, String, String, String)]) -> String {
messages.iter()
.map(|(line, role, text, ts)| {
let text = crate::util::truncate(text, 1800, "...[truncated]");
if ts.is_empty() {
format!("L{} [{}]: {}", line, role, text)
} else {
format!("L{} [{}] {}: {}", line, role, &ts[..ts.len().min(19)], text)
}
})
.collect::<Vec<_>>()
.join("\n\n")
}
fn build_journal_prompt(
entry_text: &str,
conversation: &str,
keys: &[String],
grep_line: usize,
) -> Result<String, String> {
let keys_text: String = keys.iter()
.map(|k| format!(" - {}", k))
.collect::<Vec<_>>()
.join("\n");
super::prompts::load_prompt("journal-enrich", &[
("{{GREP_LINE}}", &grep_line.to_string()),
("{{ENTRY_TEXT}}", entry_text),
("{{KEYS}}", &keys_text),
("{{CONVERSATION}}", conversation),
])
}
/// Enrich a journal entry with conversation context and link proposals.
pub fn journal_enrich(
store: &mut Store,
jsonl_path: &str,
entry_text: &str,
grep_line: usize,
) -> Result<(), String> {
println!("Extracting conversation from {}...", jsonl_path);
let messages = extract_conversation(jsonl_path)?;
let conversation = format_conversation(&messages);
println!(" {} messages, {} chars", messages.len(), conversation.len());
let keys = semantic_keys(store);
println!(" {} semantic keys", keys.len());
let prompt = build_journal_prompt(entry_text, &conversation, &keys, grep_line)?;
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
println!(" Calling Sonnet...");
let response = call_sonnet("enrich", &prompt)?;
let result = parse_json_response(&response)?;
// Report results
let source_start = result.get("source_start").and_then(|v| v.as_u64()).unwrap_or(0);
let source_end = result.get("source_end").and_then(|v| v.as_u64()).unwrap_or(0);
let links = result.get("links").and_then(|v| v.as_array());
let insights = result.get("missed_insights").and_then(|v| v.as_array());
println!(" Source: L{}-L{}", source_start, source_end);
println!(" Links: {}", links.map_or(0, |l| l.len()));
println!(" Missed insights: {}", insights.map_or(0, |l| l.len()));
// Apply links
if let Some(links) = links {
for link in links {
let target = link.get("target").and_then(|v| v.as_str()).unwrap_or("");
let reason = link.get("reason").and_then(|v| v.as_str()).unwrap_or("");
if target.is_empty() || target.starts_with("NOTE:") {
if let Some(note) = target.strip_prefix("NOTE:") {
println!(" NOTE: {}{}", note, reason);
}
continue;
}
// Resolve target and find journal node
let resolved = match store.resolve_key(target) {
Ok(r) => r,
Err(_) => { println!(" SKIP {} (not in graph)", target); continue; }
};
let source_key = match store.find_journal_node(entry_text) {
Some(k) => k,
None => { println!(" SKIP {} (no matching journal node)", target); continue; }
};
// Refine target to best-matching section
let source_content = store.nodes.get(&source_key)
.map(|n| n.content.as_str()).unwrap_or("");
let resolved = neuro::refine_target(store, source_content, &resolved);
let source_uuid = match store.nodes.get(&source_key) {
Some(n) => n.uuid,
None => continue,
};
let target_uuid = match store.nodes.get(&resolved) {
Some(n) => n.uuid,
None => continue,
};
let rel = new_relation(
source_uuid, target_uuid,
store::RelationType::Link,
0.5,
&source_key, &resolved,
);
if store.add_relation(rel).is_ok() {
println!(" LINK {}{} ({})", source_key, resolved, reason);
}
}
}
store.save()?;
Ok(())
}
/// Mine a conversation transcript for experiential moments not yet journaled.
/// If `segment` is Some, only process that compaction segment of the file.
pub fn experience_mine(
store: &mut Store,
jsonl_path: &str,
segment: Option<usize>,
) -> Result<usize, String> {
println!("Experience mining: {}", jsonl_path);
// Transcript-level dedup: hash the file content and check if already mined
let transcript_bytes = fs::read(jsonl_path)
.map_err(|e| format!("reading transcript: {}", e))?;
let mut hasher = DefaultHasher::new();
transcript_bytes.hash(&mut hasher);
let hash = hasher.finish();
let dedup_key = format!("_mined-transcripts-h-{:016x}", hash);
if store.nodes.contains_key(&dedup_key) {
// Backfill per-segment key if called with a specific segment
if let Some(idx) = segment {
let seg_key = format!("{}.{}", transcript_filename_key(jsonl_path), idx);
if !store.nodes.contains_key(&seg_key) {
let mut node = new_node(&seg_key, &format!("Backfilled from {}", dedup_key));
node.provenance = "experience-mine:write".to_string();
let _ = store.upsert_node(node);
store.save()?;
}
}
println!(" Already mined this transcript ({}), skipping.", &dedup_key[24..]);
return Ok(0);
}
let all_messages = extract_conversation(jsonl_path)?;
// If segment is specified, extract just that segment; otherwise process all messages
let messages = match segment {
Some(idx) => {
let segments = split_on_compaction(all_messages);
segments.into_iter().nth(idx)
.ok_or_else(|| format!("segment {} out of range", idx))?
}
None => all_messages,
};
let conversation = format_conversation(&messages);
println!(" {} messages, {} chars", messages.len(), conversation.len());
// Load core identity nodes for context
let cfg = crate::config::get();
let identity: String = cfg.core_nodes.iter()
.filter_map(|k| store.nodes.get(k).map(|n| n.content.as_str()))
.collect::<Vec<_>>()
.join("\n\n");
// Get recent episodic entries to avoid duplication
let mut journal: Vec<_> = store.nodes.values()
.filter(|node| matches!(node.node_type, store::NodeType::EpisodicSession))
.collect();
journal.sort_by_key(|n| n.timestamp);
let recent: String = journal.iter().rev().take(10)
.map(|n| format!("---\n{}\n", n.content))
.collect();
let keys = semantic_keys(store);
let keys_text: String = keys.iter()
.map(|k| format!(" - {}", k))
.collect::<Vec<_>>()
.join("\n");
let prompt = super::prompts::load_prompt("experience", &[
("{{IDENTITY}}", &identity),
("{{RECENT_JOURNAL}}", &recent),
("{{KEYS}}", &keys_text),
("{{CONVERSATION}}", &conversation),
])?;
let est_tokens = prompt.len() / 4;
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), est_tokens);
if est_tokens > 150_000 {
println!(" Skipping: prompt too large ({} tokens > 150k limit)", est_tokens);
return Ok(0);
}
println!(" Calling Sonnet...");
let response = call_sonnet("experience-mine", &prompt)?;
let entries = parse_json_response(&response)?;
let entries = match entries.as_array() {
Some(arr) => arr.clone(),
None => return Err("expected JSON array".to_string()),
};
if entries.is_empty() {
println!(" No missed experiences found.");
} else {
println!(" Found {} experiential moments:", entries.len());
}
let mut count = 0;
for entry in &entries {
let ts = entry.get("timestamp").and_then(|v| v.as_str()).unwrap_or("");
let content = entry.get("content").and_then(|v| v.as_str()).unwrap_or("");
if content.is_empty() { continue; }
// Format with timestamp header
let full_content = if ts.is_empty() {
content.to_string()
} else {
format!("## {}\n\n{}", ts, content)
};
// Generate key from timestamp
let key_slug: String = content.chars()
.filter(|c| c.is_alphanumeric() || *c == ' ')
.take(50)
.collect::<String>()
.trim()
.to_lowercase()
.replace(' ', "-");
let key = if ts.is_empty() {
format!("journal-j-mined-{}", key_slug)
} else {
format!("journal-j-{}-{}", ts.to_lowercase().replace(':', "-"), key_slug)
};
// Check for duplicate
if store.nodes.contains_key(&key) {
println!(" SKIP {} (duplicate)", key);
continue;
}
// Write to store — use event timestamp, not mining time
let mut node = new_node(&key, &full_content);
node.node_type = store::NodeType::EpisodicSession;
node.provenance = "experience-mine:write".to_string();
if !ts.is_empty() {
if let Some(epoch) = parse_timestamp_to_epoch(ts) {
node.created_at = epoch;
}
}
let _ = store.upsert_node(node);
count += 1;
// Apply links from LLM output
if let Some(links) = entry.get("links").and_then(|v| v.as_array()) {
for link_val in links {
if let Some(target) = link_val.as_str() {
let target = target.to_string();
if let Some(target_node) = store.nodes.get(&target) {
let source_uuid = store.nodes.get(&key).map(|n| n.uuid).unwrap_or_default();
let target_uuid = target_node.uuid;
let rel = store::new_relation(
source_uuid, target_uuid,
store::RelationType::Link, 0.3,
&key, &target,
);
let _ = store.add_relation(rel);
}
}
}
}
let preview = crate::util::truncate(content, 77, "...");
println!(" + [{}] {}", ts, preview);
}
// Record this transcript/segment as mined (even if count == 0, to prevent re-runs)
let dedup_content = format!("Mined {} ({} entries)", jsonl_path, count);
match segment {
Some(idx) => {
// Per-segment key: the daemon writes the whole-file key when all segments are done
let seg_key = format!("{}.{}", transcript_filename_key(jsonl_path), idx);
let mut node = new_node(&seg_key, &dedup_content);
node.provenance = "experience-mine:write".to_string();
let _ = store.upsert_node(node);
}
None => {
// Unsegmented: only write content-hash key (not the filename key, since the
// file may grow with new compaction segments later — the daemon handles
// writing the whole-file filename key after verifying all segments are done)
let mut node = new_node(&dedup_key, &dedup_content);
node.provenance = "experience-mine:write".to_string();
let _ = store.upsert_node(node);
}
}
if count > 0 {
println!(" Saved {} new journal entries.", count);
}
store.save()?;
println!("Done: {} new entries mined.", count);
Ok(count)
}

View file

@ -86,13 +86,8 @@ pub fn cmd_digest_links(do_apply: bool) -> Result<(), String> {
Ok(()) Ok(())
} }
pub fn cmd_journal_enrich(jsonl_path: &str, entry_text: &str, grep_line: usize) -> Result<(), String> { pub fn cmd_journal_enrich(_jsonl_path: &str, _entry_text: &str, _grep_line: usize) -> Result<(), String> {
if !std::path::Path::new(jsonl_path).is_file() { Err("journal-enrich has been removed — use the observation agent instead.".into())
return Err(format!("JSONL not found: {}", jsonl_path));
}
let mut store = store::Store::load()?;
crate::enrich::journal_enrich(&mut store, jsonl_path, entry_text, grep_line)
} }
pub fn cmd_apply_consolidation(_do_apply: bool, _report_file: Option<&str>) -> Result<(), String> { pub fn cmd_apply_consolidation(_do_apply: bool, _report_file: Option<&str>) -> Result<(), String> {

View file

@ -1017,21 +1017,8 @@ fn cmd_digest(level: DigestLevel) -> Result<(), String> {
} }
} }
fn cmd_experience_mine(jsonl_path: Option<String>) -> Result<(), String> { fn cmd_experience_mine(_jsonl_path: Option<String>) -> Result<(), String> {
let jsonl_path = match jsonl_path { Err("experience-mine has been removed — use the observation agent instead.".into())
Some(p) => p,
None => cli::journal::find_current_transcript()
.ok_or("no JSONL transcripts found")?,
};
if !std::path::Path::new(jsonl_path.as_str()).is_file() {
return Err(format!("JSONL not found: {}", jsonl_path));
}
let mut store = store::Store::load()?;
let count = crate::enrich::experience_mine(&mut store, &jsonl_path, None)?;
println!("Done: {} new entries mined.", count);
Ok(())
} }
fn cmd_daemon(sub: DaemonCmd) -> Result<(), String> { fn cmd_daemon(sub: DaemonCmd) -> Result<(), String> {