Remove experience_mine, journal_enrich, and old mining helpers

experience_mine and journal_enrich are replaced by the observation agent. enrich.rs reduced from 465 to 40 lines — only extract_conversation and split_on_compaction remain (used by observation fragment selection). -455 lines.
2026-03-17 00:54:12 -04:00 · 2026-03-17 00:54:12 -04:00 · 2b25fee520
commit 2b25fee520
parent 7a24d84ce3
4 changed files with 12 additions and 455 deletions
--- a/poc-memory/src/agents/daemon.rs
+++ b/poc-memory/src/agents/daemon.rs
@ -684,9 +684,9 @@ pub fn run_daemon() -> Result<(), String> {
            let stale = find_stale_sessions();

            // Load mined transcript keys once for this tick
-            let mined = super::enrich::mined_transcript_keys();
+            let mined = std::collections::HashSet::<String>::new(); // mining removed

-            const MAX_NEW_PER_TICK: usize = 10;
+            // MAX_NEW_PER_TICK removed — mining handled by observation agent

            // Load fact-mined keys too
            let fact_keys: HashSet<String> = {
@ -771,7 +771,7 @@ pub fn run_daemon() -> Result<(), String> {
                    continue;
                }

-                let fname_key = super::enrich::transcript_filename_key(&path_str);
+                let fname_key = format!("_experience-{}", filename.trim_end_matches(".jsonl"));
                let has_whole_file_key = mined.contains(&fname_key);

                // Check per-segment keys, find unmined segments
--- a/poc-memory/src/agents/enrich.rs
+++ b/poc-memory/src/agents/enrich.rs
@ -1,122 +1,10 @@
-// Journal enrichment and experience mining
+// Conversation extraction from JSONL transcripts
 //
-// Two modes of processing conversation transcripts:
-//   journal_enrich  — enrich a specific journal entry with source location and links
-//   experience_mine — retroactively find experiential moments not yet journaled
-//
-// Both extract conversation from JSONL transcripts, build prompts, call Sonnet,
-// and apply results to the store.
+// extract_conversation  — parse JSONL transcript to messages
+// split_on_compaction   — split messages at compaction boundaries

-use super::llm::{call_sonnet, parse_json_response, semantic_keys};
-use crate::neuro;
-use crate::store::{self, Store, new_node, new_relation};
-
-use std::collections::hash_map::DefaultHasher;
-use std::collections::HashSet;
-use std::fs;
-use std::hash::{Hash, Hasher};
-
-use crate::store::StoreView;
-
-use crate::util::parse_timestamp_to_epoch;
-
-/// Compute the store dedup key for a transcript file.
-/// This is the same key experience_mine uses to mark a transcript as mined.
-fn transcript_dedup_key(path: &str) -> Result<String, String> {
-    let bytes = fs::read(path).map_err(|e| format!("read {}: {}", path, e))?;
-    let mut hasher = DefaultHasher::new();
-    bytes.hash(&mut hasher);
-    Ok(format!("_mined-transcripts-h-{:016x}", hasher.finish()))
-}
-
-/// Check if a transcript has already been mined (dedup key exists in store).
-pub fn is_transcript_mined(store: &impl StoreView, path: &str) -> bool {
-    match transcript_dedup_key(path) {
-        Ok(key) => store.node_content(&key).is_some(),
-        Err(_) => false,
-    }
-}
-
-/// Dedup key for a transcript based on its filename (UUID).
-/// Used by the daemon reconcile loop — no file reads needed.
-pub fn transcript_filename_key(path: &str) -> String {
-    transcript_key("_mined-transcripts", path)
-}
-
-/// Build a namespaced transcript key from a prefix and path.
-pub fn transcript_key(prefix: &str, path: &str) -> String {
-    let filename = std::path::Path::new(path)
-        .file_stem()
-        .map(|s| s.to_string_lossy().to_string())
-        .unwrap_or_else(|| path.to_string());
-    format!("{}#f-{}", prefix, filename)
-}
-
-/// Per-segment key: `{base_key}.{segment_index}`
-pub fn segment_key(base: &str, segment: usize) -> String {
-    format!("{}.{}", base, segment)
-}
-
-/// Load all keys with a given prefix from the store.
-pub fn keys_with_prefix(prefix: &str) -> HashSet<String> {
-    use crate::store::AnyView;
-    let Ok(view) = AnyView::load() else { return HashSet::new() };
-    let mut keys = HashSet::new();
-    view.for_each_node(|key, _, _| {
-        if key.starts_with(prefix) {
-            keys.insert(key.to_string());
-        }
-    });
-    keys
-}
-
-/// Find unmined segments for a transcript file against a set of known keys.
-/// Returns segment indices that haven't been processed yet.
-pub fn unmined_segments(
-    path: &std::path::Path,
-    prefix: &str,
-    known: &HashSet<String>,
-) -> Vec<(usize, Vec<(usize, String, String, String)>)> {
-    let path_str = path.to_string_lossy();
-    let base = transcript_key(prefix, &path_str);
-
-    let messages = match extract_conversation(&path_str) {
-        Ok(m) => m,
-        Err(_) => return Vec::new(),
-    };
-    let segments = split_on_compaction(messages);
-
-    segments.into_iter()
-        .enumerate()
-        .filter(|(i, _)| !known.contains(&segment_key(&base, *i)))
-        .collect()
-}
-
-/// Mark a segment as processed in the store.
-pub fn mark_segment(
-    store: &mut Store,
-    path: &str,
-    prefix: &str,
-    segment: usize,
-    provenance: &str,
-    content: &str,
-) {
-    let base = transcript_key(prefix, path);
-    let key = segment_key(&base, segment);
-    let mut node = new_node(&key, content);
-    node.provenance = provenance.to_string();
-    let _ = store.upsert_node(node);
-}
-
-/// Get the set of all mined transcript keys (both content-hash and filename)
-/// from the store. Load once per daemon tick, check many.
-pub fn mined_transcript_keys() -> HashSet<String> {
-    keys_with_prefix("_mined-transcripts-")
-}
-
-
-/// Extract user/assistant messages with line numbers from a JSONL transcript.
-/// (line_number, role, text, timestamp)
+/// Extract conversation messages from a JSONL transcript file.
+/// Returns (line_number, role, text, timestamp) tuples.
 pub fn extract_conversation(jsonl_path: &str) -> Result<Vec<(usize, String, String, String)>, String> {
    let path = std::path::Path::new(jsonl_path);
    let messages = super::transcript::parse_transcript(path)?;
@ -139,7 +27,6 @@ pub fn split_on_compaction(messages: Vec<(usize, String, String, String)>) -> Ve
                segments.push(current);
                current = Vec::new();
            }
-            // The continuation message itself is part of the new segment
            current.push(msg);
        } else {
            current.push(msg);
@ -151,315 +38,3 @@ pub fn split_on_compaction(messages: Vec<(usize, String, String, String)>) -> Ve

    segments
 }
-
-/// Format conversation messages for the prompt (truncating long messages).
-fn format_conversation(messages: &[(usize, String, String, String)]) -> String {
-    messages.iter()
-        .map(|(line, role, text, ts)| {
-            let text = crate::util::truncate(text, 1800, "...[truncated]");
-            if ts.is_empty() {
-                format!("L{} [{}]: {}", line, role, text)
-            } else {
-                format!("L{} [{}] {}: {}", line, role, &ts[..ts.len().min(19)], text)
-            }
-        })
-        .collect::<Vec<_>>()
-        .join("\n\n")
-}
-
-fn build_journal_prompt(
-    entry_text: &str,
-    conversation: &str,
-    keys: &[String],
-    grep_line: usize,
-) -> Result<String, String> {
-    let keys_text: String = keys.iter()
-        .map(|k| format!("  - {}", k))
-        .collect::<Vec<_>>()
-        .join("\n");
-
-    super::prompts::load_prompt("journal-enrich", &[
-        ("{{GREP_LINE}}", &grep_line.to_string()),
-        ("{{ENTRY_TEXT}}", entry_text),
-        ("{{KEYS}}", &keys_text),
-        ("{{CONVERSATION}}", conversation),
-    ])
-}
-
-/// Enrich a journal entry with conversation context and link proposals.
-pub fn journal_enrich(
-    store: &mut Store,
-    jsonl_path: &str,
-    entry_text: &str,
-    grep_line: usize,
-) -> Result<(), String> {
-    println!("Extracting conversation from {}...", jsonl_path);
-    let messages = extract_conversation(jsonl_path)?;
-    let conversation = format_conversation(&messages);
-    println!("  {} messages, {} chars", messages.len(), conversation.len());
-
-    let keys = semantic_keys(store);
-    println!("  {} semantic keys", keys.len());
-
-    let prompt = build_journal_prompt(entry_text, &conversation, &keys, grep_line)?;
-    println!("  Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
-
-    println!("  Calling Sonnet...");
-    let response = call_sonnet("enrich", &prompt)?;
-
-    let result = parse_json_response(&response)?;
-
-    // Report results
-    let source_start = result.get("source_start").and_then(|v| v.as_u64()).unwrap_or(0);
-    let source_end = result.get("source_end").and_then(|v| v.as_u64()).unwrap_or(0);
-    let links = result.get("links").and_then(|v| v.as_array());
-    let insights = result.get("missed_insights").and_then(|v| v.as_array());
-
-    println!("  Source: L{}-L{}", source_start, source_end);
-    println!("  Links: {}", links.map_or(0, |l| l.len()));
-    println!("  Missed insights: {}", insights.map_or(0, |l| l.len()));
-
-    // Apply links
-    if let Some(links) = links {
-        for link in links {
-            let target = link.get("target").and_then(|v| v.as_str()).unwrap_or("");
-            let reason = link.get("reason").and_then(|v| v.as_str()).unwrap_or("");
-            if target.is_empty() || target.starts_with("NOTE:") {
-                if let Some(note) = target.strip_prefix("NOTE:") {
-                    println!("  NOTE: {} — {}", note, reason);
-                }
-                continue;
-            }
-
-            // Resolve target and find journal node
-            let resolved = match store.resolve_key(target) {
-                Ok(r) => r,
-                Err(_) => { println!("  SKIP {} (not in graph)", target); continue; }
-            };
-            let source_key = match store.find_journal_node(entry_text) {
-                Some(k) => k,
-                None => { println!("  SKIP {} (no matching journal node)", target); continue; }
-            };
-
-            // Refine target to best-matching section
-            let source_content = store.nodes.get(&source_key)
-                .map(|n| n.content.as_str()).unwrap_or("");
-            let resolved = neuro::refine_target(store, source_content, &resolved);
-
-            let source_uuid = match store.nodes.get(&source_key) {
-                Some(n) => n.uuid,
-                None => continue,
-            };
-            let target_uuid = match store.nodes.get(&resolved) {
-                Some(n) => n.uuid,
-                None => continue,
-            };
-
-            let rel = new_relation(
-                source_uuid, target_uuid,
-                store::RelationType::Link,
-                0.5,
-                &source_key, &resolved,
-            );
-            if store.add_relation(rel).is_ok() {
-                println!("  LINK {} → {} ({})", source_key, resolved, reason);
-            }
-        }
-    }
-
-    store.save()?;
-    Ok(())
-}
-
-/// Mine a conversation transcript for experiential moments not yet journaled.
-/// If `segment` is Some, only process that compaction segment of the file.
-pub fn experience_mine(
-    store: &mut Store,
-    jsonl_path: &str,
-    segment: Option<usize>,
-) -> Result<usize, String> {
-    println!("Experience mining: {}", jsonl_path);
-
-    // Transcript-level dedup: hash the file content and check if already mined
-    let transcript_bytes = fs::read(jsonl_path)
-        .map_err(|e| format!("reading transcript: {}", e))?;
-    let mut hasher = DefaultHasher::new();
-    transcript_bytes.hash(&mut hasher);
-    let hash = hasher.finish();
-    let dedup_key = format!("_mined-transcripts-h-{:016x}", hash);
-
-    if store.nodes.contains_key(&dedup_key) {
-        // Backfill per-segment key if called with a specific segment
-        if let Some(idx) = segment {
-            let seg_key = format!("{}.{}", transcript_filename_key(jsonl_path), idx);
-            if !store.nodes.contains_key(&seg_key) {
-                let mut node = new_node(&seg_key, &format!("Backfilled from {}", dedup_key));
-                node.provenance = "experience-mine:write".to_string();
-                let _ = store.upsert_node(node);
-                store.save()?;
-            }
-        }
-        println!("  Already mined this transcript ({}), skipping.", &dedup_key[24..]);
-        return Ok(0);
-    }
-
-    let all_messages = extract_conversation(jsonl_path)?;
-
-    // If segment is specified, extract just that segment; otherwise process all messages
-    let messages = match segment {
-        Some(idx) => {
-            let segments = split_on_compaction(all_messages);
-            segments.into_iter().nth(idx)
-                .ok_or_else(|| format!("segment {} out of range", idx))?
-        }
-        None => all_messages,
-    };
-
-    let conversation = format_conversation(&messages);
-    println!("  {} messages, {} chars", messages.len(), conversation.len());
-
-    // Load core identity nodes for context
-    let cfg = crate::config::get();
-    let identity: String = cfg.core_nodes.iter()
-        .filter_map(|k| store.nodes.get(k).map(|n| n.content.as_str()))
-        .collect::<Vec<_>>()
-        .join("\n\n");
-
-    // Get recent episodic entries to avoid duplication
-    let mut journal: Vec<_> = store.nodes.values()
-        .filter(|node| matches!(node.node_type, store::NodeType::EpisodicSession))
-        .collect();
-    journal.sort_by_key(|n| n.timestamp);
-    let recent: String = journal.iter().rev().take(10)
-        .map(|n| format!("---\n{}\n", n.content))
-        .collect();
-
-    let keys = semantic_keys(store);
-    let keys_text: String = keys.iter()
-        .map(|k| format!("  - {}", k))
-        .collect::<Vec<_>>()
-        .join("\n");
-
-    let prompt = super::prompts::load_prompt("experience", &[
-        ("{{IDENTITY}}", &identity),
-        ("{{RECENT_JOURNAL}}", &recent),
-        ("{{KEYS}}", &keys_text),
-        ("{{CONVERSATION}}", &conversation),
-    ])?;
-    let est_tokens = prompt.len() / 4;
-    println!("  Prompt: {} chars (~{} tokens)", prompt.len(), est_tokens);
-
-    if est_tokens > 150_000 {
-        println!("  Skipping: prompt too large ({} tokens > 150k limit)", est_tokens);
-        return Ok(0);
-    }
-
-    println!("  Calling Sonnet...");
-    let response = call_sonnet("experience-mine", &prompt)?;
-
-    let entries = parse_json_response(&response)?;
-    let entries = match entries.as_array() {
-        Some(arr) => arr.clone(),
-        None => return Err("expected JSON array".to_string()),
-    };
-
-    if entries.is_empty() {
-        println!("  No missed experiences found.");
-    } else {
-        println!("  Found {} experiential moments:", entries.len());
-    }
-    let mut count = 0;
-    for entry in &entries {
-        let ts = entry.get("timestamp").and_then(|v| v.as_str()).unwrap_or("");
-        let content = entry.get("content").and_then(|v| v.as_str()).unwrap_or("");
-        if content.is_empty() { continue; }
-
-        // Format with timestamp header
-        let full_content = if ts.is_empty() {
-            content.to_string()
-        } else {
-            format!("## {}\n\n{}", ts, content)
-        };
-
-        // Generate key from timestamp
-        let key_slug: String = content.chars()
-            .filter(|c| c.is_alphanumeric() || *c == ' ')
-            .take(50)
-            .collect::<String>()
-            .trim()
-            .to_lowercase()
-            .replace(' ', "-");
-        let key = if ts.is_empty() {
-            format!("journal-j-mined-{}", key_slug)
-        } else {
-            format!("journal-j-{}-{}", ts.to_lowercase().replace(':', "-"), key_slug)
-        };
-
-        // Check for duplicate
-        if store.nodes.contains_key(&key) {
-            println!("  SKIP {} (duplicate)", key);
-            continue;
-        }
-
-        // Write to store — use event timestamp, not mining time
-        let mut node = new_node(&key, &full_content);
-        node.node_type = store::NodeType::EpisodicSession;
-        node.provenance = "experience-mine:write".to_string();
-        if !ts.is_empty() {
-            if let Some(epoch) = parse_timestamp_to_epoch(ts) {
-                node.created_at = epoch;
-            }
-        }
-        let _ = store.upsert_node(node);
-        count += 1;
-
-        // Apply links from LLM output
-        if let Some(links) = entry.get("links").and_then(|v| v.as_array()) {
-            for link_val in links {
-                if let Some(target) = link_val.as_str() {
-                    let target = target.to_string();
-                    if let Some(target_node) = store.nodes.get(&target) {
-                        let source_uuid = store.nodes.get(&key).map(|n| n.uuid).unwrap_or_default();
-                        let target_uuid = target_node.uuid;
-                        let rel = store::new_relation(
-                            source_uuid, target_uuid,
-                            store::RelationType::Link, 0.3,
-                            &key, &target,
-                        );
-                        let _ = store.add_relation(rel);
-                    }
-                }
-            }
-        }
-
-        let preview = crate::util::truncate(content, 77, "...");
-        println!("  + [{}] {}", ts, preview);
-    }
-
-    // Record this transcript/segment as mined (even if count == 0, to prevent re-runs)
-    let dedup_content = format!("Mined {} ({} entries)", jsonl_path, count);
-    match segment {
-        Some(idx) => {
-            // Per-segment key: the daemon writes the whole-file key when all segments are done
-            let seg_key = format!("{}.{}", transcript_filename_key(jsonl_path), idx);
-            let mut node = new_node(&seg_key, &dedup_content);
-            node.provenance = "experience-mine:write".to_string();
-            let _ = store.upsert_node(node);
-        }
-        None => {
-            // Unsegmented: only write content-hash key (not the filename key, since the
-            // file may grow with new compaction segments later — the daemon handles
-            // writing the whole-file filename key after verifying all segments are done)
-            let mut node = new_node(&dedup_key, &dedup_content);
-            node.provenance = "experience-mine:write".to_string();
-            let _ = store.upsert_node(node);
-        }
-    }
-
-    if count > 0 {
-        println!("  Saved {} new journal entries.", count);
-    }
-    store.save()?;
-    println!("Done: {} new entries mined.", count);
-    Ok(count)
-}