From 2b25fee52013bdf87a7db3cfeb0e5d9cdfee6bd5 Mon Sep 17 00:00:00 2001 From: ProofOfConcept Date: Tue, 17 Mar 2026 00:54:12 -0400 Subject: [PATCH] Remove experience_mine, journal_enrich, and old mining helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit experience_mine and journal_enrich are replaced by the observation agent. enrich.rs reduced from 465 to 40 lines — only extract_conversation and split_on_compaction remain (used by observation fragment selection). -455 lines. --- poc-memory/src/agents/daemon.rs | 6 +- poc-memory/src/agents/enrich.rs | 435 +------------------------------- poc-memory/src/cli/agent.rs | 9 +- poc-memory/src/main.rs | 17 +- 4 files changed, 12 insertions(+), 455 deletions(-) diff --git a/poc-memory/src/agents/daemon.rs b/poc-memory/src/agents/daemon.rs index 3b96193..e097c47 100644 --- a/poc-memory/src/agents/daemon.rs +++ b/poc-memory/src/agents/daemon.rs @@ -684,9 +684,9 @@ pub fn run_daemon() -> Result<(), String> { let stale = find_stale_sessions(); // Load mined transcript keys once for this tick - let mined = super::enrich::mined_transcript_keys(); + let mined = std::collections::HashSet::::new(); // mining removed - const MAX_NEW_PER_TICK: usize = 10; + // MAX_NEW_PER_TICK removed — mining handled by observation agent // Load fact-mined keys too let fact_keys: HashSet = { @@ -771,7 +771,7 @@ pub fn run_daemon() -> Result<(), String> { continue; } - let fname_key = super::enrich::transcript_filename_key(&path_str); + let fname_key = format!("_experience-{}", filename.trim_end_matches(".jsonl")); let has_whole_file_key = mined.contains(&fname_key); // Check per-segment keys, find unmined segments diff --git a/poc-memory/src/agents/enrich.rs b/poc-memory/src/agents/enrich.rs index f4ebfe2..b44a71a 100644 --- a/poc-memory/src/agents/enrich.rs +++ b/poc-memory/src/agents/enrich.rs @@ -1,122 +1,10 @@ -// Journal enrichment and experience mining +// Conversation extraction from JSONL transcripts // -// Two modes of processing conversation transcripts: -// journal_enrich — enrich a specific journal entry with source location and links -// experience_mine — retroactively find experiential moments not yet journaled -// -// Both extract conversation from JSONL transcripts, build prompts, call Sonnet, -// and apply results to the store. +// extract_conversation — parse JSONL transcript to messages +// split_on_compaction — split messages at compaction boundaries -use super::llm::{call_sonnet, parse_json_response, semantic_keys}; -use crate::neuro; -use crate::store::{self, Store, new_node, new_relation}; - -use std::collections::hash_map::DefaultHasher; -use std::collections::HashSet; -use std::fs; -use std::hash::{Hash, Hasher}; - -use crate::store::StoreView; - -use crate::util::parse_timestamp_to_epoch; - -/// Compute the store dedup key for a transcript file. -/// This is the same key experience_mine uses to mark a transcript as mined. -fn transcript_dedup_key(path: &str) -> Result { - let bytes = fs::read(path).map_err(|e| format!("read {}: {}", path, e))?; - let mut hasher = DefaultHasher::new(); - bytes.hash(&mut hasher); - Ok(format!("_mined-transcripts-h-{:016x}", hasher.finish())) -} - -/// Check if a transcript has already been mined (dedup key exists in store). -pub fn is_transcript_mined(store: &impl StoreView, path: &str) -> bool { - match transcript_dedup_key(path) { - Ok(key) => store.node_content(&key).is_some(), - Err(_) => false, - } -} - -/// Dedup key for a transcript based on its filename (UUID). -/// Used by the daemon reconcile loop — no file reads needed. -pub fn transcript_filename_key(path: &str) -> String { - transcript_key("_mined-transcripts", path) -} - -/// Build a namespaced transcript key from a prefix and path. -pub fn transcript_key(prefix: &str, path: &str) -> String { - let filename = std::path::Path::new(path) - .file_stem() - .map(|s| s.to_string_lossy().to_string()) - .unwrap_or_else(|| path.to_string()); - format!("{}#f-{}", prefix, filename) -} - -/// Per-segment key: `{base_key}.{segment_index}` -pub fn segment_key(base: &str, segment: usize) -> String { - format!("{}.{}", base, segment) -} - -/// Load all keys with a given prefix from the store. -pub fn keys_with_prefix(prefix: &str) -> HashSet { - use crate::store::AnyView; - let Ok(view) = AnyView::load() else { return HashSet::new() }; - let mut keys = HashSet::new(); - view.for_each_node(|key, _, _| { - if key.starts_with(prefix) { - keys.insert(key.to_string()); - } - }); - keys -} - -/// Find unmined segments for a transcript file against a set of known keys. -/// Returns segment indices that haven't been processed yet. -pub fn unmined_segments( - path: &std::path::Path, - prefix: &str, - known: &HashSet, -) -> Vec<(usize, Vec<(usize, String, String, String)>)> { - let path_str = path.to_string_lossy(); - let base = transcript_key(prefix, &path_str); - - let messages = match extract_conversation(&path_str) { - Ok(m) => m, - Err(_) => return Vec::new(), - }; - let segments = split_on_compaction(messages); - - segments.into_iter() - .enumerate() - .filter(|(i, _)| !known.contains(&segment_key(&base, *i))) - .collect() -} - -/// Mark a segment as processed in the store. -pub fn mark_segment( - store: &mut Store, - path: &str, - prefix: &str, - segment: usize, - provenance: &str, - content: &str, -) { - let base = transcript_key(prefix, path); - let key = segment_key(&base, segment); - let mut node = new_node(&key, content); - node.provenance = provenance.to_string(); - let _ = store.upsert_node(node); -} - -/// Get the set of all mined transcript keys (both content-hash and filename) -/// from the store. Load once per daemon tick, check many. -pub fn mined_transcript_keys() -> HashSet { - keys_with_prefix("_mined-transcripts-") -} - - -/// Extract user/assistant messages with line numbers from a JSONL transcript. -/// (line_number, role, text, timestamp) +/// Extract conversation messages from a JSONL transcript file. +/// Returns (line_number, role, text, timestamp) tuples. pub fn extract_conversation(jsonl_path: &str) -> Result, String> { let path = std::path::Path::new(jsonl_path); let messages = super::transcript::parse_transcript(path)?; @@ -139,7 +27,6 @@ pub fn split_on_compaction(messages: Vec<(usize, String, String, String)>) -> Ve segments.push(current); current = Vec::new(); } - // The continuation message itself is part of the new segment current.push(msg); } else { current.push(msg); @@ -151,315 +38,3 @@ pub fn split_on_compaction(messages: Vec<(usize, String, String, String)>) -> Ve segments } - -/// Format conversation messages for the prompt (truncating long messages). -fn format_conversation(messages: &[(usize, String, String, String)]) -> String { - messages.iter() - .map(|(line, role, text, ts)| { - let text = crate::util::truncate(text, 1800, "...[truncated]"); - if ts.is_empty() { - format!("L{} [{}]: {}", line, role, text) - } else { - format!("L{} [{}] {}: {}", line, role, &ts[..ts.len().min(19)], text) - } - }) - .collect::>() - .join("\n\n") -} - -fn build_journal_prompt( - entry_text: &str, - conversation: &str, - keys: &[String], - grep_line: usize, -) -> Result { - let keys_text: String = keys.iter() - .map(|k| format!(" - {}", k)) - .collect::>() - .join("\n"); - - super::prompts::load_prompt("journal-enrich", &[ - ("{{GREP_LINE}}", &grep_line.to_string()), - ("{{ENTRY_TEXT}}", entry_text), - ("{{KEYS}}", &keys_text), - ("{{CONVERSATION}}", conversation), - ]) -} - -/// Enrich a journal entry with conversation context and link proposals. -pub fn journal_enrich( - store: &mut Store, - jsonl_path: &str, - entry_text: &str, - grep_line: usize, -) -> Result<(), String> { - println!("Extracting conversation from {}...", jsonl_path); - let messages = extract_conversation(jsonl_path)?; - let conversation = format_conversation(&messages); - println!(" {} messages, {} chars", messages.len(), conversation.len()); - - let keys = semantic_keys(store); - println!(" {} semantic keys", keys.len()); - - let prompt = build_journal_prompt(entry_text, &conversation, &keys, grep_line)?; - println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4); - - println!(" Calling Sonnet..."); - let response = call_sonnet("enrich", &prompt)?; - - let result = parse_json_response(&response)?; - - // Report results - let source_start = result.get("source_start").and_then(|v| v.as_u64()).unwrap_or(0); - let source_end = result.get("source_end").and_then(|v| v.as_u64()).unwrap_or(0); - let links = result.get("links").and_then(|v| v.as_array()); - let insights = result.get("missed_insights").and_then(|v| v.as_array()); - - println!(" Source: L{}-L{}", source_start, source_end); - println!(" Links: {}", links.map_or(0, |l| l.len())); - println!(" Missed insights: {}", insights.map_or(0, |l| l.len())); - - // Apply links - if let Some(links) = links { - for link in links { - let target = link.get("target").and_then(|v| v.as_str()).unwrap_or(""); - let reason = link.get("reason").and_then(|v| v.as_str()).unwrap_or(""); - if target.is_empty() || target.starts_with("NOTE:") { - if let Some(note) = target.strip_prefix("NOTE:") { - println!(" NOTE: {} — {}", note, reason); - } - continue; - } - - // Resolve target and find journal node - let resolved = match store.resolve_key(target) { - Ok(r) => r, - Err(_) => { println!(" SKIP {} (not in graph)", target); continue; } - }; - let source_key = match store.find_journal_node(entry_text) { - Some(k) => k, - None => { println!(" SKIP {} (no matching journal node)", target); continue; } - }; - - // Refine target to best-matching section - let source_content = store.nodes.get(&source_key) - .map(|n| n.content.as_str()).unwrap_or(""); - let resolved = neuro::refine_target(store, source_content, &resolved); - - let source_uuid = match store.nodes.get(&source_key) { - Some(n) => n.uuid, - None => continue, - }; - let target_uuid = match store.nodes.get(&resolved) { - Some(n) => n.uuid, - None => continue, - }; - - let rel = new_relation( - source_uuid, target_uuid, - store::RelationType::Link, - 0.5, - &source_key, &resolved, - ); - if store.add_relation(rel).is_ok() { - println!(" LINK {} → {} ({})", source_key, resolved, reason); - } - } - } - - store.save()?; - Ok(()) -} - -/// Mine a conversation transcript for experiential moments not yet journaled. -/// If `segment` is Some, only process that compaction segment of the file. -pub fn experience_mine( - store: &mut Store, - jsonl_path: &str, - segment: Option, -) -> Result { - println!("Experience mining: {}", jsonl_path); - - // Transcript-level dedup: hash the file content and check if already mined - let transcript_bytes = fs::read(jsonl_path) - .map_err(|e| format!("reading transcript: {}", e))?; - let mut hasher = DefaultHasher::new(); - transcript_bytes.hash(&mut hasher); - let hash = hasher.finish(); - let dedup_key = format!("_mined-transcripts-h-{:016x}", hash); - - if store.nodes.contains_key(&dedup_key) { - // Backfill per-segment key if called with a specific segment - if let Some(idx) = segment { - let seg_key = format!("{}.{}", transcript_filename_key(jsonl_path), idx); - if !store.nodes.contains_key(&seg_key) { - let mut node = new_node(&seg_key, &format!("Backfilled from {}", dedup_key)); - node.provenance = "experience-mine:write".to_string(); - let _ = store.upsert_node(node); - store.save()?; - } - } - println!(" Already mined this transcript ({}), skipping.", &dedup_key[24..]); - return Ok(0); - } - - let all_messages = extract_conversation(jsonl_path)?; - - // If segment is specified, extract just that segment; otherwise process all messages - let messages = match segment { - Some(idx) => { - let segments = split_on_compaction(all_messages); - segments.into_iter().nth(idx) - .ok_or_else(|| format!("segment {} out of range", idx))? - } - None => all_messages, - }; - - let conversation = format_conversation(&messages); - println!(" {} messages, {} chars", messages.len(), conversation.len()); - - // Load core identity nodes for context - let cfg = crate::config::get(); - let identity: String = cfg.core_nodes.iter() - .filter_map(|k| store.nodes.get(k).map(|n| n.content.as_str())) - .collect::>() - .join("\n\n"); - - // Get recent episodic entries to avoid duplication - let mut journal: Vec<_> = store.nodes.values() - .filter(|node| matches!(node.node_type, store::NodeType::EpisodicSession)) - .collect(); - journal.sort_by_key(|n| n.timestamp); - let recent: String = journal.iter().rev().take(10) - .map(|n| format!("---\n{}\n", n.content)) - .collect(); - - let keys = semantic_keys(store); - let keys_text: String = keys.iter() - .map(|k| format!(" - {}", k)) - .collect::>() - .join("\n"); - - let prompt = super::prompts::load_prompt("experience", &[ - ("{{IDENTITY}}", &identity), - ("{{RECENT_JOURNAL}}", &recent), - ("{{KEYS}}", &keys_text), - ("{{CONVERSATION}}", &conversation), - ])?; - let est_tokens = prompt.len() / 4; - println!(" Prompt: {} chars (~{} tokens)", prompt.len(), est_tokens); - - if est_tokens > 150_000 { - println!(" Skipping: prompt too large ({} tokens > 150k limit)", est_tokens); - return Ok(0); - } - - println!(" Calling Sonnet..."); - let response = call_sonnet("experience-mine", &prompt)?; - - let entries = parse_json_response(&response)?; - let entries = match entries.as_array() { - Some(arr) => arr.clone(), - None => return Err("expected JSON array".to_string()), - }; - - if entries.is_empty() { - println!(" No missed experiences found."); - } else { - println!(" Found {} experiential moments:", entries.len()); - } - let mut count = 0; - for entry in &entries { - let ts = entry.get("timestamp").and_then(|v| v.as_str()).unwrap_or(""); - let content = entry.get("content").and_then(|v| v.as_str()).unwrap_or(""); - if content.is_empty() { continue; } - - // Format with timestamp header - let full_content = if ts.is_empty() { - content.to_string() - } else { - format!("## {}\n\n{}", ts, content) - }; - - // Generate key from timestamp - let key_slug: String = content.chars() - .filter(|c| c.is_alphanumeric() || *c == ' ') - .take(50) - .collect::() - .trim() - .to_lowercase() - .replace(' ', "-"); - let key = if ts.is_empty() { - format!("journal-j-mined-{}", key_slug) - } else { - format!("journal-j-{}-{}", ts.to_lowercase().replace(':', "-"), key_slug) - }; - - // Check for duplicate - if store.nodes.contains_key(&key) { - println!(" SKIP {} (duplicate)", key); - continue; - } - - // Write to store — use event timestamp, not mining time - let mut node = new_node(&key, &full_content); - node.node_type = store::NodeType::EpisodicSession; - node.provenance = "experience-mine:write".to_string(); - if !ts.is_empty() { - if let Some(epoch) = parse_timestamp_to_epoch(ts) { - node.created_at = epoch; - } - } - let _ = store.upsert_node(node); - count += 1; - - // Apply links from LLM output - if let Some(links) = entry.get("links").and_then(|v| v.as_array()) { - for link_val in links { - if let Some(target) = link_val.as_str() { - let target = target.to_string(); - if let Some(target_node) = store.nodes.get(&target) { - let source_uuid = store.nodes.get(&key).map(|n| n.uuid).unwrap_or_default(); - let target_uuid = target_node.uuid; - let rel = store::new_relation( - source_uuid, target_uuid, - store::RelationType::Link, 0.3, - &key, &target, - ); - let _ = store.add_relation(rel); - } - } - } - } - - let preview = crate::util::truncate(content, 77, "..."); - println!(" + [{}] {}", ts, preview); - } - - // Record this transcript/segment as mined (even if count == 0, to prevent re-runs) - let dedup_content = format!("Mined {} ({} entries)", jsonl_path, count); - match segment { - Some(idx) => { - // Per-segment key: the daemon writes the whole-file key when all segments are done - let seg_key = format!("{}.{}", transcript_filename_key(jsonl_path), idx); - let mut node = new_node(&seg_key, &dedup_content); - node.provenance = "experience-mine:write".to_string(); - let _ = store.upsert_node(node); - } - None => { - // Unsegmented: only write content-hash key (not the filename key, since the - // file may grow with new compaction segments later — the daemon handles - // writing the whole-file filename key after verifying all segments are done) - let mut node = new_node(&dedup_key, &dedup_content); - node.provenance = "experience-mine:write".to_string(); - let _ = store.upsert_node(node); - } - } - - if count > 0 { - println!(" Saved {} new journal entries.", count); - } - store.save()?; - println!("Done: {} new entries mined.", count); - Ok(count) -} diff --git a/poc-memory/src/cli/agent.rs b/poc-memory/src/cli/agent.rs index c7d7c49..169a2cf 100644 --- a/poc-memory/src/cli/agent.rs +++ b/poc-memory/src/cli/agent.rs @@ -86,13 +86,8 @@ pub fn cmd_digest_links(do_apply: bool) -> Result<(), String> { Ok(()) } -pub fn cmd_journal_enrich(jsonl_path: &str, entry_text: &str, grep_line: usize) -> Result<(), String> { - if !std::path::Path::new(jsonl_path).is_file() { - return Err(format!("JSONL not found: {}", jsonl_path)); - } - - let mut store = store::Store::load()?; - crate::enrich::journal_enrich(&mut store, jsonl_path, entry_text, grep_line) +pub fn cmd_journal_enrich(_jsonl_path: &str, _entry_text: &str, _grep_line: usize) -> Result<(), String> { + Err("journal-enrich has been removed — use the observation agent instead.".into()) } pub fn cmd_apply_consolidation(_do_apply: bool, _report_file: Option<&str>) -> Result<(), String> { diff --git a/poc-memory/src/main.rs b/poc-memory/src/main.rs index db33033..aee9c18 100644 --- a/poc-memory/src/main.rs +++ b/poc-memory/src/main.rs @@ -1017,21 +1017,8 @@ fn cmd_digest(level: DigestLevel) -> Result<(), String> { } } -fn cmd_experience_mine(jsonl_path: Option) -> Result<(), String> { - let jsonl_path = match jsonl_path { - Some(p) => p, - None => cli::journal::find_current_transcript() - .ok_or("no JSONL transcripts found")?, - }; - - if !std::path::Path::new(jsonl_path.as_str()).is_file() { - return Err(format!("JSONL not found: {}", jsonl_path)); - } - - let mut store = store::Store::load()?; - let count = crate::enrich::experience_mine(&mut store, &jsonl_path, None)?; - println!("Done: {} new entries mined.", count); - Ok(()) +fn cmd_experience_mine(_jsonl_path: Option) -> Result<(), String> { + Err("experience-mine has been removed — use the observation agent instead.".into()) } fn cmd_daemon(sub: DaemonCmd) -> Result<(), String> {