// Journal enrichment and experience mining // // Two modes of processing conversation transcripts: // journal_enrich — enrich a specific journal entry with source location and links // experience_mine — retroactively find experiential moments not yet journaled // // Both extract conversation from JSONL transcripts, build prompts, call Sonnet, // and apply results to the store. use crate::llm::{call_sonnet, parse_json_response, semantic_keys}; use crate::neuro; use crate::store::{self, Store, new_node, new_relation}; use regex::Regex; use std::collections::hash_map::DefaultHasher; use std::collections::HashSet; use std::fs; use std::hash::{Hash, Hasher}; use crate::store::StoreView; /// Parse a timestamp string like "2026-03-05T19:56" to unix epoch seconds. fn parse_timestamp_to_epoch(ts: &str) -> Option { use chrono::{Local, NaiveDateTime, TimeZone}; // Try common formats let formats = ["%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M"]; for fmt in &formats { if let Ok(ndt) = NaiveDateTime::parse_from_str(ts, fmt) { if let Some(dt) = Local.from_local_datetime(&ndt).earliest() { return Some(dt.timestamp()); } } } None } /// Compute the store dedup key for a transcript file. /// This is the same key experience_mine uses to mark a transcript as mined. pub fn transcript_dedup_key(path: &str) -> Result { let bytes = fs::read(path).map_err(|e| format!("read {}: {}", path, e))?; let mut hasher = DefaultHasher::new(); bytes.hash(&mut hasher); Ok(format!("_mined-transcripts.md#h-{:016x}", hasher.finish())) } /// Check if a transcript has already been mined (dedup key exists in store). pub fn is_transcript_mined(store: &impl StoreView, path: &str) -> bool { match transcript_dedup_key(path) { Ok(key) => store.node_content(&key).is_some(), Err(_) => false, } } /// Dedup key for a transcript based on its filename (UUID). /// Used by the daemon reconcile loop — no file reads needed. pub fn transcript_filename_key(path: &str) -> String { let filename = std::path::Path::new(path) .file_stem() .map(|s| s.to_string_lossy().to_string()) .unwrap_or_else(|| path.to_string()); format!("_mined-transcripts.md#f-{}", filename) } /// Get the set of all mined transcript keys (both content-hash and filename) /// from the store. Load once per daemon tick, check many. pub fn mined_transcript_keys() -> HashSet { use crate::store::AnyView; let Ok(view) = AnyView::load() else { return HashSet::new() }; let mut keys = HashSet::new(); view.for_each_node(|key, _, _| { if key.starts_with("_mined-transcripts.md#") { keys.insert(key.to_string()); } }); keys } /// Check if a transcript has been mined, given a pre-loaded set of mined keys. /// Checks filename-based key only (no file read). Sessions mined before the /// filename key was added will pass through and short-circuit in experience_mine /// via the content hash check — a one-time cost on first restart after this change. pub fn is_transcript_mined_with_keys(mined: &HashSet, path: &str) -> bool { mined.contains(&transcript_filename_key(path)) } /// Extract user/assistant messages with line numbers from a JSONL transcript. /// (line_number, role, text, timestamp) fn extract_conversation(jsonl_path: &str) -> Result, String> { let content = fs::read_to_string(jsonl_path) .map_err(|e| format!("read {}: {}", jsonl_path, e))?; let mut messages = Vec::new(); for (i, line) in content.lines().enumerate() { let obj: serde_json::Value = match serde_json::from_str(line) { Ok(v) => v, Err(_) => continue, }; let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or(""); if msg_type != "user" && msg_type != "assistant" { continue; } let timestamp = obj.get("timestamp") .and_then(|v| v.as_str()) .unwrap_or("") .to_string(); let msg = obj.get("message").unwrap_or(&obj); let content = msg.get("content"); let text = match content { Some(serde_json::Value::String(s)) => s.clone(), Some(serde_json::Value::Array(arr)) => { arr.iter() .filter_map(|c| { // Only extract text blocks; skip tool_use, tool_result, thinking, etc. let is_text = c.get("type").and_then(|v| v.as_str()) == Some("text"); if is_text { c.get("text").and_then(|v| v.as_str()).map(|s| s.to_string()) } else { c.as_str().map(|s| s.to_string()) } }) .collect::>() .join("\n") } _ => continue, }; let text = text.trim().to_string(); if text.is_empty() { continue; } messages.push((i + 1, msg_type.to_string(), text, timestamp)); } Ok(messages) } /// Format conversation messages for the prompt (truncating long messages). fn format_conversation(messages: &[(usize, String, String, String)]) -> String { messages.iter() .map(|(line, role, text, ts)| { let text = if text.len() > 2000 { format!("{}...[truncated]", &text[..text.floor_char_boundary(1800)]) } else { text.clone() }; if ts.is_empty() { format!("L{} [{}]: {}", line, role, text) } else { format!("L{} [{}] {}: {}", line, role, &ts[..ts.len().min(19)], text) } }) .collect::>() .join("\n\n") } fn build_journal_prompt( entry_text: &str, conversation: &str, keys: &[String], grep_line: usize, ) -> Result { let keys_text: String = keys.iter() .map(|k| format!(" - {}", k)) .collect::>() .join("\n"); neuro::load_prompt("journal-enrich", &[ ("{{GREP_LINE}}", &grep_line.to_string()), ("{{ENTRY_TEXT}}", entry_text), ("{{KEYS}}", &keys_text), ("{{CONVERSATION}}", conversation), ]) } /// Enrich a journal entry with conversation context and link proposals. pub fn journal_enrich( store: &mut Store, jsonl_path: &str, entry_text: &str, grep_line: usize, ) -> Result<(), String> { println!("Extracting conversation from {}...", jsonl_path); let messages = extract_conversation(jsonl_path)?; let conversation = format_conversation(&messages); println!(" {} messages, {} chars", messages.len(), conversation.len()); let keys = semantic_keys(store); println!(" {} semantic keys", keys.len()); let prompt = build_journal_prompt(entry_text, &conversation, &keys, grep_line)?; println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4); println!(" Calling Sonnet..."); let response = call_sonnet("enrich", &prompt)?; let result = parse_json_response(&response)?; // Report results let source_start = result.get("source_start").and_then(|v| v.as_u64()).unwrap_or(0); let source_end = result.get("source_end").and_then(|v| v.as_u64()).unwrap_or(0); let links = result.get("links").and_then(|v| v.as_array()); let insights = result.get("missed_insights").and_then(|v| v.as_array()); println!(" Source: L{}-L{}", source_start, source_end); println!(" Links: {}", links.map_or(0, |l| l.len())); println!(" Missed insights: {}", insights.map_or(0, |l| l.len())); // Apply links if let Some(links) = links { for link in links { let target = link.get("target").and_then(|v| v.as_str()).unwrap_or(""); let reason = link.get("reason").and_then(|v| v.as_str()).unwrap_or(""); if target.is_empty() || target.starts_with("NOTE:") { if let Some(note) = target.strip_prefix("NOTE:") { println!(" NOTE: {} — {}", note, reason); } continue; } // Resolve target and find journal node let resolved = match store.resolve_key(target) { Ok(r) => r, Err(_) => { println!(" SKIP {} (not in graph)", target); continue; } }; let source_key = match store.find_journal_node(entry_text) { Some(k) => k, None => { println!(" SKIP {} (no matching journal node)", target); continue; } }; // Refine target to best-matching section let source_content = store.nodes.get(&source_key) .map(|n| n.content.as_str()).unwrap_or(""); let resolved = neuro::refine_target(store, source_content, &resolved); let source_uuid = match store.nodes.get(&source_key) { Some(n) => n.uuid, None => continue, }; let target_uuid = match store.nodes.get(&resolved) { Some(n) => n.uuid, None => continue, }; let rel = new_relation( source_uuid, target_uuid, store::RelationType::Link, 0.5, &source_key, &resolved, ); if store.add_relation(rel).is_ok() { println!(" LINK {} → {} ({})", source_key, resolved, reason); } } } store.save()?; Ok(()) } /// Mine a conversation transcript for experiential moments not yet journaled. pub fn experience_mine( store: &mut Store, jsonl_path: &str, ) -> Result { println!("Experience mining: {}", jsonl_path); // Transcript-level dedup: hash the file content and check if already mined let transcript_bytes = fs::read(jsonl_path) .map_err(|e| format!("reading transcript: {}", e))?; let mut hasher = DefaultHasher::new(); transcript_bytes.hash(&mut hasher); let hash = hasher.finish(); let dedup_key = format!("_mined-transcripts.md#h-{:016x}", hash); if store.nodes.contains_key(&dedup_key) { println!(" Already mined this transcript ({}), skipping.", &dedup_key[24..]); return Ok(0); } let messages = extract_conversation(jsonl_path)?; let conversation = format_conversation(&messages); println!(" {} messages, {} chars", messages.len(), conversation.len()); // Load identity let identity = store.nodes.get("identity.md") .map(|n| n.content.clone()) .unwrap_or_default(); // Get recent journal entries to avoid duplication let key_date_re = Regex::new(r"^journal\.md#j-(\d{4}-\d{2}-\d{2}[t-]\d{2}-\d{2})").unwrap(); let date_re = Regex::new(r"(\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2})").unwrap(); let mut journal: Vec<_> = store.nodes.values() .filter(|node| node.key.starts_with("journal.md#j-")) .collect(); journal.sort_by(|a, b| { let ak = key_date_re.captures(&a.key).map(|c| c[1].to_string()) .or_else(|| date_re.captures(&a.content).map(|c| c[1].to_string())) .unwrap_or_default(); let bk = key_date_re.captures(&b.key).map(|c| c[1].to_string()) .or_else(|| date_re.captures(&b.content).map(|c| c[1].to_string())) .unwrap_or_default(); ak.cmp(&bk) }); let recent: String = journal.iter().rev().take(10) .map(|n| format!("---\n{}\n", n.content)) .collect(); let keys = semantic_keys(store); let keys_text: String = keys.iter() .map(|k| format!(" - {}", k)) .collect::>() .join("\n"); let prompt = neuro::load_prompt("experience", &[ ("{{IDENTITY}}", &identity), ("{{RECENT_JOURNAL}}", &recent), ("{{KEYS}}", &keys_text), ("{{CONVERSATION}}", &conversation), ])?; println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4); println!(" Calling Sonnet..."); let response = call_sonnet("experience-mine", &prompt)?; let entries = parse_json_response(&response)?; let entries = match entries.as_array() { Some(arr) => arr.clone(), None => return Err("expected JSON array".to_string()), }; if entries.is_empty() { println!(" No missed experiences found."); return Ok(0); } println!(" Found {} experiential moments:", entries.len()); let mut count = 0; for entry in &entries { let ts = entry.get("timestamp").and_then(|v| v.as_str()).unwrap_or(""); let content = entry.get("content").and_then(|v| v.as_str()).unwrap_or(""); if content.is_empty() { continue; } // Format with timestamp header let full_content = if ts.is_empty() { content.to_string() } else { format!("## {}\n\n{}", ts, content) }; // Generate key from timestamp let key_slug: String = content.chars() .filter(|c| c.is_alphanumeric() || *c == ' ') .take(50) .collect::() .trim() .to_lowercase() .replace(' ', "-"); let key = if ts.is_empty() { format!("journal.md#j-mined-{}", key_slug) } else { format!("journal.md#j-{}-{}", ts.to_lowercase().replace(':', "-"), key_slug) }; // Check for duplicate if store.nodes.contains_key(&key) { println!(" SKIP {} (duplicate)", key); continue; } // Write to store — use event timestamp, not mining time let mut node = new_node(&key, &full_content); node.node_type = store::NodeType::EpisodicSession; node.category = store::Category::Observation; node.provenance = store::Provenance::AgentExperienceMine; if !ts.is_empty() { if let Some(epoch) = parse_timestamp_to_epoch(ts) { node.created_at = epoch; } } let _ = store.upsert_node(node); count += 1; let preview = if content.len() > 80 { &content[..77] } else { content }; println!(" + [{}] {}...", ts, preview); } // Record this transcript as mined (even if count == 0, to prevent re-runs) // Two keys: content hash (exact dedup) and filename (fast daemon reconcile) let dedup_content = format!("Mined {} ({} entries)", jsonl_path, count); let mut dedup_node = new_node(&dedup_key, &dedup_content); dedup_node.category = store::Category::Task; dedup_node.provenance = store::Provenance::AgentExperienceMine; let _ = store.upsert_node(dedup_node); let fname_key = transcript_filename_key(jsonl_path); let mut fname_node = new_node(&fname_key, &dedup_content); fname_node.category = store::Category::Task; fname_node.provenance = store::Provenance::AgentExperienceMine; let _ = store.upsert_node(fname_node); if count > 0 { println!(" Saved {} new journal entries.", count); } store.save()?; println!("Done: {} new entries mined.", count); Ok(count) }