digest: split into focused modules, externalize prompts

digest.rs was 2328 lines containing 6 distinct subsystems. Split into: - llm.rs: shared LLM utilities (call_sonnet, parse_json_response, semantic_keys) - audit.rs: link quality audit with parallel Sonnet batching - enrich.rs: journal enrichment + experience mining - consolidate.rs: consolidation pipeline + apply Externalized all inline prompts to prompts/*.md templates using neuro::load_prompt with {{PLACEHOLDER}} syntax: - daily-digest.md, weekly-digest.md, monthly-digest.md - experience.md, journal-enrich.md, consolidation.md digest.rs retains temporal digest generation (daily/weekly/monthly/auto) and date helpers. ~940 lines, down from 2328. Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-03-03 17:18:18 -05:00 · 2026-03-03 17:18:18 -05:00 · 50da0b7b26
commit 50da0b7b26
parent 3f644609e1
13 changed files with 1642 additions and 1582 deletions
--- a/src/enrich.rs
+++ b/src/enrich.rs
@ -0,0 +1,346 @@
+// Journal enrichment and experience mining
+//
+// Two modes of processing conversation transcripts:
+//   journal_enrich  — enrich a specific journal entry with source location and links
+//   experience_mine — retroactively find experiential moments not yet journaled
+//
+// Both extract conversation from JSONL transcripts, build prompts, call Sonnet,
+// and apply results to the store.
+
+use crate::llm::{call_sonnet, parse_json_response, semantic_keys};
+use crate::neuro;
+use crate::store::{self, Store, new_node, new_relation};
+
+use regex::Regex;
+use std::collections::hash_map::DefaultHasher;
+use std::fs;
+use std::hash::{Hash, Hasher};
+
+fn agent_results_dir() -> std::path::PathBuf {
+    let dir = store::memory_dir().join("agent-results");
+    fs::create_dir_all(&dir).ok();
+    dir
+}
+
+/// Extract user/assistant messages with line numbers from a JSONL transcript.
+/// (line_number, role, text, timestamp)
+fn extract_conversation(jsonl_path: &str) -> Result<Vec<(usize, String, String, String)>, String> {
+    let content = fs::read_to_string(jsonl_path)
+        .map_err(|e| format!("read {}: {}", jsonl_path, e))?;
+
+    let mut messages = Vec::new();
+    for (i, line) in content.lines().enumerate() {
+        let obj: serde_json::Value = match serde_json::from_str(line) {
+            Ok(v) => v,
+            Err(_) => continue,
+        };
+
+        let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
+        if msg_type != "user" && msg_type != "assistant" { continue; }
+
+        let timestamp = obj.get("timestamp")
+            .and_then(|v| v.as_str())
+            .unwrap_or("")
+            .to_string();
+
+        let msg = obj.get("message").unwrap_or(&obj);
+        let content = msg.get("content");
+
+        let text = match content {
+            Some(serde_json::Value::String(s)) => s.clone(),
+            Some(serde_json::Value::Array(arr)) => {
+                arr.iter()
+                    .filter_map(|c| {
+                        if let Some(t) = c.get("text").and_then(|v| v.as_str()) {
+                            Some(t.to_string())
+                        } else {
+                            c.as_str().map(|s| s.to_string())
+                        }
+                    })
+                    .collect::<Vec<_>>()
+                    .join("\n")
+            }
+            _ => continue,
+        };
+
+        let text = text.trim().to_string();
+        if text.is_empty() { continue; }
+
+        messages.push((i + 1, msg_type.to_string(), text, timestamp));
+    }
+
+    Ok(messages)
+}
+
+/// Format conversation messages for the prompt (truncating long messages).
+fn format_conversation(messages: &[(usize, String, String, String)]) -> String {
+    messages.iter()
+        .map(|(line, role, text, ts)| {
+            let text = if text.len() > 2000 {
+                format!("{}...[truncated]", &text[..text.floor_char_boundary(1800)])
+            } else {
+                text.clone()
+            };
+            if ts.is_empty() {
+                format!("L{} [{}]: {}", line, role, text)
+            } else {
+                format!("L{} [{}] {}: {}", line, role, &ts[..ts.len().min(19)], text)
+            }
+        })
+        .collect::<Vec<_>>()
+        .join("\n\n")
+}
+
+fn build_journal_prompt(
+    entry_text: &str,
+    conversation: &str,
+    keys: &[String],
+    grep_line: usize,
+) -> Result<String, String> {
+    let keys_text: String = keys.iter()
+        .map(|k| format!("  - {}", k))
+        .collect::<Vec<_>>()
+        .join("\n");
+
+    neuro::load_prompt("journal-enrich", &[
+        ("{{GREP_LINE}}", &grep_line.to_string()),
+        ("{{ENTRY_TEXT}}", entry_text),
+        ("{{KEYS}}", &keys_text),
+        ("{{CONVERSATION}}", conversation),
+    ])
+}
+
+/// Enrich a journal entry with conversation context and link proposals.
+pub fn journal_enrich(
+    store: &mut Store,
+    jsonl_path: &str,
+    entry_text: &str,
+    grep_line: usize,
+) -> Result<(), String> {
+    println!("Extracting conversation from {}...", jsonl_path);
+    let messages = extract_conversation(jsonl_path)?;
+    let conversation = format_conversation(&messages);
+    println!("  {} messages, {} chars", messages.len(), conversation.len());
+
+    let keys = semantic_keys(store);
+    println!("  {} semantic keys", keys.len());
+
+    let prompt = build_journal_prompt(entry_text, &conversation, &keys, grep_line)?;
+    println!("  Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
+
+    println!("  Calling Sonnet...");
+    let response = call_sonnet(&prompt, 300)?;
+
+    let result = parse_json_response(&response)?;
+
+    // Report results
+    let source_start = result.get("source_start").and_then(|v| v.as_u64()).unwrap_or(0);
+    let source_end = result.get("source_end").and_then(|v| v.as_u64()).unwrap_or(0);
+    let links = result.get("links").and_then(|v| v.as_array());
+    let insights = result.get("missed_insights").and_then(|v| v.as_array());
+
+    println!("  Source: L{}-L{}", source_start, source_end);
+    println!("  Links: {}", links.map_or(0, |l| l.len()));
+    println!("  Missed insights: {}", insights.map_or(0, |l| l.len()));
+
+    // Apply links
+    if let Some(links) = links {
+        for link in links {
+            let target = link.get("target").and_then(|v| v.as_str()).unwrap_or("");
+            let reason = link.get("reason").and_then(|v| v.as_str()).unwrap_or("");
+            if target.is_empty() || target.starts_with("NOTE:") {
+                if let Some(note) = target.strip_prefix("NOTE:") {
+                    println!("  NOTE: {} — {}", note, reason);
+                }
+                continue;
+            }
+
+            // Resolve target and find journal node
+            let resolved = match store.resolve_key(target) {
+                Ok(r) => r,
+                Err(_) => { println!("  SKIP {} (not in graph)", target); continue; }
+            };
+            let source_key = match store.find_journal_node(entry_text) {
+                Some(k) => k,
+                None => { println!("  SKIP {} (no matching journal node)", target); continue; }
+            };
+
+            // Refine target to best-matching section
+            let source_content = store.nodes.get(&source_key)
+                .map(|n| n.content.as_str()).unwrap_or("");
+            let resolved = neuro::refine_target(store, source_content, &resolved);
+
+            let source_uuid = match store.nodes.get(&source_key) {
+                Some(n) => n.uuid,
+                None => continue,
+            };
+            let target_uuid = match store.nodes.get(&resolved) {
+                Some(n) => n.uuid,
+                None => continue,
+            };
+
+            let rel = new_relation(
+                source_uuid, target_uuid,
+                store::RelationType::Link,
+                0.5,
+                &source_key, &resolved,
+            );
+            if store.add_relation(rel).is_ok() {
+                println!("  LINK {} → {} ({})", source_key, resolved, reason);
+            }
+        }
+    }
+
+    // Save result to agent-results
+    let timestamp = store::format_datetime(store::now_epoch())
+        .replace([':', '-'], "");
+    let result_file = agent_results_dir()
+        .join(format!("{}.json", timestamp));
+    let output = serde_json::json!({
+        "timestamp": timestamp,
+        "jsonl_path": jsonl_path,
+        "entry_text": &entry_text[..entry_text.len().min(500)],
+        "agent_result": result,
+    });
+    fs::write(&result_file, serde_json::to_string_pretty(&output).unwrap())
+        .map_err(|e| format!("write {}: {}", result_file.display(), e))?;
+    println!("  Results saved: {}", result_file.display());
+
+    store.save()?;
+    Ok(())
+}
+
+/// Mine a conversation transcript for experiential moments not yet journaled.
+pub fn experience_mine(
+    store: &mut Store,
+    jsonl_path: &str,
+) -> Result<usize, String> {
+    println!("Experience mining: {}", jsonl_path);
+
+    // Transcript-level dedup: hash the file content and check if already mined
+    let transcript_bytes = fs::read(jsonl_path)
+        .map_err(|e| format!("reading transcript: {}", e))?;
+    let mut hasher = DefaultHasher::new();
+    transcript_bytes.hash(&mut hasher);
+    let hash = hasher.finish();
+    let dedup_key = format!("_mined-transcripts.md#h-{:016x}", hash);
+
+    if store.nodes.contains_key(&dedup_key) {
+        println!("  Already mined this transcript ({}), skipping.", &dedup_key[24..]);
+        return Ok(0);
+    }
+
+    let messages = extract_conversation(jsonl_path)?;
+    let conversation = format_conversation(&messages);
+    println!("  {} messages, {} chars", messages.len(), conversation.len());
+
+    // Load identity
+    let identity = store.nodes.get("identity.md")
+        .map(|n| n.content.clone())
+        .unwrap_or_default();
+
+    // Get recent journal entries to avoid duplication
+    let key_date_re = Regex::new(r"^journal\.md#j-(\d{4}-\d{2}-\d{2}[t-]\d{2}-\d{2})").unwrap();
+    let date_re = Regex::new(r"(\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2})").unwrap();
+    let mut journal: Vec<_> = store.nodes.values()
+        .filter(|node| node.key.starts_with("journal.md#j-"))
+        .collect();
+    journal.sort_by(|a, b| {
+        let ak = key_date_re.captures(&a.key).map(|c| c[1].to_string())
+            .or_else(|| date_re.captures(&a.content).map(|c| c[1].to_string()))
+            .unwrap_or_default();
+        let bk = key_date_re.captures(&b.key).map(|c| c[1].to_string())
+            .or_else(|| date_re.captures(&b.content).map(|c| c[1].to_string()))
+            .unwrap_or_default();
+        ak.cmp(&bk)
+    });
+    let recent: String = journal.iter().rev().take(10)
+        .map(|n| format!("---\n{}\n", n.content))
+        .collect();
+
+    let keys = semantic_keys(store);
+    let keys_text: String = keys.iter()
+        .map(|k| format!("  - {}", k))
+        .collect::<Vec<_>>()
+        .join("\n");
+
+    let prompt = neuro::load_prompt("experience", &[
+        ("{{IDENTITY}}", &identity),
+        ("{{RECENT_JOURNAL}}", &recent),
+        ("{{KEYS}}", &keys_text),
+        ("{{CONVERSATION}}", &conversation),
+    ])?;
+    println!("  Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
+
+    println!("  Calling Sonnet...");
+    let response = call_sonnet(&prompt, 2000)?;
+
+    let entries = parse_json_response(&response)?;
+    let entries = match entries.as_array() {
+        Some(arr) => arr.clone(),
+        None => return Err("expected JSON array".to_string()),
+    };
+
+    if entries.is_empty() {
+        println!("  No missed experiences found.");
+        return Ok(0);
+    }
+
+    println!("  Found {} experiential moments:", entries.len());
+    let mut count = 0;
+    for entry in &entries {
+        let ts = entry.get("timestamp").and_then(|v| v.as_str()).unwrap_or("");
+        let content = entry.get("content").and_then(|v| v.as_str()).unwrap_or("");
+        if content.is_empty() { continue; }
+
+        // Format with timestamp header
+        let full_content = if ts.is_empty() {
+            content.to_string()
+        } else {
+            format!("## {}\n\n{}", ts, content)
+        };
+
+        // Generate key from timestamp
+        let key_slug: String = content.chars()
+            .filter(|c| c.is_alphanumeric() || *c == ' ')
+            .take(50)
+            .collect::<String>()
+            .trim()
+            .to_lowercase()
+            .replace(' ', "-");
+        let key = if ts.is_empty() {
+            format!("journal.md#j-mined-{}", key_slug)
+        } else {
+            format!("journal.md#j-{}-{}", ts.to_lowercase().replace(':', "-"), key_slug)
+        };
+
+        // Check for duplicate
+        if store.nodes.contains_key(&key) {
+            println!("  SKIP {} (duplicate)", key);
+            continue;
+        }
+
+        // Write to store
+        let mut node = new_node(&key, &full_content);
+        node.node_type = store::NodeType::EpisodicSession;
+        node.category = store::Category::Observation;
+        let _ = store.upsert_node(node);
+        count += 1;
+
+        let preview = if content.len() > 80 { &content[..77] } else { content };
+        println!("  + [{}] {}...", ts, preview);
+    }
+
+    // Record this transcript as mined (even if count == 0, to prevent re-runs)
+    let dedup_content = format!("Mined {} ({} entries)", jsonl_path, count);
+    let mut dedup_node = new_node(&dedup_key, &dedup_content);
+    dedup_node.category = store::Category::Task;
+    let _ = store.upsert_node(dedup_node);
+
+    if count > 0 {
+        println!("  Saved {} new journal entries.", count);
+    }
+    store.save()?;
+    println!("Done: {} new entries mined.", count);
+    Ok(count)
+}