digest: split into focused modules, externalize prompts
digest.rs was 2328 lines containing 6 distinct subsystems. Split into:
- llm.rs: shared LLM utilities (call_sonnet, parse_json_response, semantic_keys)
- audit.rs: link quality audit with parallel Sonnet batching
- enrich.rs: journal enrichment + experience mining
- consolidate.rs: consolidation pipeline + apply
Externalized all inline prompts to prompts/*.md templates using
neuro::load_prompt with {{PLACEHOLDER}} syntax:
- daily-digest.md, weekly-digest.md, monthly-digest.md
- experience.md, journal-enrich.md, consolidation.md
digest.rs retains temporal digest generation (daily/weekly/monthly/auto)
and date helpers. ~940 lines, down from 2328.
Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
3f644609e1
commit
50da0b7b26
13 changed files with 1642 additions and 1582 deletions
346
src/enrich.rs
Normal file
346
src/enrich.rs
Normal file
|
|
@ -0,0 +1,346 @@
|
|||
// Journal enrichment and experience mining
|
||||
//
|
||||
// Two modes of processing conversation transcripts:
|
||||
// journal_enrich — enrich a specific journal entry with source location and links
|
||||
// experience_mine — retroactively find experiential moments not yet journaled
|
||||
//
|
||||
// Both extract conversation from JSONL transcripts, build prompts, call Sonnet,
|
||||
// and apply results to the store.
|
||||
|
||||
use crate::llm::{call_sonnet, parse_json_response, semantic_keys};
|
||||
use crate::neuro;
|
||||
use crate::store::{self, Store, new_node, new_relation};
|
||||
|
||||
use regex::Regex;
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::fs;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
fn agent_results_dir() -> std::path::PathBuf {
|
||||
let dir = store::memory_dir().join("agent-results");
|
||||
fs::create_dir_all(&dir).ok();
|
||||
dir
|
||||
}
|
||||
|
||||
/// Extract user/assistant messages with line numbers from a JSONL transcript.
|
||||
/// (line_number, role, text, timestamp)
|
||||
fn extract_conversation(jsonl_path: &str) -> Result<Vec<(usize, String, String, String)>, String> {
|
||||
let content = fs::read_to_string(jsonl_path)
|
||||
.map_err(|e| format!("read {}: {}", jsonl_path, e))?;
|
||||
|
||||
let mut messages = Vec::new();
|
||||
for (i, line) in content.lines().enumerate() {
|
||||
let obj: serde_json::Value = match serde_json::from_str(line) {
|
||||
Ok(v) => v,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if msg_type != "user" && msg_type != "assistant" { continue; }
|
||||
|
||||
let timestamp = obj.get("timestamp")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
let msg = obj.get("message").unwrap_or(&obj);
|
||||
let content = msg.get("content");
|
||||
|
||||
let text = match content {
|
||||
Some(serde_json::Value::String(s)) => s.clone(),
|
||||
Some(serde_json::Value::Array(arr)) => {
|
||||
arr.iter()
|
||||
.filter_map(|c| {
|
||||
if let Some(t) = c.get("text").and_then(|v| v.as_str()) {
|
||||
Some(t.to_string())
|
||||
} else {
|
||||
c.as_str().map(|s| s.to_string())
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
}
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
let text = text.trim().to_string();
|
||||
if text.is_empty() { continue; }
|
||||
|
||||
messages.push((i + 1, msg_type.to_string(), text, timestamp));
|
||||
}
|
||||
|
||||
Ok(messages)
|
||||
}
|
||||
|
||||
/// Format conversation messages for the prompt (truncating long messages).
|
||||
fn format_conversation(messages: &[(usize, String, String, String)]) -> String {
|
||||
messages.iter()
|
||||
.map(|(line, role, text, ts)| {
|
||||
let text = if text.len() > 2000 {
|
||||
format!("{}...[truncated]", &text[..text.floor_char_boundary(1800)])
|
||||
} else {
|
||||
text.clone()
|
||||
};
|
||||
if ts.is_empty() {
|
||||
format!("L{} [{}]: {}", line, role, text)
|
||||
} else {
|
||||
format!("L{} [{}] {}: {}", line, role, &ts[..ts.len().min(19)], text)
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n\n")
|
||||
}
|
||||
|
||||
fn build_journal_prompt(
|
||||
entry_text: &str,
|
||||
conversation: &str,
|
||||
keys: &[String],
|
||||
grep_line: usize,
|
||||
) -> Result<String, String> {
|
||||
let keys_text: String = keys.iter()
|
||||
.map(|k| format!(" - {}", k))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
neuro::load_prompt("journal-enrich", &[
|
||||
("{{GREP_LINE}}", &grep_line.to_string()),
|
||||
("{{ENTRY_TEXT}}", entry_text),
|
||||
("{{KEYS}}", &keys_text),
|
||||
("{{CONVERSATION}}", conversation),
|
||||
])
|
||||
}
|
||||
|
||||
/// Enrich a journal entry with conversation context and link proposals.
|
||||
pub fn journal_enrich(
|
||||
store: &mut Store,
|
||||
jsonl_path: &str,
|
||||
entry_text: &str,
|
||||
grep_line: usize,
|
||||
) -> Result<(), String> {
|
||||
println!("Extracting conversation from {}...", jsonl_path);
|
||||
let messages = extract_conversation(jsonl_path)?;
|
||||
let conversation = format_conversation(&messages);
|
||||
println!(" {} messages, {} chars", messages.len(), conversation.len());
|
||||
|
||||
let keys = semantic_keys(store);
|
||||
println!(" {} semantic keys", keys.len());
|
||||
|
||||
let prompt = build_journal_prompt(entry_text, &conversation, &keys, grep_line)?;
|
||||
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
|
||||
|
||||
println!(" Calling Sonnet...");
|
||||
let response = call_sonnet(&prompt, 300)?;
|
||||
|
||||
let result = parse_json_response(&response)?;
|
||||
|
||||
// Report results
|
||||
let source_start = result.get("source_start").and_then(|v| v.as_u64()).unwrap_or(0);
|
||||
let source_end = result.get("source_end").and_then(|v| v.as_u64()).unwrap_or(0);
|
||||
let links = result.get("links").and_then(|v| v.as_array());
|
||||
let insights = result.get("missed_insights").and_then(|v| v.as_array());
|
||||
|
||||
println!(" Source: L{}-L{}", source_start, source_end);
|
||||
println!(" Links: {}", links.map_or(0, |l| l.len()));
|
||||
println!(" Missed insights: {}", insights.map_or(0, |l| l.len()));
|
||||
|
||||
// Apply links
|
||||
if let Some(links) = links {
|
||||
for link in links {
|
||||
let target = link.get("target").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let reason = link.get("reason").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if target.is_empty() || target.starts_with("NOTE:") {
|
||||
if let Some(note) = target.strip_prefix("NOTE:") {
|
||||
println!(" NOTE: {} — {}", note, reason);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Resolve target and find journal node
|
||||
let resolved = match store.resolve_key(target) {
|
||||
Ok(r) => r,
|
||||
Err(_) => { println!(" SKIP {} (not in graph)", target); continue; }
|
||||
};
|
||||
let source_key = match store.find_journal_node(entry_text) {
|
||||
Some(k) => k,
|
||||
None => { println!(" SKIP {} (no matching journal node)", target); continue; }
|
||||
};
|
||||
|
||||
// Refine target to best-matching section
|
||||
let source_content = store.nodes.get(&source_key)
|
||||
.map(|n| n.content.as_str()).unwrap_or("");
|
||||
let resolved = neuro::refine_target(store, source_content, &resolved);
|
||||
|
||||
let source_uuid = match store.nodes.get(&source_key) {
|
||||
Some(n) => n.uuid,
|
||||
None => continue,
|
||||
};
|
||||
let target_uuid = match store.nodes.get(&resolved) {
|
||||
Some(n) => n.uuid,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let rel = new_relation(
|
||||
source_uuid, target_uuid,
|
||||
store::RelationType::Link,
|
||||
0.5,
|
||||
&source_key, &resolved,
|
||||
);
|
||||
if store.add_relation(rel).is_ok() {
|
||||
println!(" LINK {} → {} ({})", source_key, resolved, reason);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Save result to agent-results
|
||||
let timestamp = store::format_datetime(store::now_epoch())
|
||||
.replace([':', '-'], "");
|
||||
let result_file = agent_results_dir()
|
||||
.join(format!("{}.json", timestamp));
|
||||
let output = serde_json::json!({
|
||||
"timestamp": timestamp,
|
||||
"jsonl_path": jsonl_path,
|
||||
"entry_text": &entry_text[..entry_text.len().min(500)],
|
||||
"agent_result": result,
|
||||
});
|
||||
fs::write(&result_file, serde_json::to_string_pretty(&output).unwrap())
|
||||
.map_err(|e| format!("write {}: {}", result_file.display(), e))?;
|
||||
println!(" Results saved: {}", result_file.display());
|
||||
|
||||
store.save()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Mine a conversation transcript for experiential moments not yet journaled.
|
||||
pub fn experience_mine(
|
||||
store: &mut Store,
|
||||
jsonl_path: &str,
|
||||
) -> Result<usize, String> {
|
||||
println!("Experience mining: {}", jsonl_path);
|
||||
|
||||
// Transcript-level dedup: hash the file content and check if already mined
|
||||
let transcript_bytes = fs::read(jsonl_path)
|
||||
.map_err(|e| format!("reading transcript: {}", e))?;
|
||||
let mut hasher = DefaultHasher::new();
|
||||
transcript_bytes.hash(&mut hasher);
|
||||
let hash = hasher.finish();
|
||||
let dedup_key = format!("_mined-transcripts.md#h-{:016x}", hash);
|
||||
|
||||
if store.nodes.contains_key(&dedup_key) {
|
||||
println!(" Already mined this transcript ({}), skipping.", &dedup_key[24..]);
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let messages = extract_conversation(jsonl_path)?;
|
||||
let conversation = format_conversation(&messages);
|
||||
println!(" {} messages, {} chars", messages.len(), conversation.len());
|
||||
|
||||
// Load identity
|
||||
let identity = store.nodes.get("identity.md")
|
||||
.map(|n| n.content.clone())
|
||||
.unwrap_or_default();
|
||||
|
||||
// Get recent journal entries to avoid duplication
|
||||
let key_date_re = Regex::new(r"^journal\.md#j-(\d{4}-\d{2}-\d{2}[t-]\d{2}-\d{2})").unwrap();
|
||||
let date_re = Regex::new(r"(\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2})").unwrap();
|
||||
let mut journal: Vec<_> = store.nodes.values()
|
||||
.filter(|node| node.key.starts_with("journal.md#j-"))
|
||||
.collect();
|
||||
journal.sort_by(|a, b| {
|
||||
let ak = key_date_re.captures(&a.key).map(|c| c[1].to_string())
|
||||
.or_else(|| date_re.captures(&a.content).map(|c| c[1].to_string()))
|
||||
.unwrap_or_default();
|
||||
let bk = key_date_re.captures(&b.key).map(|c| c[1].to_string())
|
||||
.or_else(|| date_re.captures(&b.content).map(|c| c[1].to_string()))
|
||||
.unwrap_or_default();
|
||||
ak.cmp(&bk)
|
||||
});
|
||||
let recent: String = journal.iter().rev().take(10)
|
||||
.map(|n| format!("---\n{}\n", n.content))
|
||||
.collect();
|
||||
|
||||
let keys = semantic_keys(store);
|
||||
let keys_text: String = keys.iter()
|
||||
.map(|k| format!(" - {}", k))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
let prompt = neuro::load_prompt("experience", &[
|
||||
("{{IDENTITY}}", &identity),
|
||||
("{{RECENT_JOURNAL}}", &recent),
|
||||
("{{KEYS}}", &keys_text),
|
||||
("{{CONVERSATION}}", &conversation),
|
||||
])?;
|
||||
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
|
||||
|
||||
println!(" Calling Sonnet...");
|
||||
let response = call_sonnet(&prompt, 2000)?;
|
||||
|
||||
let entries = parse_json_response(&response)?;
|
||||
let entries = match entries.as_array() {
|
||||
Some(arr) => arr.clone(),
|
||||
None => return Err("expected JSON array".to_string()),
|
||||
};
|
||||
|
||||
if entries.is_empty() {
|
||||
println!(" No missed experiences found.");
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
println!(" Found {} experiential moments:", entries.len());
|
||||
let mut count = 0;
|
||||
for entry in &entries {
|
||||
let ts = entry.get("timestamp").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let content = entry.get("content").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if content.is_empty() { continue; }
|
||||
|
||||
// Format with timestamp header
|
||||
let full_content = if ts.is_empty() {
|
||||
content.to_string()
|
||||
} else {
|
||||
format!("## {}\n\n{}", ts, content)
|
||||
};
|
||||
|
||||
// Generate key from timestamp
|
||||
let key_slug: String = content.chars()
|
||||
.filter(|c| c.is_alphanumeric() || *c == ' ')
|
||||
.take(50)
|
||||
.collect::<String>()
|
||||
.trim()
|
||||
.to_lowercase()
|
||||
.replace(' ', "-");
|
||||
let key = if ts.is_empty() {
|
||||
format!("journal.md#j-mined-{}", key_slug)
|
||||
} else {
|
||||
format!("journal.md#j-{}-{}", ts.to_lowercase().replace(':', "-"), key_slug)
|
||||
};
|
||||
|
||||
// Check for duplicate
|
||||
if store.nodes.contains_key(&key) {
|
||||
println!(" SKIP {} (duplicate)", key);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Write to store
|
||||
let mut node = new_node(&key, &full_content);
|
||||
node.node_type = store::NodeType::EpisodicSession;
|
||||
node.category = store::Category::Observation;
|
||||
let _ = store.upsert_node(node);
|
||||
count += 1;
|
||||
|
||||
let preview = if content.len() > 80 { &content[..77] } else { content };
|
||||
println!(" + [{}] {}...", ts, preview);
|
||||
}
|
||||
|
||||
// Record this transcript as mined (even if count == 0, to prevent re-runs)
|
||||
let dedup_content = format!("Mined {} ({} entries)", jsonl_path, count);
|
||||
let mut dedup_node = new_node(&dedup_key, &dedup_content);
|
||||
dedup_node.category = store::Category::Task;
|
||||
let _ = store.upsert_node(dedup_node);
|
||||
|
||||
if count > 0 {
|
||||
println!(" Saved {} new journal entries.", count);
|
||||
}
|
||||
store.save()?;
|
||||
println!("Done: {} new entries mined.", count);
|
||||
Ok(count)
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue