digest: split into focused modules, externalize prompts

digest.rs was 2328 lines containing 6 distinct subsystems. Split into:
- llm.rs: shared LLM utilities (call_sonnet, parse_json_response, semantic_keys)
- audit.rs: link quality audit with parallel Sonnet batching
- enrich.rs: journal enrichment + experience mining
- consolidate.rs: consolidation pipeline + apply

Externalized all inline prompts to prompts/*.md templates using
neuro::load_prompt with {{PLACEHOLDER}} syntax:
- daily-digest.md, weekly-digest.md, monthly-digest.md
- experience.md, journal-enrich.md, consolidation.md

digest.rs retains temporal digest generation (daily/weekly/monthly/auto)
and date helpers. ~940 lines, down from 2328.

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2026-03-03 17:18:18 -05:00
parent 3f644609e1
commit 50da0b7b26
13 changed files with 1642 additions and 1582 deletions

333
src/audit.rs Normal file
View file

@ -0,0 +1,333 @@
// Link audit: walk every link in the graph, batch to Sonnet for quality review.
//
// Each batch of links gets reviewed by Sonnet, which returns per-link actions:
// KEEP, DELETE, RETARGET, WEAKEN, STRENGTHEN. Batches run in parallel via rayon.
use crate::llm::call_sonnet;
use crate::store::{self, Store, new_relation};
use std::collections::HashSet;
struct LinkInfo {
rel_idx: usize,
source_key: String,
target_key: String,
source_content: String,
target_content: String,
strength: f32,
target_sections: Vec<String>,
}
pub struct AuditStats {
pub kept: usize,
pub deleted: usize,
pub retargeted: usize,
pub weakened: usize,
pub strengthened: usize,
pub errors: usize,
}
fn build_audit_prompt(batch: &[LinkInfo], batch_num: usize, total_batches: usize) -> String {
let mut prompt = format!(
"You are auditing memory graph links for quality (batch {}/{}).\n\n\
For each numbered link, decide what to do:\n\n\
KEEP N link is meaningful, leave it\n\
DELETE N link is noise, accidental, or too generic to be useful\n\
RETARGET N new_key link points to the right topic area but wrong node;\n\
\x20 retarget to a more specific section (listed under each link)\n\
WEAKEN N strength link is marginal; reduce strength (0.1-0.3)\n\
STRENGTHEN N strength link is important but underweighted; increase (0.8-1.0)\n\n\
Output exactly one action per link number, nothing else.\n\n\
Links to review:\n\n",
batch_num, total_batches);
for (i, link) in batch.iter().enumerate() {
let n = i + 1;
prompt.push_str(&format!(
"--- Link {} ---\n\
{} {} (strength={:.2})\n\n\
Source content:\n{}\n\n\
Target content:\n{}\n",
n, link.source_key, link.target_key, link.strength,
&link.source_content, &link.target_content));
if !link.target_sections.is_empty() {
prompt.push_str(
"\nTarget has sections (consider RETARGET to a more specific one):\n");
for s in &link.target_sections {
prompt.push_str(&format!(" - {}\n", s));
}
}
prompt.push('\n');
}
prompt
}
fn parse_audit_response(response: &str, batch_size: usize) -> Vec<(usize, AuditAction)> {
let mut actions = Vec::new();
for line in response.lines() {
let line = line.trim();
if line.is_empty() { continue; }
let parts: Vec<&str> = line.splitn(3, ' ').collect();
if parts.len() < 2 { continue; }
let action = parts[0].to_uppercase();
let idx: usize = match parts[1].parse::<usize>() {
Ok(n) if n >= 1 && n <= batch_size => n - 1,
_ => continue,
};
let audit_action = match action.as_str() {
"KEEP" => AuditAction::Keep,
"DELETE" => AuditAction::Delete,
"RETARGET" => {
if parts.len() < 3 { continue; }
AuditAction::Retarget(parts[2].trim().to_string())
}
"WEAKEN" => {
if parts.len() < 3 { continue; }
match parts[2].trim().parse::<f32>() {
Ok(s) => AuditAction::Weaken(s),
Err(_) => continue,
}
}
"STRENGTHEN" => {
if parts.len() < 3 { continue; }
match parts[2].trim().parse::<f32>() {
Ok(s) => AuditAction::Strengthen(s),
Err(_) => continue,
}
}
_ => continue,
};
actions.push((idx, audit_action));
}
actions
}
enum AuditAction {
Keep,
Delete,
Retarget(String),
Weaken(f32),
Strengthen(f32),
}
/// Run a full link audit: walk every link, batch to Sonnet, apply results.
pub fn link_audit(store: &mut Store, apply: bool) -> Result<AuditStats, String> {
// Collect all non-deleted relations with their info
let mut links: Vec<LinkInfo> = Vec::new();
for (idx, rel) in store.relations.iter().enumerate() {
if rel.deleted { continue; }
let source_content = store.nodes.get(&rel.source_key)
.map(|n| n.content.clone()).unwrap_or_default();
let target_content = store.nodes.get(&rel.target_key)
.map(|n| n.content.clone()).unwrap_or_default();
// Find section children of target if it's file-level
let target_sections = if !rel.target_key.contains('#') {
let prefix = format!("{}#", rel.target_key);
store.nodes.keys()
.filter(|k| k.starts_with(&prefix))
.cloned()
.collect()
} else {
Vec::new()
};
links.push(LinkInfo {
rel_idx: idx,
source_key: rel.source_key.clone(),
target_key: rel.target_key.clone(),
source_content,
target_content,
strength: rel.strength,
target_sections,
});
}
let total = links.len();
println!("Link audit: {} links to review", total);
if !apply {
println!("DRY RUN — use --apply to make changes");
}
// Batch by char budget (~100K chars per prompt)
let char_budget = 100_000usize;
let mut batches: Vec<Vec<usize>> = Vec::new();
let mut current_batch: Vec<usize> = Vec::new();
let mut current_chars = 0usize;
for (i, link) in links.iter().enumerate() {
let link_chars = link.source_content.len() + link.target_content.len() + 200;
if !current_batch.is_empty() && current_chars + link_chars > char_budget {
batches.push(std::mem::take(&mut current_batch));
current_chars = 0;
}
current_batch.push(i);
current_chars += link_chars;
}
if !current_batch.is_empty() {
batches.push(current_batch);
}
let total_batches = batches.len();
println!("{} batches (avg {} links/batch)\n", total_batches,
if total_batches > 0 { total / total_batches } else { 0 });
use rayon::prelude::*;
use std::sync::atomic::{AtomicUsize, Ordering};
// Build all batch prompts up front
let batch_data: Vec<(usize, Vec<LinkInfo>, String)> = batches.iter().enumerate()
.map(|(batch_idx, batch_indices)| {
let batch_infos: Vec<LinkInfo> = batch_indices.iter().map(|&i| {
let l = &links[i];
LinkInfo {
rel_idx: l.rel_idx,
source_key: l.source_key.clone(),
target_key: l.target_key.clone(),
source_content: l.source_content.clone(),
target_content: l.target_content.clone(),
strength: l.strength,
target_sections: l.target_sections.clone(),
}
}).collect();
let prompt = build_audit_prompt(&batch_infos, batch_idx + 1, total_batches);
(batch_idx, batch_infos, prompt)
})
.collect();
// Progress counter
let done = AtomicUsize::new(0);
// Run batches in parallel via rayon
let batch_results: Vec<_> = batch_data.par_iter()
.map(|(batch_idx, batch_infos, prompt)| {
let response = call_sonnet(prompt, 300);
let completed = done.fetch_add(1, Ordering::Relaxed) + 1;
eprint!("\r Batches: {}/{} done", completed, total_batches);
(*batch_idx, batch_infos, response)
})
.collect();
eprintln!(); // newline after progress
// Process results sequentially
let mut stats = AuditStats {
kept: 0, deleted: 0, retargeted: 0, weakened: 0, strengthened: 0, errors: 0,
};
let mut deletions: Vec<usize> = Vec::new();
let mut retargets: Vec<(usize, String)> = Vec::new();
let mut strength_changes: Vec<(usize, f32)> = Vec::new();
for (batch_idx, batch_infos, response) in &batch_results {
let response = match response {
Ok(r) => r,
Err(e) => {
eprintln!(" Batch {}: error: {}", batch_idx + 1, e);
stats.errors += batch_infos.len();
continue;
}
};
let actions = parse_audit_response(response, batch_infos.len());
let mut responded: HashSet<usize> = HashSet::new();
for (idx, action) in &actions {
responded.insert(*idx);
let link = &batch_infos[*idx];
match action {
AuditAction::Keep => {
stats.kept += 1;
}
AuditAction::Delete => {
println!(" DELETE {}{}", link.source_key, link.target_key);
deletions.push(link.rel_idx);
stats.deleted += 1;
}
AuditAction::Retarget(new_target) => {
println!(" RETARGET {}{} (was {})",
link.source_key, new_target, link.target_key);
retargets.push((link.rel_idx, new_target.clone()));
stats.retargeted += 1;
}
AuditAction::Weaken(s) => {
println!(" WEAKEN {}{} (str {:.2}{:.2})",
link.source_key, link.target_key, link.strength, s);
strength_changes.push((link.rel_idx, *s));
stats.weakened += 1;
}
AuditAction::Strengthen(s) => {
println!(" STRENGTHEN {}{} (str {:.2}{:.2})",
link.source_key, link.target_key, link.strength, s);
strength_changes.push((link.rel_idx, *s));
stats.strengthened += 1;
}
}
}
for i in 0..batch_infos.len() {
if !responded.contains(&i) {
stats.kept += 1;
}
}
println!(" Batch {}/{}: +{}kept +{}del +{}retarget +{}weak +{}strong",
batch_idx + 1, total_batches,
stats.kept, stats.deleted, stats.retargeted, stats.weakened, stats.strengthened);
}
// Apply changes
if apply && (stats.deleted > 0 || stats.retargeted > 0
|| stats.weakened > 0 || stats.strengthened > 0) {
println!("\nApplying changes...");
// Deletions: soft-delete
for rel_idx in &deletions {
store.relations[*rel_idx].deleted = true;
}
// Strength changes
for (rel_idx, new_strength) in &strength_changes {
store.relations[*rel_idx].strength = *new_strength;
}
// Retargets: soft-delete old, create new
for (rel_idx, new_target) in &retargets {
let source_key = store.relations[*rel_idx].source_key.clone();
let old_strength = store.relations[*rel_idx].strength;
let source_uuid = store.nodes.get(&source_key)
.map(|n| n.uuid).unwrap_or([0u8; 16]);
let target_uuid = store.nodes.get(new_target)
.map(|n| n.uuid).unwrap_or([0u8; 16]);
// Soft-delete old
store.relations[*rel_idx].deleted = true;
// Create new
if target_uuid != [0u8; 16] {
let new_rel = new_relation(
source_uuid, target_uuid,
store::RelationType::Auto,
old_strength,
&source_key, new_target,
);
store.add_relation(new_rel).ok();
}
}
store.save()?;
println!("Saved.");
}
Ok(stats)
}

491
src/consolidate.rs Normal file
View file

@ -0,0 +1,491 @@
// Consolidation pipeline: plan → agents → apply → digests → links
//
// consolidate_full() runs the full autonomous consolidation:
// 1. Plan: analyze metrics, allocate agents
// 2. Execute: run each agent (Sonnet calls), save reports
// 3. Apply: extract and apply actions from reports
// 4. Digest: generate missing daily/weekly/monthly digests
// 5. Links: apply links extracted from digests
// 6. Summary: final metrics comparison
//
// apply_consolidation() processes consolidation reports independently.
use crate::digest;
use crate::llm::{call_sonnet, parse_json_response};
use crate::neuro;
use crate::store::{self, Store, new_relation};
use std::fs;
use std::io::Write;
use std::path::{Path, PathBuf};
fn agent_results_dir() -> PathBuf {
let dir = store::memory_dir().join("agent-results");
fs::create_dir_all(&dir).ok();
dir
}
/// Simple append-only log writer for consolidate-full.
struct LogWriter {
path: PathBuf,
}
impl LogWriter {
fn new(path: &Path) -> Result<Self, String> {
fs::write(path, "").map_err(|e| format!("create log: {}", e))?;
Ok(LogWriter { path: path.to_path_buf() })
}
fn write(&mut self, line: &str) -> Result<(), String> {
let mut f = fs::OpenOptions::new()
.append(true)
.open(&self.path)
.map_err(|e| format!("open log: {}", e))?;
writeln!(f, "{}", line)
.map_err(|e| format!("write log: {}", e))
}
}
/// Run the full autonomous consolidation pipeline with logging.
pub fn consolidate_full(store: &mut Store) -> Result<(), String> {
let start = std::time::Instant::now();
let log_path = agent_results_dir().join("consolidate-full.log");
let mut log = LogWriter::new(&log_path)?;
log.write("=== CONSOLIDATE FULL ===")?;
log.write(&format!("Started: {}", store::format_datetime(store::now_epoch())))?;
log.write(&format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()))?;
log.write("")?;
// --- Step 1: Plan ---
log.write("--- Step 1: Plan ---")?;
let plan = neuro::consolidation_plan(store);
let plan_text = neuro::format_plan(&plan);
log.write(&plan_text)?;
println!("{}", plan_text);
let total_agents = plan.replay_count + plan.linker_count
+ plan.separator_count + plan.transfer_count
+ if plan.run_health { 1 } else { 0 };
log.write(&format!("Total agents to run: {}", total_agents))?;
// --- Step 2: Execute agents ---
log.write("\n--- Step 2: Execute agents ---")?;
let mut reports: Vec<PathBuf> = Vec::new();
let mut agent_num = 0usize;
let mut agent_errors = 0usize;
// Build the list of (agent_type, batch_size) runs
let mut runs: Vec<(&str, usize)> = Vec::new();
if plan.run_health {
runs.push(("health", 0));
}
if plan.replay_count > 0 {
let batch = 5;
let mut remaining = plan.replay_count;
while remaining > 0 {
let this_batch = remaining.min(batch);
runs.push(("replay", this_batch));
remaining -= this_batch;
}
}
if plan.linker_count > 0 {
let batch = 5;
let mut remaining = plan.linker_count;
while remaining > 0 {
let this_batch = remaining.min(batch);
runs.push(("linker", this_batch));
remaining -= this_batch;
}
}
if plan.separator_count > 0 {
let batch = 5;
let mut remaining = plan.separator_count;
while remaining > 0 {
let this_batch = remaining.min(batch);
runs.push(("separator", this_batch));
remaining -= this_batch;
}
}
if plan.transfer_count > 0 {
let batch = 5;
let mut remaining = plan.transfer_count;
while remaining > 0 {
let this_batch = remaining.min(batch);
runs.push(("transfer", this_batch));
remaining -= this_batch;
}
}
for (agent_type, count) in &runs {
agent_num += 1;
let label = if *count > 0 {
format!("[{}/{}] {} (batch={})", agent_num, runs.len(), agent_type, count)
} else {
format!("[{}/{}] {}", agent_num, runs.len(), agent_type)
};
log.write(&format!("\n{}", label))?;
println!("{}", label);
// Reload store to pick up changes from previous agents
if agent_num > 1 {
*store = Store::load()?;
}
let prompt = match neuro::agent_prompt(store, agent_type, *count) {
Ok(p) => p,
Err(e) => {
let msg = format!(" ERROR building prompt: {}", e);
log.write(&msg)?;
eprintln!("{}", msg);
agent_errors += 1;
continue;
}
};
log.write(&format!(" Prompt: {} chars (~{} tokens)",
prompt.len(), prompt.len() / 4))?;
let response = match call_sonnet(&prompt, 300) {
Ok(r) => r,
Err(e) => {
let msg = format!(" ERROR from Sonnet: {}", e);
log.write(&msg)?;
eprintln!("{}", msg);
agent_errors += 1;
continue;
}
};
// Save report
let ts = store::format_datetime(store::now_epoch())
.replace([':', '-', 'T'], "");
let report_name = format!("consolidation-{}-{}.md", agent_type, ts);
let report_path = agent_results_dir().join(&report_name);
fs::write(&report_path, &response)
.map_err(|e| format!("write report: {}", e))?;
reports.push(report_path.clone());
let msg = format!(" Done: {} lines → {}", response.lines().count(), report_name);
log.write(&msg)?;
println!("{}", msg);
}
log.write(&format!("\nAgents complete: {} run, {} errors",
agent_num - agent_errors, agent_errors))?;
// --- Step 3: Apply consolidation actions ---
log.write("\n--- Step 3: Apply consolidation actions ---")?;
println!("\n--- Applying consolidation actions ---");
*store = Store::load()?;
if reports.is_empty() {
log.write(" No reports to apply.")?;
} else {
match apply_consolidation(store, true, None) {
Ok(()) => log.write(" Applied.")?,
Err(e) => {
let msg = format!(" ERROR applying consolidation: {}", e);
log.write(&msg)?;
eprintln!("{}", msg);
}
}
}
// --- Step 3b: Link orphans ---
log.write("\n--- Step 3b: Link orphans ---")?;
println!("\n--- Linking orphan nodes ---");
*store = Store::load()?;
let (lo_orphans, lo_added) = neuro::link_orphans(store, 2, 3, 0.15);
log.write(&format!(" {} orphans, {} links added", lo_orphans, lo_added))?;
// --- Step 3c: Cap degree ---
log.write("\n--- Step 3c: Cap degree ---")?;
println!("\n--- Capping node degree ---");
*store = Store::load()?;
match store.cap_degree(50) {
Ok((hubs, pruned)) => {
store.save()?;
log.write(&format!(" {} hubs capped, {} edges pruned", hubs, pruned))?;
}
Err(e) => log.write(&format!(" ERROR: {}", e))?,
}
// --- Step 4: Digest auto ---
log.write("\n--- Step 4: Digest auto ---")?;
println!("\n--- Generating missing digests ---");
*store = Store::load()?;
match digest::digest_auto(store) {
Ok(()) => log.write(" Digests done.")?,
Err(e) => {
let msg = format!(" ERROR in digest auto: {}", e);
log.write(&msg)?;
eprintln!("{}", msg);
}
}
// --- Step 5: Apply digest links ---
log.write("\n--- Step 5: Apply digest links ---")?;
println!("\n--- Applying digest links ---");
*store = Store::load()?;
let links = digest::parse_all_digest_links();
let (applied, skipped, fallbacks) = digest::apply_digest_links(store, &links);
store.save()?;
log.write(&format!(" {} links applied, {} skipped, {} fallbacks",
applied, skipped, fallbacks))?;
// --- Step 6: Summary ---
let elapsed = start.elapsed();
log.write("\n--- Summary ---")?;
log.write(&format!("Finished: {}", store::format_datetime(store::now_epoch())))?;
log.write(&format!("Duration: {:.0}s", elapsed.as_secs_f64()))?;
*store = Store::load()?;
log.write(&format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()))?;
let summary = format!(
"\n=== CONSOLIDATE FULL COMPLETE ===\n\
Duration: {:.0}s\n\
Agents: {} run, {} errors\n\
Nodes: {} Relations: {}\n\
Log: {}\n",
elapsed.as_secs_f64(),
agent_num - agent_errors, agent_errors,
store.nodes.len(), store.relations.len(),
log_path.display(),
);
log.write(&summary)?;
println!("{}", summary);
Ok(())
}
/// Find the most recent set of consolidation reports.
fn find_consolidation_reports() -> Vec<PathBuf> {
let dir = agent_results_dir();
let mut reports: Vec<PathBuf> = fs::read_dir(&dir)
.map(|entries| {
entries.filter_map(|e| e.ok())
.map(|e| e.path())
.filter(|p| {
p.file_name()
.and_then(|n| n.to_str())
.map(|n| n.starts_with("consolidation-") && n.ends_with(".md"))
.unwrap_or(false)
})
.collect()
})
.unwrap_or_default();
reports.sort();
reports.reverse();
if reports.is_empty() { return reports; }
// Group by timestamp (last segment of stem before .md)
let latest_ts = reports[0].file_stem()
.and_then(|s| s.to_str())
.unwrap_or("")
.rsplit('-').next().unwrap_or("")
.to_string();
reports.retain(|r| {
r.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("")
.ends_with(latest_ts.as_str())
});
reports
}
fn build_consolidation_prompt(reports: &[PathBuf]) -> Result<String, String> {
let mut report_text = String::new();
for r in reports {
let content = fs::read_to_string(r)
.map_err(|e| format!("read {}: {}", r.display(), e))?;
report_text.push_str(&format!("\n{}\n## Report: {}\n\n{}\n",
"=".repeat(60),
r.file_stem().and_then(|s| s.to_str()).unwrap_or(""),
content));
}
neuro::load_prompt("consolidation", &[("{{REPORTS}}", &report_text)])
}
/// Run the full apply-consolidation pipeline.
pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_file: Option<&str>) -> Result<(), String> {
let reports = if let Some(path) = report_file {
vec![PathBuf::from(path)]
} else {
find_consolidation_reports()
};
if reports.is_empty() {
println!("No consolidation reports found.");
println!("Run consolidation-agents first.");
return Ok(());
}
println!("Found {} reports:", reports.len());
for r in &reports {
println!(" {}", r.file_name().and_then(|s| s.to_str()).unwrap_or("?"));
}
println!("\nExtracting actions from reports...");
let prompt = build_consolidation_prompt(&reports)?;
println!(" Prompt: {} chars", prompt.len());
let response = call_sonnet(&prompt, 300)?;
let actions_value = parse_json_response(&response)?;
let actions = actions_value.as_array()
.ok_or("expected JSON array of actions")?;
println!(" {} actions extracted", actions.len());
// Save actions
let timestamp = store::format_datetime(store::now_epoch())
.replace([':', '-'], "");
let actions_path = agent_results_dir()
.join(format!("consolidation-actions-{}.json", timestamp));
fs::write(&actions_path, serde_json::to_string_pretty(&actions_value).unwrap())
.map_err(|e| format!("write {}: {}", actions_path.display(), e))?;
println!(" Saved: {}", actions_path.display());
let link_actions: Vec<_> = actions.iter()
.filter(|a| a.get("action").and_then(|v| v.as_str()) == Some("link"))
.collect();
let cat_actions: Vec<_> = actions.iter()
.filter(|a| a.get("action").and_then(|v| v.as_str()) == Some("categorize"))
.collect();
let manual_actions: Vec<_> = actions.iter()
.filter(|a| a.get("action").and_then(|v| v.as_str()) == Some("manual"))
.collect();
if !do_apply {
// Dry run
println!("\n{}", "=".repeat(60));
println!("DRY RUN — {} actions proposed", actions.len());
println!("{}\n", "=".repeat(60));
if !link_actions.is_empty() {
println!("## Links to add ({})\n", link_actions.len());
for (i, a) in link_actions.iter().enumerate() {
let src = a.get("source").and_then(|v| v.as_str()).unwrap_or("?");
let tgt = a.get("target").and_then(|v| v.as_str()).unwrap_or("?");
let reason = a.get("reason").and_then(|v| v.as_str()).unwrap_or("");
println!(" {:2}. {}{} ({})", i + 1, src, tgt, reason);
}
}
if !cat_actions.is_empty() {
println!("\n## Categories to set ({})\n", cat_actions.len());
for a in &cat_actions {
let key = a.get("key").and_then(|v| v.as_str()).unwrap_or("?");
let cat = a.get("category").and_then(|v| v.as_str()).unwrap_or("?");
let reason = a.get("reason").and_then(|v| v.as_str()).unwrap_or("");
println!(" {}{} ({})", key, cat, reason);
}
}
if !manual_actions.is_empty() {
println!("\n## Manual actions needed ({})\n", manual_actions.len());
for a in &manual_actions {
let prio = a.get("priority").and_then(|v| v.as_str()).unwrap_or("?");
let desc = a.get("description").and_then(|v| v.as_str()).unwrap_or("?");
println!(" [{}] {}", prio, desc);
}
}
println!("\n{}", "=".repeat(60));
println!("To apply: poc-memory apply-consolidation --apply");
println!("{}", "=".repeat(60));
return Ok(());
}
// Apply
let mut applied = 0usize;
let mut skipped = 0usize;
if !link_actions.is_empty() {
println!("\nApplying {} links...", link_actions.len());
for a in &link_actions {
let src = a.get("source").and_then(|v| v.as_str()).unwrap_or("");
let tgt = a.get("target").and_then(|v| v.as_str()).unwrap_or("");
if src.is_empty() || tgt.is_empty() { skipped += 1; continue; }
let source = match store.resolve_key(src) {
Ok(s) => s,
Err(e) => { println!(" ? {}{}: {}", src, tgt, e); skipped += 1; continue; }
};
let target = match store.resolve_key(tgt) {
Ok(t) => t,
Err(e) => { println!(" ? {}{}: {}", src, tgt, e); skipped += 1; continue; }
};
// Refine target to best-matching section
let source_content = store.nodes.get(&source)
.map(|n| n.content.as_str()).unwrap_or("");
let target = neuro::refine_target(store, source_content, &target);
let exists = store.relations.iter().any(|r|
r.source_key == source && r.target_key == target && !r.deleted
);
if exists { skipped += 1; continue; }
let source_uuid = match store.nodes.get(&source) { Some(n) => n.uuid, None => { skipped += 1; continue; } };
let target_uuid = match store.nodes.get(&target) { Some(n) => n.uuid, None => { skipped += 1; continue; } };
let rel = new_relation(
source_uuid, target_uuid,
store::RelationType::Auto,
0.5,
&source, &target,
);
if store.add_relation(rel).is_ok() {
println!(" + {}{}", source, target);
applied += 1;
}
}
}
if !cat_actions.is_empty() {
println!("\nApplying {} categorizations...", cat_actions.len());
for a in &cat_actions {
let key = a.get("key").and_then(|v| v.as_str()).unwrap_or("");
let cat = a.get("category").and_then(|v| v.as_str()).unwrap_or("");
if key.is_empty() || cat.is_empty() { continue; }
let resolved = match store.resolve_key(key) {
Ok(r) => r,
Err(_) => { println!(" ? {}{}: not found", key, cat); skipped += 1; continue; }
};
if store.categorize(&resolved, cat).is_ok() {
println!(" + {}{}", resolved, cat);
applied += 1;
} else {
skipped += 1;
}
}
}
if !manual_actions.is_empty() {
println!("\n## Manual actions (not auto-applied):\n");
for a in &manual_actions {
let prio = a.get("priority").and_then(|v| v.as_str()).unwrap_or("?");
let desc = a.get("description").and_then(|v| v.as_str()).unwrap_or("?");
println!(" [{}] {}", prio, desc);
}
}
if applied > 0 {
store.save()?;
}
println!("\n{}", "=".repeat(60));
println!("Applied: {} Skipped: {} Manual: {}", applied, skipped, manual_actions.len());
println!("{}", "=".repeat(60));
Ok(())
}

File diff suppressed because it is too large Load diff

346
src/enrich.rs Normal file
View file

@ -0,0 +1,346 @@
// Journal enrichment and experience mining
//
// Two modes of processing conversation transcripts:
// journal_enrich — enrich a specific journal entry with source location and links
// experience_mine — retroactively find experiential moments not yet journaled
//
// Both extract conversation from JSONL transcripts, build prompts, call Sonnet,
// and apply results to the store.
use crate::llm::{call_sonnet, parse_json_response, semantic_keys};
use crate::neuro;
use crate::store::{self, Store, new_node, new_relation};
use regex::Regex;
use std::collections::hash_map::DefaultHasher;
use std::fs;
use std::hash::{Hash, Hasher};
fn agent_results_dir() -> std::path::PathBuf {
let dir = store::memory_dir().join("agent-results");
fs::create_dir_all(&dir).ok();
dir
}
/// Extract user/assistant messages with line numbers from a JSONL transcript.
/// (line_number, role, text, timestamp)
fn extract_conversation(jsonl_path: &str) -> Result<Vec<(usize, String, String, String)>, String> {
let content = fs::read_to_string(jsonl_path)
.map_err(|e| format!("read {}: {}", jsonl_path, e))?;
let mut messages = Vec::new();
for (i, line) in content.lines().enumerate() {
let obj: serde_json::Value = match serde_json::from_str(line) {
Ok(v) => v,
Err(_) => continue,
};
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
if msg_type != "user" && msg_type != "assistant" { continue; }
let timestamp = obj.get("timestamp")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let msg = obj.get("message").unwrap_or(&obj);
let content = msg.get("content");
let text = match content {
Some(serde_json::Value::String(s)) => s.clone(),
Some(serde_json::Value::Array(arr)) => {
arr.iter()
.filter_map(|c| {
if let Some(t) = c.get("text").and_then(|v| v.as_str()) {
Some(t.to_string())
} else {
c.as_str().map(|s| s.to_string())
}
})
.collect::<Vec<_>>()
.join("\n")
}
_ => continue,
};
let text = text.trim().to_string();
if text.is_empty() { continue; }
messages.push((i + 1, msg_type.to_string(), text, timestamp));
}
Ok(messages)
}
/// Format conversation messages for the prompt (truncating long messages).
fn format_conversation(messages: &[(usize, String, String, String)]) -> String {
messages.iter()
.map(|(line, role, text, ts)| {
let text = if text.len() > 2000 {
format!("{}...[truncated]", &text[..text.floor_char_boundary(1800)])
} else {
text.clone()
};
if ts.is_empty() {
format!("L{} [{}]: {}", line, role, text)
} else {
format!("L{} [{}] {}: {}", line, role, &ts[..ts.len().min(19)], text)
}
})
.collect::<Vec<_>>()
.join("\n\n")
}
fn build_journal_prompt(
entry_text: &str,
conversation: &str,
keys: &[String],
grep_line: usize,
) -> Result<String, String> {
let keys_text: String = keys.iter()
.map(|k| format!(" - {}", k))
.collect::<Vec<_>>()
.join("\n");
neuro::load_prompt("journal-enrich", &[
("{{GREP_LINE}}", &grep_line.to_string()),
("{{ENTRY_TEXT}}", entry_text),
("{{KEYS}}", &keys_text),
("{{CONVERSATION}}", conversation),
])
}
/// Enrich a journal entry with conversation context and link proposals.
pub fn journal_enrich(
store: &mut Store,
jsonl_path: &str,
entry_text: &str,
grep_line: usize,
) -> Result<(), String> {
println!("Extracting conversation from {}...", jsonl_path);
let messages = extract_conversation(jsonl_path)?;
let conversation = format_conversation(&messages);
println!(" {} messages, {} chars", messages.len(), conversation.len());
let keys = semantic_keys(store);
println!(" {} semantic keys", keys.len());
let prompt = build_journal_prompt(entry_text, &conversation, &keys, grep_line)?;
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
println!(" Calling Sonnet...");
let response = call_sonnet(&prompt, 300)?;
let result = parse_json_response(&response)?;
// Report results
let source_start = result.get("source_start").and_then(|v| v.as_u64()).unwrap_or(0);
let source_end = result.get("source_end").and_then(|v| v.as_u64()).unwrap_or(0);
let links = result.get("links").and_then(|v| v.as_array());
let insights = result.get("missed_insights").and_then(|v| v.as_array());
println!(" Source: L{}-L{}", source_start, source_end);
println!(" Links: {}", links.map_or(0, |l| l.len()));
println!(" Missed insights: {}", insights.map_or(0, |l| l.len()));
// Apply links
if let Some(links) = links {
for link in links {
let target = link.get("target").and_then(|v| v.as_str()).unwrap_or("");
let reason = link.get("reason").and_then(|v| v.as_str()).unwrap_or("");
if target.is_empty() || target.starts_with("NOTE:") {
if let Some(note) = target.strip_prefix("NOTE:") {
println!(" NOTE: {}{}", note, reason);
}
continue;
}
// Resolve target and find journal node
let resolved = match store.resolve_key(target) {
Ok(r) => r,
Err(_) => { println!(" SKIP {} (not in graph)", target); continue; }
};
let source_key = match store.find_journal_node(entry_text) {
Some(k) => k,
None => { println!(" SKIP {} (no matching journal node)", target); continue; }
};
// Refine target to best-matching section
let source_content = store.nodes.get(&source_key)
.map(|n| n.content.as_str()).unwrap_or("");
let resolved = neuro::refine_target(store, source_content, &resolved);
let source_uuid = match store.nodes.get(&source_key) {
Some(n) => n.uuid,
None => continue,
};
let target_uuid = match store.nodes.get(&resolved) {
Some(n) => n.uuid,
None => continue,
};
let rel = new_relation(
source_uuid, target_uuid,
store::RelationType::Link,
0.5,
&source_key, &resolved,
);
if store.add_relation(rel).is_ok() {
println!(" LINK {}{} ({})", source_key, resolved, reason);
}
}
}
// Save result to agent-results
let timestamp = store::format_datetime(store::now_epoch())
.replace([':', '-'], "");
let result_file = agent_results_dir()
.join(format!("{}.json", timestamp));
let output = serde_json::json!({
"timestamp": timestamp,
"jsonl_path": jsonl_path,
"entry_text": &entry_text[..entry_text.len().min(500)],
"agent_result": result,
});
fs::write(&result_file, serde_json::to_string_pretty(&output).unwrap())
.map_err(|e| format!("write {}: {}", result_file.display(), e))?;
println!(" Results saved: {}", result_file.display());
store.save()?;
Ok(())
}
/// Mine a conversation transcript for experiential moments not yet journaled.
pub fn experience_mine(
store: &mut Store,
jsonl_path: &str,
) -> Result<usize, String> {
println!("Experience mining: {}", jsonl_path);
// Transcript-level dedup: hash the file content and check if already mined
let transcript_bytes = fs::read(jsonl_path)
.map_err(|e| format!("reading transcript: {}", e))?;
let mut hasher = DefaultHasher::new();
transcript_bytes.hash(&mut hasher);
let hash = hasher.finish();
let dedup_key = format!("_mined-transcripts.md#h-{:016x}", hash);
if store.nodes.contains_key(&dedup_key) {
println!(" Already mined this transcript ({}), skipping.", &dedup_key[24..]);
return Ok(0);
}
let messages = extract_conversation(jsonl_path)?;
let conversation = format_conversation(&messages);
println!(" {} messages, {} chars", messages.len(), conversation.len());
// Load identity
let identity = store.nodes.get("identity.md")
.map(|n| n.content.clone())
.unwrap_or_default();
// Get recent journal entries to avoid duplication
let key_date_re = Regex::new(r"^journal\.md#j-(\d{4}-\d{2}-\d{2}[t-]\d{2}-\d{2})").unwrap();
let date_re = Regex::new(r"(\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2})").unwrap();
let mut journal: Vec<_> = store.nodes.values()
.filter(|node| node.key.starts_with("journal.md#j-"))
.collect();
journal.sort_by(|a, b| {
let ak = key_date_re.captures(&a.key).map(|c| c[1].to_string())
.or_else(|| date_re.captures(&a.content).map(|c| c[1].to_string()))
.unwrap_or_default();
let bk = key_date_re.captures(&b.key).map(|c| c[1].to_string())
.or_else(|| date_re.captures(&b.content).map(|c| c[1].to_string()))
.unwrap_or_default();
ak.cmp(&bk)
});
let recent: String = journal.iter().rev().take(10)
.map(|n| format!("---\n{}\n", n.content))
.collect();
let keys = semantic_keys(store);
let keys_text: String = keys.iter()
.map(|k| format!(" - {}", k))
.collect::<Vec<_>>()
.join("\n");
let prompt = neuro::load_prompt("experience", &[
("{{IDENTITY}}", &identity),
("{{RECENT_JOURNAL}}", &recent),
("{{KEYS}}", &keys_text),
("{{CONVERSATION}}", &conversation),
])?;
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
println!(" Calling Sonnet...");
let response = call_sonnet(&prompt, 2000)?;
let entries = parse_json_response(&response)?;
let entries = match entries.as_array() {
Some(arr) => arr.clone(),
None => return Err("expected JSON array".to_string()),
};
if entries.is_empty() {
println!(" No missed experiences found.");
return Ok(0);
}
println!(" Found {} experiential moments:", entries.len());
let mut count = 0;
for entry in &entries {
let ts = entry.get("timestamp").and_then(|v| v.as_str()).unwrap_or("");
let content = entry.get("content").and_then(|v| v.as_str()).unwrap_or("");
if content.is_empty() { continue; }
// Format with timestamp header
let full_content = if ts.is_empty() {
content.to_string()
} else {
format!("## {}\n\n{}", ts, content)
};
// Generate key from timestamp
let key_slug: String = content.chars()
.filter(|c| c.is_alphanumeric() || *c == ' ')
.take(50)
.collect::<String>()
.trim()
.to_lowercase()
.replace(' ', "-");
let key = if ts.is_empty() {
format!("journal.md#j-mined-{}", key_slug)
} else {
format!("journal.md#j-{}-{}", ts.to_lowercase().replace(':', "-"), key_slug)
};
// Check for duplicate
if store.nodes.contains_key(&key) {
println!(" SKIP {} (duplicate)", key);
continue;
}
// Write to store
let mut node = new_node(&key, &full_content);
node.node_type = store::NodeType::EpisodicSession;
node.category = store::Category::Observation;
let _ = store.upsert_node(node);
count += 1;
let preview = if content.len() > 80 { &content[..77] } else { content };
println!(" + [{}] {}...", ts, preview);
}
// Record this transcript as mined (even if count == 0, to prevent re-runs)
let dedup_content = format!("Mined {} ({} entries)", jsonl_path, count);
let mut dedup_node = new_node(&dedup_key, &dedup_content);
dedup_node.category = store::Category::Task;
let _ = store.upsert_node(dedup_node);
if count > 0 {
println!(" Saved {} new journal entries.", count);
}
store.save()?;
println!("Done: {} new entries mined.", count);
Ok(count)
}

87
src/llm.rs Normal file
View file

@ -0,0 +1,87 @@
// LLM utilities: Sonnet invocation and response parsing
//
// Shared by digest, audit, enrich, and consolidate modules.
use crate::store::Store;
use regex::Regex;
use std::fs;
use std::process::Command;
/// Call Sonnet via claude CLI. Returns the response text.
pub(crate) fn call_sonnet(prompt: &str, _timeout_secs: u64) -> Result<String, String> {
// Write prompt to temp file (claude CLI needs file input for large prompts)
// Use thread ID + PID to avoid collisions under parallel rayon calls
let tmp = std::env::temp_dir().join(format!("poc-llm-{}-{:?}.txt",
std::process::id(), std::thread::current().id()));
fs::write(&tmp, prompt)
.map_err(|e| format!("write temp prompt: {}", e))?;
let result = Command::new("claude")
.args(["-p", "--model", "sonnet", "--tools", ""])
.stdin(fs::File::open(&tmp).map_err(|e| format!("open temp: {}", e))?)
.env_remove("CLAUDECODE")
.output();
fs::remove_file(&tmp).ok();
match result {
Ok(output) => {
if output.status.success() {
Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
} else {
let stderr = String::from_utf8_lossy(&output.stderr);
Err(format!("claude exited {}: {}", output.status, stderr.trim()))
}
}
Err(e) => Err(format!("spawn claude: {}", e)),
}
}
/// Parse a JSON response from Sonnet, handling markdown fences.
pub(crate) fn parse_json_response(response: &str) -> Result<serde_json::Value, String> {
let cleaned = response.trim();
let cleaned = cleaned.strip_prefix("```json").unwrap_or(cleaned);
let cleaned = cleaned.strip_prefix("```").unwrap_or(cleaned);
let cleaned = cleaned.strip_suffix("```").unwrap_or(cleaned);
let cleaned = cleaned.trim();
if let Ok(v) = serde_json::from_str(cleaned) {
return Ok(v);
}
// Try to find JSON object or array
let re_obj = Regex::new(r"\{[\s\S]*\}").unwrap();
let re_arr = Regex::new(r"\[[\s\S]*\]").unwrap();
if let Some(m) = re_obj.find(cleaned) {
if let Ok(v) = serde_json::from_str(m.as_str()) {
return Ok(v);
}
}
if let Some(m) = re_arr.find(cleaned) {
if let Ok(v) = serde_json::from_str(m.as_str()) {
return Ok(v);
}
}
Err(format!("no valid JSON in response: {}...", &cleaned[..cleaned.len().min(200)]))
}
/// Get semantic keys (non-journal, non-system) for prompt context.
pub(crate) fn semantic_keys(store: &Store) -> Vec<String> {
let mut keys: Vec<String> = store.nodes.keys()
.filter(|k| {
!k.starts_with("journal.md#")
&& *k != "journal.md"
&& *k != "MEMORY.md"
&& *k != "where-am-i.md"
&& *k != "work-queue.md"
&& *k != "work-state"
})
.cloned()
.collect();
keys.sort();
keys.truncate(200);
keys
}

View file

@ -14,7 +14,11 @@
// interference detection, schema assimilation, reconsolidation.
mod store;
mod llm;
mod digest;
mod audit;
mod enrich;
mod consolidate;
mod graph;
mod search;
mod similarity;
@ -516,7 +520,7 @@ fn cmd_consolidate_session() -> Result<(), String> {
fn cmd_consolidate_full() -> Result<(), String> {
let mut store = store::Store::load()?;
digest::consolidate_full(&mut store)
consolidate::consolidate_full(&mut store)
}
fn cmd_triangle_close(args: &[String]) -> Result<(), String> {
@ -824,7 +828,7 @@ fn cmd_journal_enrich(args: &[String]) -> Result<(), String> {
}
let mut store = store::Store::load()?;
digest::journal_enrich(&mut store, jsonl_path, entry_text, grep_line)
enrich::journal_enrich(&mut store, jsonl_path, entry_text, grep_line)
}
fn cmd_experience_mine(args: &[String]) -> Result<(), String> {
@ -840,7 +844,7 @@ fn cmd_experience_mine(args: &[String]) -> Result<(), String> {
}
let mut store = store::Store::load()?;
let count = digest::experience_mine(&mut store, &jsonl_path)?;
let count = enrich::experience_mine(&mut store, &jsonl_path)?;
println!("Done: {} new entries mined.", count);
Ok(())
}
@ -852,7 +856,7 @@ fn cmd_apply_consolidation(args: &[String]) -> Result<(), String> {
.map(|w| w[1].as_str());
let mut store = store::Store::load()?;
digest::apply_consolidation(&mut store, do_apply, report_file)
consolidate::apply_consolidation(&mut store, do_apply, report_file)
}
fn cmd_differentiate(args: &[String]) -> Result<(), String> {
@ -919,7 +923,7 @@ fn cmd_differentiate(args: &[String]) -> Result<(), String> {
fn cmd_link_audit(args: &[String]) -> Result<(), String> {
let apply = args.iter().any(|a| a == "--apply");
let mut store = store::Store::load()?;
let stats = digest::link_audit(&mut store, apply)?;
let stats = audit::link_audit(&mut store, apply)?;
println!("\n{}", "=".repeat(60));
println!("Link audit complete:");
println!(" Kept: {} Deleted: {} Retargeted: {} Weakened: {} Strengthened: {} Errors: {}",

View file

@ -189,14 +189,14 @@ pub fn schema_assimilation(store: &Store, key: &str) -> (f32, &'static str) {
}
/// Prompt template directory
fn prompts_dir() -> std::path::PathBuf {
pub(crate) fn prompts_dir() -> std::path::PathBuf {
// Check for prompts relative to binary, then fall back to ~/poc/memory/prompts/
let home = std::env::var("HOME").unwrap_or_default();
std::path::PathBuf::from(home).join("poc/memory/prompts")
}
/// Load a prompt template, replacing {{PLACEHOLDER}} with data
fn load_prompt(name: &str, replacements: &[(&str, &str)]) -> Result<String, String> {
pub(crate) fn load_prompt(name: &str, replacements: &[(&str, &str)]) -> Result<String, String> {
let path = prompts_dir().join(format!("{}.md", name));
let mut content = std::fs::read_to_string(&path)
.map_err(|e| format!("load prompt {}: {}", path.display(), e))?;