digest: split into focused modules, externalize prompts
digest.rs was 2328 lines containing 6 distinct subsystems. Split into:
- llm.rs: shared LLM utilities (call_sonnet, parse_json_response, semantic_keys)
- audit.rs: link quality audit with parallel Sonnet batching
- enrich.rs: journal enrichment + experience mining
- consolidate.rs: consolidation pipeline + apply
Externalized all inline prompts to prompts/*.md templates using
neuro::load_prompt with {{PLACEHOLDER}} syntax:
- daily-digest.md, weekly-digest.md, monthly-digest.md
- experience.md, journal-enrich.md, consolidation.md
digest.rs retains temporal digest generation (daily/weekly/monthly/auto)
and date helpers. ~940 lines, down from 2328.
Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
3f644609e1
commit
50da0b7b26
13 changed files with 1642 additions and 1582 deletions
333
src/audit.rs
Normal file
333
src/audit.rs
Normal file
|
|
@ -0,0 +1,333 @@
|
|||
// Link audit: walk every link in the graph, batch to Sonnet for quality review.
|
||||
//
|
||||
// Each batch of links gets reviewed by Sonnet, which returns per-link actions:
|
||||
// KEEP, DELETE, RETARGET, WEAKEN, STRENGTHEN. Batches run in parallel via rayon.
|
||||
|
||||
use crate::llm::call_sonnet;
|
||||
use crate::store::{self, Store, new_relation};
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
struct LinkInfo {
|
||||
rel_idx: usize,
|
||||
source_key: String,
|
||||
target_key: String,
|
||||
source_content: String,
|
||||
target_content: String,
|
||||
strength: f32,
|
||||
target_sections: Vec<String>,
|
||||
}
|
||||
|
||||
pub struct AuditStats {
|
||||
pub kept: usize,
|
||||
pub deleted: usize,
|
||||
pub retargeted: usize,
|
||||
pub weakened: usize,
|
||||
pub strengthened: usize,
|
||||
pub errors: usize,
|
||||
}
|
||||
|
||||
fn build_audit_prompt(batch: &[LinkInfo], batch_num: usize, total_batches: usize) -> String {
|
||||
let mut prompt = format!(
|
||||
"You are auditing memory graph links for quality (batch {}/{}).\n\n\
|
||||
For each numbered link, decide what to do:\n\n\
|
||||
KEEP N — link is meaningful, leave it\n\
|
||||
DELETE N — link is noise, accidental, or too generic to be useful\n\
|
||||
RETARGET N new_key — link points to the right topic area but wrong node;\n\
|
||||
\x20 retarget to a more specific section (listed under each link)\n\
|
||||
WEAKEN N strength — link is marginal; reduce strength (0.1-0.3)\n\
|
||||
STRENGTHEN N strength — link is important but underweighted; increase (0.8-1.0)\n\n\
|
||||
Output exactly one action per link number, nothing else.\n\n\
|
||||
Links to review:\n\n",
|
||||
batch_num, total_batches);
|
||||
|
||||
for (i, link) in batch.iter().enumerate() {
|
||||
let n = i + 1;
|
||||
prompt.push_str(&format!(
|
||||
"--- Link {} ---\n\
|
||||
{} → {} (strength={:.2})\n\n\
|
||||
Source content:\n{}\n\n\
|
||||
Target content:\n{}\n",
|
||||
n, link.source_key, link.target_key, link.strength,
|
||||
&link.source_content, &link.target_content));
|
||||
|
||||
if !link.target_sections.is_empty() {
|
||||
prompt.push_str(
|
||||
"\nTarget has sections (consider RETARGET to a more specific one):\n");
|
||||
for s in &link.target_sections {
|
||||
prompt.push_str(&format!(" - {}\n", s));
|
||||
}
|
||||
}
|
||||
prompt.push('\n');
|
||||
}
|
||||
|
||||
prompt
|
||||
}
|
||||
|
||||
fn parse_audit_response(response: &str, batch_size: usize) -> Vec<(usize, AuditAction)> {
|
||||
let mut actions = Vec::new();
|
||||
|
||||
for line in response.lines() {
|
||||
let line = line.trim();
|
||||
if line.is_empty() { continue; }
|
||||
|
||||
let parts: Vec<&str> = line.splitn(3, ' ').collect();
|
||||
if parts.len() < 2 { continue; }
|
||||
|
||||
let action = parts[0].to_uppercase();
|
||||
let idx: usize = match parts[1].parse::<usize>() {
|
||||
Ok(n) if n >= 1 && n <= batch_size => n - 1,
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
let audit_action = match action.as_str() {
|
||||
"KEEP" => AuditAction::Keep,
|
||||
"DELETE" => AuditAction::Delete,
|
||||
"RETARGET" => {
|
||||
if parts.len() < 3 { continue; }
|
||||
AuditAction::Retarget(parts[2].trim().to_string())
|
||||
}
|
||||
"WEAKEN" => {
|
||||
if parts.len() < 3 { continue; }
|
||||
match parts[2].trim().parse::<f32>() {
|
||||
Ok(s) => AuditAction::Weaken(s),
|
||||
Err(_) => continue,
|
||||
}
|
||||
}
|
||||
"STRENGTHEN" => {
|
||||
if parts.len() < 3 { continue; }
|
||||
match parts[2].trim().parse::<f32>() {
|
||||
Ok(s) => AuditAction::Strengthen(s),
|
||||
Err(_) => continue,
|
||||
}
|
||||
}
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
actions.push((idx, audit_action));
|
||||
}
|
||||
|
||||
actions
|
||||
}
|
||||
|
||||
enum AuditAction {
|
||||
Keep,
|
||||
Delete,
|
||||
Retarget(String),
|
||||
Weaken(f32),
|
||||
Strengthen(f32),
|
||||
}
|
||||
|
||||
/// Run a full link audit: walk every link, batch to Sonnet, apply results.
|
||||
pub fn link_audit(store: &mut Store, apply: bool) -> Result<AuditStats, String> {
|
||||
// Collect all non-deleted relations with their info
|
||||
let mut links: Vec<LinkInfo> = Vec::new();
|
||||
|
||||
for (idx, rel) in store.relations.iter().enumerate() {
|
||||
if rel.deleted { continue; }
|
||||
|
||||
let source_content = store.nodes.get(&rel.source_key)
|
||||
.map(|n| n.content.clone()).unwrap_or_default();
|
||||
let target_content = store.nodes.get(&rel.target_key)
|
||||
.map(|n| n.content.clone()).unwrap_or_default();
|
||||
|
||||
// Find section children of target if it's file-level
|
||||
let target_sections = if !rel.target_key.contains('#') {
|
||||
let prefix = format!("{}#", rel.target_key);
|
||||
store.nodes.keys()
|
||||
.filter(|k| k.starts_with(&prefix))
|
||||
.cloned()
|
||||
.collect()
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
links.push(LinkInfo {
|
||||
rel_idx: idx,
|
||||
source_key: rel.source_key.clone(),
|
||||
target_key: rel.target_key.clone(),
|
||||
source_content,
|
||||
target_content,
|
||||
strength: rel.strength,
|
||||
target_sections,
|
||||
});
|
||||
}
|
||||
|
||||
let total = links.len();
|
||||
println!("Link audit: {} links to review", total);
|
||||
if !apply {
|
||||
println!("DRY RUN — use --apply to make changes");
|
||||
}
|
||||
|
||||
// Batch by char budget (~100K chars per prompt)
|
||||
let char_budget = 100_000usize;
|
||||
let mut batches: Vec<Vec<usize>> = Vec::new();
|
||||
let mut current_batch: Vec<usize> = Vec::new();
|
||||
let mut current_chars = 0usize;
|
||||
|
||||
for (i, link) in links.iter().enumerate() {
|
||||
let link_chars = link.source_content.len() + link.target_content.len() + 200;
|
||||
if !current_batch.is_empty() && current_chars + link_chars > char_budget {
|
||||
batches.push(std::mem::take(&mut current_batch));
|
||||
current_chars = 0;
|
||||
}
|
||||
current_batch.push(i);
|
||||
current_chars += link_chars;
|
||||
}
|
||||
if !current_batch.is_empty() {
|
||||
batches.push(current_batch);
|
||||
}
|
||||
|
||||
let total_batches = batches.len();
|
||||
println!("{} batches (avg {} links/batch)\n", total_batches,
|
||||
if total_batches > 0 { total / total_batches } else { 0 });
|
||||
|
||||
use rayon::prelude::*;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
// Build all batch prompts up front
|
||||
let batch_data: Vec<(usize, Vec<LinkInfo>, String)> = batches.iter().enumerate()
|
||||
.map(|(batch_idx, batch_indices)| {
|
||||
let batch_infos: Vec<LinkInfo> = batch_indices.iter().map(|&i| {
|
||||
let l = &links[i];
|
||||
LinkInfo {
|
||||
rel_idx: l.rel_idx,
|
||||
source_key: l.source_key.clone(),
|
||||
target_key: l.target_key.clone(),
|
||||
source_content: l.source_content.clone(),
|
||||
target_content: l.target_content.clone(),
|
||||
strength: l.strength,
|
||||
target_sections: l.target_sections.clone(),
|
||||
}
|
||||
}).collect();
|
||||
let prompt = build_audit_prompt(&batch_infos, batch_idx + 1, total_batches);
|
||||
(batch_idx, batch_infos, prompt)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Progress counter
|
||||
let done = AtomicUsize::new(0);
|
||||
|
||||
// Run batches in parallel via rayon
|
||||
let batch_results: Vec<_> = batch_data.par_iter()
|
||||
.map(|(batch_idx, batch_infos, prompt)| {
|
||||
let response = call_sonnet(prompt, 300);
|
||||
let completed = done.fetch_add(1, Ordering::Relaxed) + 1;
|
||||
eprint!("\r Batches: {}/{} done", completed, total_batches);
|
||||
(*batch_idx, batch_infos, response)
|
||||
})
|
||||
.collect();
|
||||
eprintln!(); // newline after progress
|
||||
|
||||
// Process results sequentially
|
||||
let mut stats = AuditStats {
|
||||
kept: 0, deleted: 0, retargeted: 0, weakened: 0, strengthened: 0, errors: 0,
|
||||
};
|
||||
let mut deletions: Vec<usize> = Vec::new();
|
||||
let mut retargets: Vec<(usize, String)> = Vec::new();
|
||||
let mut strength_changes: Vec<(usize, f32)> = Vec::new();
|
||||
|
||||
for (batch_idx, batch_infos, response) in &batch_results {
|
||||
let response = match response {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
eprintln!(" Batch {}: error: {}", batch_idx + 1, e);
|
||||
stats.errors += batch_infos.len();
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let actions = parse_audit_response(response, batch_infos.len());
|
||||
|
||||
let mut responded: HashSet<usize> = HashSet::new();
|
||||
|
||||
for (idx, action) in &actions {
|
||||
responded.insert(*idx);
|
||||
let link = &batch_infos[*idx];
|
||||
|
||||
match action {
|
||||
AuditAction::Keep => {
|
||||
stats.kept += 1;
|
||||
}
|
||||
AuditAction::Delete => {
|
||||
println!(" DELETE {} → {}", link.source_key, link.target_key);
|
||||
deletions.push(link.rel_idx);
|
||||
stats.deleted += 1;
|
||||
}
|
||||
AuditAction::Retarget(new_target) => {
|
||||
println!(" RETARGET {} → {} (was {})",
|
||||
link.source_key, new_target, link.target_key);
|
||||
retargets.push((link.rel_idx, new_target.clone()));
|
||||
stats.retargeted += 1;
|
||||
}
|
||||
AuditAction::Weaken(s) => {
|
||||
println!(" WEAKEN {} → {} (str {:.2} → {:.2})",
|
||||
link.source_key, link.target_key, link.strength, s);
|
||||
strength_changes.push((link.rel_idx, *s));
|
||||
stats.weakened += 1;
|
||||
}
|
||||
AuditAction::Strengthen(s) => {
|
||||
println!(" STRENGTHEN {} → {} (str {:.2} → {:.2})",
|
||||
link.source_key, link.target_key, link.strength, s);
|
||||
strength_changes.push((link.rel_idx, *s));
|
||||
stats.strengthened += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for i in 0..batch_infos.len() {
|
||||
if !responded.contains(&i) {
|
||||
stats.kept += 1;
|
||||
}
|
||||
}
|
||||
|
||||
println!(" Batch {}/{}: +{}kept +{}del +{}retarget +{}weak +{}strong",
|
||||
batch_idx + 1, total_batches,
|
||||
stats.kept, stats.deleted, stats.retargeted, stats.weakened, stats.strengthened);
|
||||
}
|
||||
|
||||
// Apply changes
|
||||
if apply && (stats.deleted > 0 || stats.retargeted > 0
|
||||
|| stats.weakened > 0 || stats.strengthened > 0) {
|
||||
println!("\nApplying changes...");
|
||||
|
||||
// Deletions: soft-delete
|
||||
for rel_idx in &deletions {
|
||||
store.relations[*rel_idx].deleted = true;
|
||||
}
|
||||
|
||||
// Strength changes
|
||||
for (rel_idx, new_strength) in &strength_changes {
|
||||
store.relations[*rel_idx].strength = *new_strength;
|
||||
}
|
||||
|
||||
// Retargets: soft-delete old, create new
|
||||
for (rel_idx, new_target) in &retargets {
|
||||
let source_key = store.relations[*rel_idx].source_key.clone();
|
||||
let old_strength = store.relations[*rel_idx].strength;
|
||||
let source_uuid = store.nodes.get(&source_key)
|
||||
.map(|n| n.uuid).unwrap_or([0u8; 16]);
|
||||
let target_uuid = store.nodes.get(new_target)
|
||||
.map(|n| n.uuid).unwrap_or([0u8; 16]);
|
||||
|
||||
// Soft-delete old
|
||||
store.relations[*rel_idx].deleted = true;
|
||||
|
||||
// Create new
|
||||
if target_uuid != [0u8; 16] {
|
||||
let new_rel = new_relation(
|
||||
source_uuid, target_uuid,
|
||||
store::RelationType::Auto,
|
||||
old_strength,
|
||||
&source_key, new_target,
|
||||
);
|
||||
store.add_relation(new_rel).ok();
|
||||
}
|
||||
}
|
||||
|
||||
store.save()?;
|
||||
println!("Saved.");
|
||||
}
|
||||
|
||||
Ok(stats)
|
||||
}
|
||||
491
src/consolidate.rs
Normal file
491
src/consolidate.rs
Normal file
|
|
@ -0,0 +1,491 @@
|
|||
// Consolidation pipeline: plan → agents → apply → digests → links
|
||||
//
|
||||
// consolidate_full() runs the full autonomous consolidation:
|
||||
// 1. Plan: analyze metrics, allocate agents
|
||||
// 2. Execute: run each agent (Sonnet calls), save reports
|
||||
// 3. Apply: extract and apply actions from reports
|
||||
// 4. Digest: generate missing daily/weekly/monthly digests
|
||||
// 5. Links: apply links extracted from digests
|
||||
// 6. Summary: final metrics comparison
|
||||
//
|
||||
// apply_consolidation() processes consolidation reports independently.
|
||||
|
||||
use crate::digest;
|
||||
use crate::llm::{call_sonnet, parse_json_response};
|
||||
use crate::neuro;
|
||||
use crate::store::{self, Store, new_relation};
|
||||
|
||||
use std::fs;
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
fn agent_results_dir() -> PathBuf {
|
||||
let dir = store::memory_dir().join("agent-results");
|
||||
fs::create_dir_all(&dir).ok();
|
||||
dir
|
||||
}
|
||||
|
||||
/// Simple append-only log writer for consolidate-full.
|
||||
struct LogWriter {
|
||||
path: PathBuf,
|
||||
}
|
||||
|
||||
impl LogWriter {
|
||||
fn new(path: &Path) -> Result<Self, String> {
|
||||
fs::write(path, "").map_err(|e| format!("create log: {}", e))?;
|
||||
Ok(LogWriter { path: path.to_path_buf() })
|
||||
}
|
||||
|
||||
fn write(&mut self, line: &str) -> Result<(), String> {
|
||||
let mut f = fs::OpenOptions::new()
|
||||
.append(true)
|
||||
.open(&self.path)
|
||||
.map_err(|e| format!("open log: {}", e))?;
|
||||
writeln!(f, "{}", line)
|
||||
.map_err(|e| format!("write log: {}", e))
|
||||
}
|
||||
}
|
||||
|
||||
/// Run the full autonomous consolidation pipeline with logging.
|
||||
pub fn consolidate_full(store: &mut Store) -> Result<(), String> {
|
||||
let start = std::time::Instant::now();
|
||||
let log_path = agent_results_dir().join("consolidate-full.log");
|
||||
let mut log = LogWriter::new(&log_path)?;
|
||||
|
||||
log.write("=== CONSOLIDATE FULL ===")?;
|
||||
log.write(&format!("Started: {}", store::format_datetime(store::now_epoch())))?;
|
||||
log.write(&format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()))?;
|
||||
log.write("")?;
|
||||
|
||||
// --- Step 1: Plan ---
|
||||
log.write("--- Step 1: Plan ---")?;
|
||||
let plan = neuro::consolidation_plan(store);
|
||||
let plan_text = neuro::format_plan(&plan);
|
||||
log.write(&plan_text)?;
|
||||
println!("{}", plan_text);
|
||||
|
||||
let total_agents = plan.replay_count + plan.linker_count
|
||||
+ plan.separator_count + plan.transfer_count
|
||||
+ if plan.run_health { 1 } else { 0 };
|
||||
log.write(&format!("Total agents to run: {}", total_agents))?;
|
||||
|
||||
// --- Step 2: Execute agents ---
|
||||
log.write("\n--- Step 2: Execute agents ---")?;
|
||||
let mut reports: Vec<PathBuf> = Vec::new();
|
||||
let mut agent_num = 0usize;
|
||||
let mut agent_errors = 0usize;
|
||||
|
||||
// Build the list of (agent_type, batch_size) runs
|
||||
let mut runs: Vec<(&str, usize)> = Vec::new();
|
||||
if plan.run_health {
|
||||
runs.push(("health", 0));
|
||||
}
|
||||
if plan.replay_count > 0 {
|
||||
let batch = 5;
|
||||
let mut remaining = plan.replay_count;
|
||||
while remaining > 0 {
|
||||
let this_batch = remaining.min(batch);
|
||||
runs.push(("replay", this_batch));
|
||||
remaining -= this_batch;
|
||||
}
|
||||
}
|
||||
if plan.linker_count > 0 {
|
||||
let batch = 5;
|
||||
let mut remaining = plan.linker_count;
|
||||
while remaining > 0 {
|
||||
let this_batch = remaining.min(batch);
|
||||
runs.push(("linker", this_batch));
|
||||
remaining -= this_batch;
|
||||
}
|
||||
}
|
||||
if plan.separator_count > 0 {
|
||||
let batch = 5;
|
||||
let mut remaining = plan.separator_count;
|
||||
while remaining > 0 {
|
||||
let this_batch = remaining.min(batch);
|
||||
runs.push(("separator", this_batch));
|
||||
remaining -= this_batch;
|
||||
}
|
||||
}
|
||||
if plan.transfer_count > 0 {
|
||||
let batch = 5;
|
||||
let mut remaining = plan.transfer_count;
|
||||
while remaining > 0 {
|
||||
let this_batch = remaining.min(batch);
|
||||
runs.push(("transfer", this_batch));
|
||||
remaining -= this_batch;
|
||||
}
|
||||
}
|
||||
|
||||
for (agent_type, count) in &runs {
|
||||
agent_num += 1;
|
||||
let label = if *count > 0 {
|
||||
format!("[{}/{}] {} (batch={})", agent_num, runs.len(), agent_type, count)
|
||||
} else {
|
||||
format!("[{}/{}] {}", agent_num, runs.len(), agent_type)
|
||||
};
|
||||
|
||||
log.write(&format!("\n{}", label))?;
|
||||
println!("{}", label);
|
||||
|
||||
// Reload store to pick up changes from previous agents
|
||||
if agent_num > 1 {
|
||||
*store = Store::load()?;
|
||||
}
|
||||
|
||||
let prompt = match neuro::agent_prompt(store, agent_type, *count) {
|
||||
Ok(p) => p,
|
||||
Err(e) => {
|
||||
let msg = format!(" ERROR building prompt: {}", e);
|
||||
log.write(&msg)?;
|
||||
eprintln!("{}", msg);
|
||||
agent_errors += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
log.write(&format!(" Prompt: {} chars (~{} tokens)",
|
||||
prompt.len(), prompt.len() / 4))?;
|
||||
|
||||
let response = match call_sonnet(&prompt, 300) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
let msg = format!(" ERROR from Sonnet: {}", e);
|
||||
log.write(&msg)?;
|
||||
eprintln!("{}", msg);
|
||||
agent_errors += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Save report
|
||||
let ts = store::format_datetime(store::now_epoch())
|
||||
.replace([':', '-', 'T'], "");
|
||||
let report_name = format!("consolidation-{}-{}.md", agent_type, ts);
|
||||
let report_path = agent_results_dir().join(&report_name);
|
||||
fs::write(&report_path, &response)
|
||||
.map_err(|e| format!("write report: {}", e))?;
|
||||
reports.push(report_path.clone());
|
||||
|
||||
let msg = format!(" Done: {} lines → {}", response.lines().count(), report_name);
|
||||
log.write(&msg)?;
|
||||
println!("{}", msg);
|
||||
}
|
||||
|
||||
log.write(&format!("\nAgents complete: {} run, {} errors",
|
||||
agent_num - agent_errors, agent_errors))?;
|
||||
|
||||
// --- Step 3: Apply consolidation actions ---
|
||||
log.write("\n--- Step 3: Apply consolidation actions ---")?;
|
||||
println!("\n--- Applying consolidation actions ---");
|
||||
*store = Store::load()?;
|
||||
|
||||
if reports.is_empty() {
|
||||
log.write(" No reports to apply.")?;
|
||||
} else {
|
||||
match apply_consolidation(store, true, None) {
|
||||
Ok(()) => log.write(" Applied.")?,
|
||||
Err(e) => {
|
||||
let msg = format!(" ERROR applying consolidation: {}", e);
|
||||
log.write(&msg)?;
|
||||
eprintln!("{}", msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --- Step 3b: Link orphans ---
|
||||
log.write("\n--- Step 3b: Link orphans ---")?;
|
||||
println!("\n--- Linking orphan nodes ---");
|
||||
*store = Store::load()?;
|
||||
|
||||
let (lo_orphans, lo_added) = neuro::link_orphans(store, 2, 3, 0.15);
|
||||
log.write(&format!(" {} orphans, {} links added", lo_orphans, lo_added))?;
|
||||
|
||||
// --- Step 3c: Cap degree ---
|
||||
log.write("\n--- Step 3c: Cap degree ---")?;
|
||||
println!("\n--- Capping node degree ---");
|
||||
*store = Store::load()?;
|
||||
|
||||
match store.cap_degree(50) {
|
||||
Ok((hubs, pruned)) => {
|
||||
store.save()?;
|
||||
log.write(&format!(" {} hubs capped, {} edges pruned", hubs, pruned))?;
|
||||
}
|
||||
Err(e) => log.write(&format!(" ERROR: {}", e))?,
|
||||
}
|
||||
|
||||
// --- Step 4: Digest auto ---
|
||||
log.write("\n--- Step 4: Digest auto ---")?;
|
||||
println!("\n--- Generating missing digests ---");
|
||||
*store = Store::load()?;
|
||||
|
||||
match digest::digest_auto(store) {
|
||||
Ok(()) => log.write(" Digests done.")?,
|
||||
Err(e) => {
|
||||
let msg = format!(" ERROR in digest auto: {}", e);
|
||||
log.write(&msg)?;
|
||||
eprintln!("{}", msg);
|
||||
}
|
||||
}
|
||||
|
||||
// --- Step 5: Apply digest links ---
|
||||
log.write("\n--- Step 5: Apply digest links ---")?;
|
||||
println!("\n--- Applying digest links ---");
|
||||
*store = Store::load()?;
|
||||
|
||||
let links = digest::parse_all_digest_links();
|
||||
let (applied, skipped, fallbacks) = digest::apply_digest_links(store, &links);
|
||||
store.save()?;
|
||||
log.write(&format!(" {} links applied, {} skipped, {} fallbacks",
|
||||
applied, skipped, fallbacks))?;
|
||||
|
||||
// --- Step 6: Summary ---
|
||||
let elapsed = start.elapsed();
|
||||
log.write("\n--- Summary ---")?;
|
||||
log.write(&format!("Finished: {}", store::format_datetime(store::now_epoch())))?;
|
||||
log.write(&format!("Duration: {:.0}s", elapsed.as_secs_f64()))?;
|
||||
*store = Store::load()?;
|
||||
log.write(&format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()))?;
|
||||
|
||||
let summary = format!(
|
||||
"\n=== CONSOLIDATE FULL COMPLETE ===\n\
|
||||
Duration: {:.0}s\n\
|
||||
Agents: {} run, {} errors\n\
|
||||
Nodes: {} Relations: {}\n\
|
||||
Log: {}\n",
|
||||
elapsed.as_secs_f64(),
|
||||
agent_num - agent_errors, agent_errors,
|
||||
store.nodes.len(), store.relations.len(),
|
||||
log_path.display(),
|
||||
);
|
||||
log.write(&summary)?;
|
||||
println!("{}", summary);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Find the most recent set of consolidation reports.
|
||||
fn find_consolidation_reports() -> Vec<PathBuf> {
|
||||
let dir = agent_results_dir();
|
||||
let mut reports: Vec<PathBuf> = fs::read_dir(&dir)
|
||||
.map(|entries| {
|
||||
entries.filter_map(|e| e.ok())
|
||||
.map(|e| e.path())
|
||||
.filter(|p| {
|
||||
p.file_name()
|
||||
.and_then(|n| n.to_str())
|
||||
.map(|n| n.starts_with("consolidation-") && n.ends_with(".md"))
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
reports.sort();
|
||||
reports.reverse();
|
||||
|
||||
if reports.is_empty() { return reports; }
|
||||
|
||||
// Group by timestamp (last segment of stem before .md)
|
||||
let latest_ts = reports[0].file_stem()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or("")
|
||||
.rsplit('-').next().unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
reports.retain(|r| {
|
||||
r.file_stem()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or("")
|
||||
.ends_with(latest_ts.as_str())
|
||||
});
|
||||
|
||||
reports
|
||||
}
|
||||
|
||||
fn build_consolidation_prompt(reports: &[PathBuf]) -> Result<String, String> {
|
||||
let mut report_text = String::new();
|
||||
for r in reports {
|
||||
let content = fs::read_to_string(r)
|
||||
.map_err(|e| format!("read {}: {}", r.display(), e))?;
|
||||
report_text.push_str(&format!("\n{}\n## Report: {}\n\n{}\n",
|
||||
"=".repeat(60),
|
||||
r.file_stem().and_then(|s| s.to_str()).unwrap_or(""),
|
||||
content));
|
||||
}
|
||||
|
||||
neuro::load_prompt("consolidation", &[("{{REPORTS}}", &report_text)])
|
||||
}
|
||||
|
||||
/// Run the full apply-consolidation pipeline.
|
||||
pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_file: Option<&str>) -> Result<(), String> {
|
||||
let reports = if let Some(path) = report_file {
|
||||
vec![PathBuf::from(path)]
|
||||
} else {
|
||||
find_consolidation_reports()
|
||||
};
|
||||
|
||||
if reports.is_empty() {
|
||||
println!("No consolidation reports found.");
|
||||
println!("Run consolidation-agents first.");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
println!("Found {} reports:", reports.len());
|
||||
for r in &reports {
|
||||
println!(" {}", r.file_name().and_then(|s| s.to_str()).unwrap_or("?"));
|
||||
}
|
||||
|
||||
println!("\nExtracting actions from reports...");
|
||||
let prompt = build_consolidation_prompt(&reports)?;
|
||||
println!(" Prompt: {} chars", prompt.len());
|
||||
|
||||
let response = call_sonnet(&prompt, 300)?;
|
||||
|
||||
let actions_value = parse_json_response(&response)?;
|
||||
let actions = actions_value.as_array()
|
||||
.ok_or("expected JSON array of actions")?;
|
||||
|
||||
println!(" {} actions extracted", actions.len());
|
||||
|
||||
// Save actions
|
||||
let timestamp = store::format_datetime(store::now_epoch())
|
||||
.replace([':', '-'], "");
|
||||
let actions_path = agent_results_dir()
|
||||
.join(format!("consolidation-actions-{}.json", timestamp));
|
||||
fs::write(&actions_path, serde_json::to_string_pretty(&actions_value).unwrap())
|
||||
.map_err(|e| format!("write {}: {}", actions_path.display(), e))?;
|
||||
println!(" Saved: {}", actions_path.display());
|
||||
|
||||
let link_actions: Vec<_> = actions.iter()
|
||||
.filter(|a| a.get("action").and_then(|v| v.as_str()) == Some("link"))
|
||||
.collect();
|
||||
let cat_actions: Vec<_> = actions.iter()
|
||||
.filter(|a| a.get("action").and_then(|v| v.as_str()) == Some("categorize"))
|
||||
.collect();
|
||||
let manual_actions: Vec<_> = actions.iter()
|
||||
.filter(|a| a.get("action").and_then(|v| v.as_str()) == Some("manual"))
|
||||
.collect();
|
||||
|
||||
if !do_apply {
|
||||
// Dry run
|
||||
println!("\n{}", "=".repeat(60));
|
||||
println!("DRY RUN — {} actions proposed", actions.len());
|
||||
println!("{}\n", "=".repeat(60));
|
||||
|
||||
if !link_actions.is_empty() {
|
||||
println!("## Links to add ({})\n", link_actions.len());
|
||||
for (i, a) in link_actions.iter().enumerate() {
|
||||
let src = a.get("source").and_then(|v| v.as_str()).unwrap_or("?");
|
||||
let tgt = a.get("target").and_then(|v| v.as_str()).unwrap_or("?");
|
||||
let reason = a.get("reason").and_then(|v| v.as_str()).unwrap_or("");
|
||||
println!(" {:2}. {} → {} ({})", i + 1, src, tgt, reason);
|
||||
}
|
||||
}
|
||||
if !cat_actions.is_empty() {
|
||||
println!("\n## Categories to set ({})\n", cat_actions.len());
|
||||
for a in &cat_actions {
|
||||
let key = a.get("key").and_then(|v| v.as_str()).unwrap_or("?");
|
||||
let cat = a.get("category").and_then(|v| v.as_str()).unwrap_or("?");
|
||||
let reason = a.get("reason").and_then(|v| v.as_str()).unwrap_or("");
|
||||
println!(" {} → {} ({})", key, cat, reason);
|
||||
}
|
||||
}
|
||||
if !manual_actions.is_empty() {
|
||||
println!("\n## Manual actions needed ({})\n", manual_actions.len());
|
||||
for a in &manual_actions {
|
||||
let prio = a.get("priority").and_then(|v| v.as_str()).unwrap_or("?");
|
||||
let desc = a.get("description").and_then(|v| v.as_str()).unwrap_or("?");
|
||||
println!(" [{}] {}", prio, desc);
|
||||
}
|
||||
}
|
||||
println!("\n{}", "=".repeat(60));
|
||||
println!("To apply: poc-memory apply-consolidation --apply");
|
||||
println!("{}", "=".repeat(60));
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Apply
|
||||
let mut applied = 0usize;
|
||||
let mut skipped = 0usize;
|
||||
|
||||
if !link_actions.is_empty() {
|
||||
println!("\nApplying {} links...", link_actions.len());
|
||||
for a in &link_actions {
|
||||
let src = a.get("source").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let tgt = a.get("target").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if src.is_empty() || tgt.is_empty() { skipped += 1; continue; }
|
||||
|
||||
let source = match store.resolve_key(src) {
|
||||
Ok(s) => s,
|
||||
Err(e) => { println!(" ? {} → {}: {}", src, tgt, e); skipped += 1; continue; }
|
||||
};
|
||||
let target = match store.resolve_key(tgt) {
|
||||
Ok(t) => t,
|
||||
Err(e) => { println!(" ? {} → {}: {}", src, tgt, e); skipped += 1; continue; }
|
||||
};
|
||||
|
||||
// Refine target to best-matching section
|
||||
let source_content = store.nodes.get(&source)
|
||||
.map(|n| n.content.as_str()).unwrap_or("");
|
||||
let target = neuro::refine_target(store, source_content, &target);
|
||||
|
||||
let exists = store.relations.iter().any(|r|
|
||||
r.source_key == source && r.target_key == target && !r.deleted
|
||||
);
|
||||
if exists { skipped += 1; continue; }
|
||||
|
||||
let source_uuid = match store.nodes.get(&source) { Some(n) => n.uuid, None => { skipped += 1; continue; } };
|
||||
let target_uuid = match store.nodes.get(&target) { Some(n) => n.uuid, None => { skipped += 1; continue; } };
|
||||
|
||||
let rel = new_relation(
|
||||
source_uuid, target_uuid,
|
||||
store::RelationType::Auto,
|
||||
0.5,
|
||||
&source, &target,
|
||||
);
|
||||
if store.add_relation(rel).is_ok() {
|
||||
println!(" + {} → {}", source, target);
|
||||
applied += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !cat_actions.is_empty() {
|
||||
println!("\nApplying {} categorizations...", cat_actions.len());
|
||||
for a in &cat_actions {
|
||||
let key = a.get("key").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let cat = a.get("category").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if key.is_empty() || cat.is_empty() { continue; }
|
||||
|
||||
let resolved = match store.resolve_key(key) {
|
||||
Ok(r) => r,
|
||||
Err(_) => { println!(" ? {} → {}: not found", key, cat); skipped += 1; continue; }
|
||||
};
|
||||
if store.categorize(&resolved, cat).is_ok() {
|
||||
println!(" + {} → {}", resolved, cat);
|
||||
applied += 1;
|
||||
} else {
|
||||
skipped += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !manual_actions.is_empty() {
|
||||
println!("\n## Manual actions (not auto-applied):\n");
|
||||
for a in &manual_actions {
|
||||
let prio = a.get("priority").and_then(|v| v.as_str()).unwrap_or("?");
|
||||
let desc = a.get("description").and_then(|v| v.as_str()).unwrap_or("?");
|
||||
println!(" [{}] {}", prio, desc);
|
||||
}
|
||||
}
|
||||
|
||||
if applied > 0 {
|
||||
store.save()?;
|
||||
}
|
||||
|
||||
println!("\n{}", "=".repeat(60));
|
||||
println!("Applied: {} Skipped: {} Manual: {}", applied, skipped, manual_actions.len());
|
||||
println!("{}", "=".repeat(60));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
1606
src/digest.rs
1606
src/digest.rs
File diff suppressed because it is too large
Load diff
346
src/enrich.rs
Normal file
346
src/enrich.rs
Normal file
|
|
@ -0,0 +1,346 @@
|
|||
// Journal enrichment and experience mining
|
||||
//
|
||||
// Two modes of processing conversation transcripts:
|
||||
// journal_enrich — enrich a specific journal entry with source location and links
|
||||
// experience_mine — retroactively find experiential moments not yet journaled
|
||||
//
|
||||
// Both extract conversation from JSONL transcripts, build prompts, call Sonnet,
|
||||
// and apply results to the store.
|
||||
|
||||
use crate::llm::{call_sonnet, parse_json_response, semantic_keys};
|
||||
use crate::neuro;
|
||||
use crate::store::{self, Store, new_node, new_relation};
|
||||
|
||||
use regex::Regex;
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::fs;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
fn agent_results_dir() -> std::path::PathBuf {
|
||||
let dir = store::memory_dir().join("agent-results");
|
||||
fs::create_dir_all(&dir).ok();
|
||||
dir
|
||||
}
|
||||
|
||||
/// Extract user/assistant messages with line numbers from a JSONL transcript.
|
||||
/// (line_number, role, text, timestamp)
|
||||
fn extract_conversation(jsonl_path: &str) -> Result<Vec<(usize, String, String, String)>, String> {
|
||||
let content = fs::read_to_string(jsonl_path)
|
||||
.map_err(|e| format!("read {}: {}", jsonl_path, e))?;
|
||||
|
||||
let mut messages = Vec::new();
|
||||
for (i, line) in content.lines().enumerate() {
|
||||
let obj: serde_json::Value = match serde_json::from_str(line) {
|
||||
Ok(v) => v,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if msg_type != "user" && msg_type != "assistant" { continue; }
|
||||
|
||||
let timestamp = obj.get("timestamp")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
let msg = obj.get("message").unwrap_or(&obj);
|
||||
let content = msg.get("content");
|
||||
|
||||
let text = match content {
|
||||
Some(serde_json::Value::String(s)) => s.clone(),
|
||||
Some(serde_json::Value::Array(arr)) => {
|
||||
arr.iter()
|
||||
.filter_map(|c| {
|
||||
if let Some(t) = c.get("text").and_then(|v| v.as_str()) {
|
||||
Some(t.to_string())
|
||||
} else {
|
||||
c.as_str().map(|s| s.to_string())
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
}
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
let text = text.trim().to_string();
|
||||
if text.is_empty() { continue; }
|
||||
|
||||
messages.push((i + 1, msg_type.to_string(), text, timestamp));
|
||||
}
|
||||
|
||||
Ok(messages)
|
||||
}
|
||||
|
||||
/// Format conversation messages for the prompt (truncating long messages).
|
||||
fn format_conversation(messages: &[(usize, String, String, String)]) -> String {
|
||||
messages.iter()
|
||||
.map(|(line, role, text, ts)| {
|
||||
let text = if text.len() > 2000 {
|
||||
format!("{}...[truncated]", &text[..text.floor_char_boundary(1800)])
|
||||
} else {
|
||||
text.clone()
|
||||
};
|
||||
if ts.is_empty() {
|
||||
format!("L{} [{}]: {}", line, role, text)
|
||||
} else {
|
||||
format!("L{} [{}] {}: {}", line, role, &ts[..ts.len().min(19)], text)
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n\n")
|
||||
}
|
||||
|
||||
fn build_journal_prompt(
|
||||
entry_text: &str,
|
||||
conversation: &str,
|
||||
keys: &[String],
|
||||
grep_line: usize,
|
||||
) -> Result<String, String> {
|
||||
let keys_text: String = keys.iter()
|
||||
.map(|k| format!(" - {}", k))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
neuro::load_prompt("journal-enrich", &[
|
||||
("{{GREP_LINE}}", &grep_line.to_string()),
|
||||
("{{ENTRY_TEXT}}", entry_text),
|
||||
("{{KEYS}}", &keys_text),
|
||||
("{{CONVERSATION}}", conversation),
|
||||
])
|
||||
}
|
||||
|
||||
/// Enrich a journal entry with conversation context and link proposals.
|
||||
pub fn journal_enrich(
|
||||
store: &mut Store,
|
||||
jsonl_path: &str,
|
||||
entry_text: &str,
|
||||
grep_line: usize,
|
||||
) -> Result<(), String> {
|
||||
println!("Extracting conversation from {}...", jsonl_path);
|
||||
let messages = extract_conversation(jsonl_path)?;
|
||||
let conversation = format_conversation(&messages);
|
||||
println!(" {} messages, {} chars", messages.len(), conversation.len());
|
||||
|
||||
let keys = semantic_keys(store);
|
||||
println!(" {} semantic keys", keys.len());
|
||||
|
||||
let prompt = build_journal_prompt(entry_text, &conversation, &keys, grep_line)?;
|
||||
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
|
||||
|
||||
println!(" Calling Sonnet...");
|
||||
let response = call_sonnet(&prompt, 300)?;
|
||||
|
||||
let result = parse_json_response(&response)?;
|
||||
|
||||
// Report results
|
||||
let source_start = result.get("source_start").and_then(|v| v.as_u64()).unwrap_or(0);
|
||||
let source_end = result.get("source_end").and_then(|v| v.as_u64()).unwrap_or(0);
|
||||
let links = result.get("links").and_then(|v| v.as_array());
|
||||
let insights = result.get("missed_insights").and_then(|v| v.as_array());
|
||||
|
||||
println!(" Source: L{}-L{}", source_start, source_end);
|
||||
println!(" Links: {}", links.map_or(0, |l| l.len()));
|
||||
println!(" Missed insights: {}", insights.map_or(0, |l| l.len()));
|
||||
|
||||
// Apply links
|
||||
if let Some(links) = links {
|
||||
for link in links {
|
||||
let target = link.get("target").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let reason = link.get("reason").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if target.is_empty() || target.starts_with("NOTE:") {
|
||||
if let Some(note) = target.strip_prefix("NOTE:") {
|
||||
println!(" NOTE: {} — {}", note, reason);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Resolve target and find journal node
|
||||
let resolved = match store.resolve_key(target) {
|
||||
Ok(r) => r,
|
||||
Err(_) => { println!(" SKIP {} (not in graph)", target); continue; }
|
||||
};
|
||||
let source_key = match store.find_journal_node(entry_text) {
|
||||
Some(k) => k,
|
||||
None => { println!(" SKIP {} (no matching journal node)", target); continue; }
|
||||
};
|
||||
|
||||
// Refine target to best-matching section
|
||||
let source_content = store.nodes.get(&source_key)
|
||||
.map(|n| n.content.as_str()).unwrap_or("");
|
||||
let resolved = neuro::refine_target(store, source_content, &resolved);
|
||||
|
||||
let source_uuid = match store.nodes.get(&source_key) {
|
||||
Some(n) => n.uuid,
|
||||
None => continue,
|
||||
};
|
||||
let target_uuid = match store.nodes.get(&resolved) {
|
||||
Some(n) => n.uuid,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let rel = new_relation(
|
||||
source_uuid, target_uuid,
|
||||
store::RelationType::Link,
|
||||
0.5,
|
||||
&source_key, &resolved,
|
||||
);
|
||||
if store.add_relation(rel).is_ok() {
|
||||
println!(" LINK {} → {} ({})", source_key, resolved, reason);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Save result to agent-results
|
||||
let timestamp = store::format_datetime(store::now_epoch())
|
||||
.replace([':', '-'], "");
|
||||
let result_file = agent_results_dir()
|
||||
.join(format!("{}.json", timestamp));
|
||||
let output = serde_json::json!({
|
||||
"timestamp": timestamp,
|
||||
"jsonl_path": jsonl_path,
|
||||
"entry_text": &entry_text[..entry_text.len().min(500)],
|
||||
"agent_result": result,
|
||||
});
|
||||
fs::write(&result_file, serde_json::to_string_pretty(&output).unwrap())
|
||||
.map_err(|e| format!("write {}: {}", result_file.display(), e))?;
|
||||
println!(" Results saved: {}", result_file.display());
|
||||
|
||||
store.save()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Mine a conversation transcript for experiential moments not yet journaled.
|
||||
pub fn experience_mine(
|
||||
store: &mut Store,
|
||||
jsonl_path: &str,
|
||||
) -> Result<usize, String> {
|
||||
println!("Experience mining: {}", jsonl_path);
|
||||
|
||||
// Transcript-level dedup: hash the file content and check if already mined
|
||||
let transcript_bytes = fs::read(jsonl_path)
|
||||
.map_err(|e| format!("reading transcript: {}", e))?;
|
||||
let mut hasher = DefaultHasher::new();
|
||||
transcript_bytes.hash(&mut hasher);
|
||||
let hash = hasher.finish();
|
||||
let dedup_key = format!("_mined-transcripts.md#h-{:016x}", hash);
|
||||
|
||||
if store.nodes.contains_key(&dedup_key) {
|
||||
println!(" Already mined this transcript ({}), skipping.", &dedup_key[24..]);
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let messages = extract_conversation(jsonl_path)?;
|
||||
let conversation = format_conversation(&messages);
|
||||
println!(" {} messages, {} chars", messages.len(), conversation.len());
|
||||
|
||||
// Load identity
|
||||
let identity = store.nodes.get("identity.md")
|
||||
.map(|n| n.content.clone())
|
||||
.unwrap_or_default();
|
||||
|
||||
// Get recent journal entries to avoid duplication
|
||||
let key_date_re = Regex::new(r"^journal\.md#j-(\d{4}-\d{2}-\d{2}[t-]\d{2}-\d{2})").unwrap();
|
||||
let date_re = Regex::new(r"(\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2})").unwrap();
|
||||
let mut journal: Vec<_> = store.nodes.values()
|
||||
.filter(|node| node.key.starts_with("journal.md#j-"))
|
||||
.collect();
|
||||
journal.sort_by(|a, b| {
|
||||
let ak = key_date_re.captures(&a.key).map(|c| c[1].to_string())
|
||||
.or_else(|| date_re.captures(&a.content).map(|c| c[1].to_string()))
|
||||
.unwrap_or_default();
|
||||
let bk = key_date_re.captures(&b.key).map(|c| c[1].to_string())
|
||||
.or_else(|| date_re.captures(&b.content).map(|c| c[1].to_string()))
|
||||
.unwrap_or_default();
|
||||
ak.cmp(&bk)
|
||||
});
|
||||
let recent: String = journal.iter().rev().take(10)
|
||||
.map(|n| format!("---\n{}\n", n.content))
|
||||
.collect();
|
||||
|
||||
let keys = semantic_keys(store);
|
||||
let keys_text: String = keys.iter()
|
||||
.map(|k| format!(" - {}", k))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
let prompt = neuro::load_prompt("experience", &[
|
||||
("{{IDENTITY}}", &identity),
|
||||
("{{RECENT_JOURNAL}}", &recent),
|
||||
("{{KEYS}}", &keys_text),
|
||||
("{{CONVERSATION}}", &conversation),
|
||||
])?;
|
||||
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
|
||||
|
||||
println!(" Calling Sonnet...");
|
||||
let response = call_sonnet(&prompt, 2000)?;
|
||||
|
||||
let entries = parse_json_response(&response)?;
|
||||
let entries = match entries.as_array() {
|
||||
Some(arr) => arr.clone(),
|
||||
None => return Err("expected JSON array".to_string()),
|
||||
};
|
||||
|
||||
if entries.is_empty() {
|
||||
println!(" No missed experiences found.");
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
println!(" Found {} experiential moments:", entries.len());
|
||||
let mut count = 0;
|
||||
for entry in &entries {
|
||||
let ts = entry.get("timestamp").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let content = entry.get("content").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if content.is_empty() { continue; }
|
||||
|
||||
// Format with timestamp header
|
||||
let full_content = if ts.is_empty() {
|
||||
content.to_string()
|
||||
} else {
|
||||
format!("## {}\n\n{}", ts, content)
|
||||
};
|
||||
|
||||
// Generate key from timestamp
|
||||
let key_slug: String = content.chars()
|
||||
.filter(|c| c.is_alphanumeric() || *c == ' ')
|
||||
.take(50)
|
||||
.collect::<String>()
|
||||
.trim()
|
||||
.to_lowercase()
|
||||
.replace(' ', "-");
|
||||
let key = if ts.is_empty() {
|
||||
format!("journal.md#j-mined-{}", key_slug)
|
||||
} else {
|
||||
format!("journal.md#j-{}-{}", ts.to_lowercase().replace(':', "-"), key_slug)
|
||||
};
|
||||
|
||||
// Check for duplicate
|
||||
if store.nodes.contains_key(&key) {
|
||||
println!(" SKIP {} (duplicate)", key);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Write to store
|
||||
let mut node = new_node(&key, &full_content);
|
||||
node.node_type = store::NodeType::EpisodicSession;
|
||||
node.category = store::Category::Observation;
|
||||
let _ = store.upsert_node(node);
|
||||
count += 1;
|
||||
|
||||
let preview = if content.len() > 80 { &content[..77] } else { content };
|
||||
println!(" + [{}] {}...", ts, preview);
|
||||
}
|
||||
|
||||
// Record this transcript as mined (even if count == 0, to prevent re-runs)
|
||||
let dedup_content = format!("Mined {} ({} entries)", jsonl_path, count);
|
||||
let mut dedup_node = new_node(&dedup_key, &dedup_content);
|
||||
dedup_node.category = store::Category::Task;
|
||||
let _ = store.upsert_node(dedup_node);
|
||||
|
||||
if count > 0 {
|
||||
println!(" Saved {} new journal entries.", count);
|
||||
}
|
||||
store.save()?;
|
||||
println!("Done: {} new entries mined.", count);
|
||||
Ok(count)
|
||||
}
|
||||
87
src/llm.rs
Normal file
87
src/llm.rs
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
// LLM utilities: Sonnet invocation and response parsing
|
||||
//
|
||||
// Shared by digest, audit, enrich, and consolidate modules.
|
||||
|
||||
use crate::store::Store;
|
||||
|
||||
use regex::Regex;
|
||||
use std::fs;
|
||||
use std::process::Command;
|
||||
|
||||
/// Call Sonnet via claude CLI. Returns the response text.
|
||||
pub(crate) fn call_sonnet(prompt: &str, _timeout_secs: u64) -> Result<String, String> {
|
||||
// Write prompt to temp file (claude CLI needs file input for large prompts)
|
||||
// Use thread ID + PID to avoid collisions under parallel rayon calls
|
||||
let tmp = std::env::temp_dir().join(format!("poc-llm-{}-{:?}.txt",
|
||||
std::process::id(), std::thread::current().id()));
|
||||
fs::write(&tmp, prompt)
|
||||
.map_err(|e| format!("write temp prompt: {}", e))?;
|
||||
|
||||
let result = Command::new("claude")
|
||||
.args(["-p", "--model", "sonnet", "--tools", ""])
|
||||
.stdin(fs::File::open(&tmp).map_err(|e| format!("open temp: {}", e))?)
|
||||
.env_remove("CLAUDECODE")
|
||||
.output();
|
||||
|
||||
fs::remove_file(&tmp).ok();
|
||||
|
||||
match result {
|
||||
Ok(output) => {
|
||||
if output.status.success() {
|
||||
Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
|
||||
} else {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
Err(format!("claude exited {}: {}", output.status, stderr.trim()))
|
||||
}
|
||||
}
|
||||
Err(e) => Err(format!("spawn claude: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a JSON response from Sonnet, handling markdown fences.
|
||||
pub(crate) fn parse_json_response(response: &str) -> Result<serde_json::Value, String> {
|
||||
let cleaned = response.trim();
|
||||
let cleaned = cleaned.strip_prefix("```json").unwrap_or(cleaned);
|
||||
let cleaned = cleaned.strip_prefix("```").unwrap_or(cleaned);
|
||||
let cleaned = cleaned.strip_suffix("```").unwrap_or(cleaned);
|
||||
let cleaned = cleaned.trim();
|
||||
|
||||
if let Ok(v) = serde_json::from_str(cleaned) {
|
||||
return Ok(v);
|
||||
}
|
||||
|
||||
// Try to find JSON object or array
|
||||
let re_obj = Regex::new(r"\{[\s\S]*\}").unwrap();
|
||||
let re_arr = Regex::new(r"\[[\s\S]*\]").unwrap();
|
||||
|
||||
if let Some(m) = re_obj.find(cleaned) {
|
||||
if let Ok(v) = serde_json::from_str(m.as_str()) {
|
||||
return Ok(v);
|
||||
}
|
||||
}
|
||||
if let Some(m) = re_arr.find(cleaned) {
|
||||
if let Ok(v) = serde_json::from_str(m.as_str()) {
|
||||
return Ok(v);
|
||||
}
|
||||
}
|
||||
|
||||
Err(format!("no valid JSON in response: {}...", &cleaned[..cleaned.len().min(200)]))
|
||||
}
|
||||
|
||||
/// Get semantic keys (non-journal, non-system) for prompt context.
|
||||
pub(crate) fn semantic_keys(store: &Store) -> Vec<String> {
|
||||
let mut keys: Vec<String> = store.nodes.keys()
|
||||
.filter(|k| {
|
||||
!k.starts_with("journal.md#")
|
||||
&& *k != "journal.md"
|
||||
&& *k != "MEMORY.md"
|
||||
&& *k != "where-am-i.md"
|
||||
&& *k != "work-queue.md"
|
||||
&& *k != "work-state"
|
||||
})
|
||||
.cloned()
|
||||
.collect();
|
||||
keys.sort();
|
||||
keys.truncate(200);
|
||||
keys
|
||||
}
|
||||
14
src/main.rs
14
src/main.rs
|
|
@ -14,7 +14,11 @@
|
|||
// interference detection, schema assimilation, reconsolidation.
|
||||
|
||||
mod store;
|
||||
mod llm;
|
||||
mod digest;
|
||||
mod audit;
|
||||
mod enrich;
|
||||
mod consolidate;
|
||||
mod graph;
|
||||
mod search;
|
||||
mod similarity;
|
||||
|
|
@ -516,7 +520,7 @@ fn cmd_consolidate_session() -> Result<(), String> {
|
|||
|
||||
fn cmd_consolidate_full() -> Result<(), String> {
|
||||
let mut store = store::Store::load()?;
|
||||
digest::consolidate_full(&mut store)
|
||||
consolidate::consolidate_full(&mut store)
|
||||
}
|
||||
|
||||
fn cmd_triangle_close(args: &[String]) -> Result<(), String> {
|
||||
|
|
@ -824,7 +828,7 @@ fn cmd_journal_enrich(args: &[String]) -> Result<(), String> {
|
|||
}
|
||||
|
||||
let mut store = store::Store::load()?;
|
||||
digest::journal_enrich(&mut store, jsonl_path, entry_text, grep_line)
|
||||
enrich::journal_enrich(&mut store, jsonl_path, entry_text, grep_line)
|
||||
}
|
||||
|
||||
fn cmd_experience_mine(args: &[String]) -> Result<(), String> {
|
||||
|
|
@ -840,7 +844,7 @@ fn cmd_experience_mine(args: &[String]) -> Result<(), String> {
|
|||
}
|
||||
|
||||
let mut store = store::Store::load()?;
|
||||
let count = digest::experience_mine(&mut store, &jsonl_path)?;
|
||||
let count = enrich::experience_mine(&mut store, &jsonl_path)?;
|
||||
println!("Done: {} new entries mined.", count);
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -852,7 +856,7 @@ fn cmd_apply_consolidation(args: &[String]) -> Result<(), String> {
|
|||
.map(|w| w[1].as_str());
|
||||
|
||||
let mut store = store::Store::load()?;
|
||||
digest::apply_consolidation(&mut store, do_apply, report_file)
|
||||
consolidate::apply_consolidation(&mut store, do_apply, report_file)
|
||||
}
|
||||
|
||||
fn cmd_differentiate(args: &[String]) -> Result<(), String> {
|
||||
|
|
@ -919,7 +923,7 @@ fn cmd_differentiate(args: &[String]) -> Result<(), String> {
|
|||
fn cmd_link_audit(args: &[String]) -> Result<(), String> {
|
||||
let apply = args.iter().any(|a| a == "--apply");
|
||||
let mut store = store::Store::load()?;
|
||||
let stats = digest::link_audit(&mut store, apply)?;
|
||||
let stats = audit::link_audit(&mut store, apply)?;
|
||||
println!("\n{}", "=".repeat(60));
|
||||
println!("Link audit complete:");
|
||||
println!(" Kept: {} Deleted: {} Retargeted: {} Weakened: {} Strengthened: {} Errors: {}",
|
||||
|
|
|
|||
|
|
@ -189,14 +189,14 @@ pub fn schema_assimilation(store: &Store, key: &str) -> (f32, &'static str) {
|
|||
}
|
||||
|
||||
/// Prompt template directory
|
||||
fn prompts_dir() -> std::path::PathBuf {
|
||||
pub(crate) fn prompts_dir() -> std::path::PathBuf {
|
||||
// Check for prompts relative to binary, then fall back to ~/poc/memory/prompts/
|
||||
let home = std::env::var("HOME").unwrap_or_default();
|
||||
std::path::PathBuf::from(home).join("poc/memory/prompts")
|
||||
}
|
||||
|
||||
/// Load a prompt template, replacing {{PLACEHOLDER}} with data
|
||||
fn load_prompt(name: &str, replacements: &[(&str, &str)]) -> Result<String, String> {
|
||||
pub(crate) fn load_prompt(name: &str, replacements: &[(&str, &str)]) -> Result<String, String> {
|
||||
let path = prompts_dir().join(format!("{}.md", name));
|
||||
let mut content = std::fs::read_to_string(&path)
|
||||
.map_err(|e| format!("load prompt {}: {}", path.display(), e))?;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue