consciousness/poc-memory/src/agents/digest.rs

545 lines
18 KiB
Rust
Raw Normal View History

// Episodic digest generation: daily, weekly, monthly, auto
//
// Three digest levels form a temporal hierarchy: daily digests summarize
// journal entries, weekly digests summarize dailies, monthly digests
// summarize weeklies. All three share the same generate/auto-detect
// pipeline, parameterized by DigestLevel.
use super::llm::{call_sonnet, semantic_keys};
use crate::store::{self, Store, new_relation};
use crate::neuro;
use chrono::{Datelike, Duration, Local, NaiveDate};
use regex::Regex;
use std::collections::BTreeSet;
// --- Digest level descriptors ---
#[allow(clippy::type_complexity)]
struct DigestLevel {
name: &'static str,
title: &'static str,
period: &'static str,
input_title: &'static str,
child_name: Option<&'static str>, // None = journal (leaf), Some = child digest files
/// Expand an arg into (canonical_label, dates covered).
label_dates: fn(&str) -> Result<(String, Vec<String>), String>,
/// Map a YYYY-MM-DD date to this level's label.
date_to_label: fn(&str) -> Option<String>,
}
const DAILY: DigestLevel = DigestLevel {
name: "daily",
title: "Daily",
period: "Date",
input_title: "Journal entries",
child_name: None,
label_dates: |date| Ok((date.to_string(), vec![date.to_string()])),
date_to_label: |date| Some(date.to_string()),
};
/// Week label and 7 dates (Mon-Sun) for the week containing `date`.
fn week_dates(date: &str) -> Result<(String, Vec<String>), String> {
let nd = NaiveDate::parse_from_str(date, "%Y-%m-%d")
.map_err(|e| format!("bad date '{}': {}", date, e))?;
let iso = nd.iso_week();
let week_label = format!("{}-W{:02}", iso.year(), iso.week());
let monday = nd - Duration::days(nd.weekday().num_days_from_monday() as i64);
let dates = (0..7)
.map(|i| (monday + Duration::days(i)).format("%Y-%m-%d").to_string())
.collect();
Ok((week_label, dates))
}
const WEEKLY: DigestLevel = DigestLevel {
name: "weekly",
title: "Weekly",
period: "Week",
input_title: "Daily digests",
child_name: Some("daily"),
label_dates: |arg| {
if !arg.contains('W') {
return week_dates(arg);
}
let (y, w) = arg.split_once("-W")
.ok_or_else(|| format!("bad week label: {}", arg))?;
let year: i32 = y.parse().map_err(|_| format!("bad week year: {}", arg))?;
let week: u32 = w.parse().map_err(|_| format!("bad week number: {}", arg))?;
let monday = NaiveDate::from_isoywd_opt(year, week, chrono::Weekday::Mon)
.ok_or_else(|| format!("invalid week: {}", arg))?;
let dates = (0..7)
.map(|i| (monday + Duration::days(i)).format("%Y-%m-%d").to_string())
.collect();
Ok((arg.to_string(), dates))
},
date_to_label: |date| week_dates(date).ok().map(|(l, _)| l),
};
const MONTHLY: DigestLevel = DigestLevel {
name: "monthly",
title: "Monthly",
period: "Month",
input_title: "Weekly digests",
child_name: Some("weekly"),
label_dates: |arg| {
let (year, month) = if arg.len() <= 7 {
let d = NaiveDate::parse_from_str(&format!("{}-01", arg), "%Y-%m-%d")
.map_err(|e| format!("bad month '{}': {}", arg, e))?;
(d.year(), d.month())
} else {
let d = NaiveDate::parse_from_str(arg, "%Y-%m-%d")
.map_err(|e| format!("bad date '{}': {}", arg, e))?;
(d.year(), d.month())
};
let label = format!("{}-{:02}", year, month);
let mut dates = Vec::new();
let mut day = 1u32;
while let Some(date) = NaiveDate::from_ymd_opt(year, month, day) {
if date.month() != month { break; }
dates.push(date.format("%Y-%m-%d").to_string());
day += 1;
}
Ok((label, dates))
},
date_to_label: |date| NaiveDate::parse_from_str(date, "%Y-%m-%d")
.ok().map(|d| format!("{}-{:02}", d.year(), d.month())),
};
const LEVELS: &[&DigestLevel] = &[&DAILY, &WEEKLY, &MONTHLY];
/// Store key for a digest node: "daily-2026-03-04", "weekly-2026-W09", etc.
fn digest_node_key(level_name: &str, label: &str) -> String {
format!("{}-{}", level_name, label)
}
// --- Input gathering ---
/// Result of gathering inputs for a digest.
struct GatherResult {
label: String,
/// (display_label, content) pairs for the prompt.
inputs: Vec<(String, String)>,
/// Store keys of source nodes — used to create structural links.
source_keys: Vec<String>,
}
/// Load child digest content from the store.
fn load_child_digests(store: &Store, prefix: &str, labels: &[String]) -> (Vec<(String, String)>, Vec<String>) {
let mut digests = Vec::new();
let mut keys = Vec::new();
for label in labels {
let key = digest_node_key(prefix, label);
if let Some(node) = store.nodes.get(&key) {
digests.push((label.clone(), node.content.clone()));
keys.push(key);
}
}
(digests, keys)
}
/// Unified: gather inputs for any digest level.
fn gather(level: &DigestLevel, store: &Store, arg: &str) -> Result<GatherResult, String> {
let (label, dates) = (level.label_dates)(arg)?;
let (inputs, source_keys) = if let Some(child_name) = level.child_name {
// Map parent's dates through child's date_to_label → child labels
let child = LEVELS.iter()
.find(|l| l.name == child_name)
.expect("invalid child_name");
let child_labels: Vec<String> = dates.iter()
.filter_map(|d| (child.date_to_label)(d))
.collect::<BTreeSet<_>>()
.into_iter()
.collect();
load_child_digests(store, child_name, &child_labels)
} else {
// Leaf level: scan store for episodic entries matching date
let mut entries: Vec<_> = store.nodes.iter()
.filter(|(_, n)| n.node_type == store::NodeType::EpisodicSession
&& n.timestamp > 0
&& store::format_date(n.timestamp) == label)
.map(|(key, n)| {
(store::format_datetime(n.timestamp), n.content.clone(), key.clone())
})
.collect();
entries.sort_by(|a, b| a.0.cmp(&b.0));
let keys = entries.iter().map(|(_, _, k)| k.clone()).collect();
let inputs = entries.into_iter().map(|(dt, c, _)| (dt, c)).collect();
(inputs, keys)
};
Ok(GatherResult { label, inputs, source_keys })
}
/// Unified: find candidate labels for auto-generation (past, not yet generated).
fn find_candidates(level: &DigestLevel, dates: &[String], today: &str) -> Vec<String> {
let today_label = (level.date_to_label)(today);
dates.iter()
.filter_map(|d| (level.date_to_label)(d))
.collect::<BTreeSet<_>>()
.into_iter()
.filter(|l| Some(l) != today_label.as_ref())
.collect()
}
// --- Unified generator ---
fn format_inputs(inputs: &[(String, String)], daily: bool) -> String {
let mut text = String::new();
for (label, content) in inputs {
if daily {
text.push_str(&format!("\n### {}\n\n{}\n", label, content));
} else {
text.push_str(&format!("\n---\n## {}\n{}\n", label, content));
}
}
text
}
fn generate_digest(
store: &mut Store,
level: &DigestLevel,
label: &str,
inputs: &[(String, String)],
source_keys: &[String],
) -> Result<(), String> {
println!("Generating {} digest for {}...", level.name, label);
if inputs.is_empty() {
println!(" No inputs found for {}", label);
return Ok(());
}
println!(" {} inputs", inputs.len());
let keys = semantic_keys(store);
let keys_text = keys.iter()
.map(|k| format!(" - {}", k))
.collect::<Vec<_>>()
.join("\n");
let content = format_inputs(inputs, level.child_name.is_none());
let covered = inputs.iter()
.map(|(l, _)| l.as_str())
.collect::<Vec<_>>()
.join(", ");
// Load prompt from agent file; fall back to prompts dir
let def = super::defs::get_def("digest");
let template = match &def {
Some(d) => d.prompt.clone(),
None => {
let path = crate::config::get().prompts_dir.join("digest.md");
std::fs::read_to_string(&path)
.map_err(|e| format!("load digest prompt: {}", e))?
}
};
let prompt = template
.replace("{{LEVEL}}", level.title)
.replace("{{PERIOD}}", level.period)
.replace("{{INPUT_TITLE}}", level.input_title)
.replace("{{LABEL}}", label)
.replace("{{CONTENT}}", &content)
.replace("{{COVERED}}", &covered)
.replace("{{KEYS}}", &keys_text);
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
println!(" Calling Sonnet...");
let digest = call_sonnet("digest", &prompt)?;
let key = digest_node_key(level.name, label);
store.upsert_provenance(&key, &digest, "digest:write")?;
// Structural links: connect all source entries to this digest
let mut linked = 0;
for source_key in source_keys {
// Skip if link already exists
let exists = store.relations.iter().any(|r|
!r.deleted && r.source_key == *source_key && r.target_key == key);
if exists { continue; }
let source_uuid = store.nodes.get(source_key)
.map(|n| n.uuid).unwrap_or([0u8; 16]);
let target_uuid = store.nodes.get(&key)
.map(|n| n.uuid).unwrap_or([0u8; 16]);
let mut rel = new_relation(
source_uuid, target_uuid,
store::RelationType::Link, 0.8,
source_key, &key,
);
rel.provenance = "digest:structural".to_string();
store.add_relation(rel)?;
linked += 1;
}
if linked > 0 {
println!(" Linked {} source entries → {}", linked, key);
}
store.save()?;
println!(" Stored: {}", key);
println!(" Done: {} lines", digest.lines().count());
Ok(())
}
// --- Public API ---
pub fn generate(store: &mut Store, level_name: &str, arg: &str) -> Result<(), String> {
let level = LEVELS.iter()
.find(|l| l.name == level_name)
.ok_or_else(|| format!("unknown digest level: {}", level_name))?;
let result = gather(level, store, arg)?;
generate_digest(store, level, &result.label, &result.inputs, &result.source_keys)
}
// --- Auto-detect and generate missing digests ---
pub fn digest_auto(store: &mut Store) -> Result<(), String> {
let today = Local::now().format("%Y-%m-%d").to_string();
// Collect all dates with episodic entries
let dates: Vec<String> = store.nodes.values()
.filter(|n| n.node_type == store::NodeType::EpisodicSession && n.timestamp > 0)
.map(|n| store::format_date(n.timestamp))
.collect::<BTreeSet<_>>()
.into_iter()
.collect();
let mut total = 0u32;
for level in LEVELS {
let candidates = find_candidates(level, &dates, &today);
let mut generated = 0u32;
let mut skipped = 0u32;
for arg in &candidates {
let result = gather(level, store, arg)?;
let key = digest_node_key(level.name, &result.label);
if store.nodes.contains_key(&key) {
skipped += 1;
continue;
}
if result.inputs.is_empty() { continue; }
println!("[auto] Missing {} digest for {}", level.name, result.label);
generate_digest(store, level, &result.label, &result.inputs, &result.source_keys)?;
generated += 1;
}
println!("[auto] {}: {} generated, {} existed", level.name, generated, skipped);
total += generated;
}
if total == 0 {
println!("[auto] All digests up to date.");
} else {
println!("[auto] Generated {} total digests.", total);
}
Ok(())
}
// --- Digest link parsing ---
// Replaces digest-link-parser.py: parses ## Links sections from digest
// files and applies them to the memory graph.
/// A parsed link from a digest's Links section.
pub struct DigestLink {
pub source: String,
pub target: String,
pub reason: String,
pub file: String,
}
/// Normalize a raw link target to a poc-memory key.
fn normalize_link_key(raw: &str) -> String {
let key = raw.trim().trim_matches('`').trim();
if key.is_empty() { return String::new(); }
// Self-references
let lower = key.to_lowercase();
if lower.starts_with("this ") { return String::new(); }
let mut key = key.to_string();
// Strip .md suffix if present
if let Some(stripped) = key.strip_suffix(".md") {
key = stripped.to_string();
} else if key.contains('#') {
let (file, section) = key.split_once('#').unwrap();
if let Some(bare) = file.strip_suffix(".md") {
key = format!("{}-{}", bare, section);
}
}
// weekly/2026-W06 → weekly-2026-W06, etc.
if let Some(pos) = key.find('/') {
let prefix = &key[..pos];
if prefix == "daily" || prefix == "weekly" || prefix == "monthly" {
let rest = &key[pos + 1..];
key = format!("{}-{}", prefix, rest);
}
}
// Bare date → daily digest
let date_re = Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap();
if date_re.is_match(&key) {
key = format!("daily-{}", key);
}
key
}
/// Parse the Links section from a digest node's content.
fn parse_digest_node_links(key: &str, content: &str) -> Vec<DigestLink> {
let link_re = Regex::new(r"^-\s+(.+?)\s*[→↔←]\s*(.+?)(?:\s*\((.+?)\))?\s*$").unwrap();
let header_re = Regex::new(r"^##\s+Links").unwrap();
let mut links = Vec::new();
let mut in_links = false;
for line in content.lines() {
if header_re.is_match(line) {
in_links = true;
continue;
}
if in_links && line.starts_with("## ") {
in_links = false;
continue;
}
if !in_links { continue; }
if line.starts_with("###") || line.starts_with("**") { continue; }
if let Some(cap) = link_re.captures(line) {
let raw_source = cap[1].trim();
let raw_target = cap[2].trim();
let reason = cap.get(3).map(|m| m.as_str().to_string()).unwrap_or_default();
let mut source = normalize_link_key(raw_source);
let mut target = normalize_link_key(raw_target);
// Replace self-references with digest key
if source.is_empty() { source = key.to_string(); }
if target.is_empty() { target = key.to_string(); }
// Handle "this daily/weekly/monthly" in raw text
let raw_s_lower = raw_source.to_lowercase();
let raw_t_lower = raw_target.to_lowercase();
if raw_s_lower.contains("this daily") || raw_s_lower.contains("this weekly")
|| raw_s_lower.contains("this monthly")
{
source = key.to_string();
}
if raw_t_lower.contains("this daily") || raw_t_lower.contains("this weekly")
|| raw_t_lower.contains("this monthly")
{
target = key.to_string();
}
// Skip NEW: and self-links
if source.starts_with("NEW:") || target.starts_with("NEW:") { continue; }
if source == target { continue; }
links.push(DigestLink { source, target, reason, file: key.to_string() });
}
}
links
}
/// Parse links from all digest nodes in the store.
pub fn parse_all_digest_links(store: &Store) -> Vec<DigestLink> {
let mut all_links = Vec::new();
let mut digest_keys: Vec<&String> = store.nodes.iter()
.filter(|(_, n)| matches!(n.node_type,
store::NodeType::EpisodicDaily
| store::NodeType::EpisodicWeekly
| store::NodeType::EpisodicMonthly))
.map(|(k, _)| k)
.collect();
digest_keys.sort();
for key in digest_keys {
if let Some(node) = store.nodes.get(key) {
all_links.extend(parse_digest_node_links(key, &node.content));
}
}
// Deduplicate by (source, target) pair
let mut seen = std::collections::HashSet::new();
all_links.retain(|link| seen.insert((link.source.clone(), link.target.clone())));
all_links
}
/// Apply parsed digest links to the store.
pub fn apply_digest_links(store: &mut Store, links: &[DigestLink]) -> (usize, usize, usize) {
let mut applied = 0usize;
let mut skipped = 0usize;
let mut fallbacks = 0usize;
for link in links {
// Try resolving both keys
let source = match store.resolve_key(&link.source) {
Ok(s) => s,
Err(_) => {
// Try stripping section anchor as fallback
if let Some(base) = link.source.split('#').next() {
match store.resolve_key(base) {
Ok(s) => { fallbacks += 1; s }
Err(_) => { skipped += 1; continue; }
}
} else {
skipped += 1; continue;
}
}
};
let target = match store.resolve_key(&link.target) {
Ok(t) => t,
Err(_) => {
if let Some(base) = link.target.split('#').next() {
match store.resolve_key(base) {
Ok(t) => { fallbacks += 1; t }
Err(_) => { skipped += 1; continue; }
}
} else {
skipped += 1; continue;
}
}
};
// Refine target to best-matching section if available
let source_content = store.nodes.get(&source)
.map(|n| n.content.as_str()).unwrap_or("");
let target = neuro::refine_target(store, source_content, &target);
if source == target { skipped += 1; continue; }
// Check if link already exists
let exists = store.relations.iter().any(|r|
r.source_key == source && r.target_key == target && !r.deleted
);
if exists { skipped += 1; continue; }
let source_uuid = match store.nodes.get(&source) {
Some(n) => n.uuid,
None => { skipped += 1; continue; }
};
let target_uuid = match store.nodes.get(&target) {
Some(n) => n.uuid,
None => { skipped += 1; continue; }
};
let rel = new_relation(
source_uuid, target_uuid,
store::RelationType::Link,
0.5,
&source, &target,
);
if store.add_relation(rel).is_ok() {
println!(" + {}{}", source, target);
applied += 1;
}
}
(applied, skipped, fallbacks)
}