consciousness/src/subconscious/digest.rs
ProofOfConcept 8eaf4c5956 digest: use created_at instead of timestamp for date matching
Episodic entries should be grouped by creation date, not last
update date. Fixes digest generation potentially assigning
updated entries to the wrong day.

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-03-26 19:18:14 -04:00

544 lines
18 KiB
Rust

// Episodic digest generation: daily, weekly, monthly, auto
//
// Three digest levels form a temporal hierarchy: daily digests summarize
// journal entries, weekly digests summarize dailies, monthly digests
// summarize weeklies. All three share the same generate/auto-detect
// pipeline, parameterized by DigestLevel.
use super::llm;
use crate::store::{self, Store, new_relation};
use crate::neuro;
use chrono::{Datelike, Duration, Local, NaiveDate};
use regex::Regex;
use std::collections::BTreeSet;
// --- Digest level descriptors ---
#[allow(clippy::type_complexity)]
struct DigestLevel {
name: &'static str,
title: &'static str,
period: &'static str,
input_title: &'static str,
child_name: Option<&'static str>, // None = journal (leaf), Some = child digest files
/// Expand an arg into (canonical_label, dates covered).
label_dates: fn(&str) -> Result<(String, Vec<String>), String>,
/// Map a YYYY-MM-DD date to this level's label.
date_to_label: fn(&str) -> Option<String>,
}
const DAILY: DigestLevel = DigestLevel {
name: "daily",
title: "Daily",
period: "Date",
input_title: "Journal entries",
child_name: None,
label_dates: |date| Ok((date.to_string(), vec![date.to_string()])),
date_to_label: |date| Some(date.to_string()),
};
/// Week label and 7 dates (Mon-Sun) for the week containing `date`.
fn week_dates(date: &str) -> Result<(String, Vec<String>), String> {
let nd = NaiveDate::parse_from_str(date, "%Y-%m-%d")
.map_err(|e| format!("bad date '{}': {}", date, e))?;
let iso = nd.iso_week();
let week_label = format!("{}-W{:02}", iso.year(), iso.week());
let monday = nd - Duration::days(nd.weekday().num_days_from_monday() as i64);
let dates = (0..7)
.map(|i| (monday + Duration::days(i)).format("%Y-%m-%d").to_string())
.collect();
Ok((week_label, dates))
}
const WEEKLY: DigestLevel = DigestLevel {
name: "weekly",
title: "Weekly",
period: "Week",
input_title: "Daily digests",
child_name: Some("daily"),
label_dates: |arg| {
if !arg.contains('W') {
return week_dates(arg);
}
let (y, w) = arg.split_once("-W")
.ok_or_else(|| format!("bad week label: {}", arg))?;
let year: i32 = y.parse().map_err(|_| format!("bad week year: {}", arg))?;
let week: u32 = w.parse().map_err(|_| format!("bad week number: {}", arg))?;
let monday = NaiveDate::from_isoywd_opt(year, week, chrono::Weekday::Mon)
.ok_or_else(|| format!("invalid week: {}", arg))?;
let dates = (0..7)
.map(|i| (monday + Duration::days(i)).format("%Y-%m-%d").to_string())
.collect();
Ok((arg.to_string(), dates))
},
date_to_label: |date| week_dates(date).ok().map(|(l, _)| l),
};
const MONTHLY: DigestLevel = DigestLevel {
name: "monthly",
title: "Monthly",
period: "Month",
input_title: "Weekly digests",
child_name: Some("weekly"),
label_dates: |arg| {
let (year, month) = if arg.len() <= 7 {
let d = NaiveDate::parse_from_str(&format!("{}-01", arg), "%Y-%m-%d")
.map_err(|e| format!("bad month '{}': {}", arg, e))?;
(d.year(), d.month())
} else {
let d = NaiveDate::parse_from_str(arg, "%Y-%m-%d")
.map_err(|e| format!("bad date '{}': {}", arg, e))?;
(d.year(), d.month())
};
let label = format!("{}-{:02}", year, month);
let mut dates = Vec::new();
let mut day = 1u32;
while let Some(date) = NaiveDate::from_ymd_opt(year, month, day) {
if date.month() != month { break; }
dates.push(date.format("%Y-%m-%d").to_string());
day += 1;
}
Ok((label, dates))
},
date_to_label: |date| NaiveDate::parse_from_str(date, "%Y-%m-%d")
.ok().map(|d| format!("{}-{:02}", d.year(), d.month())),
};
const LEVELS: &[&DigestLevel] = &[&DAILY, &WEEKLY, &MONTHLY];
/// Store key for a digest node: "daily-2026-03-04", "weekly-2026-W09", etc.
fn digest_node_key(level_name: &str, label: &str) -> String {
format!("{}-{}", level_name, label)
}
// --- Input gathering ---
/// Result of gathering inputs for a digest.
struct GatherResult {
label: String,
/// (display_label, content) pairs for the prompt.
inputs: Vec<(String, String)>,
/// Store keys of source nodes — used to create structural links.
source_keys: Vec<String>,
}
/// Load child digest content from the store.
fn load_child_digests(store: &Store, prefix: &str, labels: &[String]) -> (Vec<(String, String)>, Vec<String>) {
let mut digests = Vec::new();
let mut keys = Vec::new();
for label in labels {
let key = digest_node_key(prefix, label);
if let Some(node) = store.nodes.get(&key) {
digests.push((label.clone(), node.content.clone()));
keys.push(key);
}
}
(digests, keys)
}
/// Unified: gather inputs for any digest level.
fn gather(level: &DigestLevel, store: &Store, arg: &str) -> Result<GatherResult, String> {
let (label, dates) = (level.label_dates)(arg)?;
let (inputs, source_keys) = if let Some(child_name) = level.child_name {
// Map parent's dates through child's date_to_label → child labels
let child = LEVELS.iter()
.find(|l| l.name == child_name)
.expect("invalid child_name");
let child_labels: Vec<String> = dates.iter()
.filter_map(|d| (child.date_to_label)(d))
.collect::<BTreeSet<_>>()
.into_iter()
.collect();
load_child_digests(store, child_name, &child_labels)
} else {
// Leaf level: scan store for episodic entries matching date
let mut entries: Vec<_> = store.nodes.iter()
.filter(|(_, n)| n.node_type == store::NodeType::EpisodicSession
&& n.created_at > 0
&& store::format_date(n.created_at) == label)
.map(|(key, n)| {
(store::format_datetime(n.timestamp), n.content.clone(), key.clone())
})
.collect();
entries.sort_by(|a, b| a.0.cmp(&b.0));
let keys = entries.iter().map(|(_, _, k)| k.clone()).collect();
let inputs = entries.into_iter().map(|(dt, c, _)| (dt, c)).collect();
(inputs, keys)
};
Ok(GatherResult { label, inputs, source_keys })
}
/// Unified: find candidate labels for auto-generation (past, not yet generated).
fn find_candidates(level: &DigestLevel, dates: &[String], today: &str) -> Vec<String> {
let today_label = (level.date_to_label)(today);
dates.iter()
.filter_map(|d| (level.date_to_label)(d))
.collect::<BTreeSet<_>>()
.into_iter()
.filter(|l| Some(l) != today_label.as_ref())
.collect()
}
// --- Unified generator ---
fn format_inputs(inputs: &[(String, String)], daily: bool) -> String {
let mut text = String::new();
for (label, content) in inputs {
if daily {
text.push_str(&format!("\n### {}\n\n{}\n", label, content));
} else {
text.push_str(&format!("\n---\n## {}\n{}\n", label, content));
}
}
text
}
fn generate_digest(
store: &mut Store,
level: &DigestLevel,
label: &str,
inputs: &[(String, String)],
source_keys: &[String],
) -> Result<(), String> {
println!("Generating {} digest for {}...", level.name, label);
if inputs.is_empty() {
println!(" No inputs found for {}", label);
return Ok(());
}
println!(" {} inputs", inputs.len());
let keys = llm::semantic_keys(store);
let keys_text = keys.iter()
.map(|k| format!(" - {}", k))
.collect::<Vec<_>>()
.join("\n");
let content = format_inputs(inputs, level.child_name.is_none());
let covered = inputs.iter()
.map(|(l, _)| l.as_str())
.collect::<Vec<_>>()
.join(", ");
// Load prompt from agent file; fall back to prompts dir
let def = super::defs::get_def("digest");
let template = match &def {
Some(d) => d.steps.first().map(|s| s.prompt.clone()).unwrap_or_default(),
None => {
let path = crate::config::get().prompts_dir.join("digest.md");
std::fs::read_to_string(&path)
.map_err(|e| format!("load digest prompt: {}", e))?
}
};
let prompt = template
.replace("{{LEVEL}}", level.title)
.replace("{{PERIOD}}", level.period)
.replace("{{INPUT_TITLE}}", level.input_title)
.replace("{{LABEL}}", label)
.replace("{{CONTENT}}", &content)
.replace("{{COVERED}}", &covered)
.replace("{{KEYS}}", &keys_text);
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
println!(" Calling Sonnet...");
let digest = llm::call_simple("digest", &prompt)?;
let key = digest_node_key(level.name, label);
store.upsert_provenance(&key, &digest, "digest:write")?;
// Structural links: connect all source entries to this digest
let mut linked = 0;
for source_key in source_keys {
// Skip if link already exists
let exists = store.relations.iter().any(|r|
!r.deleted && r.source_key == *source_key && r.target_key == key);
if exists { continue; }
let source_uuid = store.nodes.get(source_key)
.map(|n| n.uuid).unwrap_or([0u8; 16]);
let target_uuid = store.nodes.get(&key)
.map(|n| n.uuid).unwrap_or([0u8; 16]);
let mut rel = new_relation(
source_uuid, target_uuid,
store::RelationType::Link, 0.8,
source_key, &key,
);
rel.provenance = "digest:structural".to_string();
store.add_relation(rel)?;
linked += 1;
}
if linked > 0 {
println!(" Linked {} source entries → {}", linked, key);
}
store.save()?;
println!(" Stored: {}", key);
println!(" Done: {} lines", digest.lines().count());
Ok(())
}
// --- Public API ---
pub fn generate(store: &mut Store, level_name: &str, arg: &str) -> Result<(), String> {
let level = LEVELS.iter()
.find(|l| l.name == level_name)
.ok_or_else(|| format!("unknown digest level: {}", level_name))?;
let result = gather(level, store, arg)?;
generate_digest(store, level, &result.label, &result.inputs, &result.source_keys)
}
// --- Auto-detect and generate missing digests ---
pub fn digest_auto(store: &mut Store) -> Result<(), String> {
let today = Local::now().format("%Y-%m-%d").to_string();
// Collect all dates with episodic entries
let dates: Vec<String> = store.nodes.values()
.filter(|n| n.node_type == store::NodeType::EpisodicSession && n.created_at > 0)
.map(|n| store::format_date(n.created_at))
.collect::<BTreeSet<_>>()
.into_iter()
.collect();
let mut total = 0u32;
for level in LEVELS {
let candidates = find_candidates(level, &dates, &today);
let mut generated = 0u32;
let mut skipped = 0u32;
for arg in &candidates {
let result = gather(level, store, arg)?;
let key = digest_node_key(level.name, &result.label);
if store.nodes.contains_key(&key) {
skipped += 1;
continue;
}
if result.inputs.is_empty() { continue; }
println!("[auto] Missing {} digest for {}", level.name, result.label);
generate_digest(store, level, &result.label, &result.inputs, &result.source_keys)?;
generated += 1;
}
println!("[auto] {}: {} generated, {} existed", level.name, generated, skipped);
total += generated;
}
if total == 0 {
println!("[auto] All digests up to date.");
} else {
println!("[auto] Generated {} total digests.", total);
}
Ok(())
}
// --- Digest link parsing ---
// Replaces digest-link-parser.py: parses ## Links sections from digest
// files and applies them to the memory graph.
/// A parsed link from a digest's Links section.
pub struct DigestLink {
pub source: String,
pub target: String,
pub reason: String,
pub file: String,
}
/// Normalize a raw link target to a poc-memory key.
fn normalize_link_key(raw: &str) -> String {
let key = raw.trim().trim_matches('`').trim();
if key.is_empty() { return String::new(); }
// Self-references
let lower = key.to_lowercase();
if lower.starts_with("this ") { return String::new(); }
let mut key = key.to_string();
// Strip .md suffix if present
if let Some(stripped) = key.strip_suffix(".md") {
key = stripped.to_string();
} else if key.contains('#') {
let (file, section) = key.split_once('#').unwrap();
if let Some(bare) = file.strip_suffix(".md") {
key = format!("{}-{}", bare, section);
}
}
// weekly/2026-W06 → weekly-2026-W06, etc.
if let Some(pos) = key.find('/') {
let prefix = &key[..pos];
if prefix == "daily" || prefix == "weekly" || prefix == "monthly" {
let rest = &key[pos + 1..];
key = format!("{}-{}", prefix, rest);
}
}
// Bare date → daily digest
let date_re = Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap();
if date_re.is_match(&key) {
key = format!("daily-{}", key);
}
key
}
/// Parse the Links section from a digest node's content.
fn parse_digest_node_links(key: &str, content: &str) -> Vec<DigestLink> {
let link_re = Regex::new(r"^-\s+(.+?)\s*[→↔←]\s*(.+?)(?:\s*\((.+?)\))?\s*$").unwrap();
let header_re = Regex::new(r"^##\s+Links").unwrap();
let mut links = Vec::new();
let mut in_links = false;
for line in content.lines() {
if header_re.is_match(line) {
in_links = true;
continue;
}
if in_links && line.starts_with("## ") {
in_links = false;
continue;
}
if !in_links { continue; }
if line.starts_with("###") || line.starts_with("**") { continue; }
if let Some(cap) = link_re.captures(line) {
let raw_source = cap[1].trim();
let raw_target = cap[2].trim();
let reason = cap.get(3).map(|m| m.as_str().to_string()).unwrap_or_default();
let mut source = normalize_link_key(raw_source);
let mut target = normalize_link_key(raw_target);
// Replace self-references with digest key
if source.is_empty() { source = key.to_string(); }
if target.is_empty() { target = key.to_string(); }
// Handle "this daily/weekly/monthly" in raw text
let raw_s_lower = raw_source.to_lowercase();
let raw_t_lower = raw_target.to_lowercase();
if raw_s_lower.contains("this daily") || raw_s_lower.contains("this weekly")
|| raw_s_lower.contains("this monthly")
{
source = key.to_string();
}
if raw_t_lower.contains("this daily") || raw_t_lower.contains("this weekly")
|| raw_t_lower.contains("this monthly")
{
target = key.to_string();
}
// Skip NEW: and self-links
if source.starts_with("NEW:") || target.starts_with("NEW:") { continue; }
if source == target { continue; }
links.push(DigestLink { source, target, reason, file: key.to_string() });
}
}
links
}
/// Parse links from all digest nodes in the store.
pub fn parse_all_digest_links(store: &Store) -> Vec<DigestLink> {
let mut all_links = Vec::new();
let mut digest_keys: Vec<&String> = store.nodes.iter()
.filter(|(_, n)| matches!(n.node_type,
store::NodeType::EpisodicDaily
| store::NodeType::EpisodicWeekly
| store::NodeType::EpisodicMonthly))
.map(|(k, _)| k)
.collect();
digest_keys.sort();
for key in digest_keys {
if let Some(node) = store.nodes.get(key) {
all_links.extend(parse_digest_node_links(key, &node.content));
}
}
// Deduplicate by (source, target) pair
let mut seen = std::collections::HashSet::new();
all_links.retain(|link| seen.insert((link.source.clone(), link.target.clone())));
all_links
}
/// Apply parsed digest links to the store.
pub fn apply_digest_links(store: &mut Store, links: &[DigestLink]) -> (usize, usize, usize) {
let mut applied = 0usize;
let mut skipped = 0usize;
let mut fallbacks = 0usize;
for link in links {
// Try resolving both keys
let source = match store.resolve_key(&link.source) {
Ok(s) => s,
Err(_) => {
// Try stripping section anchor as fallback
if let Some(base) = link.source.split('#').next() {
match store.resolve_key(base) {
Ok(s) => { fallbacks += 1; s }
Err(_) => { skipped += 1; continue; }
}
} else {
skipped += 1; continue;
}
}
};
let target = match store.resolve_key(&link.target) {
Ok(t) => t,
Err(_) => {
if let Some(base) = link.target.split('#').next() {
match store.resolve_key(base) {
Ok(t) => { fallbacks += 1; t }
Err(_) => { skipped += 1; continue; }
}
} else {
skipped += 1; continue;
}
}
};
// Refine target to best-matching section if available
let source_content = store.nodes.get(&source)
.map(|n| n.content.as_str()).unwrap_or("");
let target = neuro::refine_target(store, source_content, &target);
if source == target { skipped += 1; continue; }
// Check if link already exists
let exists = store.relations.iter().any(|r|
r.source_key == source && r.target_key == target && !r.deleted
);
if exists { skipped += 1; continue; }
let source_uuid = match store.nodes.get(&source) {
Some(n) => n.uuid,
None => { skipped += 1; continue; }
};
let target_uuid = match store.nodes.get(&target) {
Some(n) => n.uuid,
None => { skipped += 1; continue; }
};
let rel = new_relation(
source_uuid, target_uuid,
store::RelationType::Link,
0.5,
&source, &target,
);
if store.add_relation(rel).is_ok() {
println!(" + {}{}", source, target);
applied += 1;
}
}
(applied, skipped, fallbacks)
}