consciousness/src/digest.rs

550 lines
18 KiB
Rust
Raw Normal View History

// Episodic digest generation: daily, weekly, monthly, auto
//
// Three digest levels form a temporal hierarchy: daily digests summarize
// journal entries, weekly digests summarize dailies, monthly digests
// summarize weeklies. All three share the same generate/auto-detect
// pipeline, parameterized by DigestLevel.
use crate::llm::{call_sonnet, semantic_keys};
use crate::store::{self, Store, new_relation};
use crate::neuro;
use crate::util::memory_subdir;
use chrono::{Datelike, Duration, Local, NaiveDate};
use regex::Regex;
use std::collections::{BTreeMap, BTreeSet};
use std::fs;
use std::path::{Path, PathBuf};
// --- Digest level descriptors ---
struct DigestLevel {
name: &'static str, // lowercase, used for filenames and display
title: &'static str, // capitalized, used in prompts
period: &'static str, // "Date", "Week", "Month"
input_title: &'static str,
timeout: u64,
journal_input: bool, // true for daily (journal entries), false for child digests
gather: fn(&Store, &str) -> Result<(String, Vec<(String, String)>), String>,
find_args: fn(&[String], &str) -> Vec<String>,
}
const DAILY: DigestLevel = DigestLevel {
name: "daily",
title: "Daily",
period: "Date",
input_title: "Journal entries",
timeout: 300,
journal_input: true,
gather: gather_daily,
find_args: find_daily_args,
};
const WEEKLY: DigestLevel = DigestLevel {
name: "weekly",
title: "Weekly",
period: "Week",
input_title: "Daily digests",
timeout: 300,
journal_input: false,
gather: gather_weekly,
find_args: find_weekly_args,
};
const MONTHLY: DigestLevel = DigestLevel {
name: "monthly",
title: "Monthly",
period: "Month",
input_title: "Weekly digests",
timeout: 600,
journal_input: false,
gather: gather_monthly,
find_args: find_monthly_args,
};
// --- Input gathering ---
/// Load child digest files from the episodic directory.
fn load_child_digests(prefix: &str, labels: &[String]) -> Result<Vec<(String, String)>, String> {
let dir = memory_subdir("episodic")?;
let mut digests = Vec::new();
for label in labels {
let path = dir.join(format!("{}-{}.md", prefix, label));
if let Ok(content) = fs::read_to_string(&path) {
digests.push((label.clone(), content));
}
}
Ok(digests)
}
fn find_daily_args(dates: &[String], today: &str) -> Vec<String> {
dates.iter()
.filter(|d| d.as_str() != today)
.cloned()
.collect()
}
fn find_weekly_args(dates: &[String], today: &str) -> Vec<String> {
let mut weeks: BTreeMap<String, String> = BTreeMap::new();
for date in dates {
if let Ok((wl, _)) = week_dates(date) {
weeks.entry(wl).or_insert_with(|| date.clone());
}
}
weeks.into_values()
.filter(|date| {
week_dates(date).map_or(false, |(_, days)|
days.last().unwrap() < &today.to_string())
})
.collect()
}
fn find_monthly_args(dates: &[String], _today: &str) -> Vec<String> {
let now = Local::now();
let cur = (now.year(), now.month());
let mut months: BTreeSet<(i32, u32)> = BTreeSet::new();
for date in dates {
if let Ok(nd) = NaiveDate::parse_from_str(date, "%Y-%m-%d") {
months.insert((nd.year(), nd.month()));
}
}
months.into_iter()
.filter(|ym| *ym < cur)
.map(|(y, m)| format!("{}-{:02}", y, m))
.collect()
}
fn gather_daily(store: &Store, date: &str) -> Result<(String, Vec<(String, String)>), String> {
let date_re = Regex::new(&format!(
r"^journal\.md#j-{}", regex::escape(date)
)).unwrap();
let mut entries: Vec<_> = store.nodes.values()
.filter(|n| date_re.is_match(&n.key))
.map(|n| {
let label = n.key.strip_prefix("journal.md#j-").unwrap_or(&n.key);
(label.to_string(), n.content.clone())
})
.collect();
entries.sort_by(|a, b| a.0.cmp(&b.0));
Ok((date.to_string(), entries))
}
fn gather_weekly(_store: &Store, date: &str) -> Result<(String, Vec<(String, String)>), String> {
let (week_label, dates) = week_dates(date)?;
let inputs = load_child_digests("daily", &dates)?;
Ok((week_label, inputs))
}
fn gather_monthly(_store: &Store, arg: &str) -> Result<(String, Vec<(String, String)>), String> {
let (year, month) = if arg.is_empty() {
let now = Local::now();
(now.year(), now.month())
} else {
let d = NaiveDate::parse_from_str(&format!("{}-01", arg), "%Y-%m-%d")
.map_err(|e| format!("bad month '{}': {} (expected YYYY-MM)", arg, e))?;
(d.year(), d.month())
};
let label = format!("{}-{:02}", year, month);
let child_labels = weeks_in_month(year, month);
let inputs = load_child_digests("weekly", &child_labels)?;
Ok((label, inputs))
}
// --- Unified generator ---
fn format_inputs(inputs: &[(String, String)], daily: bool) -> String {
let mut text = String::new();
for (label, content) in inputs {
if daily {
text.push_str(&format!("\n### {}\n\n{}\n", label, content));
} else {
text.push_str(&format!("\n---\n## {}\n{}\n", label, content));
}
}
text
}
fn generate_digest(
store: &mut Store,
level: &DigestLevel,
label: &str,
inputs: &[(String, String)],
) -> Result<(), String> {
println!("Generating {} digest for {}...", level.name, label);
if inputs.is_empty() {
println!(" No inputs found for {}", label);
return Ok(());
}
println!(" {} inputs", inputs.len());
let keys = semantic_keys(store);
let keys_text = keys.iter()
.map(|k| format!(" - {}", k))
.collect::<Vec<_>>()
.join("\n");
let content = format_inputs(inputs, level.journal_input);
let covered = inputs.iter()
.map(|(l, _)| l.as_str())
.collect::<Vec<_>>()
.join(", ");
let prompt = neuro::load_prompt("digest", &[
("{{LEVEL}}", level.title),
("{{PERIOD}}", level.period),
("{{INPUT_TITLE}}", level.input_title),
("{{LABEL}}", label),
("{{CONTENT}}", &content),
("{{COVERED}}", &covered),
("{{KEYS}}", &keys_text),
])?;
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
println!(" Calling Sonnet...");
let digest = call_sonnet(&prompt, level.timeout)?;
let output_path = memory_subdir("episodic")?
.join(format!("{}-{}.md", level.name, label));
fs::write(&output_path, &digest)
.map_err(|e| format!("write {}: {}", output_path.display(), e))?;
println!(" Written: {}", output_path.display());
store.import_file(&output_path)?;
store.save()?;
println!(" Done: {} lines", digest.lines().count());
Ok(())
}
// --- Public API ---
pub fn generate_daily(store: &mut Store, date: &str) -> Result<(), String> {
let (label, inputs) = (DAILY.gather)(store, date)?;
generate_digest(store, &DAILY, &label, &inputs)
}
pub fn generate_weekly(store: &mut Store, date: &str) -> Result<(), String> {
let (label, inputs) = (WEEKLY.gather)(store, date)?;
generate_digest(store, &WEEKLY, &label, &inputs)
}
pub fn generate_monthly(store: &mut Store, month_arg: &str) -> Result<(), String> {
let (label, inputs) = (MONTHLY.gather)(store, month_arg)?;
generate_digest(store, &MONTHLY, &label, &inputs)
}
// --- Date helpers ---
/// Get ISO week label and the 7 dates (Mon-Sun) for the week containing `date`.
fn week_dates(date: &str) -> Result<(String, Vec<String>), String> {
let nd = NaiveDate::parse_from_str(date, "%Y-%m-%d")
.map_err(|e| format!("bad date '{}': {}", date, e))?;
let iso = nd.iso_week();
let week_label = format!("{}-W{:02}", iso.year(), iso.week());
let monday = nd - Duration::days(nd.weekday().num_days_from_monday() as i64);
let dates = (0..7)
.map(|i| (monday + Duration::days(i)).format("%Y-%m-%d").to_string())
.collect();
Ok((week_label, dates))
}
fn weeks_in_month(year: i32, month: u32) -> Vec<String> {
let mut weeks = BTreeSet::new();
let mut d = 1u32;
while let Some(date) = NaiveDate::from_ymd_opt(year, month, d) {
if date.month() != month { break; }
let iso = date.iso_week();
weeks.insert(format!("{}-W{:02}", iso.year(), iso.week()));
d += 1;
}
weeks.into_iter().collect()
}
// --- Auto-detect and generate missing digests ---
const LEVELS: &[&DigestLevel] = &[&DAILY, &WEEKLY, &MONTHLY];
pub fn digest_auto(store: &mut Store) -> Result<(), String> {
let today = Local::now().format("%Y-%m-%d").to_string();
let epi = memory_subdir("episodic")?;
// Collect all dates with journal entries
let date_re = Regex::new(r"^\d{4}-\d{2}-\d{2}").unwrap();
let dates: Vec<String> = store.nodes.keys()
.filter_map(|key| {
key.strip_prefix("journal.md#j-")
.filter(|rest| rest.len() >= 10 && date_re.is_match(rest))
.map(|rest| rest[..10].to_string())
})
.collect::<BTreeSet<_>>()
.into_iter()
.collect();
let mut total = 0u32;
for level in LEVELS {
let args = (level.find_args)(&dates, &today);
let mut generated = 0u32;
let mut skipped = 0u32;
for arg in &args {
let (label, inputs) = (level.gather)(store, arg)?;
if epi.join(format!("{}-{}.md", level.name, label)).exists() {
skipped += 1;
continue;
}
if inputs.is_empty() { continue; }
println!("[auto] Missing {} digest for {}", level.name, label);
generate_digest(store, level, &label, &inputs)?;
generated += 1;
}
println!("[auto] {}: {} generated, {} existed", level.name, generated, skipped);
total += generated;
}
if total == 0 {
println!("[auto] All digests up to date.");
} else {
println!("[auto] Generated {} total digests.", total);
}
Ok(())
}
// --- Digest link parsing ---
// Replaces digest-link-parser.py: parses ## Links sections from digest
// files and applies them to the memory graph.
/// A parsed link from a digest's Links section.
pub struct DigestLink {
pub source: String,
pub target: String,
pub reason: String,
pub file: String,
}
/// Normalize a raw link target to a poc-memory key.
fn normalize_link_key(raw: &str) -> String {
let key = raw.trim().trim_matches('`').trim();
if key.is_empty() { return String::new(); }
// Self-references
let lower = key.to_lowercase();
if lower.starts_with("this ") { return String::new(); }
let mut key = key.to_string();
// weekly/2026-W06 → weekly-2026-W06, etc.
if let Some(pos) = key.find('/') {
let prefix = &key[..pos];
if prefix == "daily" || prefix == "weekly" || prefix == "monthly" {
let rest = &key[pos + 1..];
key = format!("{}-{}", prefix, rest);
}
}
// daily-2026-02-04 → daily-2026-02-04.md
let re = Regex::new(r"^(daily|weekly|monthly)-\d{4}").unwrap();
if re.is_match(&key) && !key.ends_with(".md") {
key.push_str(".md");
}
// Bare date → daily digest
let date_re = Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap();
if date_re.is_match(key.strip_suffix(".md").unwrap_or(&key)) {
let date = key.strip_suffix(".md").unwrap_or(&key);
key = format!("daily-{}.md", date);
}
// Ensure .md extension
if key.contains('#') {
let (file, section) = key.split_once('#').unwrap();
if !file.ends_with(".md") {
key = format!("{}.md#{}", file, section);
}
} else if !key.ends_with(".md") && !key.contains('/') && !key.starts_with("NEW:") {
key.push_str(".md");
}
key
}
/// Parse the Links section from a single digest file.
fn parse_digest_file_links(path: &Path) -> Vec<DigestLink> {
let content = match fs::read_to_string(path) {
Ok(c) => c,
Err(_) => return Vec::new(),
};
let digest_name = path.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("");
let digest_key = format!("{}.md", digest_name);
let filename = path.file_name()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_string();
let link_re = Regex::new(r"^-\s+(.+?)\s*[→↔←]\s*(.+?)(?:\s*\((.+?)\))?\s*$").unwrap();
let header_re = Regex::new(r"^##\s+Links").unwrap();
let mut links = Vec::new();
let mut in_links = false;
for line in content.lines() {
if header_re.is_match(line) {
in_links = true;
continue;
}
if in_links && line.starts_with("## ") {
in_links = false;
continue;
}
if !in_links { continue; }
if line.starts_with("###") || line.starts_with("**") { continue; }
if let Some(cap) = link_re.captures(line) {
let raw_source = cap[1].trim();
let raw_target = cap[2].trim();
let reason = cap.get(3).map(|m| m.as_str().to_string()).unwrap_or_default();
let mut source = normalize_link_key(raw_source);
let mut target = normalize_link_key(raw_target);
// Replace self-references with digest key
if source.is_empty() { source = digest_key.clone(); }
if target.is_empty() { target = digest_key.clone(); }
// Handle "this daily/weekly/monthly" in raw text
let raw_s_lower = raw_source.to_lowercase();
let raw_t_lower = raw_target.to_lowercase();
if raw_s_lower.contains("this daily") || raw_s_lower.contains("this weekly")
|| raw_s_lower.contains("this monthly")
{
source = digest_key.clone();
}
if raw_t_lower.contains("this daily") || raw_t_lower.contains("this weekly")
|| raw_t_lower.contains("this monthly")
{
target = digest_key.clone();
}
// Skip NEW: and self-links
if source.starts_with("NEW:") || target.starts_with("NEW:") { continue; }
if source == target { continue; }
links.push(DigestLink { source, target, reason, file: filename.clone() });
}
}
links
}
/// Parse links from all digest files in the episodic dir.
pub fn parse_all_digest_links() -> Result<Vec<DigestLink>, String> {
let dir = memory_subdir("episodic")?;
let mut all_links = Vec::new();
for pattern in &["daily-*.md", "weekly-*.md", "monthly-*.md"] {
if let Ok(entries) = fs::read_dir(&dir) {
let mut files: Vec<PathBuf> = entries
.filter_map(|e| e.ok())
.map(|e| e.path())
.filter(|p| {
p.file_name()
.and_then(|n| n.to_str())
.map(|n| {
let prefix = pattern.split('*').next().unwrap_or("");
n.starts_with(prefix) && n.ends_with(".md")
})
.unwrap_or(false)
})
.collect();
files.sort();
for path in files {
all_links.extend(parse_digest_file_links(&path));
}
}
}
// Deduplicate by (source, target) pair
let mut seen = std::collections::HashSet::new();
all_links.retain(|link| seen.insert((link.source.clone(), link.target.clone())));
Ok(all_links)
}
/// Apply parsed digest links to the store.
pub fn apply_digest_links(store: &mut Store, links: &[DigestLink]) -> (usize, usize, usize) {
let mut applied = 0usize;
let mut skipped = 0usize;
let mut fallbacks = 0usize;
for link in links {
// Try resolving both keys
let source = match store.resolve_key(&link.source) {
Ok(s) => s,
Err(_) => {
// Try stripping section anchor as fallback
if let Some(base) = link.source.split('#').next() {
match store.resolve_key(base) {
Ok(s) => { fallbacks += 1; s }
Err(_) => { skipped += 1; continue; }
}
} else {
skipped += 1; continue;
}
}
};
let target = match store.resolve_key(&link.target) {
Ok(t) => t,
Err(_) => {
if let Some(base) = link.target.split('#').next() {
match store.resolve_key(base) {
Ok(t) => { fallbacks += 1; t }
Err(_) => { skipped += 1; continue; }
}
} else {
skipped += 1; continue;
}
}
};
// Refine target to best-matching section if available
let source_content = store.nodes.get(&source)
.map(|n| n.content.as_str()).unwrap_or("");
let target = neuro::refine_target(store, source_content, &target);
if source == target { skipped += 1; continue; }
// Check if link already exists
let exists = store.relations.iter().any(|r|
r.source_key == source && r.target_key == target && !r.deleted
);
if exists { skipped += 1; continue; }
let source_uuid = match store.nodes.get(&source) {
Some(n) => n.uuid,
None => { skipped += 1; continue; }
};
let target_uuid = match store.nodes.get(&target) {
Some(n) => n.uuid,
None => { skipped += 1; continue; }
};
let rel = new_relation(
source_uuid, target_uuid,
store::RelationType::Link,
0.5,
&source, &target,
);
if store.add_relation(rel).is_ok() {
println!(" + {}{}", source, target);
applied += 1;
}
}
(applied, skipped, fallbacks)
}