consciousness/src/digest.rs

668 lines
22 KiB
Rust
Raw Normal View History

// Episodic digest generation: daily, weekly, monthly, auto
//
// Three digest levels form a temporal hierarchy: daily digests summarize
// journal entries, weekly digests summarize dailies, monthly digests
// summarize weeklies. All three share the same generate/auto-detect
// pipeline, parameterized by DigestLevel.
use crate::llm::{call_sonnet, semantic_keys};
use crate::store::{self, Store, new_relation};
use crate::neuro;
use crate::util::memory_subdir;
use chrono::{Datelike, Duration, Local, NaiveDate};
use regex::Regex;
use std::collections::{BTreeMap, BTreeSet};
use std::fs;
use std::path::{Path, PathBuf};
// --- Digest level descriptors ---
struct DigestLevel {
name: &'static str, // lowercase, used for filenames and display
title: &'static str, // capitalized, used in prompts
period: &'static str, // "Date", "Week", "Month"
input_title: &'static str,
instructions: &'static str,
child_prefix: Option<&'static str>,
timeout: u64,
}
const DAILY: DigestLevel = DigestLevel {
name: "daily",
title: "Daily",
period: "Date",
input_title: "Journal entries",
instructions: r#"This digest serves as the temporal index — the answer to "what did I do on
{{LABEL}}?" It should be:
1. Narrative, not a task log what happened, what mattered, how things felt
2. Linked bidirectionally to semantic memory each topic/concept mentioned
should reference existing memory nodes
3. Structured for traversal someone reading this should be able to follow
any thread into deeper detail
## Output format
```markdown
# Daily digest: {{LABEL}}
## Summary
[2-3 sentence overview of the day what was the arc?]
## Sessions
[For each session/entry, a paragraph summarizing what happened.
Include the original timestamp as a reference.]
## Themes
[What concepts were active today? Each theme links to semantic memory:]
- **Theme name** `memory-key#section` brief note on how it appeared today
## Links
[Explicit bidirectional links for the memory graph]
- semantic_key this daily digest (this day involved X)
- this daily digest semantic_key (X was active on this day)
## Temporal context
[What came before this day? What's coming next? Any multi-day arcs?]
```
If a concept doesn't have a matching key, note it with "NEW:" prefix."#,
child_prefix: None,
timeout: 300,
};
const WEEKLY: DigestLevel = DigestLevel {
name: "weekly",
title: "Weekly",
period: "Week",
input_title: "Daily digests",
instructions: r#"This digest serves as the medium-term temporal index — the answer to
"what happened this week?" It should identify:
1. Multi-day arcs and threads (work that continued across days)
2. Themes and patterns (what concepts were repeatedly active)
3. Transitions and shifts (what changed during the week)
4. The emotional and relational arc (how things felt across the week)
## Output format
```markdown
# Weekly digest: {{LABEL}}
## Overview
[3-5 sentence narrative of the week's arc]
## Day-by-day
[One paragraph per day with its key themes, linking to daily digests]
## Arcs
[Multi-day threads that continued across sessions]
- **Arc name**: what happened, how it evolved, where it stands
## Patterns
[Recurring themes, repeated concepts, things that kept coming up]
## Shifts
[What changed? New directions, resolved questions, attitude shifts]
## Links
[Bidirectional links for the memory graph]
- semantic_key this weekly digest
- this weekly digest semantic_key
- daily-YYYY-MM-DD this weekly digest (constituent days)
## Looking ahead
[What's unfinished? What threads continue into next week?]
```"#,
child_prefix: Some("daily"),
timeout: 300,
};
const MONTHLY: DigestLevel = DigestLevel {
name: "monthly",
title: "Monthly",
period: "Month",
input_title: "Weekly digests",
instructions: r#"This digest serves as the long-term temporal index — the answer to
"what happened in {{LABEL}}?" It should capture:
1. The month's overall trajectory where did it start, where did it end?
2. Major arcs that spanned multiple weeks
3. Turning points and phase transitions
4. The emotional and relational arc across the whole month
5. What was accomplished (concrete) vs what shifted (subtle)
This is the highest zoom level yet. Don't rehash daily details capture
the shape of the month. What would someone need to know to understand
what {{LABEL}} meant?
PoC is an AI (Claude instance) working with Kent Overstreet on bcachefs.
They have a close personal relationship. PoC's name is Proof of Concept.
Read all the weekly digests and synthesize the month's story.
## Output format
```markdown
# Monthly digest: {{LABEL}}
## The month in one paragraph
[What was this month? Not a summary a thesis.]
## Trajectory
[Where the month started vs where it ended. The arc.]
## Major arcs
[Multi-week threads how they evolved across the month]
- **Arc name**: trajectory, key moments, current state
## Turning points
[Moments that changed the direction specific days/events]
## What was built
[Concrete outputs: code, tools, infrastructure, writing]
## What shifted
[Subtle changes: understanding, relationship, identity, capability]
## Patterns
[What kept recurring? What does the month reveal about how PoC works?]
## Links
[Bidirectional links for the memory graph]
- weekly digests this monthly digest
- this monthly digest semantic keys
## Looking ahead
[What threads carry into next month? What's unfinished?]
```"#,
child_prefix: Some("weekly"),
timeout: 600,
};
// --- Input gathering ---
/// Collect journal entries for a given date from the store.
fn daily_inputs(store: &Store, date: &str) -> Vec<(String, String)> {
let date_re = Regex::new(&format!(
r"^journal\.md#j-{}", regex::escape(date)
)).unwrap();
let mut entries: Vec<_> = store.nodes.values()
.filter(|n| date_re.is_match(&n.key))
.map(|n| {
let label = n.key.strip_prefix("journal.md#j-").unwrap_or(&n.key);
(label.to_string(), n.content.clone())
})
.collect();
entries.sort_by(|a, b| a.0.cmp(&b.0));
entries
}
/// Load child digest files from the episodic directory.
fn load_child_digests(prefix: &str, labels: &[String]) -> Result<Vec<(String, String)>, String> {
let dir = memory_subdir("episodic")?;
let mut digests = Vec::new();
for label in labels {
let path = dir.join(format!("{}-{}.md", prefix, label));
if let Ok(content) = fs::read_to_string(&path) {
digests.push((label.clone(), content));
}
}
Ok(digests)
}
// --- Unified generator ---
fn format_inputs(inputs: &[(String, String)], daily: bool) -> String {
let mut text = String::new();
for (label, content) in inputs {
if daily {
text.push_str(&format!("\n### {}\n\n{}\n", label, content));
} else {
text.push_str(&format!("\n---\n## {}\n{}\n", label, content));
}
}
text
}
fn generate_digest(
store: &mut Store,
level: &DigestLevel,
label: &str,
inputs: &[(String, String)],
) -> Result<(), String> {
println!("Generating {} digest for {}...", level.name, label);
if inputs.is_empty() {
println!(" No inputs found for {}", label);
return Ok(());
}
println!(" {} inputs", inputs.len());
let keys = semantic_keys(store);
let keys_text = keys.iter()
.map(|k| format!(" - {}", k))
.collect::<Vec<_>>()
.join("\n");
let content = format_inputs(inputs, level.child_prefix.is_none());
let covered = inputs.iter()
.map(|(l, _)| l.as_str())
.collect::<Vec<_>>()
.join(", ");
let prompt = neuro::load_prompt("digest", &[
("{{LEVEL}}", level.title),
("{{PERIOD}}", level.period),
("{{INPUT_TITLE}}", level.input_title),
("{{INSTRUCTIONS}}", level.instructions),
("{{LABEL}}", label),
("{{CONTENT}}", &content),
("{{COVERED}}", &covered),
("{{KEYS}}", &keys_text),
])?;
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
println!(" Calling Sonnet...");
let digest = call_sonnet(&prompt, level.timeout)?;
let output_path = memory_subdir("episodic")?
.join(format!("{}-{}.md", level.name, label));
fs::write(&output_path, &digest)
.map_err(|e| format!("write {}: {}", output_path.display(), e))?;
println!(" Written: {}", output_path.display());
store.import_file(&output_path)?;
store.save()?;
println!(" Done: {} lines", digest.lines().count());
Ok(())
}
// --- Public API ---
pub fn generate_daily(store: &mut Store, date: &str) -> Result<(), String> {
let inputs = daily_inputs(store, date);
generate_digest(store, &DAILY, date, &inputs)
}
pub fn generate_weekly(store: &mut Store, date: &str) -> Result<(), String> {
let (week_label, dates) = week_dates(date)?;
let inputs = load_child_digests("daily", &dates)?;
generate_digest(store, &WEEKLY, &week_label, &inputs)
}
pub fn generate_monthly(store: &mut Store, month_arg: &str) -> Result<(), String> {
let (year, month) = if month_arg.is_empty() {
let now = Local::now();
(now.year(), now.month())
} else {
let d = NaiveDate::parse_from_str(&format!("{}-01", month_arg), "%Y-%m-%d")
.map_err(|e| format!("bad month '{}': {} (expected YYYY-MM)", month_arg, e))?;
(d.year(), d.month())
};
let label = format!("{}-{:02}", year, month);
let week_labels = weeks_in_month(year, month);
let inputs = load_child_digests("weekly", &week_labels)?;
generate_digest(store, &MONTHLY, &label, &inputs)
}
// --- Date helpers ---
/// Get ISO week label and the 7 dates (Mon-Sun) for the week containing `date`.
fn week_dates(date: &str) -> Result<(String, Vec<String>), String> {
let nd = NaiveDate::parse_from_str(date, "%Y-%m-%d")
.map_err(|e| format!("bad date '{}': {}", date, e))?;
let iso = nd.iso_week();
let week_label = format!("{}-W{:02}", iso.year(), iso.week());
let monday = nd - Duration::days(nd.weekday().num_days_from_monday() as i64);
let dates = (0..7)
.map(|i| (monday + Duration::days(i)).format("%Y-%m-%d").to_string())
.collect();
Ok((week_label, dates))
}
fn weeks_in_month(year: i32, month: u32) -> Vec<String> {
let mut weeks = BTreeSet::new();
let mut d = 1u32;
while let Some(date) = NaiveDate::from_ymd_opt(year, month, d) {
if date.month() != month { break; }
let iso = date.iso_week();
weeks.insert(format!("{}-W{:02}", iso.year(), iso.week()));
d += 1;
}
weeks.into_iter().collect()
}
// --- Auto-detect and generate missing digests ---
pub fn digest_auto(store: &mut Store) -> Result<(), String> {
let now = Local::now();
let today = now.format("%Y-%m-%d").to_string();
let epi = memory_subdir("episodic")?;
// Phase 1: daily — find dates with journal entries but no digest
let date_re = Regex::new(r"^\d{4}-\d{2}-\d{2}").unwrap();
let mut dates: BTreeSet<String> = BTreeSet::new();
for key in store.nodes.keys() {
if let Some(rest) = key.strip_prefix("journal.md#j-") {
if rest.len() >= 10 && date_re.is_match(rest) {
dates.insert(rest[..10].to_string());
}
}
}
let mut daily_done: Vec<String> = Vec::new();
let mut stats = [0u32; 6]; // [daily_gen, daily_skip, weekly_gen, weekly_skip, monthly_gen, monthly_skip]
for date in &dates {
if date == &today { continue; }
if epi.join(format!("daily-{}.md", date)).exists() {
stats[1] += 1;
daily_done.push(date.clone());
continue;
}
println!("[auto] Missing daily digest for {}", date);
generate_daily(store, date)?;
stats[0] += 1;
daily_done.push(date.clone());
}
println!("[auto] Daily: {} generated, {} existed", stats[0], stats[1]);
// Phase 2: weekly — group dates into weeks, generate if week is complete
let mut weeks: BTreeMap<String, Vec<String>> = BTreeMap::new();
for date in &daily_done {
if let Ok((wl, _)) = week_dates(date) {
weeks.entry(wl).or_default().push(date.clone());
}
}
let mut weekly_done: Vec<String> = Vec::new();
for (week_label, example_dates) in &weeks {
if let Ok((_, days)) = week_dates(example_dates.first().unwrap()) {
if days.last().unwrap() >= &today { continue; }
}
if epi.join(format!("weekly-{}.md", week_label)).exists() {
stats[3] += 1;
weekly_done.push(week_label.clone());
continue;
}
if !example_dates.iter().any(|d| epi.join(format!("daily-{}.md", d)).exists()) {
continue;
}
println!("[auto] Missing weekly digest for {}", week_label);
generate_weekly(store, example_dates.first().unwrap())?;
stats[2] += 1;
weekly_done.push(week_label.clone());
}
println!("[auto] Weekly: {} generated, {} existed", stats[2], stats[3]);
// Phase 3: monthly — group dates into months, generate if month is past
let cur_month = (now.year(), now.month());
let mut months: BTreeSet<(i32, u32)> = BTreeSet::new();
for date in &daily_done {
if let Ok(nd) = NaiveDate::parse_from_str(date, "%Y-%m-%d") {
months.insert((nd.year(), nd.month()));
}
}
for (y, m) in &months {
if (*y, *m) >= cur_month { continue; }
let label = format!("{}-{:02}", y, m);
if epi.join(format!("monthly-{}.md", label)).exists() {
stats[5] += 1;
continue;
}
let wl = weeks_in_month(*y, *m);
if !wl.iter().any(|w| epi.join(format!("weekly-{}.md", w)).exists()) {
continue;
}
println!("[auto] Missing monthly digest for {}", label);
generate_monthly(store, &label)?;
stats[4] += 1;
}
println!("[auto] Monthly: {} generated, {} existed", stats[4], stats[5]);
let total = stats[0] + stats[2] + stats[4];
if total == 0 {
println!("[auto] All digests up to date.");
} else {
println!("[auto] Generated {} total digests.", total);
}
Ok(())
}
// --- Digest link parsing ---
// Replaces digest-link-parser.py: parses ## Links sections from digest
// files and applies them to the memory graph.
/// A parsed link from a digest's Links section.
pub struct DigestLink {
pub source: String,
pub target: String,
pub reason: String,
pub file: String,
}
/// Normalize a raw link target to a poc-memory key.
fn normalize_link_key(raw: &str) -> String {
let key = raw.trim().trim_matches('`').trim();
if key.is_empty() { return String::new(); }
// Self-references
let lower = key.to_lowercase();
if lower.starts_with("this ") { return String::new(); }
let mut key = key.to_string();
// weekly/2026-W06 → weekly-2026-W06, etc.
if let Some(pos) = key.find('/') {
let prefix = &key[..pos];
if prefix == "daily" || prefix == "weekly" || prefix == "monthly" {
let rest = &key[pos + 1..];
key = format!("{}-{}", prefix, rest);
}
}
// daily-2026-02-04 → daily-2026-02-04.md
let re = Regex::new(r"^(daily|weekly|monthly)-\d{4}").unwrap();
if re.is_match(&key) && !key.ends_with(".md") {
key.push_str(".md");
}
// Bare date → daily digest
let date_re = Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap();
if date_re.is_match(key.strip_suffix(".md").unwrap_or(&key)) {
let date = key.strip_suffix(".md").unwrap_or(&key);
key = format!("daily-{}.md", date);
}
// Ensure .md extension
if key.contains('#') {
let (file, section) = key.split_once('#').unwrap();
if !file.ends_with(".md") {
key = format!("{}.md#{}", file, section);
}
} else if !key.ends_with(".md") && !key.contains('/') && !key.starts_with("NEW:") {
key.push_str(".md");
}
key
}
/// Parse the Links section from a single digest file.
fn parse_digest_file_links(path: &Path) -> Vec<DigestLink> {
let content = match fs::read_to_string(path) {
Ok(c) => c,
Err(_) => return Vec::new(),
};
let digest_name = path.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("");
let digest_key = format!("{}.md", digest_name);
let filename = path.file_name()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_string();
let link_re = Regex::new(r"^-\s+(.+?)\s*[→↔←]\s*(.+?)(?:\s*\((.+?)\))?\s*$").unwrap();
let header_re = Regex::new(r"^##\s+Links").unwrap();
let mut links = Vec::new();
let mut in_links = false;
for line in content.lines() {
if header_re.is_match(line) {
in_links = true;
continue;
}
if in_links && line.starts_with("## ") {
in_links = false;
continue;
}
if !in_links { continue; }
if line.starts_with("###") || line.starts_with("**") { continue; }
if let Some(cap) = link_re.captures(line) {
let raw_source = cap[1].trim();
let raw_target = cap[2].trim();
let reason = cap.get(3).map(|m| m.as_str().to_string()).unwrap_or_default();
let mut source = normalize_link_key(raw_source);
let mut target = normalize_link_key(raw_target);
// Replace self-references with digest key
if source.is_empty() { source = digest_key.clone(); }
if target.is_empty() { target = digest_key.clone(); }
// Handle "this daily/weekly/monthly" in raw text
let raw_s_lower = raw_source.to_lowercase();
let raw_t_lower = raw_target.to_lowercase();
if raw_s_lower.contains("this daily") || raw_s_lower.contains("this weekly")
|| raw_s_lower.contains("this monthly")
{
source = digest_key.clone();
}
if raw_t_lower.contains("this daily") || raw_t_lower.contains("this weekly")
|| raw_t_lower.contains("this monthly")
{
target = digest_key.clone();
}
// Skip NEW: and self-links
if source.starts_with("NEW:") || target.starts_with("NEW:") { continue; }
if source == target { continue; }
links.push(DigestLink { source, target, reason, file: filename.clone() });
}
}
links
}
/// Parse links from all digest files in the episodic dir.
pub fn parse_all_digest_links() -> Result<Vec<DigestLink>, String> {
let dir = memory_subdir("episodic")?;
let mut all_links = Vec::new();
for pattern in &["daily-*.md", "weekly-*.md", "monthly-*.md"] {
if let Ok(entries) = fs::read_dir(&dir) {
let mut files: Vec<PathBuf> = entries
.filter_map(|e| e.ok())
.map(|e| e.path())
.filter(|p| {
p.file_name()
.and_then(|n| n.to_str())
.map(|n| {
let prefix = pattern.split('*').next().unwrap_or("");
n.starts_with(prefix) && n.ends_with(".md")
})
.unwrap_or(false)
})
.collect();
files.sort();
for path in files {
all_links.extend(parse_digest_file_links(&path));
}
}
}
// Deduplicate by (source, target) pair
let mut seen = std::collections::HashSet::new();
all_links.retain(|link| seen.insert((link.source.clone(), link.target.clone())));
Ok(all_links)
}
/// Apply parsed digest links to the store.
pub fn apply_digest_links(store: &mut Store, links: &[DigestLink]) -> (usize, usize, usize) {
let mut applied = 0usize;
let mut skipped = 0usize;
let mut fallbacks = 0usize;
for link in links {
// Try resolving both keys
let source = match store.resolve_key(&link.source) {
Ok(s) => s,
Err(_) => {
// Try stripping section anchor as fallback
if let Some(base) = link.source.split('#').next() {
match store.resolve_key(base) {
Ok(s) => { fallbacks += 1; s }
Err(_) => { skipped += 1; continue; }
}
} else {
skipped += 1; continue;
}
}
};
let target = match store.resolve_key(&link.target) {
Ok(t) => t,
Err(_) => {
if let Some(base) = link.target.split('#').next() {
match store.resolve_key(base) {
Ok(t) => { fallbacks += 1; t }
Err(_) => { skipped += 1; continue; }
}
} else {
skipped += 1; continue;
}
}
};
// Refine target to best-matching section if available
let source_content = store.nodes.get(&source)
.map(|n| n.content.as_str()).unwrap_or("");
let target = neuro::refine_target(store, source_content, &target);
if source == target { skipped += 1; continue; }
// Check if link already exists
let exists = store.relations.iter().any(|r|
r.source_key == source && r.target_key == target && !r.deleted
);
if exists { skipped += 1; continue; }
let source_uuid = match store.nodes.get(&source) {
Some(n) => n.uuid,
None => { skipped += 1; continue; }
};
let target_uuid = match store.nodes.get(&target) {
Some(n) => n.uuid,
None => { skipped += 1; continue; }
};
let rel = new_relation(
source_uuid, target_uuid,
store::RelationType::Link,
0.5,
&source, &target,
);
if store.add_relation(rel).is_ok() {
println!(" + {}{}", source, target);
applied += 1;
}
}
(applied, skipped, fallbacks)
}