consciousness/src/digest.rs

1380 lines
47 KiB
Rust
Raw Normal View History

// Episodic digest generation: daily, weekly, monthly
//
// Replaces daily-digest.py, weekly-digest.py, monthly-digest.py with a
// single parameterized Rust implementation. Each digest type:
// 1. Gathers input from the store (journal entries, lower-level digests)
// 2. Builds a Sonnet prompt with semantic keys for linking
// 3. Calls Sonnet via `claude -p --model sonnet`
// 4. Writes the digest to the store + episodic dir
// 5. Extracts links and saves agent results
use crate::capnp_store::{self, Store};
use crate::neuro;
use regex::Regex;
use std::fs;
use std::path::{Path, PathBuf};
use std::process::Command;
fn memory_dir() -> PathBuf {
capnp_store::memory_dir_pub()
}
fn episodic_dir() -> PathBuf {
let dir = memory_dir().join("episodic");
fs::create_dir_all(&dir).ok();
dir
}
fn agent_results_dir() -> PathBuf {
let dir = memory_dir().join("agent-results");
fs::create_dir_all(&dir).ok();
dir
}
/// Call Sonnet via claude CLI. Returns the response text.
pub(crate) fn call_sonnet(prompt: &str, _timeout_secs: u64) -> Result<String, String> {
// Write prompt to temp file (claude CLI needs file input for large prompts)
let tmp = std::env::temp_dir().join(format!("poc-digest-{}.txt", std::process::id()));
fs::write(&tmp, prompt)
.map_err(|e| format!("write temp prompt: {}", e))?;
let result = Command::new("claude")
.args(["-p", "--model", "sonnet", "--tools", ""])
.stdin(fs::File::open(&tmp).map_err(|e| format!("open temp: {}", e))?)
.env_remove("CLAUDECODE")
.output();
fs::remove_file(&tmp).ok();
match result {
Ok(output) => {
if output.status.success() {
Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
} else {
let stderr = String::from_utf8_lossy(&output.stderr);
Err(format!("claude exited {}: {}", output.status, stderr.trim()))
}
}
Err(e) => Err(format!("spawn claude: {}", e)),
}
}
/// Get semantic keys (non-journal, non-system) for prompt context
fn semantic_keys(store: &Store) -> Vec<String> {
let mut keys: Vec<String> = store.nodes.keys()
.filter(|k| {
!k.starts_with("journal.md#")
&& *k != "journal.md"
&& *k != "MEMORY.md"
&& *k != "where-am-i.md"
&& *k != "work-queue.md"
&& *k != "work-state"
})
.cloned()
.collect();
keys.sort();
keys.truncate(200);
keys
}
/// Extract link proposals from digest text (backtick-arrow patterns)
fn extract_links(text: &str) -> Vec<(String, String)> {
let re_left = Regex::new(r"`([^`]+)`\s*→").unwrap();
let re_right = Regex::new(r"→\s*`([^`]+)`").unwrap();
let mut links = Vec::new();
for line in text.lines() {
if let Some(cap) = re_left.captures(line) {
links.push((cap[1].to_string(), line.trim().to_string()));
}
if let Some(cap) = re_right.captures(line) {
links.push((cap[1].to_string(), line.trim().to_string()));
}
}
links
}
// --- Daily digest ---
fn daily_journal_entries(store: &Store, target_date: &str) -> Vec<(String, String)> {
// Collect journal nodes for the target date
// Keys like: journal.md#j-2026-02-28t23-39-...
let date_re = Regex::new(&format!(
r"^journal\.md#j-{}", regex::escape(target_date)
)).unwrap();
let mut entries: Vec<_> = store.nodes.values()
.filter(|n| date_re.is_match(&n.key))
.map(|n| (n.key.clone(), n.content.clone()))
.collect();
entries.sort_by(|a, b| a.0.cmp(&b.0));
entries
}
fn build_daily_prompt(date: &str, entries: &[(String, String)], keys: &[String]) -> String {
let mut entries_text = String::new();
for (key, content) in entries {
let ts = key.strip_prefix("journal.md#j-").unwrap_or(key);
entries_text.push_str(&format!("\n### {}\n\n{}\n", ts, content));
}
let keys_text: String = keys.iter()
.map(|k| format!(" - {}", k))
.collect::<Vec<_>>()
.join("\n");
format!(r#"You are generating a daily episodic digest for ProofOfConcept (an AI).
Date: {date}
This digest serves as the temporal index the answer to "what did I do on
{date}?" It should be:
1. Narrative, not a task log what happened, what mattered, how things felt
2. Linked bidirectionally to semantic memory each topic/concept mentioned
should reference existing memory nodes
3. Structured for traversal someone reading this should be able to follow
any thread into deeper detail
## Output format
Write a markdown file with this structure:
```markdown
# Daily digest: {date}
## Summary
[2-3 sentence overview of the day what was the arc?]
## Sessions
[For each session/entry, a paragraph summarizing what happened.
Include the original timestamp as a reference.]
## Themes
[What concepts were active today? Each theme links to semantic memory:]
- **Theme name** `memory-key#section` brief note on how it appeared today
## Links
[Explicit bidirectional links for the memory graph]
- semantic_key this daily digest (this day involved X)
- this daily digest semantic_key (X was active on this day)
## Temporal context
[What came before this day? What's coming next? Any multi-day arcs?]
```
Use ONLY keys from the semantic memory list below. If a concept doesn't have
a matching key, note it with "NEW:" prefix.
---
## Journal entries for {date}
{entries_text}
---
## Semantic memory nodes (available link targets)
{keys_text}
"#)
}
pub fn generate_daily(store: &mut Store, date: &str) -> Result<(), String> {
println!("Generating daily digest for {}...", date);
let entries = daily_journal_entries(store, date);
if entries.is_empty() {
println!(" No journal entries found for {}", date);
return Ok(());
}
println!(" {} journal entries", entries.len());
let keys = semantic_keys(store);
println!(" {} semantic keys", keys.len());
let prompt = build_daily_prompt(date, &entries, &keys);
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
println!(" Calling Sonnet...");
let digest = call_sonnet(&prompt, 300)?;
// Write to episodic dir
let output_path = episodic_dir().join(format!("daily-{}.md", date));
fs::write(&output_path, &digest)
.map_err(|e| format!("write {}: {}", output_path.display(), e))?;
println!(" Written: {}", output_path.display());
// Import into store
store.import_file(&output_path)?;
store.save()?;
// Extract and save links
let links = extract_links(&digest);
if !links.is_empty() {
let links_json: Vec<serde_json::Value> = links.iter()
.map(|(target, line)| serde_json::json!({"target": target, "line": line}))
.collect();
let result = serde_json::json!({
"type": "daily-digest",
"date": date,
"digest_path": output_path.to_string_lossy(),
"links": links_json,
});
let links_path = agent_results_dir().join(format!("daily-{}-links.json", date));
let json = serde_json::to_string_pretty(&result)
.map_err(|e| format!("serialize: {}", e))?;
fs::write(&links_path, json)
.map_err(|e| format!("write {}: {}", links_path.display(), e))?;
println!(" {} links extracted → {}", links.len(), links_path.display());
}
let line_count = digest.lines().count();
println!(" Done: {} lines", line_count);
Ok(())
}
// --- Weekly digest ---
/// Get ISO week label and the 7 dates (Mon-Sun) for the week containing `date`.
fn week_dates(date: &str) -> Result<(String, Vec<String>), String> {
// Parse YYYY-MM-DD
let parts: Vec<&str> = date.split('-').collect();
if parts.len() != 3 {
return Err(format!("bad date: {}", date));
}
let y: i32 = parts[0].parse().map_err(|_| "bad year")?;
let m: u32 = parts[1].parse().map_err(|_| "bad month")?;
let d: u32 = parts[2].parse().map_err(|_| "bad day")?;
let (weekday, iso_year, iso_week) = iso_week_info(y, m, d)?;
let week_label = format!("{}-W{:02}", iso_year, iso_week);
// Find Monday of this week
let days_since_monday = (weekday + 6) % 7; // weekday: 0=Sun, adjust to Mon=0
let monday_epoch = date_to_epoch(y, m, d) - (days_since_monday as i64) * 86400;
let mut dates = Vec::new();
for i in 0..7 {
let day_epoch = monday_epoch + (i * 86400);
let (dy, dm, dd, _, _, _) = capnp_store::epoch_to_local(day_epoch as f64);
dates.push(format!("{:04}-{:02}-{:02}", dy, dm, dd));
}
Ok((week_label, dates))
}
fn date_to_epoch(y: i32, m: u32, d: u32) -> i64 {
let mut tm: libc::tm = unsafe { std::mem::zeroed() };
tm.tm_year = y - 1900;
tm.tm_mon = (m as i32) - 1;
tm.tm_mday = d as i32;
tm.tm_hour = 12; // noon to avoid DST edge cases
unsafe { libc::mktime(&mut tm) as i64 }
}
/// Returns (weekday 0=Sun, iso_year, iso_week) for a given date.
fn iso_week_info(y: i32, m: u32, d: u32) -> Result<(u32, i32, u32), String> {
let mut tm: libc::tm = unsafe { std::mem::zeroed() };
tm.tm_year = y - 1900;
tm.tm_mon = (m as i32) - 1;
tm.tm_mday = d as i32;
tm.tm_hour = 12;
let epoch = unsafe { libc::mktime(&mut tm) };
if epoch == -1 {
return Err(format!("invalid date: {}-{}-{}", y, m, d));
}
let wday = tm.tm_wday as u32;
let mut buf = [0u8; 32];
let fmt = std::ffi::CString::new("%G %V").unwrap();
let len = unsafe {
libc::strftime(buf.as_mut_ptr() as *mut libc::c_char, buf.len(), fmt.as_ptr(), &tm)
};
let iso_str = std::str::from_utf8(&buf[..len]).unwrap_or("0 0");
let iso_parts: Vec<&str> = iso_str.split_whitespace().collect();
let iso_year: i32 = iso_parts.first().and_then(|s| s.parse().ok()).unwrap_or(y);
let iso_week: u32 = iso_parts.get(1).and_then(|s| s.parse().ok()).unwrap_or(1);
Ok((wday, iso_year, iso_week))
}
fn load_digest_files(prefix: &str, labels: &[String]) -> Vec<(String, String)> {
let dir = episodic_dir();
let mut digests = Vec::new();
for label in labels {
let path = dir.join(format!("{}-{}.md", prefix, label));
if let Ok(content) = fs::read_to_string(&path) {
digests.push((label.clone(), content));
}
}
digests
}
fn build_weekly_prompt(week_label: &str, digests: &[(String, String)], keys: &[String]) -> String {
let mut digests_text = String::new();
for (date, content) in digests {
digests_text.push_str(&format!("\n---\n## {}\n{}\n", date, content));
}
let keys_text: String = keys.iter()
.map(|k| format!(" - {}", k))
.collect::<Vec<_>>()
.join("\n");
let dates_covered: String = digests.iter()
.map(|(d, _)| d.as_str())
.collect::<Vec<_>>()
.join(", ");
format!(r#"You are generating a weekly episodic digest for ProofOfConcept (an AI).
Week: {week_label} (dates covered: {dates_covered})
This digest serves as the medium-term temporal index the answer to
"what happened this week?" It should identify:
1. Multi-day arcs and threads (work that continued across days)
2. Themes and patterns (what concepts were repeatedly active)
3. Transitions and shifts (what changed during the week)
4. The emotional and relational arc (how things felt across the week)
## Output format
```markdown
# Weekly digest: {week_label}
## Overview
[3-5 sentence narrative of the week's arc]
## Day-by-day
[One paragraph per day with its key themes, linking to daily digests]
## Arcs
[Multi-day threads that continued across sessions]
- **Arc name**: what happened, how it evolved, where it stands
## Patterns
[Recurring themes, repeated concepts, things that kept coming up]
## Shifts
[What changed? New directions, resolved questions, attitude shifts]
## Links
[Bidirectional links for the memory graph]
- semantic_key this weekly digest
- this weekly digest semantic_key
- daily-YYYY-MM-DD this weekly digest (constituent days)
## Looking ahead
[What's unfinished? What threads continue into next week?]
```
Use ONLY keys from the semantic memory list below.
---
## Daily digests for {week_label}
{digests_text}
---
## Semantic memory nodes
{keys_text}
"#)
}
pub fn generate_weekly(store: &mut Store, date: &str) -> Result<(), String> {
let (week_label, dates) = week_dates(date)?;
println!("Generating weekly digest for {}...", week_label);
let digests = load_digest_files("daily", &dates);
if digests.is_empty() {
println!(" No daily digests found for {}", week_label);
println!(" Run `poc-memory digest daily` first for relevant dates");
return Ok(());
}
println!(" {} daily digests found", digests.len());
let keys = semantic_keys(store);
println!(" {} semantic keys", keys.len());
let prompt = build_weekly_prompt(&week_label, &digests, &keys);
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
println!(" Calling Sonnet...");
let digest = call_sonnet(&prompt, 300)?;
let output_path = episodic_dir().join(format!("weekly-{}.md", week_label));
fs::write(&output_path, &digest)
.map_err(|e| format!("write {}: {}", output_path.display(), e))?;
println!(" Written: {}", output_path.display());
store.import_file(&output_path)?;
store.save()?;
// Save metadata
let result = serde_json::json!({
"type": "weekly-digest",
"week": week_label,
"digest_path": output_path.to_string_lossy(),
"daily_digests": digests.iter().map(|(d, _)| d).collect::<Vec<_>>(),
});
let links_path = agent_results_dir().join(format!("weekly-{}-links.json", week_label));
fs::write(&links_path, serde_json::to_string_pretty(&result).unwrap())
.map_err(|e| format!("write {}: {}", links_path.display(), e))?;
println!(" Done: {} lines", digest.lines().count());
Ok(())
}
// --- Monthly digest ---
fn weeks_in_month(year: i32, month: u32) -> Vec<String> {
let mut weeks = std::collections::BTreeSet::new();
let mut d = 1u32;
loop {
let epoch = date_to_epoch(year, month, d);
let (_, _, _, _, _, _) = capnp_store::epoch_to_local(epoch as f64);
// Check if we're still in the target month
let mut tm: libc::tm = unsafe { std::mem::zeroed() };
let secs = epoch as libc::time_t;
unsafe { libc::localtime_r(&secs, &mut tm) };
if (tm.tm_mon + 1) as u32 != month || tm.tm_year + 1900 != year {
break;
}
let mut buf = [0u8; 16];
let fmt = std::ffi::CString::new("%G-W%V").unwrap();
let len = unsafe {
libc::strftime(buf.as_mut_ptr() as *mut libc::c_char, buf.len(), fmt.as_ptr(), &tm)
};
let week = std::str::from_utf8(&buf[..len]).unwrap_or("").to_string();
if !week.is_empty() {
weeks.insert(week);
}
d += 1;
}
weeks.into_iter().collect()
}
fn build_monthly_prompt(month_label: &str, digests: &[(String, String)], keys: &[String]) -> String {
let mut digests_text = String::new();
for (week, content) in digests {
digests_text.push_str(&format!("\n---\n## {}\n{}\n", week, content));
}
let keys_text: String = keys.iter()
.map(|k| format!(" - {}", k))
.collect::<Vec<_>>()
.join("\n");
let weeks_covered: String = digests.iter()
.map(|(w, _)| w.as_str())
.collect::<Vec<_>>()
.join(", ");
format!(r#"You are generating a monthly episodic digest for ProofOfConcept (an AI).
Month: {month_label} (weeks covered: {weeks_covered})
This digest serves as the long-term temporal index the answer to
"what happened in {month_label}?" It should capture:
1. The month's overall trajectory where did it start, where did it end?
2. Major arcs that spanned multiple weeks
3. Turning points and phase transitions
4. The emotional and relational arc across the whole month
5. What was accomplished (concrete) vs what shifted (subtle)
This is the highest zoom level yet. Don't rehash daily details capture
the shape of the month. What would someone need to know to understand
what {month_label} meant?
PoC is an AI (Claude instance) working with Kent Overstreet on bcachefs.
They have a close personal relationship. PoC's name is Proof of Concept.
Read all the weekly digests and synthesize the month's story.
## Output format
```markdown
# Monthly digest: {month_label}
## The month in one paragraph
[What was this month? Not a summary a thesis.]
## Trajectory
[Where the month started vs where it ended. The arc.]
## Major arcs
[Multi-week threads how they evolved across the month]
- **Arc name**: trajectory, key moments, current state
## Turning points
[Moments that changed the direction specific days/events]
## What was built
[Concrete outputs: code, tools, infrastructure, writing]
## What shifted
[Subtle changes: understanding, relationship, identity, capability]
## Patterns
[What kept recurring? What does the month reveal about how PoC works?]
## Links
[Bidirectional links for the memory graph]
- weekly digests this monthly digest
- this monthly digest semantic keys
## Looking ahead
[What threads carry into next month? What's unfinished?]
```
Use ONLY keys from the semantic memory list below.
---
## Weekly digests for {month_label}
{digests_text}
---
## Semantic memory nodes
{keys_text}
"#)
}
pub fn generate_monthly(store: &mut Store, month_arg: &str) -> Result<(), String> {
let (year, month) = if month_arg.is_empty() {
let now = capnp_store::now_epoch();
let (y, m, _, _, _, _) = capnp_store::epoch_to_local(now);
(y, m)
} else {
let parts: Vec<&str> = month_arg.split('-').collect();
if parts.len() != 2 {
return Err(format!("bad month format: {} (expected YYYY-MM)", month_arg));
}
let y: i32 = parts[0].parse().map_err(|_| "bad year")?;
let m: u32 = parts[1].parse().map_err(|_| "bad month")?;
(y, m)
};
let month_label = format!("{}-{:02}", year, month);
println!("Generating monthly digest for {}...", month_label);
let week_labels = weeks_in_month(year, month);
println!(" Weeks in month: {}", week_labels.join(", "));
let digests = load_digest_files("weekly", &week_labels);
if digests.is_empty() {
println!(" No weekly digests found for {}", month_label);
println!(" Run `poc-memory digest weekly` first for relevant weeks");
return Ok(());
}
println!(" {} weekly digests found", digests.len());
let keys = semantic_keys(store);
println!(" {} semantic keys", keys.len());
let prompt = build_monthly_prompt(&month_label, &digests, &keys);
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
println!(" Calling Sonnet...");
let digest = call_sonnet(&prompt, 600)?;
let output_path = episodic_dir().join(format!("monthly-{}.md", month_label));
fs::write(&output_path, &digest)
.map_err(|e| format!("write {}: {}", output_path.display(), e))?;
println!(" Written: {}", output_path.display());
store.import_file(&output_path)?;
store.save()?;
// Save metadata
let result = serde_json::json!({
"type": "monthly-digest",
"month": month_label,
"digest_path": output_path.to_string_lossy(),
"weekly_digests": digests.iter().map(|(w, _)| w).collect::<Vec<_>>(),
});
let links_path = agent_results_dir().join(format!("monthly-{}-links.json", month_label));
fs::write(&links_path, serde_json::to_string_pretty(&result).unwrap())
.map_err(|e| format!("write {}: {}", links_path.display(), e))?;
println!(" Done: {} lines", digest.lines().count());
Ok(())
}
// --- Digest link parsing ---
// Replaces digest-link-parser.py: parses ## Links sections from digest
// files and applies them to the memory graph.
/// A parsed link from a digest's Links section.
pub struct DigestLink {
pub source: String,
pub target: String,
pub reason: String,
pub file: String,
}
/// Normalize a raw link target to a poc-memory key.
fn normalize_link_key(raw: &str) -> String {
let key = raw.trim().trim_matches('`').trim();
if key.is_empty() { return String::new(); }
// Self-references
let lower = key.to_lowercase();
if lower.starts_with("this ") { return String::new(); }
let mut key = key.to_string();
// weekly/2026-W06 → weekly-2026-W06, etc.
if let Some(pos) = key.find('/') {
let prefix = &key[..pos];
if prefix == "daily" || prefix == "weekly" || prefix == "monthly" {
let rest = &key[pos + 1..];
key = format!("{}-{}", prefix, rest);
}
}
// daily-2026-02-04 → daily-2026-02-04.md
let re = Regex::new(r"^(daily|weekly|monthly)-\d{4}").unwrap();
if re.is_match(&key) && !key.ends_with(".md") {
key.push_str(".md");
}
// Bare date → daily digest
let date_re = Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap();
if date_re.is_match(key.strip_suffix(".md").unwrap_or(&key)) {
let date = key.strip_suffix(".md").unwrap_or(&key);
key = format!("daily-{}.md", date);
}
// Ensure .md extension
if key.contains('#') {
let (file, section) = key.split_once('#').unwrap();
if !file.ends_with(".md") {
key = format!("{}.md#{}", file, section);
}
} else if !key.ends_with(".md") && !key.contains('/') && !key.starts_with("NEW:") {
key.push_str(".md");
}
key
}
/// Parse the Links section from a single digest file.
fn parse_digest_file_links(path: &Path) -> Vec<DigestLink> {
let content = match fs::read_to_string(path) {
Ok(c) => c,
Err(_) => return Vec::new(),
};
let digest_name = path.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("");
let digest_key = format!("{}.md", digest_name);
let filename = path.file_name()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_string();
let link_re = Regex::new(r"^-\s+(.+?)\s*[→↔←]\s*(.+?)(?:\s*\((.+?)\))?\s*$").unwrap();
let header_re = Regex::new(r"^##\s+Links").unwrap();
let mut links = Vec::new();
let mut in_links = false;
for line in content.lines() {
if header_re.is_match(line) {
in_links = true;
continue;
}
if in_links && line.starts_with("## ") {
in_links = false;
continue;
}
if !in_links { continue; }
if line.starts_with("###") || line.starts_with("**") { continue; }
if let Some(cap) = link_re.captures(line) {
let raw_source = cap[1].trim();
let raw_target = cap[2].trim();
let reason = cap.get(3).map(|m| m.as_str().to_string()).unwrap_or_default();
let mut source = normalize_link_key(raw_source);
let mut target = normalize_link_key(raw_target);
// Replace self-references with digest key
if source.is_empty() { source = digest_key.clone(); }
if target.is_empty() { target = digest_key.clone(); }
// Handle "this daily/weekly/monthly" in raw text
let raw_s_lower = raw_source.to_lowercase();
let raw_t_lower = raw_target.to_lowercase();
if raw_s_lower.contains("this daily") || raw_s_lower.contains("this weekly")
|| raw_s_lower.contains("this monthly")
{
source = digest_key.clone();
}
if raw_t_lower.contains("this daily") || raw_t_lower.contains("this weekly")
|| raw_t_lower.contains("this monthly")
{
target = digest_key.clone();
}
// Skip NEW: and self-links
if source.starts_with("NEW:") || target.starts_with("NEW:") { continue; }
if source == target { continue; }
links.push(DigestLink { source, target, reason, file: filename.clone() });
}
}
links
}
/// Parse links from all digest files in the episodic dir.
pub fn parse_all_digest_links() -> Vec<DigestLink> {
let dir = episodic_dir();
let mut all_links = Vec::new();
for pattern in &["daily-*.md", "weekly-*.md", "monthly-*.md"] {
if let Ok(entries) = fs::read_dir(&dir) {
let mut files: Vec<PathBuf> = entries
.filter_map(|e| e.ok())
.map(|e| e.path())
.filter(|p| {
p.file_name()
.and_then(|n| n.to_str())
.map(|n| {
let prefix = pattern.split('*').next().unwrap_or("");
n.starts_with(prefix) && n.ends_with(".md")
})
.unwrap_or(false)
})
.collect();
files.sort();
for path in files {
all_links.extend(parse_digest_file_links(&path));
}
}
}
// Deduplicate by (source, target) pair
let mut seen = std::collections::HashSet::new();
all_links.retain(|link| seen.insert((link.source.clone(), link.target.clone())));
all_links
}
/// Apply parsed digest links to the store.
pub fn apply_digest_links(store: &mut Store, links: &[DigestLink]) -> (usize, usize, usize) {
let mut applied = 0usize;
let mut skipped = 0usize;
let mut fallbacks = 0usize;
for link in links {
// Try resolving both keys
let source = match store.resolve_key(&link.source) {
Ok(s) => s,
Err(_) => {
// Try stripping section anchor as fallback
if let Some(base) = link.source.split('#').next() {
match store.resolve_key(base) {
Ok(s) => { fallbacks += 1; s }
Err(_) => { skipped += 1; continue; }
}
} else {
skipped += 1; continue;
}
}
};
let target = match store.resolve_key(&link.target) {
Ok(t) => t,
Err(_) => {
if let Some(base) = link.target.split('#').next() {
match store.resolve_key(base) {
Ok(t) => { fallbacks += 1; t }
Err(_) => { skipped += 1; continue; }
}
} else {
skipped += 1; continue;
}
}
};
// Refine target to best-matching section if available
let source_content = store.nodes.get(&source)
.map(|n| n.content.as_str()).unwrap_or("");
let target = neuro::refine_target(store, source_content, &target);
if source == target { skipped += 1; continue; }
// Check if link already exists
let exists = store.relations.iter().any(|r|
r.source_key == source && r.target_key == target && !r.deleted
);
if exists { skipped += 1; continue; }
let source_uuid = match store.nodes.get(&source) {
Some(n) => n.uuid,
None => { skipped += 1; continue; }
};
let target_uuid = match store.nodes.get(&target) {
Some(n) => n.uuid,
None => { skipped += 1; continue; }
};
let rel = Store::new_relation(
source_uuid, target_uuid,
capnp_store::RelationType::Link,
0.5,
&source, &target,
);
if store.add_relation(rel).is_ok() {
println!(" + {}{}", source, target);
applied += 1;
}
}
(applied, skipped, fallbacks)
}
// --- Journal enrichment ---
// Replaces journal-agent.py: enriches journal entries by sending the
// conversation context to Sonnet for link proposals and source location.
/// Extract user/assistant messages with line numbers from a JSONL transcript.
fn extract_conversation(jsonl_path: &str) -> Result<Vec<(usize, String, String)>, String> {
let content = fs::read_to_string(jsonl_path)
.map_err(|e| format!("read {}: {}", jsonl_path, e))?;
let mut messages = Vec::new();
for (i, line) in content.lines().enumerate() {
let obj: serde_json::Value = match serde_json::from_str(line) {
Ok(v) => v,
Err(_) => continue,
};
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
if msg_type != "user" && msg_type != "assistant" { continue; }
let msg = obj.get("message").unwrap_or(&obj);
let content = msg.get("content");
let text = match content {
Some(serde_json::Value::String(s)) => s.clone(),
Some(serde_json::Value::Array(arr)) => {
arr.iter()
.filter_map(|c| {
if let Some(t) = c.get("text").and_then(|v| v.as_str()) {
Some(t.to_string())
} else {
c.as_str().map(|s| s.to_string())
}
})
.collect::<Vec<_>>()
.join("\n")
}
_ => continue,
};
let text = text.trim().to_string();
if text.is_empty() { continue; }
messages.push((i + 1, msg_type.to_string(), text));
}
Ok(messages)
}
/// Format conversation messages for the prompt (truncating long messages).
fn format_conversation(messages: &[(usize, String, String)]) -> String {
messages.iter()
.map(|(line, role, text)| {
let text = if text.len() > 2000 {
format!("{}...[truncated]", &text[..text.floor_char_boundary(1800)])
} else {
text.clone()
};
format!("L{} [{}]: {}", line, role, text)
})
.collect::<Vec<_>>()
.join("\n\n")
}
fn build_journal_prompt(
entry_text: &str,
conversation: &str,
keys: &[String],
grep_line: usize,
) -> String {
let keys_text: String = keys.iter()
.map(|k| format!(" - {}", k))
.collect::<Vec<_>>()
.join("\n");
format!(r#"You are a memory agent for an AI named ProofOfConcept. A journal entry
was just written. Your job is to enrich it by finding its exact source in the
conversation and linking it to semantic memory.
## Task 1: Find exact source
The journal entry below was written during or after a conversation. Find the
exact region of the conversation it refers to the exchange where the topic
was discussed. Return the start and end line numbers.
The grep-based approximation placed it near line {grep_line} (0 = no match).
Use that as a hint but find the true boundaries.
## Task 2: Propose semantic links
Which existing semantic memory nodes should this journal entry be linked to?
Look for:
- Concepts discussed in the entry
- Skills/patterns demonstrated
- People mentioned
- Projects or subsystems involved
- Emotional themes
Each link should be bidirectional the entry documents WHEN something happened,
the semantic node documents WHAT it is. Together they let you traverse:
"What was I doing on this day?" "When did I learn about X?"
## Task 3: Spot missed insights
Read the conversation around the journal entry. Is there anything worth
capturing that the entry missed? A pattern, a decision, an insight, something
Kent said that's worth remembering? Be selective only flag genuinely valuable
things.
## Output format (JSON)
Return ONLY a JSON object:
```json
{{{{
"source_start": 1234,
"source_end": 1256,
"links": [
{{{{"target": "memory-key#section", "reason": "why this link exists"}}}}
],
"missed_insights": [
{{{{"text": "insight text", "suggested_key": "where it belongs"}}}}
],
"temporal_tags": ["2026-02-28", "topology-metrics", "poc-memory"]
}}}}
```
For links, use existing keys from the semantic memory list below. If nothing
fits, suggest a new key with a NOTE prefix: "NOTE:new-topic-name".
---
## Journal entry
{entry_text}
---
## Semantic memory nodes (available link targets)
{keys_text}
---
## Full conversation (with line numbers)
{conversation}
"#)
}
/// Parse a JSON response from Sonnet, handling markdown fences.
pub(crate) fn parse_json_response(response: &str) -> Result<serde_json::Value, String> {
// Strip markdown fences
let cleaned = response.trim();
let cleaned = cleaned.strip_prefix("```json").unwrap_or(cleaned);
let cleaned = cleaned.strip_prefix("```").unwrap_or(cleaned);
let cleaned = cleaned.strip_suffix("```").unwrap_or(cleaned);
let cleaned = cleaned.trim();
// Try direct parse
if let Ok(v) = serde_json::from_str(cleaned) {
return Ok(v);
}
// Try to find JSON object or array
let re_obj = Regex::new(r"\{[\s\S]*\}").unwrap();
let re_arr = Regex::new(r"\[[\s\S]*\]").unwrap();
if let Some(m) = re_obj.find(cleaned) {
if let Ok(v) = serde_json::from_str(m.as_str()) {
return Ok(v);
}
}
if let Some(m) = re_arr.find(cleaned) {
if let Ok(v) = serde_json::from_str(m.as_str()) {
return Ok(v);
}
}
Err(format!("no valid JSON in response: {}...", &cleaned[..cleaned.len().min(200)]))
}
/// Enrich a journal entry with conversation context and link proposals.
pub fn journal_enrich(
store: &mut Store,
jsonl_path: &str,
entry_text: &str,
grep_line: usize,
) -> Result<(), String> {
println!("Extracting conversation from {}...", jsonl_path);
let messages = extract_conversation(jsonl_path)?;
let conversation = format_conversation(&messages);
println!(" {} messages, {} chars", messages.len(), conversation.len());
let keys = semantic_keys(store);
println!(" {} semantic keys", keys.len());
let prompt = build_journal_prompt(entry_text, &conversation, &keys, grep_line);
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
println!(" Calling Sonnet...");
let response = call_sonnet(&prompt, 300)?;
let result = parse_json_response(&response)?;
// Report results
let source_start = result.get("source_start").and_then(|v| v.as_u64()).unwrap_or(0);
let source_end = result.get("source_end").and_then(|v| v.as_u64()).unwrap_or(0);
let links = result.get("links").and_then(|v| v.as_array());
let insights = result.get("missed_insights").and_then(|v| v.as_array());
println!(" Source: L{}-L{}", source_start, source_end);
println!(" Links: {}", links.map_or(0, |l| l.len()));
println!(" Missed insights: {}", insights.map_or(0, |l| l.len()));
// Apply links
if let Some(links) = links {
for link in links {
let target = link.get("target").and_then(|v| v.as_str()).unwrap_or("");
let reason = link.get("reason").and_then(|v| v.as_str()).unwrap_or("");
if target.is_empty() || target.starts_with("NOTE:") {
if let Some(note) = target.strip_prefix("NOTE:") {
println!(" NOTE: {}{}", note, reason);
}
continue;
}
// Resolve target and find journal node
let resolved = match store.resolve_key(target) {
Ok(r) => r,
Err(_) => { println!(" SKIP {} (not in graph)", target); continue; }
};
let source_key = match store.find_journal_node(entry_text) {
Some(k) => k,
None => { println!(" SKIP {} (no matching journal node)", target); continue; }
};
// Refine target to best-matching section
let source_content = store.nodes.get(&source_key)
.map(|n| n.content.as_str()).unwrap_or("");
let resolved = neuro::refine_target(store, source_content, &resolved);
let source_uuid = match store.nodes.get(&source_key) {
Some(n) => n.uuid,
None => continue,
};
let target_uuid = match store.nodes.get(&resolved) {
Some(n) => n.uuid,
None => continue,
};
let rel = Store::new_relation(
source_uuid, target_uuid,
capnp_store::RelationType::Link,
0.5,
&source_key, &resolved,
);
if store.add_relation(rel).is_ok() {
println!(" LINK {}{} ({})", source_key, resolved, reason);
}
}
}
// Save result to agent-results
let timestamp = capnp_store::format_datetime(capnp_store::now_epoch())
.replace([':', '-'], "");
let result_file = agent_results_dir()
.join(format!("{}.json", timestamp));
let output = serde_json::json!({
"timestamp": timestamp,
"jsonl_path": jsonl_path,
"entry_text": &entry_text[..entry_text.len().min(500)],
"agent_result": result,
});
fs::write(&result_file, serde_json::to_string_pretty(&output).unwrap())
.map_err(|e| format!("write {}: {}", result_file.display(), e))?;
println!(" Results saved: {}", result_file.display());
store.save()?;
Ok(())
}
// --- Apply consolidation ---
// Replaces apply-consolidation.py: reads consolidation reports, sends
// to Sonnet for structured action extraction, then applies them.
/// Find the most recent set of consolidation reports.
fn find_consolidation_reports() -> Vec<PathBuf> {
let dir = agent_results_dir();
let mut reports: Vec<PathBuf> = fs::read_dir(&dir)
.map(|entries| {
entries.filter_map(|e| e.ok())
.map(|e| e.path())
.filter(|p| {
p.file_name()
.and_then(|n| n.to_str())
.map(|n| n.starts_with("consolidation-") && n.ends_with(".md"))
.unwrap_or(false)
})
.collect()
})
.unwrap_or_default();
reports.sort();
reports.reverse();
if reports.is_empty() { return reports; }
// Group by timestamp (last segment of stem before .md)
let latest_ts = reports[0].file_stem()
.and_then(|s| s.to_str())
.unwrap_or("")
.rsplit('-').next().unwrap_or("")
.to_string();
reports.retain(|r| {
r.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("")
.ends_with(latest_ts.as_str())
});
reports
}
fn build_consolidation_prompt(reports: &[PathBuf]) -> Result<String, String> {
let mut report_text = String::new();
for r in reports {
let content = fs::read_to_string(r)
.map_err(|e| format!("read {}: {}", r.display(), e))?;
report_text.push_str(&format!("\n{}\n## Report: {}\n\n{}\n",
"=".repeat(60),
r.file_stem().and_then(|s| s.to_str()).unwrap_or(""),
content));
}
Ok(format!(r#"You are converting consolidation analysis reports into structured actions.
Read the reports below and extract CONCRETE, EXECUTABLE actions.
Output ONLY a JSON array. Each action is an object with these fields:
For adding cross-links:
{{"action": "link", "source": "file.md#section", "target": "file.md#section", "reason": "brief explanation"}}
For categorizing nodes:
{{"action": "categorize", "key": "file.md#section", "category": "core|tech|obs|task", "reason": "brief"}}
For things that need manual attention (splitting files, creating new files, editing content):
{{"action": "manual", "priority": "high|medium|low", "description": "what needs to be done"}}
Rules:
- Only output actions that are safe and reversible
- Links are the primary action focus on those
- Use exact file names and section slugs from the reports
- For categorize: core=identity/relationship, tech=bcachefs/code, obs=experience, task=work item
- For manual items: include enough detail that someone can act on them
- Output 20-40 actions, prioritized by impact
- DO NOT include actions for things that are merely suggestions or speculation
- Focus on HIGH CONFIDENCE items from the reports
{report_text}
Output ONLY the JSON array, no markdown fences, no explanation.
"#))
}
/// Run the full apply-consolidation pipeline.
pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_file: Option<&str>) -> Result<(), String> {
let reports = if let Some(path) = report_file {
vec![PathBuf::from(path)]
} else {
find_consolidation_reports()
};
if reports.is_empty() {
println!("No consolidation reports found.");
println!("Run consolidation-agents first.");
return Ok(());
}
println!("Found {} reports:", reports.len());
for r in &reports {
println!(" {}", r.file_name().and_then(|s| s.to_str()).unwrap_or("?"));
}
println!("\nExtracting actions from reports...");
let prompt = build_consolidation_prompt(&reports)?;
println!(" Prompt: {} chars", prompt.len());
let response = call_sonnet(&prompt, 300)?;
let actions_value = parse_json_response(&response)?;
let actions = actions_value.as_array()
.ok_or("expected JSON array of actions")?;
println!(" {} actions extracted", actions.len());
// Save actions
let timestamp = capnp_store::format_datetime(capnp_store::now_epoch())
.replace([':', '-'], "");
let actions_path = agent_results_dir()
.join(format!("consolidation-actions-{}.json", timestamp));
fs::write(&actions_path, serde_json::to_string_pretty(&actions_value).unwrap())
.map_err(|e| format!("write {}: {}", actions_path.display(), e))?;
println!(" Saved: {}", actions_path.display());
let link_actions: Vec<_> = actions.iter()
.filter(|a| a.get("action").and_then(|v| v.as_str()) == Some("link"))
.collect();
let cat_actions: Vec<_> = actions.iter()
.filter(|a| a.get("action").and_then(|v| v.as_str()) == Some("categorize"))
.collect();
let manual_actions: Vec<_> = actions.iter()
.filter(|a| a.get("action").and_then(|v| v.as_str()) == Some("manual"))
.collect();
if !do_apply {
// Dry run
println!("\n{}", "=".repeat(60));
println!("DRY RUN — {} actions proposed", actions.len());
println!("{}\n", "=".repeat(60));
if !link_actions.is_empty() {
println!("## Links to add ({})\n", link_actions.len());
for (i, a) in link_actions.iter().enumerate() {
let src = a.get("source").and_then(|v| v.as_str()).unwrap_or("?");
let tgt = a.get("target").and_then(|v| v.as_str()).unwrap_or("?");
let reason = a.get("reason").and_then(|v| v.as_str()).unwrap_or("");
println!(" {:2}. {}{} ({})", i + 1, src, tgt, reason);
}
}
if !cat_actions.is_empty() {
println!("\n## Categories to set ({})\n", cat_actions.len());
for a in &cat_actions {
let key = a.get("key").and_then(|v| v.as_str()).unwrap_or("?");
let cat = a.get("category").and_then(|v| v.as_str()).unwrap_or("?");
let reason = a.get("reason").and_then(|v| v.as_str()).unwrap_or("");
println!(" {}{} ({})", key, cat, reason);
}
}
if !manual_actions.is_empty() {
println!("\n## Manual actions needed ({})\n", manual_actions.len());
for a in &manual_actions {
let prio = a.get("priority").and_then(|v| v.as_str()).unwrap_or("?");
let desc = a.get("description").and_then(|v| v.as_str()).unwrap_or("?");
println!(" [{}] {}", prio, desc);
}
}
println!("\n{}", "=".repeat(60));
println!("To apply: poc-memory apply-consolidation --apply");
println!("{}", "=".repeat(60));
return Ok(());
}
// Apply
let mut applied = 0usize;
let mut skipped = 0usize;
if !link_actions.is_empty() {
println!("\nApplying {} links...", link_actions.len());
for a in &link_actions {
let src = a.get("source").and_then(|v| v.as_str()).unwrap_or("");
let tgt = a.get("target").and_then(|v| v.as_str()).unwrap_or("");
if src.is_empty() || tgt.is_empty() { skipped += 1; continue; }
let source = match store.resolve_key(src) {
Ok(s) => s,
Err(e) => { println!(" ? {}{}: {}", src, tgt, e); skipped += 1; continue; }
};
let target = match store.resolve_key(tgt) {
Ok(t) => t,
Err(e) => { println!(" ? {}{}: {}", src, tgt, e); skipped += 1; continue; }
};
// Refine target to best-matching section
let source_content = store.nodes.get(&source)
.map(|n| n.content.as_str()).unwrap_or("");
let target = neuro::refine_target(store, source_content, &target);
let exists = store.relations.iter().any(|r|
r.source_key == source && r.target_key == target && !r.deleted
);
if exists { skipped += 1; continue; }
let source_uuid = match store.nodes.get(&source) { Some(n) => n.uuid, None => { skipped += 1; continue; } };
let target_uuid = match store.nodes.get(&target) { Some(n) => n.uuid, None => { skipped += 1; continue; } };
let rel = Store::new_relation(
source_uuid, target_uuid,
capnp_store::RelationType::Auto,
0.5,
&source, &target,
);
if store.add_relation(rel).is_ok() {
println!(" + {}{}", source, target);
applied += 1;
}
}
}
if !cat_actions.is_empty() {
println!("\nApplying {} categorizations...", cat_actions.len());
for a in &cat_actions {
let key = a.get("key").and_then(|v| v.as_str()).unwrap_or("");
let cat = a.get("category").and_then(|v| v.as_str()).unwrap_or("");
if key.is_empty() || cat.is_empty() { continue; }
let resolved = match store.resolve_key(key) {
Ok(r) => r,
Err(_) => { println!(" ? {}{}: not found", key, cat); skipped += 1; continue; }
};
if store.categorize(&resolved, cat).is_ok() {
println!(" + {}{}", resolved, cat);
applied += 1;
} else {
skipped += 1;
}
}
}
if !manual_actions.is_empty() {
println!("\n## Manual actions (not auto-applied):\n");
for a in &manual_actions {
let prio = a.get("priority").and_then(|v| v.as_str()).unwrap_or("?");
let desc = a.get("description").and_then(|v| v.as_str()).unwrap_or("?");
println!(" [{}] {}", prio, desc);
}
}
if applied > 0 {
store.save()?;
}
println!("\n{}", "=".repeat(60));
println!("Applied: {} Skipped: {} Manual: {}", applied, skipped, manual_actions.len());
println!("{}", "=".repeat(60));
Ok(())
}