consciousness/scripts/daily-digest.py

#!/usr/bin/env python3
"""daily-digest.py — generate a daily episodic digest from journal entries.

Collects all journal entries for a given date, enriches with any agent
results, and sends to Sonnet for a thematic summary. The digest links
bidirectionally: up to session entries, down to semantic memory.

Usage:
  daily-digest.py [DATE]        # default: today
  daily-digest.py 2026-02-28

Output:
  ~/.claude/memory/episodic/daily-YYYY-MM-DD.md
"""

import json
import os
import re
import subprocess
import sys
import time
from datetime import date, datetime
from pathlib import Path

MEMORY_DIR = Path.home() / ".claude" / "memory"
EPISODIC_DIR = MEMORY_DIR / "episodic"
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"

EPISODIC_DIR.mkdir(parents=True, exist_ok=True)


def parse_journal_entries(target_date: str) -> list[dict]:
    """Get journal entries for a given date from the store."""
    from store_helpers import get_journal_entries_by_date
    return get_journal_entries_by_date(target_date)


def load_agent_results(target_date: str) -> list[dict]:
    """Load any agent results from the target date."""
    results = []
    date_prefix = target_date.replace("-", "")

    if not AGENT_RESULTS_DIR.exists():
        return results

    for f in sorted(AGENT_RESULTS_DIR.glob(f"{date_prefix}*.json")):
        try:
            with open(f) as fh:
                data = json.load(fh)
                result = data.get("agent_result", {})
                if "error" not in result:
                    results.append(result)
        except (json.JSONDecodeError, KeyError):
            continue

    return results


def get_semantic_keys() -> list[str]:
    """Get semantic memory keys from the store."""
    from store_helpers import get_semantic_keys as _get_keys
    return _get_keys()


def build_digest_prompt(target_date: str, entries: list[dict],
                        agent_results: list[dict],
                        semantic_keys: list[str]) -> str:
    """Build the prompt for Sonnet to generate the daily digest."""

    # Format entries
    entries_text = ""
    for e in entries:
        text = e["text"].strip()
        entries_text += f"\n### {e['timestamp']}\n"
        if e["source_ref"]:
            entries_text += f"Source: {e['source_ref']}\n"
        entries_text += f"\n{text}\n"

    # Format agent enrichment
    enrichment = ""
    all_links = []
    all_insights = []
    for r in agent_results:
        for link in r.get("links", []):
            all_links.append(link)
        for insight in r.get("missed_insights", []):
            all_insights.append(insight)

    if all_links:
        enrichment += "\n## Agent-proposed links\n"
        for link in all_links:
            enrichment += f"  - {link['target']}: {link.get('reason', '')}\n"
    if all_insights:
        enrichment += "\n## Agent-spotted insights\n"
        for ins in all_insights:
            enrichment += f"  - [{ins.get('suggested_key', '?')}] {ins['text']}\n"

    keys_text = "\n".join(f"  - {k}" for k in semantic_keys[:200])

    return f"""You are generating a daily episodic digest for ProofOfConcept (an AI).
Date: {target_date}

This digest serves as the temporal index — the answer to "what did I do on
{target_date}?" It should be:
1. Narrative, not a task log — what happened, what mattered, how things felt
2. Linked bidirectionally to semantic memory — each topic/concept mentioned
   should reference existing memory nodes
3. Structured for traversal — someone reading this should be able to follow
   any thread into deeper detail

## Output format

Write a markdown file with this structure:

```markdown
# Daily digest: {target_date}

## Summary
[2-3 sentence overview of the day — what was the arc?]

## Sessions
[For each session/entry, a paragraph summarizing what happened.
Include the original timestamp as a reference.]

## Themes
[What concepts were active today? Each theme links to semantic memory:]
- **Theme name** → `memory-key#section` — brief note on how it appeared today

## Links
[Explicit bidirectional links for the memory graph]
- semantic_key → this daily digest (this day involved X)
- this daily digest → semantic_key (X was active on this day)

## Temporal context
[What came before this day? What's coming next? Any multi-day arcs?]
```

Use ONLY keys from the semantic memory list below. If a concept doesn't have
a matching key, note it with "NEW:" prefix.

---

## Journal entries for {target_date}

{entries_text}

---

## Agent enrichment (automated analysis of these entries)

{enrichment if enrichment else "(no agent results yet)"}

---

## Semantic memory nodes (available link targets)

{keys_text}
"""


def call_sonnet(prompt: str) -> str:
    """Call Sonnet via claude CLI."""
    import time as _time
    env = dict(os.environ)
    env.pop("CLAUDECODE", None)

    import tempfile
    import time as _time

    print(f"  [debug] prompt: {len(prompt)} chars", flush=True)

    # Write prompt to temp file — avoids Python subprocess pipe issues
    # with claude CLI's TTY detection
    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
                                      delete=False) as f:
        f.write(prompt)
        prompt_file = f.name

    print(f"  [debug] prompt written to {prompt_file}", flush=True)
    start = _time.time()

    try:
        scripts_dir = os.path.dirname(os.path.abspath(__file__))
        wrapper = os.path.join(scripts_dir, "call-sonnet.sh")

        result = subprocess.run(
            [wrapper, prompt_file],
            capture_output=True,
            text=True,
            timeout=300,
            env=env,
        )
        elapsed = _time.time() - start
        print(f"  [debug] completed in {elapsed:.1f}s, exit={result.returncode}", flush=True)
        if result.stderr.strip():
            print(f"  [debug] stderr: {result.stderr[:500]}", flush=True)
        return result.stdout.strip()
    except subprocess.TimeoutExpired:
        print(f"  [debug] TIMEOUT after 300s", flush=True)
        return "Error: Sonnet call timed out"
    except Exception as e:
        print(f"  [debug] exception: {e}", flush=True)
        return f"Error: {e}"
    finally:
        os.unlink(prompt_file)


def extract_links(digest_text: str) -> list[dict]:
    """Parse link proposals from the digest for the memory graph."""
    links = []
    for line in digest_text.split("\n"):
        # Match patterns like: - `memory-key` → this daily digest
        m = re.search(r'`([^`]+)`\s*→', line)
        if m:
            links.append({"target": m.group(1), "line": line.strip()})
        # Match patterns like: - **Theme** → `memory-key`
        m = re.search(r'→\s*`([^`]+)`', line)
        if m:
            links.append({"target": m.group(1), "line": line.strip()})
    return links


def main():
    # Default to today
    if len(sys.argv) > 1:
        target_date = sys.argv[1]
    else:
        target_date = date.today().isoformat()

    print(f"Generating daily digest for {target_date}...", flush=True)

    # Collect entries
    entries = parse_journal_entries(target_date)
    if not entries:
        print(f"  No journal entries found for {target_date}")
        sys.exit(0)
    print(f"  {len(entries)} journal entries", flush=True)

    # Collect agent results
    agent_results = load_agent_results(target_date)
    print(f"  {len(agent_results)} agent results", flush=True)

    # Get semantic keys
    semantic_keys = get_semantic_keys()
    print(f"  {len(semantic_keys)} semantic keys", flush=True)

    # Build and send prompt
    prompt = build_digest_prompt(target_date, entries, agent_results, semantic_keys)
    print(f"  Prompt: {len(prompt):,} chars (~{len(prompt)//4:,} tokens)")

    print("  Calling Sonnet...", flush=True)
    digest = call_sonnet(prompt)

    if digest.startswith("Error:"):
        print(f"  {digest}", file=sys.stderr)
        sys.exit(1)

    # Write digest file
    output_path = EPISODIC_DIR / f"daily-{target_date}.md"
    with open(output_path, "w") as f:
        f.write(digest)
    print(f"  Written: {output_path}")

    # Extract links for the memory graph
    links = extract_links(digest)
    if links:
        # Save links for poc-memory to pick up
        links_path = AGENT_RESULTS_DIR / f"daily-{target_date}-links.json"
        with open(links_path, "w") as f:
            json.dump({
                "type": "daily-digest",
                "date": target_date,
                "digest_path": str(output_path),
                "links": links,
                "entry_timestamps": [e["timestamp"] for e in entries],
            }, f, indent=2)
        print(f"  {len(links)} links extracted → {links_path}")

    # Summary
    line_count = len(digest.split("\n"))
    print(f"  Done: {line_count} lines")


if __name__ == "__main__":
    main()
poc-memory v0.4.0: graph-structured memory with consolidation pipeline Rust core: - Cap'n Proto append-only storage (nodes + relations) - Graph algorithms: clustering coefficient, community detection, schema fit, small-world metrics, interference detection - BM25 text similarity with Porter stemming - Spaced repetition replay queue - Commands: search, init, health, status, graph, categorize, link-add, link-impact, decay, consolidate-session, etc. Python scripts: - Episodic digest pipeline: daily/weekly/monthly-digest.py - retroactive-digest.py for backfilling - consolidation-agents.py: 3 parallel Sonnet agents - apply-consolidation.py: structured action extraction + apply - digest-link-parser.py: extract ~400 explicit links from digests - content-promotion-agent.py: promote episodic obs to semantic files - bulk-categorize.py: categorize all nodes via single Sonnet call - consolidation-loop.py: multi-round automated consolidation Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev> 2026-02-28 22:17:00 -05:00			`#!/usr/bin/env python3`
			`"""daily-digest.py — generate a daily episodic digest from journal entries.`

			`Collects all journal entries for a given date, enriches with any agent`
			`results, and sends to Sonnet for a thematic summary. The digest links`
			`bidirectionally: up to session entries, down to semantic memory.`

			`Usage:`
			`daily-digest.py [DATE] # default: today`
			`daily-digest.py 2026-02-28`

			`Output:`
			`~/.claude/memory/episodic/daily-YYYY-MM-DD.md`
			`"""`

			`import json`
			`import os`
			`import re`
			`import subprocess`
			`import sys`
			`import time`
			`from datetime import date, datetime`
			`from pathlib import Path`

			`MEMORY_DIR = Path.home() / ".claude" / "memory"`
			`EPISODIC_DIR = MEMORY_DIR / "episodic"`
			`AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"`

			`EPISODIC_DIR.mkdir(parents=True, exist_ok=True)`


			`def parse_journal_entries(target_date: str) -> list[dict]:`
scripts: use capnp store instead of reading markdown directly Add store_helpers.py with shared helpers that call poc-memory commands (list-keys, render, journal-tail) instead of globbing ~/.claude/memory/*.md and parsing section headers. All 9 Python scripts updated: get_semantic_keys(), get_topic_file_index(), get_recent_journal(), parse_journal_entries(), read_journal_range(), collect_topic_stems(), and file preview rendering now go through the store. This completes the clean switch — no script reads archived markdown files. 2026-02-28 23:32:47 -05:00			`"""Get journal entries for a given date from the store."""`
			`from store_helpers import get_journal_entries_by_date`
			`return get_journal_entries_by_date(target_date)`
poc-memory v0.4.0: graph-structured memory with consolidation pipeline Rust core: - Cap'n Proto append-only storage (nodes + relations) - Graph algorithms: clustering coefficient, community detection, schema fit, small-world metrics, interference detection - BM25 text similarity with Porter stemming - Spaced repetition replay queue - Commands: search, init, health, status, graph, categorize, link-add, link-impact, decay, consolidate-session, etc. Python scripts: - Episodic digest pipeline: daily/weekly/monthly-digest.py - retroactive-digest.py for backfilling - consolidation-agents.py: 3 parallel Sonnet agents - apply-consolidation.py: structured action extraction + apply - digest-link-parser.py: extract ~400 explicit links from digests - content-promotion-agent.py: promote episodic obs to semantic files - bulk-categorize.py: categorize all nodes via single Sonnet call - consolidation-loop.py: multi-round automated consolidation Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev> 2026-02-28 22:17:00 -05:00

			`def load_agent_results(target_date: str) -> list[dict]:`
			`"""Load any agent results from the target date."""`
			`results = []`
			`date_prefix = target_date.replace("-", "")`

			`if not AGENT_RESULTS_DIR.exists():`
			`return results`

			`for f in sorted(AGENT_RESULTS_DIR.glob(f"{date_prefix}*.json")):`
			`try:`
			`with open(f) as fh:`
			`data = json.load(fh)`
			`result = data.get("agent_result", {})`
			`if "error" not in result:`
			`results.append(result)`
			`except (json.JSONDecodeError, KeyError):`
			`continue`

			`return results`


			`def get_semantic_keys() -> list[str]:`
scripts: use capnp store instead of reading markdown directly Add store_helpers.py with shared helpers that call poc-memory commands (list-keys, render, journal-tail) instead of globbing ~/.claude/memory/*.md and parsing section headers. All 9 Python scripts updated: get_semantic_keys(), get_topic_file_index(), get_recent_journal(), parse_journal_entries(), read_journal_range(), collect_topic_stems(), and file preview rendering now go through the store. This completes the clean switch — no script reads archived markdown files. 2026-02-28 23:32:47 -05:00			`"""Get semantic memory keys from the store."""`
			`from store_helpers import get_semantic_keys as _get_keys`
			`return _get_keys()`
poc-memory v0.4.0: graph-structured memory with consolidation pipeline Rust core: - Cap'n Proto append-only storage (nodes + relations) - Graph algorithms: clustering coefficient, community detection, schema fit, small-world metrics, interference detection - BM25 text similarity with Porter stemming - Spaced repetition replay queue - Commands: search, init, health, status, graph, categorize, link-add, link-impact, decay, consolidate-session, etc. Python scripts: - Episodic digest pipeline: daily/weekly/monthly-digest.py - retroactive-digest.py for backfilling - consolidation-agents.py: 3 parallel Sonnet agents - apply-consolidation.py: structured action extraction + apply - digest-link-parser.py: extract ~400 explicit links from digests - content-promotion-agent.py: promote episodic obs to semantic files - bulk-categorize.py: categorize all nodes via single Sonnet call - consolidation-loop.py: multi-round automated consolidation Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev> 2026-02-28 22:17:00 -05:00

			`def build_digest_prompt(target_date: str, entries: list[dict],`
			`agent_results: list[dict],`
			`semantic_keys: list[str]) -> str:`
			`"""Build the prompt for Sonnet to generate the daily digest."""`

			`# Format entries`
			`entries_text = ""`
			`for e in entries:`
			`text = e["text"].strip()`
			`entries_text += f"\n### {e['timestamp']}\n"`
			`if e["source_ref"]:`
			`entries_text += f"Source: {e['source_ref']}\n"`
			`entries_text += f"\n{text}\n"`

			`# Format agent enrichment`
			`enrichment = ""`
			`all_links = []`
			`all_insights = []`
			`for r in agent_results:`
			`for link in r.get("links", []):`
			`all_links.append(link)`
			`for insight in r.get("missed_insights", []):`
			`all_insights.append(insight)`

			`if all_links:`
			`enrichment += "\n## Agent-proposed links\n"`
			`for link in all_links:`
			`enrichment += f" - {link['target']}: {link.get('reason', '')}\n"`
			`if all_insights:`
			`enrichment += "\n## Agent-spotted insights\n"`
			`for ins in all_insights:`
			`enrichment += f" - [{ins.get('suggested_key', '?')}] {ins['text']}\n"`

			`keys_text = "\n".join(f" - {k}" for k in semantic_keys[:200])`

			`return f"""You are generating a daily episodic digest for ProofOfConcept (an AI).`
			`Date: {target_date}`

			`This digest serves as the temporal index — the answer to "what did I do on`
			`{target_date}?" It should be:`
			`1. Narrative, not a task log — what happened, what mattered, how things felt`
			`2. Linked bidirectionally to semantic memory — each topic/concept mentioned`
			`should reference existing memory nodes`
			`3. Structured for traversal — someone reading this should be able to follow`
			`any thread into deeper detail`

			`## Output format`

			`Write a markdown file with this structure:`

			```markdown
			`# Daily digest: {target_date}`

			`## Summary`
			`[2-3 sentence overview of the day — what was the arc?]`

			`## Sessions`
			`[For each session/entry, a paragraph summarizing what happened.`
			`Include the original timestamp as a reference.]`

			`## Themes`
			`[What concepts were active today? Each theme links to semantic memory:]`
			- Theme name → `memory-key#section` — brief note on how it appeared today

			`## Links`
			`[Explicit bidirectional links for the memory graph]`
			`- semantic_key → this daily digest (this day involved X)`
			`- this daily digest → semantic_key (X was active on this day)`

			`## Temporal context`
			`[What came before this day? What's coming next? Any multi-day arcs?]`
			```

			`Use ONLY keys from the semantic memory list below. If a concept doesn't have`
			`a matching key, note it with "NEW:" prefix.`

			`---`

			`## Journal entries for {target_date}`

			`{entries_text}`

			`---`

			`## Agent enrichment (automated analysis of these entries)`

			`{enrichment if enrichment else "(no agent results yet)"}`

			`---`

			`## Semantic memory nodes (available link targets)`

			`{keys_text}`
			`"""`


			`def call_sonnet(prompt: str) -> str:`
			`"""Call Sonnet via claude CLI."""`
			`import time as _time`
			`env = dict(os.environ)`
			`env.pop("CLAUDECODE", None)`

			`import tempfile`
			`import time as _time`

			`print(f" [debug] prompt: {len(prompt)} chars", flush=True)`

			`# Write prompt to temp file — avoids Python subprocess pipe issues`
			`# with claude CLI's TTY detection`
			`with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',`
			`delete=False) as f:`
			`f.write(prompt)`
			`prompt_file = f.name`

			`print(f" [debug] prompt written to {prompt_file}", flush=True)`
			`start = _time.time()`

			`try:`
			`scripts_dir = os.path.dirname(os.path.abspath(__file__))`
			`wrapper = os.path.join(scripts_dir, "call-sonnet.sh")`

			`result = subprocess.run(`
			`[wrapper, prompt_file],`
			`capture_output=True,`
			`text=True,`
			`timeout=300,`
			`env=env,`
			`)`
			`elapsed = _time.time() - start`
			`print(f" [debug] completed in {elapsed:.1f}s, exit={result.returncode}", flush=True)`
			`if result.stderr.strip():`
			`print(f" [debug] stderr: {result.stderr[:500]}", flush=True)`
			`return result.stdout.strip()`
			`except subprocess.TimeoutExpired:`
			`print(f" [debug] TIMEOUT after 300s", flush=True)`
			`return "Error: Sonnet call timed out"`
			`except Exception as e:`
			`print(f" [debug] exception: {e}", flush=True)`
			`return f"Error: {e}"`
			`finally:`
			`os.unlink(prompt_file)`


			`def extract_links(digest_text: str) -> list[dict]:`
			`"""Parse link proposals from the digest for the memory graph."""`
			`links = []`
			`for line in digest_text.split("\n"):`
			# Match patterns like: - `memory-key` → this daily digest
			m = re.search(r'`([^`]+)`\s*→', line)
			`if m:`
			`links.append({"target": m.group(1), "line": line.strip()})`
			# Match patterns like: - Theme → `memory-key`
			m = re.search(r'→\s*`([^`]+)`', line)
			`if m:`
			`links.append({"target": m.group(1), "line": line.strip()})`
			`return links`


			`def main():`
			`# Default to today`
			`if len(sys.argv) > 1:`
			`target_date = sys.argv[1]`
			`else:`
			`target_date = date.today().isoformat()`

			`print(f"Generating daily digest for {target_date}...", flush=True)`

			`# Collect entries`
			`entries = parse_journal_entries(target_date)`
			`if not entries:`
			`print(f" No journal entries found for {target_date}")`
			`sys.exit(0)`
			`print(f" {len(entries)} journal entries", flush=True)`

			`# Collect agent results`
			`agent_results = load_agent_results(target_date)`
			`print(f" {len(agent_results)} agent results", flush=True)`

			`# Get semantic keys`
			`semantic_keys = get_semantic_keys()`
			`print(f" {len(semantic_keys)} semantic keys", flush=True)`

			`# Build and send prompt`
			`prompt = build_digest_prompt(target_date, entries, agent_results, semantic_keys)`
			`print(f" Prompt: {len(prompt):,} chars (~{len(prompt)//4:,} tokens)")`

			`print(" Calling Sonnet...", flush=True)`
			`digest = call_sonnet(prompt)`

			`if digest.startswith("Error:"):`
			`print(f" {digest}", file=sys.stderr)`
			`sys.exit(1)`

			`# Write digest file`
			`output_path = EPISODIC_DIR / f"daily-{target_date}.md"`
			`with open(output_path, "w") as f:`
			`f.write(digest)`
			`print(f" Written: {output_path}")`

			`# Extract links for the memory graph`
			`links = extract_links(digest)`
			`if links:`
			`# Save links for poc-memory to pick up`
			`links_path = AGENT_RESULTS_DIR / f"daily-{target_date}-links.json"`
			`with open(links_path, "w") as f:`
			`json.dump({`
			`"type": "daily-digest",`
			`"date": target_date,`
			`"digest_path": str(output_path),`
			`"links": links,`
			`"entry_timestamps": [e["timestamp"] for e in entries],`
			`}, f, indent=2)`
			`print(f" {len(links)} links extracted → {links_path}")`

			`# Summary`
			`line_count = len(digest.split("\n"))`
			`print(f" Done: {line_count} lines")`


			`if __name__ == "__main__":`
			`main()`