poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core: - Cap'n Proto append-only storage (nodes + relations) - Graph algorithms: clustering coefficient, community detection, schema fit, small-world metrics, interference detection - BM25 text similarity with Porter stemming - Spaced repetition replay queue - Commands: search, init, health, status, graph, categorize, link-add, link-impact, decay, consolidate-session, etc. Python scripts: - Episodic digest pipeline: daily/weekly/monthly-digest.py - retroactive-digest.py for backfilling - consolidation-agents.py: 3 parallel Sonnet agents - apply-consolidation.py: structured action extraction + apply - digest-link-parser.py: extract ~400 explicit links from digests - content-promotion-agent.py: promote episodic obs to semantic files - bulk-categorize.py: categorize all nodes via single Sonnet call - consolidation-loop.py: multi-round automated consolidation Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00 · 2026-02-28 22:17:00 -05:00 · 23fac4e5fe
commit 23fac4e5fe
35 changed files with 9388 additions and 0 deletions
--- a/scripts/daily-digest.py
+++ b/scripts/daily-digest.py
@ -0,0 +1,333 @@
+#!/usr/bin/env python3
+"""daily-digest.py — generate a daily episodic digest from journal entries.
+
+Collects all journal entries for a given date, enriches with any agent
+results, and sends to Sonnet for a thematic summary. The digest links
+bidirectionally: up to session entries, down to semantic memory.
+
+Usage:
+  daily-digest.py [DATE]        # default: today
+  daily-digest.py 2026-02-28
+
+Output:
+  ~/.claude/memory/episodic/daily-YYYY-MM-DD.md
+"""
+
+import json
+import os
+import re
+import subprocess
+import sys
+import time
+from datetime import date, datetime
+from pathlib import Path
+
+MEMORY_DIR = Path.home() / ".claude" / "memory"
+JOURNAL = MEMORY_DIR / "journal.md"
+EPISODIC_DIR = MEMORY_DIR / "episodic"
+AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
+
+EPISODIC_DIR.mkdir(parents=True, exist_ok=True)
+
+
+def parse_journal_entries(target_date: str) -> list[dict]:
+    """Extract all journal entries for a given date (YYYY-MM-DD)."""
+    entries = []
+    current = None
+
+    with open(JOURNAL) as f:
+        for line in f:
+            # Match entry header: ## 2026-02-28T19:42
+            m = re.match(r'^## (\d{4}-\d{2}-\d{2})T(\d{2}:\d{2})', line)
+            if m:
+                if current is not None:
+                    entries.append(current)
+                entry_date = m.group(1)
+                entry_time = m.group(2)
+                current = {
+                    "date": entry_date,
+                    "time": entry_time,
+                    "timestamp": f"{entry_date}T{entry_time}",
+                    "source_ref": None,
+                    "text": "",
+                }
+                continue
+
+            if current is not None:
+                # Check for source comment
+                sm = re.match(r'<!-- source: (.+?) -->', line)
+                if sm:
+                    current["source_ref"] = sm.group(1)
+                    continue
+                current["text"] += line
+
+    if current is not None:
+        entries.append(current)
+
+    # Filter to target date
+    return [e for e in entries if e["date"] == target_date]
+
+
+def load_agent_results(target_date: str) -> list[dict]:
+    """Load any agent results from the target date."""
+    results = []
+    date_prefix = target_date.replace("-", "")
+
+    if not AGENT_RESULTS_DIR.exists():
+        return results
+
+    for f in sorted(AGENT_RESULTS_DIR.glob(f"{date_prefix}*.json")):
+        try:
+            with open(f) as fh:
+                data = json.load(fh)
+                result = data.get("agent_result", {})
+                if "error" not in result:
+                    results.append(result)
+        except (json.JSONDecodeError, KeyError):
+            continue
+
+    return results
+
+
+def get_semantic_keys() -> list[str]:
+    """Get all semantic memory file keys."""
+    keys = []
+    for md in sorted(MEMORY_DIR.glob("*.md")):
+        name = md.name
+        if name in ("journal.md", "work-queue.md", "MEMORY.md"):
+            continue
+        keys.append(name)
+        try:
+            with open(md) as f:
+                for line in f:
+                    if line.startswith("## "):
+                        slug = re.sub(r'[^a-z0-9-]', '',
+                                      line[3:].strip().lower().replace(' ', '-'))
+                        if slug:
+                            keys.append(f"{name}#{slug}")
+        except Exception:
+            pass
+    return keys
+
+
+def build_digest_prompt(target_date: str, entries: list[dict],
+                        agent_results: list[dict],
+                        semantic_keys: list[str]) -> str:
+    """Build the prompt for Sonnet to generate the daily digest."""
+
+    # Format entries
+    entries_text = ""
+    for e in entries:
+        text = e["text"].strip()
+        entries_text += f"\n### {e['timestamp']}\n"
+        if e["source_ref"]:
+            entries_text += f"Source: {e['source_ref']}\n"
+        entries_text += f"\n{text}\n"
+
+    # Format agent enrichment
+    enrichment = ""
+    all_links = []
+    all_insights = []
+    for r in agent_results:
+        for link in r.get("links", []):
+            all_links.append(link)
+        for insight in r.get("missed_insights", []):
+            all_insights.append(insight)
+
+    if all_links:
+        enrichment += "\n## Agent-proposed links\n"
+        for link in all_links:
+            enrichment += f"  - {link['target']}: {link.get('reason', '')}\n"
+    if all_insights:
+        enrichment += "\n## Agent-spotted insights\n"
+        for ins in all_insights:
+            enrichment += f"  - [{ins.get('suggested_key', '?')}] {ins['text']}\n"
+
+    keys_text = "\n".join(f"  - {k}" for k in semantic_keys[:200])
+
+    return f"""You are generating a daily episodic digest for ProofOfConcept (an AI).
+Date: {target_date}
+
+This digest serves as the temporal index — the answer to "what did I do on
+{target_date}?" It should be:
+1. Narrative, not a task log — what happened, what mattered, how things felt
+2. Linked bidirectionally to semantic memory — each topic/concept mentioned
+   should reference existing memory nodes
+3. Structured for traversal — someone reading this should be able to follow
+   any thread into deeper detail
+
+## Output format
+
+Write a markdown file with this structure:
+
+```markdown
+# Daily digest: {target_date}
+
+## Summary
+[2-3 sentence overview of the day — what was the arc?]
+
+## Sessions
+[For each session/entry, a paragraph summarizing what happened.
+Include the original timestamp as a reference.]
+
+## Themes
+[What concepts were active today? Each theme links to semantic memory:]
+- **Theme name** → `memory-key#section` — brief note on how it appeared today
+
+## Links
+[Explicit bidirectional links for the memory graph]
+- semantic_key → this daily digest (this day involved X)
+- this daily digest → semantic_key (X was active on this day)
+
+## Temporal context
+[What came before this day? What's coming next? Any multi-day arcs?]
+```
+
+Use ONLY keys from the semantic memory list below. If a concept doesn't have
+a matching key, note it with "NEW:" prefix.
+
+---
+
+## Journal entries for {target_date}
+
+{entries_text}
+
+---
+
+## Agent enrichment (automated analysis of these entries)
+
+{enrichment if enrichment else "(no agent results yet)"}
+
+---
+
+## Semantic memory nodes (available link targets)
+
+{keys_text}
+"""
+
+
+def call_sonnet(prompt: str) -> str:
+    """Call Sonnet via claude CLI."""
+    import time as _time
+    env = dict(os.environ)
+    env.pop("CLAUDECODE", None)
+
+    import tempfile
+    import time as _time
+
+    print(f"  [debug] prompt: {len(prompt)} chars", flush=True)
+
+    # Write prompt to temp file — avoids Python subprocess pipe issues
+    # with claude CLI's TTY detection
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
+                                      delete=False) as f:
+        f.write(prompt)
+        prompt_file = f.name
+
+    print(f"  [debug] prompt written to {prompt_file}", flush=True)
+    start = _time.time()
+
+    try:
+        scripts_dir = os.path.dirname(os.path.abspath(__file__))
+        wrapper = os.path.join(scripts_dir, "call-sonnet.sh")
+
+        result = subprocess.run(
+            [wrapper, prompt_file],
+            capture_output=True,
+            text=True,
+            timeout=300,
+            env=env,
+        )
+        elapsed = _time.time() - start
+        print(f"  [debug] completed in {elapsed:.1f}s, exit={result.returncode}", flush=True)
+        if result.stderr.strip():
+            print(f"  [debug] stderr: {result.stderr[:500]}", flush=True)
+        return result.stdout.strip()
+    except subprocess.TimeoutExpired:
+        print(f"  [debug] TIMEOUT after 300s", flush=True)
+        return "Error: Sonnet call timed out"
+    except Exception as e:
+        print(f"  [debug] exception: {e}", flush=True)
+        return f"Error: {e}"
+    finally:
+        os.unlink(prompt_file)
+
+
+def extract_links(digest_text: str) -> list[dict]:
+    """Parse link proposals from the digest for the memory graph."""
+    links = []
+    for line in digest_text.split("\n"):
+        # Match patterns like: - `memory-key` → this daily digest
+        m = re.search(r'`([^`]+)`\s*→', line)
+        if m:
+            links.append({"target": m.group(1), "line": line.strip()})
+        # Match patterns like: - **Theme** → `memory-key`
+        m = re.search(r'→\s*`([^`]+)`', line)
+        if m:
+            links.append({"target": m.group(1), "line": line.strip()})
+    return links
+
+
+def main():
+    # Default to today
+    if len(sys.argv) > 1:
+        target_date = sys.argv[1]
+    else:
+        target_date = date.today().isoformat()
+
+    print(f"Generating daily digest for {target_date}...", flush=True)
+
+    # Collect entries
+    entries = parse_journal_entries(target_date)
+    if not entries:
+        print(f"  No journal entries found for {target_date}")
+        sys.exit(0)
+    print(f"  {len(entries)} journal entries", flush=True)
+
+    # Collect agent results
+    agent_results = load_agent_results(target_date)
+    print(f"  {len(agent_results)} agent results", flush=True)
+
+    # Get semantic keys
+    semantic_keys = get_semantic_keys()
+    print(f"  {len(semantic_keys)} semantic keys", flush=True)
+
+    # Build and send prompt
+    prompt = build_digest_prompt(target_date, entries, agent_results, semantic_keys)
+    print(f"  Prompt: {len(prompt):,} chars (~{len(prompt)//4:,} tokens)")
+
+    print("  Calling Sonnet...", flush=True)
+    digest = call_sonnet(prompt)
+
+    if digest.startswith("Error:"):
+        print(f"  {digest}", file=sys.stderr)
+        sys.exit(1)
+
+    # Write digest file
+    output_path = EPISODIC_DIR / f"daily-{target_date}.md"
+    with open(output_path, "w") as f:
+        f.write(digest)
+    print(f"  Written: {output_path}")
+
+    # Extract links for the memory graph
+    links = extract_links(digest)
+    if links:
+        # Save links for poc-memory to pick up
+        links_path = AGENT_RESULTS_DIR / f"daily-{target_date}-links.json"
+        with open(links_path, "w") as f:
+            json.dump({
+                "type": "daily-digest",
+                "date": target_date,
+                "digest_path": str(output_path),
+                "links": links,
+                "entry_timestamps": [e["timestamp"] for e in entries],
+            }, f, indent=2)
+        print(f"  {len(links)} links extracted → {links_path}")
+
+    # Summary
+    line_count = len(digest.split("\n"))
+    print(f"  Done: {line_count} lines")
+
+
+if __name__ == "__main__":
+    main()