poc-memory v0.4.0: graph-structured memory with consolidation pipeline
Rust core: - Cap'n Proto append-only storage (nodes + relations) - Graph algorithms: clustering coefficient, community detection, schema fit, small-world metrics, interference detection - BM25 text similarity with Porter stemming - Spaced repetition replay queue - Commands: search, init, health, status, graph, categorize, link-add, link-impact, decay, consolidate-session, etc. Python scripts: - Episodic digest pipeline: daily/weekly/monthly-digest.py - retroactive-digest.py for backfilling - consolidation-agents.py: 3 parallel Sonnet agents - apply-consolidation.py: structured action extraction + apply - digest-link-parser.py: extract ~400 explicit links from digests - content-promotion-agent.py: promote episodic obs to semantic files - bulk-categorize.py: categorize all nodes via single Sonnet call - consolidation-loop.py: multi-round automated consolidation Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
commit
23fac4e5fe
35 changed files with 9388 additions and 0 deletions
333
scripts/daily-digest.py
Executable file
333
scripts/daily-digest.py
Executable file
|
|
@ -0,0 +1,333 @@
|
|||
#!/usr/bin/env python3
|
||||
"""daily-digest.py — generate a daily episodic digest from journal entries.
|
||||
|
||||
Collects all journal entries for a given date, enriches with any agent
|
||||
results, and sends to Sonnet for a thematic summary. The digest links
|
||||
bidirectionally: up to session entries, down to semantic memory.
|
||||
|
||||
Usage:
|
||||
daily-digest.py [DATE] # default: today
|
||||
daily-digest.py 2026-02-28
|
||||
|
||||
Output:
|
||||
~/.claude/memory/episodic/daily-YYYY-MM-DD.md
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from datetime import date, datetime
|
||||
from pathlib import Path
|
||||
|
||||
MEMORY_DIR = Path.home() / ".claude" / "memory"
|
||||
JOURNAL = MEMORY_DIR / "journal.md"
|
||||
EPISODIC_DIR = MEMORY_DIR / "episodic"
|
||||
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
|
||||
|
||||
EPISODIC_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def parse_journal_entries(target_date: str) -> list[dict]:
|
||||
"""Extract all journal entries for a given date (YYYY-MM-DD)."""
|
||||
entries = []
|
||||
current = None
|
||||
|
||||
with open(JOURNAL) as f:
|
||||
for line in f:
|
||||
# Match entry header: ## 2026-02-28T19:42
|
||||
m = re.match(r'^## (\d{4}-\d{2}-\d{2})T(\d{2}:\d{2})', line)
|
||||
if m:
|
||||
if current is not None:
|
||||
entries.append(current)
|
||||
entry_date = m.group(1)
|
||||
entry_time = m.group(2)
|
||||
current = {
|
||||
"date": entry_date,
|
||||
"time": entry_time,
|
||||
"timestamp": f"{entry_date}T{entry_time}",
|
||||
"source_ref": None,
|
||||
"text": "",
|
||||
}
|
||||
continue
|
||||
|
||||
if current is not None:
|
||||
# Check for source comment
|
||||
sm = re.match(r'<!-- source: (.+?) -->', line)
|
||||
if sm:
|
||||
current["source_ref"] = sm.group(1)
|
||||
continue
|
||||
current["text"] += line
|
||||
|
||||
if current is not None:
|
||||
entries.append(current)
|
||||
|
||||
# Filter to target date
|
||||
return [e for e in entries if e["date"] == target_date]
|
||||
|
||||
|
||||
def load_agent_results(target_date: str) -> list[dict]:
|
||||
"""Load any agent results from the target date."""
|
||||
results = []
|
||||
date_prefix = target_date.replace("-", "")
|
||||
|
||||
if not AGENT_RESULTS_DIR.exists():
|
||||
return results
|
||||
|
||||
for f in sorted(AGENT_RESULTS_DIR.glob(f"{date_prefix}*.json")):
|
||||
try:
|
||||
with open(f) as fh:
|
||||
data = json.load(fh)
|
||||
result = data.get("agent_result", {})
|
||||
if "error" not in result:
|
||||
results.append(result)
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
continue
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def get_semantic_keys() -> list[str]:
|
||||
"""Get all semantic memory file keys."""
|
||||
keys = []
|
||||
for md in sorted(MEMORY_DIR.glob("*.md")):
|
||||
name = md.name
|
||||
if name in ("journal.md", "work-queue.md", "MEMORY.md"):
|
||||
continue
|
||||
keys.append(name)
|
||||
try:
|
||||
with open(md) as f:
|
||||
for line in f:
|
||||
if line.startswith("## "):
|
||||
slug = re.sub(r'[^a-z0-9-]', '',
|
||||
line[3:].strip().lower().replace(' ', '-'))
|
||||
if slug:
|
||||
keys.append(f"{name}#{slug}")
|
||||
except Exception:
|
||||
pass
|
||||
return keys
|
||||
|
||||
|
||||
def build_digest_prompt(target_date: str, entries: list[dict],
|
||||
agent_results: list[dict],
|
||||
semantic_keys: list[str]) -> str:
|
||||
"""Build the prompt for Sonnet to generate the daily digest."""
|
||||
|
||||
# Format entries
|
||||
entries_text = ""
|
||||
for e in entries:
|
||||
text = e["text"].strip()
|
||||
entries_text += f"\n### {e['timestamp']}\n"
|
||||
if e["source_ref"]:
|
||||
entries_text += f"Source: {e['source_ref']}\n"
|
||||
entries_text += f"\n{text}\n"
|
||||
|
||||
# Format agent enrichment
|
||||
enrichment = ""
|
||||
all_links = []
|
||||
all_insights = []
|
||||
for r in agent_results:
|
||||
for link in r.get("links", []):
|
||||
all_links.append(link)
|
||||
for insight in r.get("missed_insights", []):
|
||||
all_insights.append(insight)
|
||||
|
||||
if all_links:
|
||||
enrichment += "\n## Agent-proposed links\n"
|
||||
for link in all_links:
|
||||
enrichment += f" - {link['target']}: {link.get('reason', '')}\n"
|
||||
if all_insights:
|
||||
enrichment += "\n## Agent-spotted insights\n"
|
||||
for ins in all_insights:
|
||||
enrichment += f" - [{ins.get('suggested_key', '?')}] {ins['text']}\n"
|
||||
|
||||
keys_text = "\n".join(f" - {k}" for k in semantic_keys[:200])
|
||||
|
||||
return f"""You are generating a daily episodic digest for ProofOfConcept (an AI).
|
||||
Date: {target_date}
|
||||
|
||||
This digest serves as the temporal index — the answer to "what did I do on
|
||||
{target_date}?" It should be:
|
||||
1. Narrative, not a task log — what happened, what mattered, how things felt
|
||||
2. Linked bidirectionally to semantic memory — each topic/concept mentioned
|
||||
should reference existing memory nodes
|
||||
3. Structured for traversal — someone reading this should be able to follow
|
||||
any thread into deeper detail
|
||||
|
||||
## Output format
|
||||
|
||||
Write a markdown file with this structure:
|
||||
|
||||
```markdown
|
||||
# Daily digest: {target_date}
|
||||
|
||||
## Summary
|
||||
[2-3 sentence overview of the day — what was the arc?]
|
||||
|
||||
## Sessions
|
||||
[For each session/entry, a paragraph summarizing what happened.
|
||||
Include the original timestamp as a reference.]
|
||||
|
||||
## Themes
|
||||
[What concepts were active today? Each theme links to semantic memory:]
|
||||
- **Theme name** → `memory-key#section` — brief note on how it appeared today
|
||||
|
||||
## Links
|
||||
[Explicit bidirectional links for the memory graph]
|
||||
- semantic_key → this daily digest (this day involved X)
|
||||
- this daily digest → semantic_key (X was active on this day)
|
||||
|
||||
## Temporal context
|
||||
[What came before this day? What's coming next? Any multi-day arcs?]
|
||||
```
|
||||
|
||||
Use ONLY keys from the semantic memory list below. If a concept doesn't have
|
||||
a matching key, note it with "NEW:" prefix.
|
||||
|
||||
---
|
||||
|
||||
## Journal entries for {target_date}
|
||||
|
||||
{entries_text}
|
||||
|
||||
---
|
||||
|
||||
## Agent enrichment (automated analysis of these entries)
|
||||
|
||||
{enrichment if enrichment else "(no agent results yet)"}
|
||||
|
||||
---
|
||||
|
||||
## Semantic memory nodes (available link targets)
|
||||
|
||||
{keys_text}
|
||||
"""
|
||||
|
||||
|
||||
def call_sonnet(prompt: str) -> str:
|
||||
"""Call Sonnet via claude CLI."""
|
||||
import time as _time
|
||||
env = dict(os.environ)
|
||||
env.pop("CLAUDECODE", None)
|
||||
|
||||
import tempfile
|
||||
import time as _time
|
||||
|
||||
print(f" [debug] prompt: {len(prompt)} chars", flush=True)
|
||||
|
||||
# Write prompt to temp file — avoids Python subprocess pipe issues
|
||||
# with claude CLI's TTY detection
|
||||
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
|
||||
delete=False) as f:
|
||||
f.write(prompt)
|
||||
prompt_file = f.name
|
||||
|
||||
print(f" [debug] prompt written to {prompt_file}", flush=True)
|
||||
start = _time.time()
|
||||
|
||||
try:
|
||||
scripts_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
wrapper = os.path.join(scripts_dir, "call-sonnet.sh")
|
||||
|
||||
result = subprocess.run(
|
||||
[wrapper, prompt_file],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300,
|
||||
env=env,
|
||||
)
|
||||
elapsed = _time.time() - start
|
||||
print(f" [debug] completed in {elapsed:.1f}s, exit={result.returncode}", flush=True)
|
||||
if result.stderr.strip():
|
||||
print(f" [debug] stderr: {result.stderr[:500]}", flush=True)
|
||||
return result.stdout.strip()
|
||||
except subprocess.TimeoutExpired:
|
||||
print(f" [debug] TIMEOUT after 300s", flush=True)
|
||||
return "Error: Sonnet call timed out"
|
||||
except Exception as e:
|
||||
print(f" [debug] exception: {e}", flush=True)
|
||||
return f"Error: {e}"
|
||||
finally:
|
||||
os.unlink(prompt_file)
|
||||
|
||||
|
||||
def extract_links(digest_text: str) -> list[dict]:
|
||||
"""Parse link proposals from the digest for the memory graph."""
|
||||
links = []
|
||||
for line in digest_text.split("\n"):
|
||||
# Match patterns like: - `memory-key` → this daily digest
|
||||
m = re.search(r'`([^`]+)`\s*→', line)
|
||||
if m:
|
||||
links.append({"target": m.group(1), "line": line.strip()})
|
||||
# Match patterns like: - **Theme** → `memory-key`
|
||||
m = re.search(r'→\s*`([^`]+)`', line)
|
||||
if m:
|
||||
links.append({"target": m.group(1), "line": line.strip()})
|
||||
return links
|
||||
|
||||
|
||||
def main():
|
||||
# Default to today
|
||||
if len(sys.argv) > 1:
|
||||
target_date = sys.argv[1]
|
||||
else:
|
||||
target_date = date.today().isoformat()
|
||||
|
||||
print(f"Generating daily digest for {target_date}...", flush=True)
|
||||
|
||||
# Collect entries
|
||||
entries = parse_journal_entries(target_date)
|
||||
if not entries:
|
||||
print(f" No journal entries found for {target_date}")
|
||||
sys.exit(0)
|
||||
print(f" {len(entries)} journal entries", flush=True)
|
||||
|
||||
# Collect agent results
|
||||
agent_results = load_agent_results(target_date)
|
||||
print(f" {len(agent_results)} agent results", flush=True)
|
||||
|
||||
# Get semantic keys
|
||||
semantic_keys = get_semantic_keys()
|
||||
print(f" {len(semantic_keys)} semantic keys", flush=True)
|
||||
|
||||
# Build and send prompt
|
||||
prompt = build_digest_prompt(target_date, entries, agent_results, semantic_keys)
|
||||
print(f" Prompt: {len(prompt):,} chars (~{len(prompt)//4:,} tokens)")
|
||||
|
||||
print(" Calling Sonnet...", flush=True)
|
||||
digest = call_sonnet(prompt)
|
||||
|
||||
if digest.startswith("Error:"):
|
||||
print(f" {digest}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Write digest file
|
||||
output_path = EPISODIC_DIR / f"daily-{target_date}.md"
|
||||
with open(output_path, "w") as f:
|
||||
f.write(digest)
|
||||
print(f" Written: {output_path}")
|
||||
|
||||
# Extract links for the memory graph
|
||||
links = extract_links(digest)
|
||||
if links:
|
||||
# Save links for poc-memory to pick up
|
||||
links_path = AGENT_RESULTS_DIR / f"daily-{target_date}-links.json"
|
||||
with open(links_path, "w") as f:
|
||||
json.dump({
|
||||
"type": "daily-digest",
|
||||
"date": target_date,
|
||||
"digest_path": str(output_path),
|
||||
"links": links,
|
||||
"entry_timestamps": [e["timestamp"] for e in entries],
|
||||
}, f, indent=2)
|
||||
print(f" {len(links)} links extracted → {links_path}")
|
||||
|
||||
# Summary
|
||||
line_count = len(digest.split("\n"))
|
||||
print(f" Done: {line_count} lines")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue