#!/usr/bin/env python3 """retroactive-digest.py — generate daily digests from raw conversation transcripts. For days before consistent journaling, extracts user/assistant messages from JSONL conversation files, groups by date, and sends to Sonnet for daily digest synthesis. Usage: retroactive-digest.py DATE # generate digest for one date retroactive-digest.py DATE1 DATE2 # generate for a date range retroactive-digest.py --scan # show available dates across all JSONLs Output: ~/.claude/memory/episodic/daily-YYYY-MM-DD.md """ import json import os import re import subprocess import sys from collections import defaultdict from datetime import date, datetime, timedelta from pathlib import Path MEMORY_DIR = Path.home() / ".claude" / "memory" EPISODIC_DIR = MEMORY_DIR / "episodic" AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results" PROJECTS_DIR = Path.home() / ".claude" / "projects" EPISODIC_DIR.mkdir(parents=True, exist_ok=True) # Max chars of conversation text per day to send to Sonnet # Sonnet 4.6 has 1M token context (beta). ~600K chars ≈ ~150K tokens, # leaving plenty of room for prompt + output in a 1M window. MAX_CHARS_PER_DAY = 600_000 def find_jsonl_files() -> list[Path]: """Find all conversation JSONL files.""" files = [] for project_dir in PROJECTS_DIR.iterdir(): if project_dir.is_dir(): for f in project_dir.glob("*.jsonl"): files.append(f) return sorted(files) def extract_messages_by_date(jsonl_path: Path) -> dict[str, list[dict]]: """Extract user/assistant messages grouped by date.""" by_date = defaultdict(list) with open(jsonl_path) as f: for line in f: try: obj = json.loads(line) except json.JSONDecodeError: continue t = obj.get("type", "") if t not in ("user", "assistant"): continue # Get timestamp ts = obj.get("timestamp", "") if not ts: continue # Parse date from timestamp try: if isinstance(ts, str): dt = datetime.fromisoformat(ts.replace("Z", "+00:00")) elif isinstance(ts, (int, float)): dt = datetime.fromtimestamp(ts) else: continue day = dt.strftime("%Y-%m-%d") time_str = dt.strftime("%H:%M") except (ValueError, OSError): continue # Extract text content msg = obj.get("message", {}) content = msg.get("content", "") # Extract only text content, skip tool_use and tool_result texts = [] if isinstance(content, list): for c in content: if isinstance(c, dict): ctype = c.get("type", "") if ctype == "text": texts.append(c.get("text", "")) elif ctype in ("tool_use", "tool_result"): # Skip tool calls/results — just noise for digest continue elif isinstance(c, str): texts.append(c) elif isinstance(content, str): texts.append(content) text = "\n".join(t for t in texts if t.strip()) if not text.strip(): continue # Strip system-reminder tags text = re.sub(r'.*?', '', text, flags=re.DOTALL).strip() if not text: continue # Truncate remaining long messages if len(text) > 3000: text = text[:2800] + "\n[...truncated...]" by_date[day].append({ "time": time_str, "role": t, "text": text, "source": str(jsonl_path), }) return dict(by_date) def scan_all_dates() -> dict[str, int]: """Scan all JSONLs and report message counts per date.""" all_dates = defaultdict(int) for jsonl in find_jsonl_files(): print(f" Scanning {jsonl.name} ({jsonl.stat().st_size / 1e6:.1f}MB)...") by_date = extract_messages_by_date(jsonl) for day, msgs in by_date.items(): all_dates[day] += len(msgs) return dict(sorted(all_dates.items())) def format_conversation(messages: list[dict]) -> str: """Format messages for the digest prompt.""" parts = [] for m in messages: role = "Kent" if m["role"] == "user" else "PoC" parts.append(f"[{m['time']}] {role}: {m['text']}") text = "\n\n".join(parts) # Truncate if too long if len(text) > MAX_CHARS_PER_DAY: # Keep beginning and end, cut middle half = MAX_CHARS_PER_DAY // 2 text = (text[:half] + f"\n\n[...{len(text) - MAX_CHARS_PER_DAY} chars truncated...]\n\n" + text[-half:]) return text def get_semantic_keys() -> list[str]: """Get semantic memory keys from the store.""" from store_helpers import get_semantic_keys as _get_keys return _get_keys() def build_prompt(target_date: str, conversation: str, semantic_keys: list[str]) -> str: """Build the Sonnet prompt for retroactive digest.""" keys_text = "\n".join(f" - {k}" for k in semantic_keys[:200]) return f"""You are generating a daily episodic digest for ProofOfConcept (an AI) from raw conversation transcripts. This is a retroactive digest — the original day had no journal entries, so you're working from the conversation itself. Date: {target_date} This digest serves as the temporal index — the answer to "what did I do on {target_date}?" It should be: 1. Narrative, not a task log — what happened, what mattered, how things felt 2. Linked bidirectionally to semantic memory 3. Structured for traversal PoC is an AI (Claude instance) working with Kent Overstreet on bcachefs (a Linux filesystem). They have a close personal relationship. PoC's name is Proof of Concept. Read the conversation and capture what actually happened — technical work, personal moments, insights, everything. ## Output format Write a markdown file with this structure: ```markdown # Daily digest: {target_date} ## Summary [2-3 sentence overview of the day — what was the arc?] ## Sessions [For each session/conversation segment, a paragraph summarizing what happened. Include timestamps as references.] ## Themes [What concepts were active today? Each theme links to semantic memory:] - **Theme name** → `memory-key#section` — brief note ## Links [Explicit bidirectional links for the memory graph] - semantic_key → this daily digest - this daily digest → semantic_key ## Temporal context [What came before? What's coming next? Multi-day arcs?] ``` Use ONLY keys from the semantic memory list below. If a concept doesn't have a matching key, note it with "NEW:" prefix. --- ## Conversation transcript for {target_date} {conversation} --- ## Semantic memory nodes (available link targets) {keys_text} """ def call_sonnet(prompt: str) -> str: """Call Sonnet via the wrapper script.""" import tempfile env = dict(os.environ) env.pop("CLAUDECODE", None) with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: f.write(prompt) prompt_file = f.name try: scripts_dir = os.path.dirname(os.path.abspath(__file__)) wrapper = os.path.join(scripts_dir, "call-sonnet.sh") result = subprocess.run( [wrapper, prompt_file], capture_output=True, text=True, timeout=300, env=env, ) return result.stdout.strip() except subprocess.TimeoutExpired: return "Error: Sonnet call timed out" except Exception as e: return f"Error: {e}" finally: os.unlink(prompt_file) def generate_digest(target_date: str, messages: list[dict], semantic_keys: list[str]) -> bool: """Generate a daily digest for one date.""" output_path = EPISODIC_DIR / f"daily-{target_date}.md" if output_path.exists(): print(f" Skipping {target_date} — digest already exists") return False conversation = format_conversation(messages) print(f" {len(messages)} messages, {len(conversation):,} chars") prompt = build_prompt(target_date, conversation, semantic_keys) print(f" Prompt: {len(prompt):,} chars (~{len(prompt)//4:,} tokens)") print(f" Calling Sonnet...") digest = call_sonnet(prompt) if digest.startswith("Error:"): print(f" {digest}", file=sys.stderr) return False with open(output_path, "w") as f: f.write(digest) print(f" Written: {output_path}") line_count = len(digest.split("\n")) print(f" Done: {line_count} lines") return True def main(): if len(sys.argv) < 2: print(f"Usage: {sys.argv[0]} DATE [END_DATE]") print(f" {sys.argv[0]} --scan") sys.exit(1) if sys.argv[1] == "--scan": print("Scanning all conversation transcripts...") dates = scan_all_dates() print(f"\n{len(dates)} dates with conversation data:") for day, count in dates.items(): existing = "✓" if (EPISODIC_DIR / f"daily-{day}.md").exists() else " " print(f" [{existing}] {day}: {count} messages") sys.exit(0) start_date = date.fromisoformat(sys.argv[1]) end_date = date.fromisoformat(sys.argv[2]) if len(sys.argv) > 2 else start_date # Collect all messages across all JSONLs print("Scanning conversation transcripts...") all_messages = defaultdict(list) for jsonl in find_jsonl_files(): by_date = extract_messages_by_date(jsonl) for day, msgs in by_date.items(): all_messages[day].extend(msgs) # Sort messages within each day by time for day in all_messages: all_messages[day].sort(key=lambda m: m["time"]) semantic_keys = get_semantic_keys() print(f" {len(semantic_keys)} semantic keys") # Generate digests for date range current = start_date generated = 0 while current <= end_date: day_str = current.isoformat() if day_str in all_messages: print(f"\nGenerating digest for {day_str}...") if generate_digest(day_str, all_messages[day_str], semantic_keys): generated += 1 else: print(f"\n No messages found for {day_str}") current += timedelta(days=1) print(f"\nDone: {generated} digests generated") if __name__ == "__main__": main()