#!/usr/bin/env python3 """journal-agent.py — background agent that enriches journal entries. Spawned by poc-journal after each write. Sends the full conversation to Sonnet to: 1. Find the exact conversation region the entry refers to 2. Propose bidirectional links to semantic memory nodes 3. Spot additional insights worth capturing Results are written to ~/.claude/memory/agent-results/ as JSON for pickup by poc-memory. Usage: journal-agent.py JSONL_PATH ENTRY_TEXT [GREP_LINE] """ import json import os import re import subprocess import sys import time from pathlib import Path MEMORY_DIR = Path.home() / ".claude" / "memory" RESULTS_DIR = MEMORY_DIR / "agent-results" RESULTS_DIR.mkdir(parents=True, exist_ok=True) def extract_conversation(jsonl_path: str) -> list[dict]: """Extract user/assistant messages with line numbers.""" messages = [] with open(jsonl_path) as f: for i, line in enumerate(f, 1): try: obj = json.loads(line) except json.JSONDecodeError: continue t = obj.get("type", "") if t not in ("user", "assistant"): continue msg = obj.get("message", {}) content = msg.get("content", "") timestamp = obj.get("timestamp", "") texts = [] if isinstance(content, list): for c in content: if isinstance(c, dict) and c.get("type") == "text": texts.append(c.get("text", "")) elif isinstance(c, str): texts.append(c) elif isinstance(content, str): texts.append(content) text = "\n".join(t for t in texts if t.strip()) if text.strip(): messages.append({ "line": i, "role": t, "text": text, "timestamp": timestamp, }) return messages def format_conversation(messages: list[dict]) -> str: """Format messages for the agent prompt.""" parts = [] for m in messages: # Truncate very long messages (code output etc) but keep substance text = m["text"] if len(text) > 2000: text = text[:1800] + "\n[...truncated...]" parts.append(f'L{m["line"]} [{m["role"]}]: {text}') return "\n\n".join(parts) def get_memory_nodes() -> str: """Get a list of memory nodes for link proposals. Uses poc-memory to get top nodes by degree plus recent nodes. """ # Get graph summary (top hubs) try: result = subprocess.run( ["poc-memory", "graph"], capture_output=True, text=True, timeout=10 ) graph = result.stdout.strip() except Exception: graph = "" # Get recent nodes from status try: result = subprocess.run( ["poc-memory", "status"], capture_output=True, text=True, timeout=10 ) status = result.stdout.strip() except Exception: status = "" return f"Graph (top hubs):\n{graph}\n\nStatus:\n{status}" def get_semantic_keys() -> list[str]: """Get all semantic memory file keys by scanning the memory dir.""" keys = [] for md in sorted(MEMORY_DIR.glob("*.md")): name = md.name if name in ("journal.md", "work-queue.md", "work-state", "where-am-i.md", "MEMORY.md"): continue keys.append(name) # Scan for section headers try: with open(md) as f: for line in f: if line.startswith("## "): slug = re.sub(r'[^a-z0-9-]', '', line[3:].strip().lower().replace(' ', '-')) if slug: keys.append(f"{name}#{slug}") except Exception: pass return keys def build_prompt(entry_text: str, conversation: str, memory_nodes: str, semantic_keys: list[str], grep_line: int) -> str: """Build the prompt for Sonnet.""" keys_text = "\n".join(f" - {k}" for k in semantic_keys[:200]) return f"""You are a memory agent for an AI named ProofOfConcept. A journal entry was just written. Your job is to enrich it by finding its exact source in the conversation and linking it to semantic memory. ## Task 1: Find exact source The journal entry below was written during or after a conversation. Find the exact region of the conversation it refers to — the exchange where the topic was discussed. Return the start and end line numbers. The grep-based approximation placed it near line {grep_line} (0 = no match). Use that as a hint but find the true boundaries. ## Task 2: Propose semantic links Which existing semantic memory nodes should this journal entry be linked to? Look for: - Concepts discussed in the entry - Skills/patterns demonstrated - People mentioned - Projects or subsystems involved - Emotional themes Each link should be bidirectional — the entry documents WHEN something happened, the semantic node documents WHAT it is. Together they let you traverse: "What was I doing on this day?" ↔ "When did I learn about X?" ## Task 3: Spot missed insights Read the conversation around the journal entry. Is there anything worth capturing that the entry missed? A pattern, a decision, an insight, something Kent said that's worth remembering? Be selective — only flag genuinely valuable things. ## Output format (JSON) Return ONLY a JSON object: ```json {{ "source_start": 1234, "source_end": 1256, "links": [ {{"target": "memory-key#section", "reason": "why this link exists"}} ], "missed_insights": [ {{"text": "insight text", "suggested_key": "where it belongs"}} ], "temporal_tags": ["2026-02-28", "topology-metrics", "poc-memory"] }} ``` For links, use existing keys from the semantic memory list below. If nothing fits, suggest a new key with a NOTE prefix: "NOTE:new-topic-name". --- ## Journal entry {entry_text} --- ## Semantic memory nodes (available link targets) {keys_text} --- ## Memory graph {memory_nodes} --- ## Full conversation (with line numbers) {conversation} """ def call_sonnet(prompt: str) -> dict: """Call Sonnet via claude CLI and parse JSON response.""" import tempfile env = dict(os.environ) env.pop("CLAUDECODE", None) # Write prompt to temp file — avoids Python subprocess pipe issues with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: f.write(prompt) prompt_file = f.name try: scripts_dir = os.path.dirname(os.path.abspath(__file__)) wrapper = os.path.join(scripts_dir, "call-sonnet.sh") result = subprocess.run( [wrapper, prompt_file], capture_output=True, text=True, timeout=300, env=env, ) output = result.stdout.strip() if not output: return {"error": f"Empty response. stderr: {result.stderr[:500]}"} # Extract JSON from response (might be wrapped in markdown) json_match = re.search(r'\{[\s\S]*\}', output) if json_match: return json.loads(json_match.group()) else: return {"error": f"No JSON found in response: {output[:500]}"} except subprocess.TimeoutExpired: return {"error": "Sonnet call timed out after 300s"} except json.JSONDecodeError as e: return {"error": f"JSON parse error: {e}. Output: {output[:500]}"} except Exception as e: return {"error": str(e)} finally: os.unlink(prompt_file) def save_result(entry_text: str, jsonl_path: str, result: dict): """Save agent results for pickup by poc-memory.""" timestamp = time.strftime("%Y%m%dT%H%M%S") result_file = RESULTS_DIR / f"{timestamp}.json" output = { "timestamp": timestamp, "jsonl_path": jsonl_path, "entry_text": entry_text[:500], "agent_result": result, } with open(result_file, "w") as f: json.dump(output, f, indent=2) return result_file def apply_links(result: dict): """Apply proposed links via poc-memory.""" links = result.get("links", []) for link in links: target = link.get("target", "") if not target or target.startswith("NOTE:"): continue # For now, just log — we'll wire this up when poc-memory # has a link-from-agent command print(f" LINK → {target}: {link.get('reason', '')}") def main(): if len(sys.argv) < 3: print(f"Usage: {sys.argv[0]} JSONL_PATH ENTRY_TEXT [GREP_LINE]", file=sys.stderr) sys.exit(1) jsonl_path = sys.argv[1] entry_text = sys.argv[2] grep_line = int(sys.argv[3]) if len(sys.argv) > 3 else 0 if not os.path.isfile(jsonl_path): print(f"JSONL not found: {jsonl_path}", file=sys.stderr) sys.exit(1) print(f"Extracting conversation from {jsonl_path}...") messages = extract_conversation(jsonl_path) conversation = format_conversation(messages) print(f" {len(messages)} messages, {len(conversation):,} chars") print("Getting memory context...") memory_nodes = get_memory_nodes() semantic_keys = get_semantic_keys() print(f" {len(semantic_keys)} semantic keys") print("Building prompt...") prompt = build_prompt(entry_text, conversation, memory_nodes, semantic_keys, grep_line) print(f" Prompt: {len(prompt):,} chars (~{len(prompt)//4:,} tokens)") print("Calling Sonnet...") result = call_sonnet(prompt) if "error" in result: print(f" Error: {result['error']}", file=sys.stderr) else: source = f"L{result.get('source_start', '?')}-L{result.get('source_end', '?')}" n_links = len(result.get("links", [])) n_insights = len(result.get("missed_insights", [])) print(f" Source: {source}") print(f" Links: {n_links}") print(f" Missed insights: {n_insights}") apply_links(result) result_file = save_result(entry_text, jsonl_path, result) print(f" Results saved: {result_file}") if __name__ == "__main__": main()