poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core: - Cap'n Proto append-only storage (nodes + relations) - Graph algorithms: clustering coefficient, community detection, schema fit, small-world metrics, interference detection - BM25 text similarity with Porter stemming - Spaced repetition replay queue - Commands: search, init, health, status, graph, categorize, link-add, link-impact, decay, consolidate-session, etc. Python scripts: - Episodic digest pipeline: daily/weekly/monthly-digest.py - retroactive-digest.py for backfilling - consolidation-agents.py: 3 parallel Sonnet agents - apply-consolidation.py: structured action extraction + apply - digest-link-parser.py: extract ~400 explicit links from digests - content-promotion-agent.py: promote episodic obs to semantic files - bulk-categorize.py: categorize all nodes via single Sonnet call - consolidation-loop.py: multi-round automated consolidation Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00 · 2026-02-28 22:17:00 -05:00 · 23fac4e5fe
commit 23fac4e5fe
35 changed files with 9388 additions and 0 deletions
--- a/scripts/journal-agent.py
+++ b/scripts/journal-agent.py
@ -0,0 +1,343 @@
+#!/usr/bin/env python3
+"""journal-agent.py — background agent that enriches journal entries.
+
+Spawned by poc-journal after each write. Sends the full conversation
+to Sonnet to:
+  1. Find the exact conversation region the entry refers to
+  2. Propose bidirectional links to semantic memory nodes
+  3. Spot additional insights worth capturing
+
+Results are written to ~/.claude/memory/agent-results/ as JSON for
+pickup by poc-memory.
+
+Usage:
+  journal-agent.py JSONL_PATH ENTRY_TEXT [GREP_LINE]
+"""
+
+import json
+import os
+import re
+import subprocess
+import sys
+import time
+from pathlib import Path
+
+MEMORY_DIR = Path.home() / ".claude" / "memory"
+RESULTS_DIR = MEMORY_DIR / "agent-results"
+RESULTS_DIR.mkdir(parents=True, exist_ok=True)
+
+
+def extract_conversation(jsonl_path: str) -> list[dict]:
+    """Extract user/assistant messages with line numbers."""
+    messages = []
+    with open(jsonl_path) as f:
+        for i, line in enumerate(f, 1):
+            try:
+                obj = json.loads(line)
+            except json.JSONDecodeError:
+                continue
+
+            t = obj.get("type", "")
+            if t not in ("user", "assistant"):
+                continue
+
+            msg = obj.get("message", {})
+            content = msg.get("content", "")
+            timestamp = obj.get("timestamp", "")
+
+            texts = []
+            if isinstance(content, list):
+                for c in content:
+                    if isinstance(c, dict) and c.get("type") == "text":
+                        texts.append(c.get("text", ""))
+                    elif isinstance(c, str):
+                        texts.append(c)
+            elif isinstance(content, str):
+                texts.append(content)
+
+            text = "\n".join(t for t in texts if t.strip())
+            if text.strip():
+                messages.append({
+                    "line": i,
+                    "role": t,
+                    "text": text,
+                    "timestamp": timestamp,
+                })
+
+    return messages
+
+
+def format_conversation(messages: list[dict]) -> str:
+    """Format messages for the agent prompt."""
+    parts = []
+    for m in messages:
+        # Truncate very long messages (code output etc) but keep substance
+        text = m["text"]
+        if len(text) > 2000:
+            text = text[:1800] + "\n[...truncated...]"
+        parts.append(f'L{m["line"]} [{m["role"]}]: {text}')
+    return "\n\n".join(parts)
+
+
+def get_memory_nodes() -> str:
+    """Get a list of memory nodes for link proposals.
+
+    Uses poc-memory to get top nodes by degree plus recent nodes.
+    """
+    # Get graph summary (top hubs)
+    try:
+        result = subprocess.run(
+            ["poc-memory", "graph"],
+            capture_output=True, text=True, timeout=10
+        )
+        graph = result.stdout.strip()
+    except Exception:
+        graph = ""
+
+    # Get recent nodes from status
+    try:
+        result = subprocess.run(
+            ["poc-memory", "status"],
+            capture_output=True, text=True, timeout=10
+        )
+        status = result.stdout.strip()
+    except Exception:
+        status = ""
+
+    return f"Graph (top hubs):\n{graph}\n\nStatus:\n{status}"
+
+
+def get_semantic_keys() -> list[str]:
+    """Get all semantic memory file keys by scanning the memory dir."""
+    keys = []
+    for md in sorted(MEMORY_DIR.glob("*.md")):
+        name = md.name
+        if name in ("journal.md", "work-queue.md", "work-state",
+                     "where-am-i.md", "MEMORY.md"):
+            continue
+        keys.append(name)
+        # Scan for section headers
+        try:
+            with open(md) as f:
+                for line in f:
+                    if line.startswith("## "):
+                        slug = re.sub(r'[^a-z0-9-]', '',
+                                      line[3:].strip().lower().replace(' ', '-'))
+                        if slug:
+                            keys.append(f"{name}#{slug}")
+        except Exception:
+            pass
+    return keys
+
+
+def build_prompt(entry_text: str, conversation: str,
+                 memory_nodes: str, semantic_keys: list[str],
+                 grep_line: int) -> str:
+    """Build the prompt for Sonnet."""
+    keys_text = "\n".join(f"  - {k}" for k in semantic_keys[:200])
+
+    return f"""You are a memory agent for an AI named ProofOfConcept. A journal entry
+was just written. Your job is to enrich it by finding its exact source in the
+conversation and linking it to semantic memory.
+
+## Task 1: Find exact source
+
+The journal entry below was written during or after a conversation. Find the
+exact region of the conversation it refers to — the exchange where the topic
+was discussed. Return the start and end line numbers.
+
+The grep-based approximation placed it near line {grep_line} (0 = no match).
+Use that as a hint but find the true boundaries.
+
+## Task 2: Propose semantic links
+
+Which existing semantic memory nodes should this journal entry be linked to?
+Look for:
+- Concepts discussed in the entry
+- Skills/patterns demonstrated
+- People mentioned
+- Projects or subsystems involved
+- Emotional themes
+
+Each link should be bidirectional — the entry documents WHEN something happened,
+the semantic node documents WHAT it is. Together they let you traverse:
+"What was I doing on this day?" ↔ "When did I learn about X?"
+
+## Task 3: Spot missed insights
+
+Read the conversation around the journal entry. Is there anything worth
+capturing that the entry missed? A pattern, a decision, an insight, something
+Kent said that's worth remembering? Be selective — only flag genuinely valuable
+things.
+
+## Output format (JSON)
+
+Return ONLY a JSON object:
+```json
+{{
+  "source_start": 1234,
+  "source_end": 1256,
+  "links": [
+    {{"target": "memory-key#section", "reason": "why this link exists"}}
+  ],
+  "missed_insights": [
+    {{"text": "insight text", "suggested_key": "where it belongs"}}
+  ],
+  "temporal_tags": ["2026-02-28", "topology-metrics", "poc-memory"]
+}}
+```
+
+For links, use existing keys from the semantic memory list below. If nothing
+fits, suggest a new key with a NOTE prefix: "NOTE:new-topic-name".
+
+---
+
+## Journal entry
+
+{entry_text}
+
+---
+
+## Semantic memory nodes (available link targets)
+
+{keys_text}
+
+---
+
+## Memory graph
+
+{memory_nodes}
+
+---
+
+## Full conversation (with line numbers)
+
+{conversation}
+"""
+
+
+def call_sonnet(prompt: str) -> dict:
+    """Call Sonnet via claude CLI and parse JSON response."""
+    import tempfile
+
+    env = dict(os.environ)
+    env.pop("CLAUDECODE", None)
+
+    # Write prompt to temp file — avoids Python subprocess pipe issues
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
+                                      delete=False) as f:
+        f.write(prompt)
+        prompt_file = f.name
+
+    try:
+        scripts_dir = os.path.dirname(os.path.abspath(__file__))
+        wrapper = os.path.join(scripts_dir, "call-sonnet.sh")
+
+        result = subprocess.run(
+            [wrapper, prompt_file],
+            capture_output=True,
+            text=True,
+            timeout=300,
+            env=env,
+        )
+
+        output = result.stdout.strip()
+        if not output:
+            return {"error": f"Empty response. stderr: {result.stderr[:500]}"}
+
+        # Extract JSON from response (might be wrapped in markdown)
+        json_match = re.search(r'\{[\s\S]*\}', output)
+        if json_match:
+            return json.loads(json_match.group())
+        else:
+            return {"error": f"No JSON found in response: {output[:500]}"}
+
+    except subprocess.TimeoutExpired:
+        return {"error": "Sonnet call timed out after 300s"}
+    except json.JSONDecodeError as e:
+        return {"error": f"JSON parse error: {e}. Output: {output[:500]}"}
+    except Exception as e:
+        return {"error": str(e)}
+    finally:
+        os.unlink(prompt_file)
+
+
+def save_result(entry_text: str, jsonl_path: str, result: dict):
+    """Save agent results for pickup by poc-memory."""
+    timestamp = time.strftime("%Y%m%dT%H%M%S")
+    result_file = RESULTS_DIR / f"{timestamp}.json"
+
+    output = {
+        "timestamp": timestamp,
+        "jsonl_path": jsonl_path,
+        "entry_text": entry_text[:500],
+        "agent_result": result,
+    }
+
+    with open(result_file, "w") as f:
+        json.dump(output, f, indent=2)
+
+    return result_file
+
+
+def apply_links(result: dict):
+    """Apply proposed links via poc-memory."""
+    links = result.get("links", [])
+    for link in links:
+        target = link.get("target", "")
+        if not target or target.startswith("NOTE:"):
+            continue
+        # For now, just log — we'll wire this up when poc-memory
+        # has a link-from-agent command
+        print(f"  LINK → {target}: {link.get('reason', '')}")
+
+
+def main():
+    if len(sys.argv) < 3:
+        print(f"Usage: {sys.argv[0]} JSONL_PATH ENTRY_TEXT [GREP_LINE]",
+              file=sys.stderr)
+        sys.exit(1)
+
+    jsonl_path = sys.argv[1]
+    entry_text = sys.argv[2]
+    grep_line = int(sys.argv[3]) if len(sys.argv) > 3 else 0
+
+    if not os.path.isfile(jsonl_path):
+        print(f"JSONL not found: {jsonl_path}", file=sys.stderr)
+        sys.exit(1)
+
+    print(f"Extracting conversation from {jsonl_path}...")
+    messages = extract_conversation(jsonl_path)
+    conversation = format_conversation(messages)
+    print(f"  {len(messages)} messages, {len(conversation):,} chars")
+
+    print("Getting memory context...")
+    memory_nodes = get_memory_nodes()
+    semantic_keys = get_semantic_keys()
+    print(f"  {len(semantic_keys)} semantic keys")
+
+    print("Building prompt...")
+    prompt = build_prompt(entry_text, conversation, memory_nodes,
+                          semantic_keys, grep_line)
+    print(f"  Prompt: {len(prompt):,} chars (~{len(prompt)//4:,} tokens)")
+
+    print("Calling Sonnet...")
+    result = call_sonnet(prompt)
+
+    if "error" in result:
+        print(f"  Error: {result['error']}", file=sys.stderr)
+    else:
+        source = f"L{result.get('source_start', '?')}-L{result.get('source_end', '?')}"
+        n_links = len(result.get("links", []))
+        n_insights = len(result.get("missed_insights", []))
+        print(f"  Source: {source}")
+        print(f"  Links: {n_links}")
+        print(f"  Missed insights: {n_insights}")
+        apply_links(result)
+
+    result_file = save_result(entry_text, jsonl_path, result)
+    print(f"  Results saved: {result_file}")
+
+
+if __name__ == "__main__":
+    main()