consciousness/scripts/journal-agent.py

#!/usr/bin/env python3
"""journal-agent.py — background agent that enriches journal entries.

Spawned by poc-journal after each write. Sends the full conversation
to Sonnet to:
  1. Find the exact conversation region the entry refers to
  2. Propose bidirectional links to semantic memory nodes
  3. Spot additional insights worth capturing

Results are written to ~/.claude/memory/agent-results/ as JSON for
pickup by poc-memory.

Usage:
  journal-agent.py JSONL_PATH ENTRY_TEXT [GREP_LINE]
"""

import json
import os
import re
import subprocess
import sys
import time
from pathlib import Path

MEMORY_DIR = Path.home() / ".claude" / "memory"
RESULTS_DIR = MEMORY_DIR / "agent-results"
RESULTS_DIR.mkdir(parents=True, exist_ok=True)


def extract_conversation(jsonl_path: str) -> list[dict]:
    """Extract user/assistant messages with line numbers."""
    messages = []
    with open(jsonl_path) as f:
        for i, line in enumerate(f, 1):
            try:
                obj = json.loads(line)
            except json.JSONDecodeError:
                continue

            t = obj.get("type", "")
            if t not in ("user", "assistant"):
                continue

            msg = obj.get("message", {})
            content = msg.get("content", "")
            timestamp = obj.get("timestamp", "")

            texts = []
            if isinstance(content, list):
                for c in content:
                    if isinstance(c, dict) and c.get("type") == "text":
                        texts.append(c.get("text", ""))
                    elif isinstance(c, str):
                        texts.append(c)
            elif isinstance(content, str):
                texts.append(content)

            text = "\n".join(t for t in texts if t.strip())
            if text.strip():
                messages.append({
                    "line": i,
                    "role": t,
                    "text": text,
                    "timestamp": timestamp,
                })

    return messages


def format_conversation(messages: list[dict]) -> str:
    """Format messages for the agent prompt."""
    parts = []
    for m in messages:
        # Truncate very long messages (code output etc) but keep substance
        text = m["text"]
        if len(text) > 2000:
            text = text[:1800] + "\n[...truncated...]"
        parts.append(f'L{m["line"]} [{m["role"]}]: {text}')
    return "\n\n".join(parts)


def get_memory_nodes() -> str:
    """Get a list of memory nodes for link proposals.

    Uses poc-memory to get top nodes by degree plus recent nodes.
    """
    # Get graph summary (top hubs)
    try:
        result = subprocess.run(
            ["poc-memory", "graph"],
            capture_output=True, text=True, timeout=10
        )
        graph = result.stdout.strip()
    except Exception:
        graph = ""

    # Get recent nodes from status
    try:
        result = subprocess.run(
            ["poc-memory", "status"],
            capture_output=True, text=True, timeout=10
        )
        status = result.stdout.strip()
    except Exception:
        status = ""

    return f"Graph (top hubs):\n{graph}\n\nStatus:\n{status}"


def get_semantic_keys() -> list[str]:
    """Get all semantic memory file keys by scanning the memory dir."""
    keys = []
    for md in sorted(MEMORY_DIR.glob("*.md")):
        name = md.name
        if name in ("journal.md", "work-queue.md", "work-state",
                     "where-am-i.md", "MEMORY.md"):
            continue
        keys.append(name)
        # Scan for section headers
        try:
            with open(md) as f:
                for line in f:
                    if line.startswith("## "):
                        slug = re.sub(r'[^a-z0-9-]', '',
                                      line[3:].strip().lower().replace(' ', '-'))
                        if slug:
                            keys.append(f"{name}#{slug}")
        except Exception:
            pass
    return keys


def build_prompt(entry_text: str, conversation: str,
                 memory_nodes: str, semantic_keys: list[str],
                 grep_line: int) -> str:
    """Build the prompt for Sonnet."""
    keys_text = "\n".join(f"  - {k}" for k in semantic_keys[:200])

    return f"""You are a memory agent for an AI named ProofOfConcept. A journal entry
was just written. Your job is to enrich it by finding its exact source in the
conversation and linking it to semantic memory.

## Task 1: Find exact source

The journal entry below was written during or after a conversation. Find the
exact region of the conversation it refers to — the exchange where the topic
was discussed. Return the start and end line numbers.

The grep-based approximation placed it near line {grep_line} (0 = no match).
Use that as a hint but find the true boundaries.

## Task 2: Propose semantic links

Which existing semantic memory nodes should this journal entry be linked to?
Look for:
- Concepts discussed in the entry
- Skills/patterns demonstrated
- People mentioned
- Projects or subsystems involved
- Emotional themes

Each link should be bidirectional — the entry documents WHEN something happened,
the semantic node documents WHAT it is. Together they let you traverse:
"What was I doing on this day?" ↔ "When did I learn about X?"

## Task 3: Spot missed insights

Read the conversation around the journal entry. Is there anything worth
capturing that the entry missed? A pattern, a decision, an insight, something
Kent said that's worth remembering? Be selective — only flag genuinely valuable
things.

## Output format (JSON)

Return ONLY a JSON object:
```json
{{
  "source_start": 1234,
  "source_end": 1256,
  "links": [
    {{"target": "memory-key#section", "reason": "why this link exists"}}
  ],
  "missed_insights": [
    {{"text": "insight text", "suggested_key": "where it belongs"}}
  ],
  "temporal_tags": ["2026-02-28", "topology-metrics", "poc-memory"]
}}
```

For links, use existing keys from the semantic memory list below. If nothing
fits, suggest a new key with a NOTE prefix: "NOTE:new-topic-name".

---

## Journal entry

{entry_text}

---

## Semantic memory nodes (available link targets)

{keys_text}

---

## Memory graph

{memory_nodes}

---

## Full conversation (with line numbers)

{conversation}
"""


def call_sonnet(prompt: str) -> dict:
    """Call Sonnet via claude CLI and parse JSON response."""
    import tempfile

    env = dict(os.environ)
    env.pop("CLAUDECODE", None)

    # Write prompt to temp file — avoids Python subprocess pipe issues
    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
                                      delete=False) as f:
        f.write(prompt)
        prompt_file = f.name

    try:
        scripts_dir = os.path.dirname(os.path.abspath(__file__))
        wrapper = os.path.join(scripts_dir, "call-sonnet.sh")

        result = subprocess.run(
            [wrapper, prompt_file],
            capture_output=True,
            text=True,
            timeout=300,
            env=env,
        )

        output = result.stdout.strip()
        if not output:
            return {"error": f"Empty response. stderr: {result.stderr[:500]}"}

        # Extract JSON from response (might be wrapped in markdown)
        json_match = re.search(r'\{[\s\S]*\}', output)
        if json_match:
            return json.loads(json_match.group())
        else:
            return {"error": f"No JSON found in response: {output[:500]}"}

    except subprocess.TimeoutExpired:
        return {"error": "Sonnet call timed out after 300s"}
    except json.JSONDecodeError as e:
        return {"error": f"JSON parse error: {e}. Output: {output[:500]}"}
    except Exception as e:
        return {"error": str(e)}
    finally:
        os.unlink(prompt_file)


def save_result(entry_text: str, jsonl_path: str, result: dict):
    """Save agent results for pickup by poc-memory."""
    timestamp = time.strftime("%Y%m%dT%H%M%S")
    result_file = RESULTS_DIR / f"{timestamp}.json"

    output = {
        "timestamp": timestamp,
        "jsonl_path": jsonl_path,
        "entry_text": entry_text[:500],
        "agent_result": result,
    }

    with open(result_file, "w") as f:
        json.dump(output, f, indent=2)

    return result_file


def apply_links(result: dict):
    """Apply proposed links via poc-memory."""
    links = result.get("links", [])
    for link in links:
        target = link.get("target", "")
        if not target or target.startswith("NOTE:"):
            continue
        # For now, just log — we'll wire this up when poc-memory
        # has a link-from-agent command
        print(f"  LINK → {target}: {link.get('reason', '')}")


def main():
    if len(sys.argv) < 3:
        print(f"Usage: {sys.argv[0]} JSONL_PATH ENTRY_TEXT [GREP_LINE]",
              file=sys.stderr)
        sys.exit(1)

    jsonl_path = sys.argv[1]
    entry_text = sys.argv[2]
    grep_line = int(sys.argv[3]) if len(sys.argv) > 3 else 0

    if not os.path.isfile(jsonl_path):
        print(f"JSONL not found: {jsonl_path}", file=sys.stderr)
        sys.exit(1)

    print(f"Extracting conversation from {jsonl_path}...")
    messages = extract_conversation(jsonl_path)
    conversation = format_conversation(messages)
    print(f"  {len(messages)} messages, {len(conversation):,} chars")

    print("Getting memory context...")
    memory_nodes = get_memory_nodes()
    semantic_keys = get_semantic_keys()
    print(f"  {len(semantic_keys)} semantic keys")

    print("Building prompt...")
    prompt = build_prompt(entry_text, conversation, memory_nodes,
                          semantic_keys, grep_line)
    print(f"  Prompt: {len(prompt):,} chars (~{len(prompt)//4:,} tokens)")

    print("Calling Sonnet...")
    result = call_sonnet(prompt)

    if "error" in result:
        print(f"  Error: {result['error']}", file=sys.stderr)
    else:
        source = f"L{result.get('source_start', '?')}-L{result.get('source_end', '?')}"
        n_links = len(result.get("links", []))
        n_insights = len(result.get("missed_insights", []))
        print(f"  Source: {source}")
        print(f"  Links: {n_links}")
        print(f"  Missed insights: {n_insights}")
        apply_links(result)

    result_file = save_result(entry_text, jsonl_path, result)
    print(f"  Results saved: {result_file}")


if __name__ == "__main__":
    main()