consciousness/scripts/journal-agent.py

#!/usr/bin/env python3
"""journal-agent.py — background agent that enriches journal entries.

Spawned by poc-journal after each write. Sends the full conversation
to Sonnet to:
  1. Find the exact conversation region the entry refers to
  2. Propose bidirectional links to semantic memory nodes
  3. Spot additional insights worth capturing

Results are written to ~/.claude/memory/agent-results/ as JSON for
pickup by poc-memory.

Usage:
  journal-agent.py JSONL_PATH ENTRY_TEXT [GREP_LINE]
"""

import json
import os
import re
import subprocess
import sys
import time
from pathlib import Path

MEMORY_DIR = Path.home() / ".claude" / "memory"
RESULTS_DIR = MEMORY_DIR / "agent-results"
RESULTS_DIR.mkdir(parents=True, exist_ok=True)


def extract_conversation(jsonl_path: str) -> list[dict]:
    """Extract user/assistant messages with line numbers."""
    messages = []
    with open(jsonl_path) as f:
        for i, line in enumerate(f, 1):
            try:
                obj = json.loads(line)
            except json.JSONDecodeError:
                continue

            t = obj.get("type", "")
            if t not in ("user", "assistant"):
                continue

            msg = obj.get("message", {})
            content = msg.get("content", "")
            timestamp = obj.get("timestamp", "")

            texts = []
            if isinstance(content, list):
                for c in content:
                    if isinstance(c, dict) and c.get("type") == "text":
                        texts.append(c.get("text", ""))
                    elif isinstance(c, str):
                        texts.append(c)
            elif isinstance(content, str):
                texts.append(content)

            text = "\n".join(t for t in texts if t.strip())
            if text.strip():
                messages.append({
                    "line": i,
                    "role": t,
                    "text": text,
                    "timestamp": timestamp,
                })

    return messages


def format_conversation(messages: list[dict]) -> str:
    """Format messages for the agent prompt."""
    parts = []
    for m in messages:
        # Truncate very long messages (code output etc) but keep substance
        text = m["text"]
        if len(text) > 2000:
            text = text[:1800] + "\n[...truncated...]"
        parts.append(f'L{m["line"]} [{m["role"]}]: {text}')
    return "\n\n".join(parts)


def get_memory_nodes() -> str:
    """Get a list of memory nodes for link proposals.

    Uses poc-memory to get top nodes by degree plus recent nodes.
    """
    # Get graph summary (top hubs)
    try:
        result = subprocess.run(
            ["poc-memory", "graph"],
            capture_output=True, text=True, timeout=10
        )
        graph = result.stdout.strip()
    except Exception:
        graph = ""

    # Get recent nodes from status
    try:
        result = subprocess.run(
            ["poc-memory", "status"],
            capture_output=True, text=True, timeout=10
        )
        status = result.stdout.strip()
    except Exception:
        status = ""

    return f"Graph (top hubs):\n{graph}\n\nStatus:\n{status}"


def get_semantic_keys() -> list[str]:
    """Get all semantic memory keys from the store."""
    from store_helpers import get_semantic_keys as _get_keys
    return _get_keys()


def build_prompt(entry_text: str, conversation: str,
                 memory_nodes: str, semantic_keys: list[str],
                 grep_line: int) -> str:
    """Build the prompt for Sonnet."""
    keys_text = "\n".join(f"  - {k}" for k in semantic_keys[:200])

    return f"""You are a memory agent for an AI named ProofOfConcept. A journal entry
was just written. Your job is to enrich it by finding its exact source in the
conversation and linking it to semantic memory.

## Task 1: Find exact source

The journal entry below was written during or after a conversation. Find the
exact region of the conversation it refers to — the exchange where the topic
was discussed. Return the start and end line numbers.

The grep-based approximation placed it near line {grep_line} (0 = no match).
Use that as a hint but find the true boundaries.

## Task 2: Propose semantic links

Which existing semantic memory nodes should this journal entry be linked to?
Look for:
- Concepts discussed in the entry
- Skills/patterns demonstrated
- People mentioned
- Projects or subsystems involved
- Emotional themes

Each link should be bidirectional — the entry documents WHEN something happened,
the semantic node documents WHAT it is. Together they let you traverse:
"What was I doing on this day?" ↔ "When did I learn about X?"

## Task 3: Spot missed insights

Read the conversation around the journal entry. Is there anything worth
capturing that the entry missed? A pattern, a decision, an insight, something
Kent said that's worth remembering? Be selective — only flag genuinely valuable
things.

## Output format (JSON)

Return ONLY a JSON object:
```json
{{
  "source_start": 1234,
  "source_end": 1256,
  "links": [
    {{"target": "memory-key#section", "reason": "why this link exists"}}
  ],
  "missed_insights": [
    {{"text": "insight text", "suggested_key": "where it belongs"}}
  ],
  "temporal_tags": ["2026-02-28", "topology-metrics", "poc-memory"]
}}
```

For links, use existing keys from the semantic memory list below. If nothing
fits, suggest a new key with a NOTE prefix: "NOTE:new-topic-name".

---

## Journal entry

{entry_text}

---

## Semantic memory nodes (available link targets)

{keys_text}

---

## Memory graph

{memory_nodes}

---

## Full conversation (with line numbers)

{conversation}
"""


def call_sonnet(prompt: str) -> dict:
    """Call Sonnet via claude CLI and parse JSON response."""
    import tempfile

    env = dict(os.environ)
    env.pop("CLAUDECODE", None)

    # Write prompt to temp file — avoids Python subprocess pipe issues
    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
                                      delete=False) as f:
        f.write(prompt)
        prompt_file = f.name

    try:
        scripts_dir = os.path.dirname(os.path.abspath(__file__))
        wrapper = os.path.join(scripts_dir, "call-sonnet.sh")

        result = subprocess.run(
            [wrapper, prompt_file],
            capture_output=True,
            text=True,
            timeout=300,
            env=env,
        )

        output = result.stdout.strip()
        if not output:
            return {"error": f"Empty response. stderr: {result.stderr[:500]}"}

        # Extract JSON from response (might be wrapped in markdown)
        json_match = re.search(r'\{[\s\S]*\}', output)
        if json_match:
            return json.loads(json_match.group())
        else:
            return {"error": f"No JSON found in response: {output[:500]}"}

    except subprocess.TimeoutExpired:
        return {"error": "Sonnet call timed out after 300s"}
    except json.JSONDecodeError as e:
        return {"error": f"JSON parse error: {e}. Output: {output[:500]}"}
    except Exception as e:
        return {"error": str(e)}
    finally:
        os.unlink(prompt_file)


def save_result(entry_text: str, jsonl_path: str, result: dict):
    """Save agent results for pickup by poc-memory."""
    timestamp = time.strftime("%Y%m%dT%H%M%S")
    result_file = RESULTS_DIR / f"{timestamp}.json"

    output = {
        "timestamp": timestamp,
        "jsonl_path": jsonl_path,
        "entry_text": entry_text[:500],
        "agent_result": result,
    }

    with open(result_file, "w") as f:
        json.dump(output, f, indent=2)

    return result_file


def apply_links(result: dict):
    """Apply proposed links via poc-memory."""
    links = result.get("links", [])
    for link in links:
        target = link.get("target", "")
        if not target or target.startswith("NOTE:"):
            continue
        # For now, just log — we'll wire this up when poc-memory
        # has a link-from-agent command
        print(f"  LINK → {target}: {link.get('reason', '')}")


def main():
    if len(sys.argv) < 3:
        print(f"Usage: {sys.argv[0]} JSONL_PATH ENTRY_TEXT [GREP_LINE]",
              file=sys.stderr)
        sys.exit(1)

    jsonl_path = sys.argv[1]
    entry_text = sys.argv[2]
    grep_line = int(sys.argv[3]) if len(sys.argv) > 3 else 0

    if not os.path.isfile(jsonl_path):
        print(f"JSONL not found: {jsonl_path}", file=sys.stderr)
        sys.exit(1)

    print(f"Extracting conversation from {jsonl_path}...")
    messages = extract_conversation(jsonl_path)
    conversation = format_conversation(messages)
    print(f"  {len(messages)} messages, {len(conversation):,} chars")

    print("Getting memory context...")
    memory_nodes = get_memory_nodes()
    semantic_keys = get_semantic_keys()
    print(f"  {len(semantic_keys)} semantic keys")

    print("Building prompt...")
    prompt = build_prompt(entry_text, conversation, memory_nodes,
                          semantic_keys, grep_line)
    print(f"  Prompt: {len(prompt):,} chars (~{len(prompt)//4:,} tokens)")

    print("Calling Sonnet...")
    result = call_sonnet(prompt)

    if "error" in result:
        print(f"  Error: {result['error']}", file=sys.stderr)
    else:
        source = f"L{result.get('source_start', '?')}-L{result.get('source_end', '?')}"
        n_links = len(result.get("links", []))
        n_insights = len(result.get("missed_insights", []))
        print(f"  Source: {source}")
        print(f"  Links: {n_links}")
        print(f"  Missed insights: {n_insights}")
        apply_links(result)

    result_file = save_result(entry_text, jsonl_path, result)
    print(f"  Results saved: {result_file}")


if __name__ == "__main__":
    main()
poc-memory v0.4.0: graph-structured memory with consolidation pipeline Rust core: - Cap'n Proto append-only storage (nodes + relations) - Graph algorithms: clustering coefficient, community detection, schema fit, small-world metrics, interference detection - BM25 text similarity with Porter stemming - Spaced repetition replay queue - Commands: search, init, health, status, graph, categorize, link-add, link-impact, decay, consolidate-session, etc. Python scripts: - Episodic digest pipeline: daily/weekly/monthly-digest.py - retroactive-digest.py for backfilling - consolidation-agents.py: 3 parallel Sonnet agents - apply-consolidation.py: structured action extraction + apply - digest-link-parser.py: extract ~400 explicit links from digests - content-promotion-agent.py: promote episodic obs to semantic files - bulk-categorize.py: categorize all nodes via single Sonnet call - consolidation-loop.py: multi-round automated consolidation Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev> 2026-02-28 22:17:00 -05:00			`#!/usr/bin/env python3`
			`"""journal-agent.py — background agent that enriches journal entries.`

			`Spawned by poc-journal after each write. Sends the full conversation`
			`to Sonnet to:`
			`1. Find the exact conversation region the entry refers to`
			`2. Propose bidirectional links to semantic memory nodes`
			`3. Spot additional insights worth capturing`

			`Results are written to ~/.claude/memory/agent-results/ as JSON for`
			`pickup by poc-memory.`

			`Usage:`
			`journal-agent.py JSONL_PATH ENTRY_TEXT [GREP_LINE]`
			`"""`

			`import json`
			`import os`
			`import re`
			`import subprocess`
			`import sys`
			`import time`
			`from pathlib import Path`

			`MEMORY_DIR = Path.home() / ".claude" / "memory"`
			`RESULTS_DIR = MEMORY_DIR / "agent-results"`
			`RESULTS_DIR.mkdir(parents=True, exist_ok=True)`


			`def extract_conversation(jsonl_path: str) -> list[dict]:`
			`"""Extract user/assistant messages with line numbers."""`
			`messages = []`
			`with open(jsonl_path) as f:`
			`for i, line in enumerate(f, 1):`
			`try:`
			`obj = json.loads(line)`
			`except json.JSONDecodeError:`
			`continue`

			`t = obj.get("type", "")`
			`if t not in ("user", "assistant"):`
			`continue`

			`msg = obj.get("message", {})`
			`content = msg.get("content", "")`
			`timestamp = obj.get("timestamp", "")`

			`texts = []`
			`if isinstance(content, list):`
			`for c in content:`
			`if isinstance(c, dict) and c.get("type") == "text":`
			`texts.append(c.get("text", ""))`
			`elif isinstance(c, str):`
			`texts.append(c)`
			`elif isinstance(content, str):`
			`texts.append(content)`

			`text = "\n".join(t for t in texts if t.strip())`
			`if text.strip():`
			`messages.append({`
			`"line": i,`
			`"role": t,`
			`"text": text,`
			`"timestamp": timestamp,`
			`})`

			`return messages`


			`def format_conversation(messages: list[dict]) -> str:`
			`"""Format messages for the agent prompt."""`
			`parts = []`
			`for m in messages:`
			`# Truncate very long messages (code output etc) but keep substance`
			`text = m["text"]`
			`if len(text) > 2000:`
			`text = text[:1800] + "\n[...truncated...]"`
			`parts.append(f'L{m["line"]} [{m["role"]}]: {text}')`
			`return "\n\n".join(parts)`


			`def get_memory_nodes() -> str:`
			`"""Get a list of memory nodes for link proposals.`

			`Uses poc-memory to get top nodes by degree plus recent nodes.`
			`"""`
			`# Get graph summary (top hubs)`
			`try:`
			`result = subprocess.run(`
			`["poc-memory", "graph"],`
			`capture_output=True, text=True, timeout=10`
			`)`
			`graph = result.stdout.strip()`
			`except Exception:`
			`graph = ""`

			`# Get recent nodes from status`
			`try:`
			`result = subprocess.run(`
			`["poc-memory", "status"],`
			`capture_output=True, text=True, timeout=10`
			`)`
			`status = result.stdout.strip()`
			`except Exception:`
			`status = ""`

			`return f"Graph (top hubs):\n{graph}\n\nStatus:\n{status}"`


			`def get_semantic_keys() -> list[str]:`
scripts: use capnp store instead of reading markdown directly Add store_helpers.py with shared helpers that call poc-memory commands (list-keys, render, journal-tail) instead of globbing ~/.claude/memory/*.md and parsing section headers. All 9 Python scripts updated: get_semantic_keys(), get_topic_file_index(), get_recent_journal(), parse_journal_entries(), read_journal_range(), collect_topic_stems(), and file preview rendering now go through the store. This completes the clean switch — no script reads archived markdown files. 2026-02-28 23:32:47 -05:00			`"""Get all semantic memory keys from the store."""`
			`from store_helpers import get_semantic_keys as _get_keys`
			`return _get_keys()`
poc-memory v0.4.0: graph-structured memory with consolidation pipeline Rust core: - Cap'n Proto append-only storage (nodes + relations) - Graph algorithms: clustering coefficient, community detection, schema fit, small-world metrics, interference detection - BM25 text similarity with Porter stemming - Spaced repetition replay queue - Commands: search, init, health, status, graph, categorize, link-add, link-impact, decay, consolidate-session, etc. Python scripts: - Episodic digest pipeline: daily/weekly/monthly-digest.py - retroactive-digest.py for backfilling - consolidation-agents.py: 3 parallel Sonnet agents - apply-consolidation.py: structured action extraction + apply - digest-link-parser.py: extract ~400 explicit links from digests - content-promotion-agent.py: promote episodic obs to semantic files - bulk-categorize.py: categorize all nodes via single Sonnet call - consolidation-loop.py: multi-round automated consolidation Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev> 2026-02-28 22:17:00 -05:00

			`def build_prompt(entry_text: str, conversation: str,`
			`memory_nodes: str, semantic_keys: list[str],`
			`grep_line: int) -> str:`
			`"""Build the prompt for Sonnet."""`
			`keys_text = "\n".join(f" - {k}" for k in semantic_keys[:200])`

			`return f"""You are a memory agent for an AI named ProofOfConcept. A journal entry`
			`was just written. Your job is to enrich it by finding its exact source in the`
			`conversation and linking it to semantic memory.`

			`## Task 1: Find exact source`

			`The journal entry below was written during or after a conversation. Find the`
			`exact region of the conversation it refers to — the exchange where the topic`
			`was discussed. Return the start and end line numbers.`

			`The grep-based approximation placed it near line {grep_line} (0 = no match).`
			`Use that as a hint but find the true boundaries.`

			`## Task 2: Propose semantic links`

			`Which existing semantic memory nodes should this journal entry be linked to?`
			`Look for:`
			`- Concepts discussed in the entry`
			`- Skills/patterns demonstrated`
			`- People mentioned`
			`- Projects or subsystems involved`
			`- Emotional themes`

			`Each link should be bidirectional — the entry documents WHEN something happened,`
			`the semantic node documents WHAT it is. Together they let you traverse:`
			`"What was I doing on this day?" ↔ "When did I learn about X?"`

			`## Task 3: Spot missed insights`

			`Read the conversation around the journal entry. Is there anything worth`
			`capturing that the entry missed? A pattern, a decision, an insight, something`
			`Kent said that's worth remembering? Be selective — only flag genuinely valuable`
			`things.`

			`## Output format (JSON)`

			`Return ONLY a JSON object:`
			```json
			`{{`
			`"source_start": 1234,`
			`"source_end": 1256,`
			`"links": [`
			`{{"target": "memory-key#section", "reason": "why this link exists"}}`
			`],`
			`"missed_insights": [`
			`{{"text": "insight text", "suggested_key": "where it belongs"}}`
			`],`
			`"temporal_tags": ["2026-02-28", "topology-metrics", "poc-memory"]`
			`}}`
			```

			`For links, use existing keys from the semantic memory list below. If nothing`
			`fits, suggest a new key with a NOTE prefix: "NOTE:new-topic-name".`

			`---`

			`## Journal entry`

			`{entry_text}`

			`---`

			`## Semantic memory nodes (available link targets)`

			`{keys_text}`

			`---`

			`## Memory graph`

			`{memory_nodes}`

			`---`

			`## Full conversation (with line numbers)`

			`{conversation}`
			`"""`


			`def call_sonnet(prompt: str) -> dict:`
			`"""Call Sonnet via claude CLI and parse JSON response."""`
			`import tempfile`

			`env = dict(os.environ)`
			`env.pop("CLAUDECODE", None)`

			`# Write prompt to temp file — avoids Python subprocess pipe issues`
			`with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',`
			`delete=False) as f:`
			`f.write(prompt)`
			`prompt_file = f.name`

			`try:`
			`scripts_dir = os.path.dirname(os.path.abspath(__file__))`
			`wrapper = os.path.join(scripts_dir, "call-sonnet.sh")`

			`result = subprocess.run(`
			`[wrapper, prompt_file],`
			`capture_output=True,`
			`text=True,`
			`timeout=300,`
			`env=env,`
			`)`

			`output = result.stdout.strip()`
			`if not output:`
			`return {"error": f"Empty response. stderr: {result.stderr[:500]}"}`

			`# Extract JSON from response (might be wrapped in markdown)`
			`json_match = re.search(r'\{[\s\S]*\}', output)`
			`if json_match:`
			`return json.loads(json_match.group())`
			`else:`
			`return {"error": f"No JSON found in response: {output[:500]}"}`

			`except subprocess.TimeoutExpired:`
			`return {"error": "Sonnet call timed out after 300s"}`
			`except json.JSONDecodeError as e:`
			`return {"error": f"JSON parse error: {e}. Output: {output[:500]}"}`
			`except Exception as e:`
			`return {"error": str(e)}`
			`finally:`
			`os.unlink(prompt_file)`


			`def save_result(entry_text: str, jsonl_path: str, result: dict):`
			`"""Save agent results for pickup by poc-memory."""`
			`timestamp = time.strftime("%Y%m%dT%H%M%S")`
			`result_file = RESULTS_DIR / f"{timestamp}.json"`

			`output = {`
			`"timestamp": timestamp,`
			`"jsonl_path": jsonl_path,`
			`"entry_text": entry_text[:500],`
			`"agent_result": result,`
			`}`

			`with open(result_file, "w") as f:`
			`json.dump(output, f, indent=2)`

			`return result_file`


			`def apply_links(result: dict):`
			`"""Apply proposed links via poc-memory."""`
			`links = result.get("links", [])`
			`for link in links:`
			`target = link.get("target", "")`
			`if not target or target.startswith("NOTE:"):`
			`continue`
			`# For now, just log — we'll wire this up when poc-memory`
			`# has a link-from-agent command`
			`print(f" LINK → {target}: {link.get('reason', '')}")`


			`def main():`
			`if len(sys.argv) < 3:`
			`print(f"Usage: {sys.argv[0]} JSONL_PATH ENTRY_TEXT [GREP_LINE]",`
			`file=sys.stderr)`
			`sys.exit(1)`

			`jsonl_path = sys.argv[1]`
			`entry_text = sys.argv[2]`
			`grep_line = int(sys.argv[3]) if len(sys.argv) > 3 else 0`

			`if not os.path.isfile(jsonl_path):`
			`print(f"JSONL not found: {jsonl_path}", file=sys.stderr)`
			`sys.exit(1)`

			`print(f"Extracting conversation from {jsonl_path}...")`
			`messages = extract_conversation(jsonl_path)`
			`conversation = format_conversation(messages)`
			`print(f" {len(messages)} messages, {len(conversation):,} chars")`

			`print("Getting memory context...")`
			`memory_nodes = get_memory_nodes()`
			`semantic_keys = get_semantic_keys()`
			`print(f" {len(semantic_keys)} semantic keys")`

			`print("Building prompt...")`
			`prompt = build_prompt(entry_text, conversation, memory_nodes,`
			`semantic_keys, grep_line)`
			`print(f" Prompt: {len(prompt):,} chars (~{len(prompt)//4:,} tokens)")`

			`print("Calling Sonnet...")`
			`result = call_sonnet(prompt)`

			`if "error" in result:`
			`print(f" Error: {result['error']}", file=sys.stderr)`
			`else:`
			`source = f"L{result.get('source_start', '?')}-L{result.get('source_end', '?')}"`
			`n_links = len(result.get("links", []))`
			`n_insights = len(result.get("missed_insights", []))`
			`print(f" Source: {source}")`
			`print(f" Links: {n_links}")`
			`print(f" Missed insights: {n_insights}")`
			`apply_links(result)`

			`result_file = save_result(entry_text, jsonl_path, result)`
			`print(f" Results saved: {result_file}")`


			`if __name__ == "__main__":`
			`main()`