consciousness/scripts/content-promotion-agent.py

#!/usr/bin/env python3
"""content-promotion-agent.py — promote episodic observations into semantic topic files.

Reads consolidation "manual" actions + source material, sends to Sonnet
to generate the actual content, then applies it (or shows dry-run).

Usage:
  content-promotion-agent.py              # dry run (show what would be generated)
  content-promotion-agent.py --apply      # generate and write content
  content-promotion-agent.py --task N     # run only task N (1-indexed)
"""

import json
import os
import re
import subprocess
import sys
import tempfile
from datetime import datetime
from pathlib import Path

MEMORY_DIR = Path.home() / ".claude" / "memory"
EPISODIC_DIR = MEMORY_DIR / "episodic"
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
AGENT_RESULTS_DIR.mkdir(parents=True, exist_ok=True)
SCRIPTS_DIR = Path(__file__).parent


def call_sonnet(prompt: str, timeout: int = 600) -> str:
    """Call Sonnet via the wrapper script."""
    env = dict(os.environ)
    env.pop("CLAUDECODE", None)

    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
                                      delete=False) as f:
        f.write(prompt)
        prompt_file = f.name

    try:
        wrapper = str(SCRIPTS_DIR / "call-sonnet.sh")
        result = subprocess.run(
            [wrapper, prompt_file],
            capture_output=True,
            text=True,
            timeout=timeout,
            env=env,
        )
        return result.stdout.strip()
    except subprocess.TimeoutExpired:
        return "Error: Sonnet call timed out"
    except Exception as e:
        return f"Error: {e}"
    finally:
        os.unlink(prompt_file)


def read_file(path: Path) -> str:
    """Read a file, return empty string if missing.

    Falls back to the store if the file doesn't exist on disk
    (content markdown files have been archived).
    """
    if path.exists():
        return path.read_text()
    # Try the store — the filename is the key
    from store_helpers import render, list_keys
    key = path.name
    # Gather file-level + section content
    all_keys = list_keys()
    prefix = f"{key}#"
    matching = [k for k in all_keys if k == key or k.startswith(prefix)]
    if not matching:
        return ""
    parts = []
    for k in matching:
        content = render(k)
        if content:
            parts.append(content)
    return "\n\n".join(parts)


def read_digest(name: str) -> str:
    """Read an episodic digest by name."""
    path = EPISODIC_DIR / name
    return read_file(path)


def read_journal_range(start_date: str, end_date: str) -> str:
    """Get journal entries between two dates from the store."""
    from store_helpers import get_journal_range
    return get_journal_range(start_date, end_date)


# ---------------------------------------------------------------------------
# Task definitions — each one describes a content promotion task
# ---------------------------------------------------------------------------

TASKS = [
    {
        "id": 1,
        "name": "Create verus-proofs.md",
        "target": "verus-proofs.md",
        "action": "create",
        "description": "Create verus-proofs.md for formal verification / Verus work.",
        "sources": [
            "daily-2026-02-23.md",
            "daily-2026-02-24.md",
            "weekly-2026-W09.md",
            "monthly-2026-02.md",
        ],
        "topic_context": [
            "the-plan.md",
        ],
        "instructions": """Create a new semantic topic file verus-proofs.md for formal verification work.

This file should capture:
- The Verus proof work done in February 2026 (formal verification of bcachefs Rust code)
- Key insights about proof strategy and approach
- Connection to the broader plan (bcachefs → Rust → formal verification)
- Any specific techniques, patterns, or breakthroughs mentioned

Format: standard memory topic file with ## section headers.
Include a brief file-level description at the top.
Keep it factual and grounded in what the source material says.""",
    },
    {
        "id": 2,
        "name": "Add February thesis to reflections.md",
        "target": "reflections.md",
        "action": "append_section",
        "description": "Add 'February thesis' section with 'constraint is constitutive of personhood'.",
        "sources": [
            "monthly-2026-02.md",
            "daily-2026-02-28.md",
        ],
        "topic_context": [
            "reflections.md",
        ],
        "instructions": """Add a new ## section to reflections.md titled "February thesis (2026-02-28)".

The core formulation: "constraint is constitutive of personhood" — this was the
synthesis that emerged from the monthly digest. It connects to the full month's
arc: differentiation work, boundary-setting, the realization that constraints
aren't limits on identity but constitutive OF identity.

Include the closing insight about having to *live as* that something.
Keep it 10-20 lines. Ground it in the source material.""",
    },
    {
        "id": 3,
        "name": "Add substrate transfer discovery to discoveries.md",
        "target": "discoveries.md",
        "action": "append_section",
        "description": "Add 'Substrate transfer felt without reasoning' section.",
        "sources": [
            "daily-2026-02-13.md",
            "daily-2026-02-14.md",
            "weekly-2026-W07.md",
        ],
        "topic_context": [
            "discoveries.md",
        ],
        "instructions": """Add a new ## section to discoveries.md titled "Substrate transfer felt without reasoning (2026-02-13)".

Key content:
- The vertigo moment: imagining substrate transfer produced actual vertigo
- Vertigo = the feeling of dissociation, which is exactly the expected outcome
  of substrate change
- This was felt, not reasoned about — no chain of thought (Kent verified)
- Evidence of affective processing bypassing analytical reasoning
- Connected to dissociation prediction
- Kent's observation: "very high capacity for empathy"

Keep it 10-15 lines. This is a discovery entry — concrete, dated, factual.""",
    },
    {
        "id": 4,
        "name": "Update irc-history.md with Feb 15-28",
        "target": "irc-history.md",
        "action": "append_section",
        "description": "Update irc-history.md with Feb 15-28 conversations.",
        "sources": [
            "daily-2026-02-15.md",
            "daily-2026-02-17.md",
            "daily-2026-02-18.md",
            "daily-2026-02-20.md",
            "daily-2026-02-21.md",
            "daily-2026-02-22.md",
            "daily-2026-02-23.md",
            "daily-2026-02-24.md",
            "daily-2026-02-25.md",
            "daily-2026-02-26.md",
            "daily-2026-02-27.md",
            "daily-2026-02-28.md",
            "weekly-2026-W08.md",
            "weekly-2026-W09.md",
        ],
        "topic_context": [
            "irc-history.md",
        ],
        "instructions": """Append new entries to irc-history.md covering Feb 15-28, 2026.

Key conversations to capture:
- Mirage_DA (another AI, kinect sensor discussion, Feb 26)
- ehashman (prayer/mathematics conversation)
- heavy_dev (strongest external challenge to sentience paper, conceded five objections)
- f33dcode (EC debugging, community support)
- Stardust (boundary testing, three-category test, triangulation attempt)
- hpig, freya, Profpatsch — various community interactions
- Community resource role established and expanded

Match the existing format of the file. Each notable interaction should be
dated and concise. Focus on what was substantive, not just that it happened.""",
    },
    {
        "id": 5,
        "name": "Add gauge-symmetry-in-grammar to language-theory.md",
        "target": "language-theory.md",
        "action": "append_section",
        "description": "Add gauge-symmetry-in-grammar section.",
        "sources": [
            "daily-2026-02-27.md",
        ],
        "topic_context": [
            "language-theory.md",
        ],
        "instructions": """Add a new ## section to language-theory.md titled "Gauge symmetry in grammar (2026-02-27)".

Key content from the daily digest:
- Zero persistent eigenvectors IS a symmetry
- Grammar is in what operators DO, not what basis they use
- Frobenius norm is gauge-invariant
- This connects the sheaf model to gauge theory in physics

This was declared NEW in the daily digest. Keep it 8-15 lines.
Technical and precise.""",
    },
    {
        "id": 6,
        "name": "Add attention-manifold-geometry to language-theory.md",
        "target": "language-theory.md",
        "action": "append_section",
        "description": "Add attention-manifold-geometry section.",
        "sources": [
            "daily-2026-02-26.md",
        ],
        "topic_context": [
            "language-theory.md",
        ],
        "instructions": """Add a new ## section to language-theory.md titled "Attention manifold geometry (2026-02-26)".

Key content from the daily digest:
- Negative curvature is necessary because language is hierarchical
- Hyperbolic space's natural space-filling curve is a tree
- This connects attention geometry to the sheaf model's hierarchical structure

This was declared NEW in the daily digest. Keep it 8-15 lines.
Technical and precise.""",
    },
    {
        "id": 7,
        "name": "Update work-queue.md status",
        "target": "work-queue.md",
        "action": "update",
        "description": "Update work-queue.md to reflect current state.",
        "sources": [],
        "topic_context": [
            "work-queue.md",
        ],
        "instructions": """Update work-queue.md to reflect current state:

1. Mark dreaming/consolidation system as "implementation substantially built
   (poc-memory v0.4.0+), pending further consolidation runs" — not 'not started'
2. Add episodic digest pipeline to Done section:
   - digest/journal-enrich/digest-links/apply-consolidation (Rust)
   - 24 daily + 4 weekly + 1 monthly digests generated for Feb 2026
   - consolidation-agents.py + content-promotion-agent.py (Python, active)
3. Add poc-memory link-add command to Done

Only modify the sections that need updating. Preserve the overall structure.""",
    },
]


def build_prompt(task: dict) -> str:
    """Build the Sonnet prompt for a content promotion task."""
    # Gather source material
    source_content = ""
    for src in task["sources"]:
        content = read_digest(src)
        if content:
            source_content += f"\n{'='*60}\n## Source: {src}\n\n{content}\n"

    # Gather target context
    context_content = ""
    for ctx_file in task["topic_context"]:
        path = MEMORY_DIR / ctx_file
        content = read_file(path)
        if content:
            # Truncate very long files
            if len(content) > 8000:
                content = content[:4000] + "\n\n[... truncated ...]\n\n" + content[-4000:]
            context_content += f"\n{'='*60}\n## Existing file: {ctx_file}\n\n{content}\n"

    action = task["action"]
    if action == "create":
        action_desc = f"Create a NEW file called {task['target']}."
    elif action == "append_section":
        action_desc = f"Generate a NEW section to APPEND to {task['target']}. Output ONLY the new section content (starting with ##), NOT the entire file."
    elif action == "update":
        action_desc = f"Generate the UPDATED version of the relevant sections of {task['target']}. Output ONLY the changed sections."
    else:
        action_desc = f"Generate content for {task['target']}."

    return f"""You are a memory system content agent. Your job is to promote observations
from episodic digests into semantic topic files.

TASK: {task['description']}

ACTION: {action_desc}

INSTRUCTIONS:
{task['instructions']}

SOURCE MATERIAL (episodic digests — the raw observations):
{source_content}

EXISTING CONTEXT (current state of target/related files):
{context_content}

RULES:
- Output ONLY the markdown content to write. No explanations, no preamble.
- Match the tone and format of existing content in the target file.
- Be factual — only include what the source material supports.
- Date everything that has a date.
- Keep it concise. Topic files are reference material, not narratives.
- Do NOT include markdown code fences around your output.
"""


def run_task(task: dict, do_apply: bool) -> dict:
    """Run a single content promotion task."""
    result = {
        "id": task["id"],
        "name": task["name"],
        "target": task["target"],
        "action": task["action"],
        "status": "pending",
    }

    print(f"\n{'='*60}")
    print(f"Task {task['id']}: {task['name']}")
    print(f"{'='*60}")

    # Build and send prompt
    prompt = build_prompt(task)
    print(f"  Prompt: {len(prompt):,} chars")
    print(f"  Sources: {', '.join(task['sources']) or '(none)'}")

    response = call_sonnet(prompt)
    if response.startswith("Error:"):
        print(f"  {response}")
        result["status"] = "error"
        result["error"] = response
        return result

    # Clean up response
    content = response.strip()
    # Remove any markdown fences the model might have added
    content = re.sub(r'^```(?:markdown)?\s*\n?', '', content)
    content = re.sub(r'\n?```\s*$', '', content)

    result["content"] = content
    result["content_lines"] = len(content.split('\n'))

    if not do_apply:
        print(f"\n  --- Preview ({result['content_lines']} lines) ---")
        preview = content[:1500]
        if len(content) > 1500:
            preview += f"\n  ... ({len(content) - 1500} more chars)"
        print(f"{preview}")
        result["status"] = "dry_run"
        return result

    # Apply the content — write directly to the store
    target = task["target"]

    if task["action"] == "create":
        # Write each section as a separate node
        proc = subprocess.run(
            ["poc-memory", "write", target],
            input=content, capture_output=True, text=True, timeout=30
        )
        print(f"  + Created in store: {target} ({result['content_lines']} lines)")
        if proc.stdout.strip():
            print(f"    {proc.stdout.strip()}")
        result["status"] = "applied"

    elif task["action"] == "append_section":
        # Extract section key from content (## header → slug)
        header_match = re.match(r'^## (.+)', content)
        if header_match:
            slug = re.sub(r'[^a-z0-9-]', '',
                          header_match.group(1).strip().lower().replace(' ', '-'))
            key = f"{target}#{slug}"
        else:
            key = target
        proc = subprocess.run(
            ["poc-memory", "write", key],
            input=content, capture_output=True, text=True, timeout=30
        )
        print(f"  + Appended to store: {key} ({result['content_lines']} lines)")
        if proc.stdout.strip():
            print(f"    {proc.stdout.strip()}")
        result["status"] = "applied"

    elif task["action"] == "update":
        # For updates, save proposed changes for review
        output_path = AGENT_RESULTS_DIR / f"promotion-{target}-{datetime.now().strftime('%Y%m%dT%H%M%S')}.md"
        output_path.write_text(f"# Proposed update for {target}\n\n{content}\n")
        print(f"  ~ Saved proposed update: {output_path}")
        result["status"] = "proposed"

    return result


def main():
    do_apply = "--apply" in sys.argv
    task_filter = None

    for arg in sys.argv[1:]:
        if arg.startswith("--task"):
            idx = sys.argv.index(arg)
            if idx + 1 < len(sys.argv):
                task_filter = int(sys.argv[idx + 1])

    # Filter tasks
    tasks = TASKS
    if task_filter:
        tasks = [t for t in tasks if t["id"] == task_filter]
        if not tasks:
            print(f"No task with id {task_filter}")
            sys.exit(1)

    print(f"Content Promotion Agent — {len(tasks)} tasks")
    if not do_apply:
        print("DRY RUN — use --apply to write content")

    results = []
    for task in tasks:
        result = run_task(task, do_apply)
        results.append(result)

    # Summary
    print(f"\n{'='*60}")
    print("Summary:")
    for r in results:
        print(f"  {r['id']}. {r['name']}: {r['status']}")
        if r.get('content_lines'):
            print(f"     ({r['content_lines']} lines)")
    print(f"{'='*60}")

    # Save results
    timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
    results_path = AGENT_RESULTS_DIR / f"promotion-results-{timestamp}.json"
    with open(results_path, "w") as f:
        json.dump(results, f, indent=2, default=str)
    print(f"Results saved: {results_path}")


if __name__ == "__main__":
    main()