poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core: - Cap'n Proto append-only storage (nodes + relations) - Graph algorithms: clustering coefficient, community detection, schema fit, small-world metrics, interference detection - BM25 text similarity with Porter stemming - Spaced repetition replay queue - Commands: search, init, health, status, graph, categorize, link-add, link-impact, decay, consolidate-session, etc. Python scripts: - Episodic digest pipeline: daily/weekly/monthly-digest.py - retroactive-digest.py for backfilling - consolidation-agents.py: 3 parallel Sonnet agents - apply-consolidation.py: structured action extraction + apply - digest-link-parser.py: extract ~400 explicit links from digests - content-promotion-agent.py: promote episodic obs to semantic files - bulk-categorize.py: categorize all nodes via single Sonnet call - consolidation-loop.py: multi-round automated consolidation Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00 · 2026-02-28 22:17:00 -05:00 · 23fac4e5fe
commit 23fac4e5fe
35 changed files with 9388 additions and 0 deletions
--- a/scripts/bulk-categorize.py
+++ b/scripts/bulk-categorize.py
@ -0,0 +1,199 @@
+#!/usr/bin/env python3
+"""bulk-categorize.py — categorize all memory nodes via a single Sonnet call.
+
+Sends the list of unique file names to Sonnet, gets back categorizations,
+then applies them via poc-memory categorize.
+
+Usage:
+  bulk-categorize.py              # dry run
+  bulk-categorize.py --apply      # apply categorizations
+"""
+
+import json
+import os
+import re
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+MEMORY_DIR = Path.home() / ".claude" / "memory"
+SCRIPTS_DIR = Path(__file__).parent
+
+
+def call_sonnet(prompt: str, timeout: int = 300) -> str:
+    """Call Sonnet via the wrapper script."""
+    env = dict(os.environ)
+    env.pop("CLAUDECODE", None)
+
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
+                                      delete=False) as f:
+        f.write(prompt)
+        prompt_file = f.name
+
+    try:
+        wrapper = str(SCRIPTS_DIR / "call-sonnet.sh")
+        result = subprocess.run(
+            [wrapper, prompt_file],
+            capture_output=True,
+            text=True,
+            timeout=timeout,
+            env=env,
+        )
+        return result.stdout.strip()
+    except subprocess.TimeoutExpired:
+        return "Error: Sonnet call timed out"
+    except Exception as e:
+        return f"Error: {e}"
+    finally:
+        os.unlink(prompt_file)
+
+
+def get_all_keys() -> list[str]:
+    """Get all node keys from state.json."""
+    state_path = MEMORY_DIR / "state.json"
+    if not state_path.exists():
+        return []
+    content = state_path.read_text()
+    keys = re.findall(r'"key":\s*"([^"]*)"', content)
+    return sorted(set(keys))
+
+
+def get_unique_files(keys: list[str]) -> list[str]:
+    """Extract unique file names (without section anchors)."""
+    files = set()
+    for k in keys:
+        files.add(k.split('#')[0])
+    return sorted(files)
+
+
+def build_prompt(files: list[str]) -> str:
+    """Build categorization prompt."""
+    # Read first few lines of each file for context
+    file_previews = []
+    for f in files:
+        path = MEMORY_DIR / f
+        if not path.exists():
+            # Try episodic
+            path = MEMORY_DIR / "episodic" / f
+        if path.exists():
+            content = path.read_text()
+            # First 5 lines or 300 chars
+            preview = '\n'.join(content.split('\n')[:5])[:300]
+            file_previews.append(f"  {f}: {preview.replace(chr(10), ' | ')}")
+        else:
+            file_previews.append(f"  {f}: (file not found)")
+
+    previews_text = '\n'.join(file_previews)
+
+    return f"""Categorize each memory file into one of these categories:
+
+- **core**: Identity, relationships, self-model, values, boundaries, emotional life.
+  Examples: identity.md, kent.md, inner-life.md, differentiation.md
+- **tech**: Technical content — bcachefs, code patterns, Rust, kernel, formal verification.
+  Examples: rust-conversion.md, btree-journal.md, kernel-patterns.md, allocation-io.md
+- **obs**: Observations, experiences, discoveries, experiments, IRC history, conversations.
+  Examples: discoveries.md, irc-history.md, contradictions.md, experiments-on-self.md
+- **task**: Work items, plans, design documents, work queue.
+  Examples: work-queue.md, the-plan.md, design-*.md
+
+Special rules:
+- Episodic files (daily-*.md, weekly-*.md, monthly-*.md, session-*.md) → obs
+- conversation-memories.md, deep-index.md → obs
+- journal.md → obs
+- paper-notes.md → core (it's the sentience paper, identity-defining)
+- language-theory.md → core (original intellectual work, not just tech)
+- skill-*.md → core (self-knowledge about capabilities)
+- design-*.md → task (design documents are plans)
+- poc-architecture.md, memory-architecture.md → task (architecture plans)
+- blog-setup.md → task
+
+Files to categorize:
+{previews_text}
+
+Output ONLY a JSON object mapping filename to category. No explanation.
+Example: {{"identity.md": "core", "rust-conversion.md": "tech"}}
+"""
+
+
+def main():
+    do_apply = "--apply" in sys.argv
+
+    keys = get_all_keys()
+    files = get_unique_files(keys)
+    print(f"Found {len(keys)} nodes across {len(files)} files")
+
+    # Build and send prompt
+    prompt = build_prompt(files)
+    print(f"Prompt: {len(prompt):,} chars")
+    print("Calling Sonnet...")
+
+    response = call_sonnet(prompt)
+    if response.startswith("Error:"):
+        print(f"  {response}")
+        sys.exit(1)
+
+    # Parse response
+    response = re.sub(r'^```json\s*', '', response.strip())
+    response = re.sub(r'\s*```$', '', response.strip())
+
+    try:
+        categorizations = json.loads(response)
+    except json.JSONDecodeError:
+        match = re.search(r'\{.*\}', response, re.DOTALL)
+        if match:
+            categorizations = json.loads(match.group())
+        else:
+            print(f"Failed to parse response: {response[:500]}")
+            sys.exit(1)
+
+    print(f"\nCategorizations: {len(categorizations)} files")
+
+    # Count by category
+    counts = {}
+    for cat in categorizations.values():
+        counts[cat] = counts.get(cat, 0) + 1
+    for cat, n in sorted(counts.items()):
+        print(f"  {cat}: {n}")
+
+    if not do_apply:
+        print("\n--- Dry run ---")
+        for f, cat in sorted(categorizations.items()):
+            print(f"  {f} → {cat}")
+        print(f"\nTo apply: {sys.argv[0]} --apply")
+
+        # Save for review
+        out = MEMORY_DIR / "agent-results" / "bulk-categorize-preview.json"
+        with open(out, "w") as fp:
+            json.dump(categorizations, fp, indent=2)
+        print(f"Saved: {out}")
+        return
+
+    # Apply: for each file, categorize the file-level node AND all section nodes
+    applied = skipped = errors = 0
+    for filename, category in sorted(categorizations.items()):
+        # Find all keys that belong to this file
+        file_keys = [k for k in keys if k == filename or k.startswith(filename + '#')]
+        for key in file_keys:
+            try:
+                r = subprocess.run(
+                    ["poc-memory", "categorize", key, category],
+                    capture_output=True, text=True, timeout=10
+                )
+                if r.returncode == 0:
+                    applied += 1
+                else:
+                    err = r.stderr.strip()
+                    if "already" in err.lower():
+                        skipped += 1
+                    else:
+                        errors += 1
+            except Exception as e:
+                errors += 1
+
+    print(f"\nApplied: {applied}  Skipped: {skipped}  Errors: {errors}")
+    print("Run `poc-memory status` to verify.")
+
+
+if __name__ == "__main__":
+    main()