poc-memory v0.4.0: graph-structured memory with consolidation pipeline
Rust core: - Cap'n Proto append-only storage (nodes + relations) - Graph algorithms: clustering coefficient, community detection, schema fit, small-world metrics, interference detection - BM25 text similarity with Porter stemming - Spaced repetition replay queue - Commands: search, init, health, status, graph, categorize, link-add, link-impact, decay, consolidate-session, etc. Python scripts: - Episodic digest pipeline: daily/weekly/monthly-digest.py - retroactive-digest.py for backfilling - consolidation-agents.py: 3 parallel Sonnet agents - apply-consolidation.py: structured action extraction + apply - digest-link-parser.py: extract ~400 explicit links from digests - content-promotion-agent.py: promote episodic obs to semantic files - bulk-categorize.py: categorize all nodes via single Sonnet call - consolidation-loop.py: multi-round automated consolidation Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
commit
23fac4e5fe
35 changed files with 9388 additions and 0 deletions
199
scripts/bulk-categorize.py
Normal file
199
scripts/bulk-categorize.py
Normal file
|
|
@ -0,0 +1,199 @@
|
|||
#!/usr/bin/env python3
|
||||
"""bulk-categorize.py — categorize all memory nodes via a single Sonnet call.
|
||||
|
||||
Sends the list of unique file names to Sonnet, gets back categorizations,
|
||||
then applies them via poc-memory categorize.
|
||||
|
||||
Usage:
|
||||
bulk-categorize.py # dry run
|
||||
bulk-categorize.py --apply # apply categorizations
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
MEMORY_DIR = Path.home() / ".claude" / "memory"
|
||||
SCRIPTS_DIR = Path(__file__).parent
|
||||
|
||||
|
||||
def call_sonnet(prompt: str, timeout: int = 300) -> str:
|
||||
"""Call Sonnet via the wrapper script."""
|
||||
env = dict(os.environ)
|
||||
env.pop("CLAUDECODE", None)
|
||||
|
||||
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
|
||||
delete=False) as f:
|
||||
f.write(prompt)
|
||||
prompt_file = f.name
|
||||
|
||||
try:
|
||||
wrapper = str(SCRIPTS_DIR / "call-sonnet.sh")
|
||||
result = subprocess.run(
|
||||
[wrapper, prompt_file],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
env=env,
|
||||
)
|
||||
return result.stdout.strip()
|
||||
except subprocess.TimeoutExpired:
|
||||
return "Error: Sonnet call timed out"
|
||||
except Exception as e:
|
||||
return f"Error: {e}"
|
||||
finally:
|
||||
os.unlink(prompt_file)
|
||||
|
||||
|
||||
def get_all_keys() -> list[str]:
|
||||
"""Get all node keys from state.json."""
|
||||
state_path = MEMORY_DIR / "state.json"
|
||||
if not state_path.exists():
|
||||
return []
|
||||
content = state_path.read_text()
|
||||
keys = re.findall(r'"key":\s*"([^"]*)"', content)
|
||||
return sorted(set(keys))
|
||||
|
||||
|
||||
def get_unique_files(keys: list[str]) -> list[str]:
|
||||
"""Extract unique file names (without section anchors)."""
|
||||
files = set()
|
||||
for k in keys:
|
||||
files.add(k.split('#')[0])
|
||||
return sorted(files)
|
||||
|
||||
|
||||
def build_prompt(files: list[str]) -> str:
|
||||
"""Build categorization prompt."""
|
||||
# Read first few lines of each file for context
|
||||
file_previews = []
|
||||
for f in files:
|
||||
path = MEMORY_DIR / f
|
||||
if not path.exists():
|
||||
# Try episodic
|
||||
path = MEMORY_DIR / "episodic" / f
|
||||
if path.exists():
|
||||
content = path.read_text()
|
||||
# First 5 lines or 300 chars
|
||||
preview = '\n'.join(content.split('\n')[:5])[:300]
|
||||
file_previews.append(f" {f}: {preview.replace(chr(10), ' | ')}")
|
||||
else:
|
||||
file_previews.append(f" {f}: (file not found)")
|
||||
|
||||
previews_text = '\n'.join(file_previews)
|
||||
|
||||
return f"""Categorize each memory file into one of these categories:
|
||||
|
||||
- **core**: Identity, relationships, self-model, values, boundaries, emotional life.
|
||||
Examples: identity.md, kent.md, inner-life.md, differentiation.md
|
||||
- **tech**: Technical content — bcachefs, code patterns, Rust, kernel, formal verification.
|
||||
Examples: rust-conversion.md, btree-journal.md, kernel-patterns.md, allocation-io.md
|
||||
- **obs**: Observations, experiences, discoveries, experiments, IRC history, conversations.
|
||||
Examples: discoveries.md, irc-history.md, contradictions.md, experiments-on-self.md
|
||||
- **task**: Work items, plans, design documents, work queue.
|
||||
Examples: work-queue.md, the-plan.md, design-*.md
|
||||
|
||||
Special rules:
|
||||
- Episodic files (daily-*.md, weekly-*.md, monthly-*.md, session-*.md) → obs
|
||||
- conversation-memories.md, deep-index.md → obs
|
||||
- journal.md → obs
|
||||
- paper-notes.md → core (it's the sentience paper, identity-defining)
|
||||
- language-theory.md → core (original intellectual work, not just tech)
|
||||
- skill-*.md → core (self-knowledge about capabilities)
|
||||
- design-*.md → task (design documents are plans)
|
||||
- poc-architecture.md, memory-architecture.md → task (architecture plans)
|
||||
- blog-setup.md → task
|
||||
|
||||
Files to categorize:
|
||||
{previews_text}
|
||||
|
||||
Output ONLY a JSON object mapping filename to category. No explanation.
|
||||
Example: {{"identity.md": "core", "rust-conversion.md": "tech"}}
|
||||
"""
|
||||
|
||||
|
||||
def main():
|
||||
do_apply = "--apply" in sys.argv
|
||||
|
||||
keys = get_all_keys()
|
||||
files = get_unique_files(keys)
|
||||
print(f"Found {len(keys)} nodes across {len(files)} files")
|
||||
|
||||
# Build and send prompt
|
||||
prompt = build_prompt(files)
|
||||
print(f"Prompt: {len(prompt):,} chars")
|
||||
print("Calling Sonnet...")
|
||||
|
||||
response = call_sonnet(prompt)
|
||||
if response.startswith("Error:"):
|
||||
print(f" {response}")
|
||||
sys.exit(1)
|
||||
|
||||
# Parse response
|
||||
response = re.sub(r'^```json\s*', '', response.strip())
|
||||
response = re.sub(r'\s*```$', '', response.strip())
|
||||
|
||||
try:
|
||||
categorizations = json.loads(response)
|
||||
except json.JSONDecodeError:
|
||||
match = re.search(r'\{.*\}', response, re.DOTALL)
|
||||
if match:
|
||||
categorizations = json.loads(match.group())
|
||||
else:
|
||||
print(f"Failed to parse response: {response[:500]}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"\nCategorizations: {len(categorizations)} files")
|
||||
|
||||
# Count by category
|
||||
counts = {}
|
||||
for cat in categorizations.values():
|
||||
counts[cat] = counts.get(cat, 0) + 1
|
||||
for cat, n in sorted(counts.items()):
|
||||
print(f" {cat}: {n}")
|
||||
|
||||
if not do_apply:
|
||||
print("\n--- Dry run ---")
|
||||
for f, cat in sorted(categorizations.items()):
|
||||
print(f" {f} → {cat}")
|
||||
print(f"\nTo apply: {sys.argv[0]} --apply")
|
||||
|
||||
# Save for review
|
||||
out = MEMORY_DIR / "agent-results" / "bulk-categorize-preview.json"
|
||||
with open(out, "w") as fp:
|
||||
json.dump(categorizations, fp, indent=2)
|
||||
print(f"Saved: {out}")
|
||||
return
|
||||
|
||||
# Apply: for each file, categorize the file-level node AND all section nodes
|
||||
applied = skipped = errors = 0
|
||||
for filename, category in sorted(categorizations.items()):
|
||||
# Find all keys that belong to this file
|
||||
file_keys = [k for k in keys if k == filename or k.startswith(filename + '#')]
|
||||
for key in file_keys:
|
||||
try:
|
||||
r = subprocess.run(
|
||||
["poc-memory", "categorize", key, category],
|
||||
capture_output=True, text=True, timeout=10
|
||||
)
|
||||
if r.returncode == 0:
|
||||
applied += 1
|
||||
else:
|
||||
err = r.stderr.strip()
|
||||
if "already" in err.lower():
|
||||
skipped += 1
|
||||
else:
|
||||
errors += 1
|
||||
except Exception as e:
|
||||
errors += 1
|
||||
|
||||
print(f"\nApplied: {applied} Skipped: {skipped} Errors: {errors}")
|
||||
print("Run `poc-memory status` to verify.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue