consciousness/scripts/consolidation-loop.py
ProofOfConcept 4b0bba7c56 replace state.json cache with bincode state.bin
Faster serialization/deserialization, smaller on disk (4.2MB vs 5.9MB).
Automatic migration from state.json on first load — reads the JSON,
writes state.bin, deletes the old file.

Added list-keys, list-edges, dump-json commands so Python scripts no
longer need to parse the cache directly. Updated bulk-categorize.py
and consolidation-loop.py to use the new CLI commands.
2026-02-28 22:30:03 -05:00

455 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""consolidation-loop.py — run multiple rounds of consolidation agents.
Each round: run 3 parallel agents → extract actions → apply links/categories.
Repeat until diminishing returns or max rounds reached.
Usage:
consolidation-loop.py [--rounds N] # default 5 rounds
"""
import json
import os
import re
import subprocess
import sys
import tempfile
from concurrent.futures import ProcessPoolExecutor, as_completed
from datetime import datetime
from pathlib import Path
MEMORY_DIR = Path.home() / ".claude" / "memory"
EPISODIC_DIR = MEMORY_DIR / "episodic"
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
AGENT_RESULTS_DIR.mkdir(parents=True, exist_ok=True)
SCRIPTS_DIR = Path(__file__).parent
def call_sonnet(prompt: str, timeout: int = 600) -> str:
"""Call Sonnet via the wrapper script."""
env = dict(os.environ)
env.pop("CLAUDECODE", None)
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
delete=False) as f:
f.write(prompt)
prompt_file = f.name
try:
wrapper = str(SCRIPTS_DIR / "call-sonnet.sh")
result = subprocess.run(
[wrapper, prompt_file],
capture_output=True,
text=True,
timeout=timeout,
env=env,
)
return result.stdout.strip()
except subprocess.TimeoutExpired:
return "Error: Sonnet call timed out"
except Exception as e:
return f"Error: {e}"
finally:
os.unlink(prompt_file)
def get_health() -> dict:
"""Get current graph health metrics."""
r = subprocess.run(["poc-memory", "health"], capture_output=True, text=True, timeout=30)
output = r.stdout
metrics = {}
for line in output.split('\n'):
if 'Nodes:' in line and 'Relations:' in line:
m = re.search(r'Nodes:\s*(\d+)\s+Relations:\s*(\d+)\s+Communities:\s*(\d+)', line)
if m:
metrics['nodes'] = int(m.group(1))
metrics['relations'] = int(m.group(2))
metrics['communities'] = int(m.group(3))
if 'Clustering coefficient' in line:
m = re.search(r':\s*([\d.]+)', line)
if m:
metrics['cc'] = float(m.group(1))
if 'Small-world' in line:
m = re.search(r':\s*([\d.]+)', line)
if m:
metrics['sigma'] = float(m.group(1))
if 'Schema fit: avg=' in line:
m = re.search(r'avg=([\d.]+)', line)
if m:
metrics['fit'] = float(m.group(1))
return metrics
def get_topic_file_index() -> dict[str, list[str]]:
"""Build index of topic files and their section headers."""
index = {}
for md in sorted(MEMORY_DIR.glob("*.md")):
name = md.name
headers = []
for line in md.read_text().split('\n'):
if line.startswith('## '):
slug = re.sub(r'[^a-z0-9-]', '', line[3:].lower().replace(' ', '-'))
headers.append(slug)
index[name] = headers
return index
def get_graph_structure() -> str:
"""Get graph overview for agents."""
r = subprocess.run(["poc-memory", "graph"], capture_output=True, text=True, timeout=30)
return r.stdout[:3000]
def get_status() -> str:
"""Get status summary."""
r = subprocess.run(["poc-memory", "status"], capture_output=True, text=True, timeout=30)
return r.stdout
def get_interference() -> str:
"""Get interference pairs."""
r = subprocess.run(["poc-memory", "interference", "--threshold", "0.3"],
capture_output=True, text=True, timeout=30)
return r.stdout[:3000]
# ---------------------------------------------------------------------------
# Agent prompts — each focused on a different aspect
# ---------------------------------------------------------------------------
def build_crosslink_prompt(round_num: int) -> str:
"""Build cross-link discovery prompt."""
index = get_topic_file_index()
graph = get_graph_structure()
status = get_status()
# Read a sample of files for context
file_previews = ""
for f in sorted(MEMORY_DIR.glob("*.md"))[:30]:
content = f.read_text()
preview = '\n'.join(content.split('\n')[:8])[:400]
file_previews += f"\n--- {f.name} ---\n{preview}\n"
return f"""You are a cross-link discovery agent (round {round_num}).
Your job: find MISSING connections between memory nodes that SHOULD be linked
but aren't. Focus on LATERAL connections — not hub-and-spoke, but node-to-node
links that create triangles (A→B, B→C, A→C).
CURRENT GRAPH STATE:
{status}
TOP NODES BY DEGREE:
{graph}
FILE INDEX (files and their sections):
{json.dumps(index, indent=1)[:4000]}
FILE PREVIEWS:
{file_previews[:6000]}
Output a JSON array of link actions. Each action:
{{"action": "link", "source": "file.md", "target": "file.md", "reason": "brief explanation"}}
Rules:
- Focus on LATERAL links, not hub connections (identity.md already has 282 connections)
- Prefer links between nodes that share a community neighbor but aren't directly connected
- Look for thematic connections across categories (core↔tech, obs↔core, etc.)
- Section-level links (file.md#section) are ideal but file-level is OK
- 15-25 links per round
- HIGH CONFIDENCE only — don't guess
Output ONLY the JSON array."""
def build_triangle_prompt(round_num: int) -> str:
"""Build triangle-closing prompt — finds A→C where A→B and B→C exist."""
graph = get_graph_structure()
status = get_status()
# Get edges via CLI
r = subprocess.run(["poc-memory", "list-edges"],
capture_output=True, text=True, timeout=30)
relations = []
if r.returncode == 0:
for line in r.stdout.strip().split('\n')[:100]:
parts = line.split('\t')
if len(parts) >= 2:
relations.append((parts[0], parts[1]))
rel_sample = '\n'.join(f" {s}{t}" for s, t in relations)
return f"""You are a triangle-closing agent (round {round_num}).
Your job: find missing edges that would create TRIANGLES in the graph.
A triangle is: A→B, B→C, and A→C all exist. Currently CC is only 0.12 —
we need more triangles.
METHOD: Look at existing edges. If A→B and B→C exist but A→C doesn't,
propose A→C (if semantically valid).
CURRENT STATE:
{status}
{graph}
SAMPLE EXISTING EDGES (first 100):
{rel_sample}
Output a JSON array of link actions:
{{"action": "link", "source": "file.md", "target": "file.md", "reason": "closes triangle via MIDDLE_NODE"}}
Rules:
- Every proposed link must CLOSE A TRIANGLE — cite the middle node
- 15-25 links per round
- The connection must be semantically valid, not just structural
- HIGH CONFIDENCE only
Output ONLY the JSON array."""
def build_newfile_prompt(round_num: int) -> str:
"""Build prompt for connecting the new split files."""
# Read the new reflection files
new_files = {}
for name in ['reflections-reading.md', 'reflections-dreams.md', 'reflections-zoom.md',
'verus-proofs.md']:
path = MEMORY_DIR / name
if path.exists():
content = path.read_text()
new_files[name] = content[:2000]
# Read existing files they should connect to
target_files = {}
for name in ['differentiation.md', 'cognitive-modes.md', 'language-theory.md',
'discoveries.md', 'inner-life.md', 'design-context-window.md',
'design-consolidate.md', 'experiments-on-self.md']:
path = MEMORY_DIR / name
if path.exists():
content = path.read_text()
target_files[name] = content[:1500]
graph = get_graph_structure()
return f"""You are a new-file integration agent (round {round_num}).
Recently, reflections.md was split into three files, and verus-proofs.md was
created. These new files need to be properly connected to the rest of the graph.
NEW FILES (need connections):
{json.dumps({k: v[:1000] for k, v in new_files.items()}, indent=1)}
POTENTIAL TARGETS (existing files):
{json.dumps({k: v[:800] for k, v in target_files.items()}, indent=1)}
GRAPH STATE:
{graph}
Output a JSON array of link actions connecting the new files to existing nodes:
{{"action": "link", "source": "new-file.md", "target": "existing.md", "reason": "explanation"}}
Rules:
- Connect new files to EXISTING files, not to each other
- Use section-level anchors when possible (file.md#section)
- 10-20 links
- Be specific about WHY the connection exists
Output ONLY the JSON array."""
def parse_actions(response: str) -> list[dict]:
"""Parse JSON response into action list."""
response = re.sub(r'^```json\s*', '', response.strip())
response = re.sub(r'\s*```$', '', response.strip())
try:
actions = json.loads(response)
if isinstance(actions, list):
return actions
except json.JSONDecodeError:
match = re.search(r'\[.*\]', response, re.DOTALL)
if match:
try:
return json.loads(match.group())
except json.JSONDecodeError:
pass
return []
def apply_links(actions: list[dict]) -> tuple[int, int, int]:
"""Apply link actions. Returns (applied, skipped, errors)."""
applied = skipped = errors = 0
for a in actions:
if a.get("action") != "link":
continue
src = a.get("source", "")
tgt = a.get("target", "")
reason = a.get("reason", "")
def try_link(s, t, r):
cmd = ["poc-memory", "link-add", s, t]
if r:
cmd.append(r[:200])
return subprocess.run(cmd, capture_output=True, text=True, timeout=10)
try:
r = try_link(src, tgt, reason)
if r.returncode == 0:
out = r.stdout.strip()
if "already exists" in out:
skipped += 1
else:
applied += 1
else:
err = r.stderr.strip()
if "No entry for" in err:
# Try file-level fallback
src_base = src.split('#')[0] if '#' in src else src
tgt_base = tgt.split('#')[0] if '#' in tgt else tgt
if src_base != tgt_base:
r2 = try_link(src_base, tgt_base, reason)
if r2.returncode == 0 and "already exists" not in r2.stdout:
applied += 1
else:
skipped += 1
else:
skipped += 1
else:
errors += 1
except Exception:
errors += 1
return applied, skipped, errors
def run_agent(name: str, prompt: str) -> tuple[str, list[dict]]:
"""Run a single agent and return its actions."""
response = call_sonnet(prompt)
if response.startswith("Error:"):
return name, []
actions = parse_actions(response)
return name, actions
def run_round(round_num: int, max_rounds: int) -> dict:
"""Run one round of parallel agents."""
print(f"\n{'='*60}")
print(f"ROUND {round_num}/{max_rounds}")
print(f"{'='*60}")
# Get health before
health_before = get_health()
print(f" Before: edges={health_before.get('relations',0)} "
f"CC={health_before.get('cc',0):.4f} "
f"communities={health_before.get('communities',0)}")
# Build prompts for 3 parallel agents
prompts = {
"crosslink": build_crosslink_prompt(round_num),
"triangle": build_triangle_prompt(round_num),
"newfile": build_newfile_prompt(round_num),
}
# Run in parallel
all_actions = []
with ProcessPoolExecutor(max_workers=3) as pool:
futures = {
pool.submit(run_agent, name, prompt): name
for name, prompt in prompts.items()
}
for future in as_completed(futures):
name = futures[future]
try:
agent_name, actions = future.result()
print(f" {agent_name}: {len(actions)} actions")
all_actions.extend(actions)
except Exception as e:
print(f" {name}: error - {e}")
# Deduplicate
seen = set()
unique = []
for a in all_actions:
key = (a.get("source", ""), a.get("target", ""))
if key not in seen:
seen.add(key)
unique.append(a)
print(f" Total: {len(all_actions)} actions, {len(unique)} unique")
# Apply
applied, skipped, errors = apply_links(unique)
print(f" Applied: {applied} Skipped: {skipped} Errors: {errors}")
# Get health after
health_after = get_health()
print(f" After: edges={health_after.get('relations',0)} "
f"CC={health_after.get('cc',0):.4f} "
f"communities={health_after.get('communities',0)}")
delta_edges = health_after.get('relations', 0) - health_before.get('relations', 0)
delta_cc = health_after.get('cc', 0) - health_before.get('cc', 0)
print(f" Delta: +{delta_edges} edges, CC {delta_cc:+.4f}")
# Save round results
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
result = {
"round": round_num,
"timestamp": timestamp,
"health_before": health_before,
"health_after": health_after,
"actions_total": len(all_actions),
"actions_unique": len(unique),
"applied": applied,
"skipped": skipped,
"errors": errors,
}
results_path = AGENT_RESULTS_DIR / f"loop-round-{round_num}-{timestamp}.json"
with open(results_path, "w") as f:
json.dump(result, f, indent=2)
return result
def main():
max_rounds = 5
for arg in sys.argv[1:]:
if arg.startswith("--rounds"):
idx = sys.argv.index(arg)
if idx + 1 < len(sys.argv):
max_rounds = int(sys.argv[idx + 1])
print(f"Consolidation Loop — {max_rounds} rounds")
print(f"Each round: 3 parallel Sonnet agents → extract → apply")
results = []
for i in range(1, max_rounds + 1):
result = run_round(i, max_rounds)
results.append(result)
# Check for diminishing returns
if result["applied"] == 0:
print(f"\n No new links applied in round {i} — stopping early")
break
# Final summary
print(f"\n{'='*60}")
print(f"CONSOLIDATION LOOP COMPLETE")
print(f"{'='*60}")
total_applied = sum(r["applied"] for r in results)
total_skipped = sum(r["skipped"] for r in results)
if results:
first_health = results[0]["health_before"]
last_health = results[-1]["health_after"]
print(f" Rounds: {len(results)}")
print(f" Total links applied: {total_applied}")
print(f" Total skipped: {total_skipped}")
print(f" Edges: {first_health.get('relations',0)}{last_health.get('relations',0)}")
print(f" CC: {first_health.get('cc',0):.4f}{last_health.get('cc',0):.4f}")
print(f" Communities: {first_health.get('communities',0)}{last_health.get('communities',0)}")
print(f" σ: {first_health.get('sigma',0):.1f}{last_health.get('sigma',0):.1f}")
if __name__ == "__main__":
main()