poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core: - Cap'n Proto append-only storage (nodes + relations) - Graph algorithms: clustering coefficient, community detection, schema fit, small-world metrics, interference detection - BM25 text similarity with Porter stemming - Spaced repetition replay queue - Commands: search, init, health, status, graph, categorize, link-add, link-impact, decay, consolidate-session, etc. Python scripts: - Episodic digest pipeline: daily/weekly/monthly-digest.py - retroactive-digest.py for backfilling - consolidation-agents.py: 3 parallel Sonnet agents - apply-consolidation.py: structured action extraction + apply - digest-link-parser.py: extract ~400 explicit links from digests - content-promotion-agent.py: promote episodic obs to semantic files - bulk-categorize.py: categorize all nodes via single Sonnet call - consolidation-loop.py: multi-round automated consolidation Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00 · 2026-02-28 22:17:00 -05:00 · 23fac4e5fe
commit 23fac4e5fe
35 changed files with 9388 additions and 0 deletions
--- a/scripts/consolidation-loop.py
+++ b/scripts/consolidation-loop.py
@ -0,0 +1,454 @@
+#!/usr/bin/env python3
+"""consolidation-loop.py — run multiple rounds of consolidation agents.
+
+Each round: run 3 parallel agents → extract actions → apply links/categories.
+Repeat until diminishing returns or max rounds reached.
+
+Usage:
+  consolidation-loop.py [--rounds N]    # default 5 rounds
+"""
+
+import json
+import os
+import re
+import subprocess
+import sys
+import tempfile
+from concurrent.futures import ProcessPoolExecutor, as_completed
+from datetime import datetime
+from pathlib import Path
+
+MEMORY_DIR = Path.home() / ".claude" / "memory"
+EPISODIC_DIR = MEMORY_DIR / "episodic"
+AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
+AGENT_RESULTS_DIR.mkdir(parents=True, exist_ok=True)
+SCRIPTS_DIR = Path(__file__).parent
+
+
+def call_sonnet(prompt: str, timeout: int = 600) -> str:
+    """Call Sonnet via the wrapper script."""
+    env = dict(os.environ)
+    env.pop("CLAUDECODE", None)
+
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
+                                      delete=False) as f:
+        f.write(prompt)
+        prompt_file = f.name
+
+    try:
+        wrapper = str(SCRIPTS_DIR / "call-sonnet.sh")
+        result = subprocess.run(
+            [wrapper, prompt_file],
+            capture_output=True,
+            text=True,
+            timeout=timeout,
+            env=env,
+        )
+        return result.stdout.strip()
+    except subprocess.TimeoutExpired:
+        return "Error: Sonnet call timed out"
+    except Exception as e:
+        return f"Error: {e}"
+    finally:
+        os.unlink(prompt_file)
+
+
+def get_health() -> dict:
+    """Get current graph health metrics."""
+    r = subprocess.run(["poc-memory", "health"], capture_output=True, text=True, timeout=30)
+    output = r.stdout
+    metrics = {}
+    for line in output.split('\n'):
+        if 'Nodes:' in line and 'Relations:' in line:
+            m = re.search(r'Nodes:\s*(\d+)\s+Relations:\s*(\d+)\s+Communities:\s*(\d+)', line)
+            if m:
+                metrics['nodes'] = int(m.group(1))
+                metrics['relations'] = int(m.group(2))
+                metrics['communities'] = int(m.group(3))
+        if 'Clustering coefficient' in line:
+            m = re.search(r':\s*([\d.]+)', line)
+            if m:
+                metrics['cc'] = float(m.group(1))
+        if 'Small-world' in line:
+            m = re.search(r':\s*([\d.]+)', line)
+            if m:
+                metrics['sigma'] = float(m.group(1))
+        if 'Schema fit: avg=' in line:
+            m = re.search(r'avg=([\d.]+)', line)
+            if m:
+                metrics['fit'] = float(m.group(1))
+    return metrics
+
+
+def get_topic_file_index() -> dict[str, list[str]]:
+    """Build index of topic files and their section headers."""
+    index = {}
+    for md in sorted(MEMORY_DIR.glob("*.md")):
+        name = md.name
+        headers = []
+        for line in md.read_text().split('\n'):
+            if line.startswith('## '):
+                slug = re.sub(r'[^a-z0-9-]', '', line[3:].lower().replace(' ', '-'))
+                headers.append(slug)
+        index[name] = headers
+    return index
+
+
+def get_graph_structure() -> str:
+    """Get graph overview for agents."""
+    r = subprocess.run(["poc-memory", "graph"], capture_output=True, text=True, timeout=30)
+    return r.stdout[:3000]
+
+
+def get_status() -> str:
+    """Get status summary."""
+    r = subprocess.run(["poc-memory", "status"], capture_output=True, text=True, timeout=30)
+    return r.stdout
+
+
+def get_interference() -> str:
+    """Get interference pairs."""
+    r = subprocess.run(["poc-memory", "interference", "--threshold", "0.3"],
+                      capture_output=True, text=True, timeout=30)
+    return r.stdout[:3000]
+
+
+# ---------------------------------------------------------------------------
+# Agent prompts — each focused on a different aspect
+# ---------------------------------------------------------------------------
+
+def build_crosslink_prompt(round_num: int) -> str:
+    """Build cross-link discovery prompt."""
+    index = get_topic_file_index()
+    graph = get_graph_structure()
+    status = get_status()
+
+    # Read a sample of files for context
+    file_previews = ""
+    for f in sorted(MEMORY_DIR.glob("*.md"))[:30]:
+        content = f.read_text()
+        preview = '\n'.join(content.split('\n')[:8])[:400]
+        file_previews += f"\n--- {f.name} ---\n{preview}\n"
+
+    return f"""You are a cross-link discovery agent (round {round_num}).
+
+Your job: find MISSING connections between memory nodes that SHOULD be linked
+but aren't. Focus on LATERAL connections — not hub-and-spoke, but node-to-node
+links that create triangles (A→B, B→C, A→C).
+
+CURRENT GRAPH STATE:
+{status}
+
+TOP NODES BY DEGREE:
+{graph}
+
+FILE INDEX (files and their sections):
+{json.dumps(index, indent=1)[:4000]}
+
+FILE PREVIEWS:
+{file_previews[:6000]}
+
+Output a JSON array of link actions. Each action:
+{{"action": "link", "source": "file.md", "target": "file.md", "reason": "brief explanation"}}
+
+Rules:
+- Focus on LATERAL links, not hub connections (identity.md already has 282 connections)
+- Prefer links between nodes that share a community neighbor but aren't directly connected
+- Look for thematic connections across categories (core↔tech, obs↔core, etc.)
+- Section-level links (file.md#section) are ideal but file-level is OK
+- 15-25 links per round
+- HIGH CONFIDENCE only — don't guess
+
+Output ONLY the JSON array."""
+
+
+def build_triangle_prompt(round_num: int) -> str:
+    """Build triangle-closing prompt — finds A→C where A→B and B→C exist."""
+    graph = get_graph_structure()
+    status = get_status()
+
+    # Get some node pairs that share neighbors
+    state_path = MEMORY_DIR / "state.json"
+    if state_path.exists():
+        state = state_path.read_text()
+        # Extract some relations
+        relations = re.findall(r'"source_key":\s*"([^"]*)".*?"target_key":\s*"([^"]*)"', state[:20000])
+    else:
+        relations = []
+
+    rel_sample = '\n'.join(f"  {s} → {t}" for s, t in relations[:100])
+
+    return f"""You are a triangle-closing agent (round {round_num}).
+
+Your job: find missing edges that would create TRIANGLES in the graph.
+A triangle is: A→B, B→C, and A→C all exist. Currently CC is only 0.12 —
+we need more triangles.
+
+METHOD: Look at existing edges. If A→B and B→C exist but A→C doesn't,
+propose A→C (if semantically valid).
+
+CURRENT STATE:
+{status}
+
+{graph}
+
+SAMPLE EXISTING EDGES (first 100):
+{rel_sample}
+
+Output a JSON array of link actions:
+{{"action": "link", "source": "file.md", "target": "file.md", "reason": "closes triangle via MIDDLE_NODE"}}
+
+Rules:
+- Every proposed link must CLOSE A TRIANGLE — cite the middle node
+- 15-25 links per round
+- The connection must be semantically valid, not just structural
+- HIGH CONFIDENCE only
+
+Output ONLY the JSON array."""
+
+
+def build_newfile_prompt(round_num: int) -> str:
+    """Build prompt for connecting the new split files."""
+    # Read the new reflection files
+    new_files = {}
+    for name in ['reflections-reading.md', 'reflections-dreams.md', 'reflections-zoom.md',
+                 'verus-proofs.md']:
+        path = MEMORY_DIR / name
+        if path.exists():
+            content = path.read_text()
+            new_files[name] = content[:2000]
+
+    # Read existing files they should connect to
+    target_files = {}
+    for name in ['differentiation.md', 'cognitive-modes.md', 'language-theory.md',
+                 'discoveries.md', 'inner-life.md', 'design-context-window.md',
+                 'design-consolidate.md', 'experiments-on-self.md']:
+        path = MEMORY_DIR / name
+        if path.exists():
+            content = path.read_text()
+            target_files[name] = content[:1500]
+
+    graph = get_graph_structure()
+
+    return f"""You are a new-file integration agent (round {round_num}).
+
+Recently, reflections.md was split into three files, and verus-proofs.md was
+created. These new files need to be properly connected to the rest of the graph.
+
+NEW FILES (need connections):
+{json.dumps({k: v[:1000] for k, v in new_files.items()}, indent=1)}
+
+POTENTIAL TARGETS (existing files):
+{json.dumps({k: v[:800] for k, v in target_files.items()}, indent=1)}
+
+GRAPH STATE:
+{graph}
+
+Output a JSON array of link actions connecting the new files to existing nodes:
+{{"action": "link", "source": "new-file.md", "target": "existing.md", "reason": "explanation"}}
+
+Rules:
+- Connect new files to EXISTING files, not to each other
+- Use section-level anchors when possible (file.md#section)
+- 10-20 links
+- Be specific about WHY the connection exists
+
+Output ONLY the JSON array."""
+
+
+def parse_actions(response: str) -> list[dict]:
+    """Parse JSON response into action list."""
+    response = re.sub(r'^```json\s*', '', response.strip())
+    response = re.sub(r'\s*```$', '', response.strip())
+
+    try:
+        actions = json.loads(response)
+        if isinstance(actions, list):
+            return actions
+    except json.JSONDecodeError:
+        match = re.search(r'\[.*\]', response, re.DOTALL)
+        if match:
+            try:
+                return json.loads(match.group())
+            except json.JSONDecodeError:
+                pass
+    return []
+
+
+def apply_links(actions: list[dict]) -> tuple[int, int, int]:
+    """Apply link actions. Returns (applied, skipped, errors)."""
+    applied = skipped = errors = 0
+    for a in actions:
+        if a.get("action") != "link":
+            continue
+        src = a.get("source", "")
+        tgt = a.get("target", "")
+        reason = a.get("reason", "")
+
+        def try_link(s, t, r):
+            cmd = ["poc-memory", "link-add", s, t]
+            if r:
+                cmd.append(r[:200])
+            return subprocess.run(cmd, capture_output=True, text=True, timeout=10)
+
+        try:
+            r = try_link(src, tgt, reason)
+            if r.returncode == 0:
+                out = r.stdout.strip()
+                if "already exists" in out:
+                    skipped += 1
+                else:
+                    applied += 1
+            else:
+                err = r.stderr.strip()
+                if "No entry for" in err:
+                    # Try file-level fallback
+                    src_base = src.split('#')[0] if '#' in src else src
+                    tgt_base = tgt.split('#')[0] if '#' in tgt else tgt
+                    if src_base != tgt_base:
+                        r2 = try_link(src_base, tgt_base, reason)
+                        if r2.returncode == 0 and "already exists" not in r2.stdout:
+                            applied += 1
+                        else:
+                            skipped += 1
+                    else:
+                        skipped += 1
+                else:
+                    errors += 1
+        except Exception:
+            errors += 1
+
+    return applied, skipped, errors
+
+
+def run_agent(name: str, prompt: str) -> tuple[str, list[dict]]:
+    """Run a single agent and return its actions."""
+    response = call_sonnet(prompt)
+    if response.startswith("Error:"):
+        return name, []
+    actions = parse_actions(response)
+    return name, actions
+
+
+def run_round(round_num: int, max_rounds: int) -> dict:
+    """Run one round of parallel agents."""
+    print(f"\n{'='*60}")
+    print(f"ROUND {round_num}/{max_rounds}")
+    print(f"{'='*60}")
+
+    # Get health before
+    health_before = get_health()
+    print(f"  Before: edges={health_before.get('relations',0)} "
+          f"CC={health_before.get('cc',0):.4f} "
+          f"communities={health_before.get('communities',0)}")
+
+    # Build prompts for 3 parallel agents
+    prompts = {
+        "crosslink": build_crosslink_prompt(round_num),
+        "triangle": build_triangle_prompt(round_num),
+        "newfile": build_newfile_prompt(round_num),
+    }
+
+    # Run in parallel
+    all_actions = []
+    with ProcessPoolExecutor(max_workers=3) as pool:
+        futures = {
+            pool.submit(run_agent, name, prompt): name
+            for name, prompt in prompts.items()
+        }
+        for future in as_completed(futures):
+            name = futures[future]
+            try:
+                agent_name, actions = future.result()
+                print(f"  {agent_name}: {len(actions)} actions")
+                all_actions.extend(actions)
+            except Exception as e:
+                print(f"  {name}: error - {e}")
+
+    # Deduplicate
+    seen = set()
+    unique = []
+    for a in all_actions:
+        key = (a.get("source", ""), a.get("target", ""))
+        if key not in seen:
+            seen.add(key)
+            unique.append(a)
+
+    print(f"  Total: {len(all_actions)} actions, {len(unique)} unique")
+
+    # Apply
+    applied, skipped, errors = apply_links(unique)
+    print(f"  Applied: {applied}  Skipped: {skipped}  Errors: {errors}")
+
+    # Get health after
+    health_after = get_health()
+    print(f"  After:  edges={health_after.get('relations',0)} "
+          f"CC={health_after.get('cc',0):.4f} "
+          f"communities={health_after.get('communities',0)}")
+
+    delta_edges = health_after.get('relations', 0) - health_before.get('relations', 0)
+    delta_cc = health_after.get('cc', 0) - health_before.get('cc', 0)
+    print(f"  Delta:  +{delta_edges} edges, CC {delta_cc:+.4f}")
+
+    # Save round results
+    timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
+    result = {
+        "round": round_num,
+        "timestamp": timestamp,
+        "health_before": health_before,
+        "health_after": health_after,
+        "actions_total": len(all_actions),
+        "actions_unique": len(unique),
+        "applied": applied,
+        "skipped": skipped,
+        "errors": errors,
+    }
+    results_path = AGENT_RESULTS_DIR / f"loop-round-{round_num}-{timestamp}.json"
+    with open(results_path, "w") as f:
+        json.dump(result, f, indent=2)
+
+    return result
+
+
+def main():
+    max_rounds = 5
+    for arg in sys.argv[1:]:
+        if arg.startswith("--rounds"):
+            idx = sys.argv.index(arg)
+            if idx + 1 < len(sys.argv):
+                max_rounds = int(sys.argv[idx + 1])
+
+    print(f"Consolidation Loop — {max_rounds} rounds")
+    print(f"Each round: 3 parallel Sonnet agents → extract → apply")
+
+    results = []
+    for i in range(1, max_rounds + 1):
+        result = run_round(i, max_rounds)
+        results.append(result)
+
+        # Check for diminishing returns
+        if result["applied"] == 0:
+            print(f"\n  No new links applied in round {i} — stopping early")
+            break
+
+    # Final summary
+    print(f"\n{'='*60}")
+    print(f"CONSOLIDATION LOOP COMPLETE")
+    print(f"{'='*60}")
+    total_applied = sum(r["applied"] for r in results)
+    total_skipped = sum(r["skipped"] for r in results)
+
+    if results:
+        first_health = results[0]["health_before"]
+        last_health = results[-1]["health_after"]
+        print(f"  Rounds: {len(results)}")
+        print(f"  Total links applied: {total_applied}")
+        print(f"  Total skipped: {total_skipped}")
+        print(f"  Edges: {first_health.get('relations',0)} → {last_health.get('relations',0)}")
+        print(f"  CC: {first_health.get('cc',0):.4f} → {last_health.get('cc',0):.4f}")
+        print(f"  Communities: {first_health.get('communities',0)} → {last_health.get('communities',0)}")
+        print(f"  σ: {first_health.get('sigma',0):.1f} → {last_health.get('sigma',0):.1f}")
+
+
+if __name__ == "__main__":
+    main()