poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core: - Cap'n Proto append-only storage (nodes + relations) - Graph algorithms: clustering coefficient, community detection, schema fit, small-world metrics, interference detection - BM25 text similarity with Porter stemming - Spaced repetition replay queue - Commands: search, init, health, status, graph, categorize, link-add, link-impact, decay, consolidate-session, etc. Python scripts: - Episodic digest pipeline: daily/weekly/monthly-digest.py - retroactive-digest.py for backfilling - consolidation-agents.py: 3 parallel Sonnet agents - apply-consolidation.py: structured action extraction + apply - digest-link-parser.py: extract ~400 explicit links from digests - content-promotion-agent.py: promote episodic obs to semantic files - bulk-categorize.py: categorize all nodes via single Sonnet call - consolidation-loop.py: multi-round automated consolidation Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00 · 2026-02-28 22:17:00 -05:00 · 23fac4e5fe
commit 23fac4e5fe
35 changed files with 9388 additions and 0 deletions
--- a/scripts/digest-link-parser.py
+++ b/scripts/digest-link-parser.py
@ -0,0 +1,220 @@
+#!/usr/bin/env python3
+"""digest-link-parser.py — extract explicit links from episodic digests.
+
+Parses the "Links" sections of daily/weekly/monthly digests and
+applies them to the memory graph via poc-memory link-add.
+
+Usage:
+  digest-link-parser.py              # dry run
+  digest-link-parser.py --apply      # apply links
+"""
+
+import re
+import subprocess
+import sys
+from pathlib import Path
+
+EPISODIC_DIR = Path.home() / ".claude" / "memory" / "episodic"
+
+
+def normalize_key(raw: str) -> str:
+    """Normalize a link target to a poc-memory key."""
+    key = raw.strip().strip('`').strip()
+
+    # weekly/2026-W06 → weekly-2026-W06.md
+    # monthly/2026-02 → monthly-2026-02.md
+    # daily/2026-02-04 → daily-2026-02-04.md
+    key = re.sub(r'^(daily|weekly|monthly)/', r'\1-', key)
+
+    # daily-2026-02-04 → daily-2026-02-04.md
+    if re.match(r'^(daily|weekly|monthly)-\d{4}', key):
+        if not key.endswith('.md'):
+            key = key + '.md'
+
+    # Handle "this daily digest" / "this weekly digest" etc
+    if key.startswith('this ') or key == '2026-02-14':
+        return ""  # Skip self-references, handled by caller
+
+    # Ensure .md extension for file references
+    if '#' in key:
+        parts = key.split('#', 1)
+        if not parts[0].endswith('.md'):
+            parts[0] = parts[0] + '.md'
+        key = '#'.join(parts)
+    elif not key.endswith('.md') and '/' not in key and not key.startswith('NEW:'):
+        key = key + '.md'
+
+    return key
+
+
+def extract_links(filepath: Path) -> list[dict]:
+    """Extract links from a digest file's Links section."""
+    content = filepath.read_text()
+    links = []
+
+    # Determine the digest's own key
+    digest_name = filepath.stem  # e.g., "daily-2026-02-28"
+    digest_key = digest_name + ".md"
+
+    # Find the Links section
+    in_links = False
+    for line in content.split('\n'):
+        # Start of Links section
+        if re.match(r'^##\s+Links', line):
+            in_links = True
+            continue
+        # End of Links section (next ## header)
+        if in_links and re.match(r'^##\s+', line) and not re.match(r'^##\s+Links', line):
+            in_links = False
+            continue
+
+        if not in_links:
+            continue
+
+        # Skip subheaders within links section
+        if line.startswith('###') or line.startswith('**'):
+            continue
+
+        # Parse link lines: "- source → target (reason)"
+        # Also handles: "- `source` → `target` (reason)"
+        # And: "- source → target"
+        match = re.match(
+            r'^-\s+(.+?)\s*[→↔←]\s*(.+?)(?:\s*\((.+?)\))?\s*$',
+            line
+        )
+        if not match:
+            continue
+
+        raw_source = match.group(1).strip()
+        raw_target = match.group(2).strip()
+        reason = match.group(3) or ""
+
+        # Normalize keys
+        source = normalize_key(raw_source)
+        target = normalize_key(raw_target)
+
+        # Replace self-references with digest key
+        if not source:
+            source = digest_key
+        if not target:
+            target = digest_key
+
+        # Handle "this daily digest" patterns in the raw text
+        if 'this daily' in raw_source.lower() or 'this weekly' in raw_source.lower() or 'this monthly' in raw_source.lower():
+            source = digest_key
+        if 'this daily' in raw_target.lower() or 'this weekly' in raw_target.lower() or 'this monthly' in raw_target.lower():
+            target = digest_key
+
+        # Handle bare date references like "2026-02-14"
+        date_match = re.match(r'^(\d{4}-\d{2}-\d{2})$', source.replace('.md', ''))
+        if date_match:
+            source = f"daily-{date_match.group(1)}.md"
+        date_match = re.match(r'^(\d{4}-\d{2}-\d{2})$', target.replace('.md', ''))
+        if date_match:
+            target = f"daily-{date_match.group(1)}.md"
+
+        # Skip NEW: prefixed links (target doesn't exist yet)
+        if source.startswith('NEW:') or target.startswith('NEW:'):
+            continue
+
+        # Skip if source == target
+        if source == target:
+            continue
+
+        links.append({
+            "source": source,
+            "target": target,
+            "reason": reason,
+            "file": filepath.name,
+        })
+
+    return links
+
+
+def main():
+    do_apply = "--apply" in sys.argv
+
+    # Collect all links from all digests
+    all_links = []
+    for pattern in ["daily-*.md", "weekly-*.md", "monthly-*.md"]:
+        for f in sorted(EPISODIC_DIR.glob(pattern)):
+            links = extract_links(f)
+            if links:
+                all_links.extend(links)
+
+    # Deduplicate (same source→target pair)
+    seen = set()
+    unique_links = []
+    for link in all_links:
+        key = (link["source"], link["target"])
+        if key not in seen:
+            seen.add(key)
+            unique_links.append(link)
+
+    print(f"Found {len(all_links)} total links, {len(unique_links)} unique")
+
+    if not do_apply:
+        # Dry run — just show them
+        for i, link in enumerate(unique_links, 1):
+            print(f"  {i:3d}. {link['source']} → {link['target']}")
+            if link['reason']:
+                print(f"       ({link['reason'][:80]})")
+        print(f"\nTo apply: {sys.argv[0]} --apply")
+        return
+
+    # Apply with fallback: if section-level key fails, try file-level
+    applied = skipped = errors = fallbacks = 0
+    for link in unique_links:
+        src, tgt = link["source"], link["target"]
+        reason = link.get("reason", "")
+
+        def try_link(s, t, r):
+            cmd = ["poc-memory", "link-add", s, t]
+            if r:
+                cmd.append(r[:200])
+            result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
+            return result
+
+        try:
+            r = try_link(src, tgt, reason)
+            if r.returncode == 0:
+                out = r.stdout.strip()
+                if "already exists" in out:
+                    skipped += 1
+                else:
+                    print(f"  {out}")
+                    applied += 1
+            else:
+                err = r.stderr.strip()
+                if "No entry for" in err:
+                    # Try stripping section anchors
+                    src_base = src.split('#')[0] if '#' in src else src
+                    tgt_base = tgt.split('#')[0] if '#' in tgt else tgt
+                    if src_base == tgt_base:
+                        skipped += 1  # Same file, skip
+                        continue
+                    r2 = try_link(src_base, tgt_base, reason)
+                    if r2.returncode == 0:
+                        out = r2.stdout.strip()
+                        if "already exists" in out:
+                            skipped += 1
+                        else:
+                            print(f"  {out}  (fallback from #{src.split('#')[-1] if '#' in src else ''}/{tgt.split('#')[-1] if '#' in tgt else ''})")
+                            applied += 1
+                            fallbacks += 1
+                    else:
+                        skipped += 1  # File truly doesn't exist
+                elif "not found" in err:
+                    skipped += 1
+                else:
+                    print(f"  ? {src} → {tgt}: {err}")
+                    errors += 1
+        except Exception as e:
+            print(f"  ! {src} → {tgt}: {e}")
+            errors += 1
+
+    print(f"\nApplied: {applied} ({fallbacks} file-level fallbacks)  Skipped: {skipped}  Errors: {errors}")
+
+
+if __name__ == "__main__":
+    main()