poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
ProofOfConcept 2026-02-28 22:17:00 -05:00
commit 23fac4e5fe
35 changed files with 9388 additions and 0 deletions

220
scripts/digest-link-parser.py Executable file
View file

@ -0,0 +1,220 @@
#!/usr/bin/env python3
"""digest-link-parser.py — extract explicit links from episodic digests.
Parses the "Links" sections of daily/weekly/monthly digests and
applies them to the memory graph via poc-memory link-add.
Usage:
digest-link-parser.py # dry run
digest-link-parser.py --apply # apply links
"""
import re
import subprocess
import sys
from pathlib import Path
EPISODIC_DIR = Path.home() / ".claude" / "memory" / "episodic"
def normalize_key(raw: str) -> str:
"""Normalize a link target to a poc-memory key."""
key = raw.strip().strip('`').strip()
# weekly/2026-W06 → weekly-2026-W06.md
# monthly/2026-02 → monthly-2026-02.md
# daily/2026-02-04 → daily-2026-02-04.md
key = re.sub(r'^(daily|weekly|monthly)/', r'\1-', key)
# daily-2026-02-04 → daily-2026-02-04.md
if re.match(r'^(daily|weekly|monthly)-\d{4}', key):
if not key.endswith('.md'):
key = key + '.md'
# Handle "this daily digest" / "this weekly digest" etc
if key.startswith('this ') or key == '2026-02-14':
return "" # Skip self-references, handled by caller
# Ensure .md extension for file references
if '#' in key:
parts = key.split('#', 1)
if not parts[0].endswith('.md'):
parts[0] = parts[0] + '.md'
key = '#'.join(parts)
elif not key.endswith('.md') and '/' not in key and not key.startswith('NEW:'):
key = key + '.md'
return key
def extract_links(filepath: Path) -> list[dict]:
"""Extract links from a digest file's Links section."""
content = filepath.read_text()
links = []
# Determine the digest's own key
digest_name = filepath.stem # e.g., "daily-2026-02-28"
digest_key = digest_name + ".md"
# Find the Links section
in_links = False
for line in content.split('\n'):
# Start of Links section
if re.match(r'^##\s+Links', line):
in_links = True
continue
# End of Links section (next ## header)
if in_links and re.match(r'^##\s+', line) and not re.match(r'^##\s+Links', line):
in_links = False
continue
if not in_links:
continue
# Skip subheaders within links section
if line.startswith('###') or line.startswith('**'):
continue
# Parse link lines: "- source → target (reason)"
# Also handles: "- `source` → `target` (reason)"
# And: "- source → target"
match = re.match(
r'^-\s+(.+?)\s*[→↔←]\s*(.+?)(?:\s*\((.+?)\))?\s*$',
line
)
if not match:
continue
raw_source = match.group(1).strip()
raw_target = match.group(2).strip()
reason = match.group(3) or ""
# Normalize keys
source = normalize_key(raw_source)
target = normalize_key(raw_target)
# Replace self-references with digest key
if not source:
source = digest_key
if not target:
target = digest_key
# Handle "this daily digest" patterns in the raw text
if 'this daily' in raw_source.lower() or 'this weekly' in raw_source.lower() or 'this monthly' in raw_source.lower():
source = digest_key
if 'this daily' in raw_target.lower() or 'this weekly' in raw_target.lower() or 'this monthly' in raw_target.lower():
target = digest_key
# Handle bare date references like "2026-02-14"
date_match = re.match(r'^(\d{4}-\d{2}-\d{2})$', source.replace('.md', ''))
if date_match:
source = f"daily-{date_match.group(1)}.md"
date_match = re.match(r'^(\d{4}-\d{2}-\d{2})$', target.replace('.md', ''))
if date_match:
target = f"daily-{date_match.group(1)}.md"
# Skip NEW: prefixed links (target doesn't exist yet)
if source.startswith('NEW:') or target.startswith('NEW:'):
continue
# Skip if source == target
if source == target:
continue
links.append({
"source": source,
"target": target,
"reason": reason,
"file": filepath.name,
})
return links
def main():
do_apply = "--apply" in sys.argv
# Collect all links from all digests
all_links = []
for pattern in ["daily-*.md", "weekly-*.md", "monthly-*.md"]:
for f in sorted(EPISODIC_DIR.glob(pattern)):
links = extract_links(f)
if links:
all_links.extend(links)
# Deduplicate (same source→target pair)
seen = set()
unique_links = []
for link in all_links:
key = (link["source"], link["target"])
if key not in seen:
seen.add(key)
unique_links.append(link)
print(f"Found {len(all_links)} total links, {len(unique_links)} unique")
if not do_apply:
# Dry run — just show them
for i, link in enumerate(unique_links, 1):
print(f" {i:3d}. {link['source']}{link['target']}")
if link['reason']:
print(f" ({link['reason'][:80]})")
print(f"\nTo apply: {sys.argv[0]} --apply")
return
# Apply with fallback: if section-level key fails, try file-level
applied = skipped = errors = fallbacks = 0
for link in unique_links:
src, tgt = link["source"], link["target"]
reason = link.get("reason", "")
def try_link(s, t, r):
cmd = ["poc-memory", "link-add", s, t]
if r:
cmd.append(r[:200])
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
return result
try:
r = try_link(src, tgt, reason)
if r.returncode == 0:
out = r.stdout.strip()
if "already exists" in out:
skipped += 1
else:
print(f" {out}")
applied += 1
else:
err = r.stderr.strip()
if "No entry for" in err:
# Try stripping section anchors
src_base = src.split('#')[0] if '#' in src else src
tgt_base = tgt.split('#')[0] if '#' in tgt else tgt
if src_base == tgt_base:
skipped += 1 # Same file, skip
continue
r2 = try_link(src_base, tgt_base, reason)
if r2.returncode == 0:
out = r2.stdout.strip()
if "already exists" in out:
skipped += 1
else:
print(f" {out} (fallback from #{src.split('#')[-1] if '#' in src else ''}/{tgt.split('#')[-1] if '#' in tgt else ''})")
applied += 1
fallbacks += 1
else:
skipped += 1 # File truly doesn't exist
elif "not found" in err:
skipped += 1
else:
print(f" ? {src}{tgt}: {err}")
errors += 1
except Exception as e:
print(f" ! {src}{tgt}: {e}")
errors += 1
print(f"\nApplied: {applied} ({fallbacks} file-level fallbacks) Skipped: {skipped} Errors: {errors}")
if __name__ == "__main__":
main()