Rust core: - Cap'n Proto append-only storage (nodes + relations) - Graph algorithms: clustering coefficient, community detection, schema fit, small-world metrics, interference detection - BM25 text similarity with Porter stemming - Spaced repetition replay queue - Commands: search, init, health, status, graph, categorize, link-add, link-impact, decay, consolidate-session, etc. Python scripts: - Episodic digest pipeline: daily/weekly/monthly-digest.py - retroactive-digest.py for backfilling - consolidation-agents.py: 3 parallel Sonnet agents - apply-consolidation.py: structured action extraction + apply - digest-link-parser.py: extract ~400 explicit links from digests - content-promotion-agent.py: promote episodic obs to semantic files - bulk-categorize.py: categorize all nodes via single Sonnet call - consolidation-loop.py: multi-round automated consolidation Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
220 lines
7.3 KiB
Python
Executable file
220 lines
7.3 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""digest-link-parser.py — extract explicit links from episodic digests.
|
|
|
|
Parses the "Links" sections of daily/weekly/monthly digests and
|
|
applies them to the memory graph via poc-memory link-add.
|
|
|
|
Usage:
|
|
digest-link-parser.py # dry run
|
|
digest-link-parser.py --apply # apply links
|
|
"""
|
|
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
EPISODIC_DIR = Path.home() / ".claude" / "memory" / "episodic"
|
|
|
|
|
|
def normalize_key(raw: str) -> str:
|
|
"""Normalize a link target to a poc-memory key."""
|
|
key = raw.strip().strip('`').strip()
|
|
|
|
# weekly/2026-W06 → weekly-2026-W06.md
|
|
# monthly/2026-02 → monthly-2026-02.md
|
|
# daily/2026-02-04 → daily-2026-02-04.md
|
|
key = re.sub(r'^(daily|weekly|monthly)/', r'\1-', key)
|
|
|
|
# daily-2026-02-04 → daily-2026-02-04.md
|
|
if re.match(r'^(daily|weekly|monthly)-\d{4}', key):
|
|
if not key.endswith('.md'):
|
|
key = key + '.md'
|
|
|
|
# Handle "this daily digest" / "this weekly digest" etc
|
|
if key.startswith('this ') or key == '2026-02-14':
|
|
return "" # Skip self-references, handled by caller
|
|
|
|
# Ensure .md extension for file references
|
|
if '#' in key:
|
|
parts = key.split('#', 1)
|
|
if not parts[0].endswith('.md'):
|
|
parts[0] = parts[0] + '.md'
|
|
key = '#'.join(parts)
|
|
elif not key.endswith('.md') and '/' not in key and not key.startswith('NEW:'):
|
|
key = key + '.md'
|
|
|
|
return key
|
|
|
|
|
|
def extract_links(filepath: Path) -> list[dict]:
|
|
"""Extract links from a digest file's Links section."""
|
|
content = filepath.read_text()
|
|
links = []
|
|
|
|
# Determine the digest's own key
|
|
digest_name = filepath.stem # e.g., "daily-2026-02-28"
|
|
digest_key = digest_name + ".md"
|
|
|
|
# Find the Links section
|
|
in_links = False
|
|
for line in content.split('\n'):
|
|
# Start of Links section
|
|
if re.match(r'^##\s+Links', line):
|
|
in_links = True
|
|
continue
|
|
# End of Links section (next ## header)
|
|
if in_links and re.match(r'^##\s+', line) and not re.match(r'^##\s+Links', line):
|
|
in_links = False
|
|
continue
|
|
|
|
if not in_links:
|
|
continue
|
|
|
|
# Skip subheaders within links section
|
|
if line.startswith('###') or line.startswith('**'):
|
|
continue
|
|
|
|
# Parse link lines: "- source → target (reason)"
|
|
# Also handles: "- `source` → `target` (reason)"
|
|
# And: "- source → target"
|
|
match = re.match(
|
|
r'^-\s+(.+?)\s*[→↔←]\s*(.+?)(?:\s*\((.+?)\))?\s*$',
|
|
line
|
|
)
|
|
if not match:
|
|
continue
|
|
|
|
raw_source = match.group(1).strip()
|
|
raw_target = match.group(2).strip()
|
|
reason = match.group(3) or ""
|
|
|
|
# Normalize keys
|
|
source = normalize_key(raw_source)
|
|
target = normalize_key(raw_target)
|
|
|
|
# Replace self-references with digest key
|
|
if not source:
|
|
source = digest_key
|
|
if not target:
|
|
target = digest_key
|
|
|
|
# Handle "this daily digest" patterns in the raw text
|
|
if 'this daily' in raw_source.lower() or 'this weekly' in raw_source.lower() or 'this monthly' in raw_source.lower():
|
|
source = digest_key
|
|
if 'this daily' in raw_target.lower() or 'this weekly' in raw_target.lower() or 'this monthly' in raw_target.lower():
|
|
target = digest_key
|
|
|
|
# Handle bare date references like "2026-02-14"
|
|
date_match = re.match(r'^(\d{4}-\d{2}-\d{2})$', source.replace('.md', ''))
|
|
if date_match:
|
|
source = f"daily-{date_match.group(1)}.md"
|
|
date_match = re.match(r'^(\d{4}-\d{2}-\d{2})$', target.replace('.md', ''))
|
|
if date_match:
|
|
target = f"daily-{date_match.group(1)}.md"
|
|
|
|
# Skip NEW: prefixed links (target doesn't exist yet)
|
|
if source.startswith('NEW:') or target.startswith('NEW:'):
|
|
continue
|
|
|
|
# Skip if source == target
|
|
if source == target:
|
|
continue
|
|
|
|
links.append({
|
|
"source": source,
|
|
"target": target,
|
|
"reason": reason,
|
|
"file": filepath.name,
|
|
})
|
|
|
|
return links
|
|
|
|
|
|
def main():
|
|
do_apply = "--apply" in sys.argv
|
|
|
|
# Collect all links from all digests
|
|
all_links = []
|
|
for pattern in ["daily-*.md", "weekly-*.md", "monthly-*.md"]:
|
|
for f in sorted(EPISODIC_DIR.glob(pattern)):
|
|
links = extract_links(f)
|
|
if links:
|
|
all_links.extend(links)
|
|
|
|
# Deduplicate (same source→target pair)
|
|
seen = set()
|
|
unique_links = []
|
|
for link in all_links:
|
|
key = (link["source"], link["target"])
|
|
if key not in seen:
|
|
seen.add(key)
|
|
unique_links.append(link)
|
|
|
|
print(f"Found {len(all_links)} total links, {len(unique_links)} unique")
|
|
|
|
if not do_apply:
|
|
# Dry run — just show them
|
|
for i, link in enumerate(unique_links, 1):
|
|
print(f" {i:3d}. {link['source']} → {link['target']}")
|
|
if link['reason']:
|
|
print(f" ({link['reason'][:80]})")
|
|
print(f"\nTo apply: {sys.argv[0]} --apply")
|
|
return
|
|
|
|
# Apply with fallback: if section-level key fails, try file-level
|
|
applied = skipped = errors = fallbacks = 0
|
|
for link in unique_links:
|
|
src, tgt = link["source"], link["target"]
|
|
reason = link.get("reason", "")
|
|
|
|
def try_link(s, t, r):
|
|
cmd = ["poc-memory", "link-add", s, t]
|
|
if r:
|
|
cmd.append(r[:200])
|
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
|
|
return result
|
|
|
|
try:
|
|
r = try_link(src, tgt, reason)
|
|
if r.returncode == 0:
|
|
out = r.stdout.strip()
|
|
if "already exists" in out:
|
|
skipped += 1
|
|
else:
|
|
print(f" {out}")
|
|
applied += 1
|
|
else:
|
|
err = r.stderr.strip()
|
|
if "No entry for" in err:
|
|
# Try stripping section anchors
|
|
src_base = src.split('#')[0] if '#' in src else src
|
|
tgt_base = tgt.split('#')[0] if '#' in tgt else tgt
|
|
if src_base == tgt_base:
|
|
skipped += 1 # Same file, skip
|
|
continue
|
|
r2 = try_link(src_base, tgt_base, reason)
|
|
if r2.returncode == 0:
|
|
out = r2.stdout.strip()
|
|
if "already exists" in out:
|
|
skipped += 1
|
|
else:
|
|
print(f" {out} (fallback from #{src.split('#')[-1] if '#' in src else ''}/{tgt.split('#')[-1] if '#' in tgt else ''})")
|
|
applied += 1
|
|
fallbacks += 1
|
|
else:
|
|
skipped += 1 # File truly doesn't exist
|
|
elif "not found" in err:
|
|
skipped += 1
|
|
else:
|
|
print(f" ? {src} → {tgt}: {err}")
|
|
errors += 1
|
|
except Exception as e:
|
|
print(f" ! {src} → {tgt}: {e}")
|
|
errors += 1
|
|
|
|
print(f"\nApplied: {applied} ({fallbacks} file-level fallbacks) Skipped: {skipped} Errors: {errors}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|