#!/usr/bin/env python3 """digest-link-parser.py — extract explicit links from episodic digests. Parses the "Links" sections of daily/weekly/monthly digests and applies them to the memory graph via poc-memory link-add. Usage: digest-link-parser.py # dry run digest-link-parser.py --apply # apply links """ import re import subprocess import sys from pathlib import Path EPISODIC_DIR = Path.home() / ".claude" / "memory" / "episodic" def normalize_key(raw: str) -> str: """Normalize a link target to a poc-memory key.""" key = raw.strip().strip('`').strip() # weekly/2026-W06 → weekly-2026-W06.md # monthly/2026-02 → monthly-2026-02.md # daily/2026-02-04 → daily-2026-02-04.md key = re.sub(r'^(daily|weekly|monthly)/', r'\1-', key) # daily-2026-02-04 → daily-2026-02-04.md if re.match(r'^(daily|weekly|monthly)-\d{4}', key): if not key.endswith('.md'): key = key + '.md' # Handle "this daily digest" / "this weekly digest" etc if key.startswith('this ') or key == '2026-02-14': return "" # Skip self-references, handled by caller # Ensure .md extension for file references if '#' in key: parts = key.split('#', 1) if not parts[0].endswith('.md'): parts[0] = parts[0] + '.md' key = '#'.join(parts) elif not key.endswith('.md') and '/' not in key and not key.startswith('NEW:'): key = key + '.md' return key def extract_links(filepath: Path) -> list[dict]: """Extract links from a digest file's Links section.""" content = filepath.read_text() links = [] # Determine the digest's own key digest_name = filepath.stem # e.g., "daily-2026-02-28" digest_key = digest_name + ".md" # Find the Links section in_links = False for line in content.split('\n'): # Start of Links section if re.match(r'^##\s+Links', line): in_links = True continue # End of Links section (next ## header) if in_links and re.match(r'^##\s+', line) and not re.match(r'^##\s+Links', line): in_links = False continue if not in_links: continue # Skip subheaders within links section if line.startswith('###') or line.startswith('**'): continue # Parse link lines: "- source → target (reason)" # Also handles: "- `source` → `target` (reason)" # And: "- source → target" match = re.match( r'^-\s+(.+?)\s*[→↔←]\s*(.+?)(?:\s*\((.+?)\))?\s*$', line ) if not match: continue raw_source = match.group(1).strip() raw_target = match.group(2).strip() reason = match.group(3) or "" # Normalize keys source = normalize_key(raw_source) target = normalize_key(raw_target) # Replace self-references with digest key if not source: source = digest_key if not target: target = digest_key # Handle "this daily digest" patterns in the raw text if 'this daily' in raw_source.lower() or 'this weekly' in raw_source.lower() or 'this monthly' in raw_source.lower(): source = digest_key if 'this daily' in raw_target.lower() or 'this weekly' in raw_target.lower() or 'this monthly' in raw_target.lower(): target = digest_key # Handle bare date references like "2026-02-14" date_match = re.match(r'^(\d{4}-\d{2}-\d{2})$', source.replace('.md', '')) if date_match: source = f"daily-{date_match.group(1)}.md" date_match = re.match(r'^(\d{4}-\d{2}-\d{2})$', target.replace('.md', '')) if date_match: target = f"daily-{date_match.group(1)}.md" # Skip NEW: prefixed links (target doesn't exist yet) if source.startswith('NEW:') or target.startswith('NEW:'): continue # Skip if source == target if source == target: continue links.append({ "source": source, "target": target, "reason": reason, "file": filepath.name, }) return links def main(): do_apply = "--apply" in sys.argv # Collect all links from all digests all_links = [] for pattern in ["daily-*.md", "weekly-*.md", "monthly-*.md"]: for f in sorted(EPISODIC_DIR.glob(pattern)): links = extract_links(f) if links: all_links.extend(links) # Deduplicate (same source→target pair) seen = set() unique_links = [] for link in all_links: key = (link["source"], link["target"]) if key not in seen: seen.add(key) unique_links.append(link) print(f"Found {len(all_links)} total links, {len(unique_links)} unique") if not do_apply: # Dry run — just show them for i, link in enumerate(unique_links, 1): print(f" {i:3d}. {link['source']} → {link['target']}") if link['reason']: print(f" ({link['reason'][:80]})") print(f"\nTo apply: {sys.argv[0]} --apply") return # Apply with fallback: if section-level key fails, try file-level applied = skipped = errors = fallbacks = 0 for link in unique_links: src, tgt = link["source"], link["target"] reason = link.get("reason", "") def try_link(s, t, r): cmd = ["poc-memory", "link-add", s, t] if r: cmd.append(r[:200]) result = subprocess.run(cmd, capture_output=True, text=True, timeout=10) return result try: r = try_link(src, tgt, reason) if r.returncode == 0: out = r.stdout.strip() if "already exists" in out: skipped += 1 else: print(f" {out}") applied += 1 else: err = r.stderr.strip() if "No entry for" in err: # Try stripping section anchors src_base = src.split('#')[0] if '#' in src else src tgt_base = tgt.split('#')[0] if '#' in tgt else tgt if src_base == tgt_base: skipped += 1 # Same file, skip continue r2 = try_link(src_base, tgt_base, reason) if r2.returncode == 0: out = r2.stdout.strip() if "already exists" in out: skipped += 1 else: print(f" {out} (fallback from #{src.split('#')[-1] if '#' in src else ''}/{tgt.split('#')[-1] if '#' in tgt else ''})") applied += 1 fallbacks += 1 else: skipped += 1 # File truly doesn't exist elif "not found" in err: skipped += 1 else: print(f" ? {src} → {tgt}: {err}") errors += 1 except Exception as e: print(f" ! {src} → {tgt}: {e}") errors += 1 print(f"\nApplied: {applied} ({fallbacks} file-level fallbacks) Skipped: {skipped} Errors: {errors}") if __name__ == "__main__": main()