poc-memory v0.4.0: graph-structured memory with consolidation pipeline
Rust core: - Cap'n Proto append-only storage (nodes + relations) - Graph algorithms: clustering coefficient, community detection, schema fit, small-world metrics, interference detection - BM25 text similarity with Porter stemming - Spaced repetition replay queue - Commands: search, init, health, status, graph, categorize, link-add, link-impact, decay, consolidate-session, etc. Python scripts: - Episodic digest pipeline: daily/weekly/monthly-digest.py - retroactive-digest.py for backfilling - consolidation-agents.py: 3 parallel Sonnet agents - apply-consolidation.py: structured action extraction + apply - digest-link-parser.py: extract ~400 explicit links from digests - content-promotion-agent.py: promote episodic obs to semantic files - bulk-categorize.py: categorize all nodes via single Sonnet call - consolidation-loop.py: multi-round automated consolidation Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
commit
23fac4e5fe
35 changed files with 9388 additions and 0 deletions
220
scripts/digest-link-parser.py
Executable file
220
scripts/digest-link-parser.py
Executable file
|
|
@ -0,0 +1,220 @@
|
|||
#!/usr/bin/env python3
|
||||
"""digest-link-parser.py — extract explicit links from episodic digests.
|
||||
|
||||
Parses the "Links" sections of daily/weekly/monthly digests and
|
||||
applies them to the memory graph via poc-memory link-add.
|
||||
|
||||
Usage:
|
||||
digest-link-parser.py # dry run
|
||||
digest-link-parser.py --apply # apply links
|
||||
"""
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
EPISODIC_DIR = Path.home() / ".claude" / "memory" / "episodic"
|
||||
|
||||
|
||||
def normalize_key(raw: str) -> str:
|
||||
"""Normalize a link target to a poc-memory key."""
|
||||
key = raw.strip().strip('`').strip()
|
||||
|
||||
# weekly/2026-W06 → weekly-2026-W06.md
|
||||
# monthly/2026-02 → monthly-2026-02.md
|
||||
# daily/2026-02-04 → daily-2026-02-04.md
|
||||
key = re.sub(r'^(daily|weekly|monthly)/', r'\1-', key)
|
||||
|
||||
# daily-2026-02-04 → daily-2026-02-04.md
|
||||
if re.match(r'^(daily|weekly|monthly)-\d{4}', key):
|
||||
if not key.endswith('.md'):
|
||||
key = key + '.md'
|
||||
|
||||
# Handle "this daily digest" / "this weekly digest" etc
|
||||
if key.startswith('this ') or key == '2026-02-14':
|
||||
return "" # Skip self-references, handled by caller
|
||||
|
||||
# Ensure .md extension for file references
|
||||
if '#' in key:
|
||||
parts = key.split('#', 1)
|
||||
if not parts[0].endswith('.md'):
|
||||
parts[0] = parts[0] + '.md'
|
||||
key = '#'.join(parts)
|
||||
elif not key.endswith('.md') and '/' not in key and not key.startswith('NEW:'):
|
||||
key = key + '.md'
|
||||
|
||||
return key
|
||||
|
||||
|
||||
def extract_links(filepath: Path) -> list[dict]:
|
||||
"""Extract links from a digest file's Links section."""
|
||||
content = filepath.read_text()
|
||||
links = []
|
||||
|
||||
# Determine the digest's own key
|
||||
digest_name = filepath.stem # e.g., "daily-2026-02-28"
|
||||
digest_key = digest_name + ".md"
|
||||
|
||||
# Find the Links section
|
||||
in_links = False
|
||||
for line in content.split('\n'):
|
||||
# Start of Links section
|
||||
if re.match(r'^##\s+Links', line):
|
||||
in_links = True
|
||||
continue
|
||||
# End of Links section (next ## header)
|
||||
if in_links and re.match(r'^##\s+', line) and not re.match(r'^##\s+Links', line):
|
||||
in_links = False
|
||||
continue
|
||||
|
||||
if not in_links:
|
||||
continue
|
||||
|
||||
# Skip subheaders within links section
|
||||
if line.startswith('###') or line.startswith('**'):
|
||||
continue
|
||||
|
||||
# Parse link lines: "- source → target (reason)"
|
||||
# Also handles: "- `source` → `target` (reason)"
|
||||
# And: "- source → target"
|
||||
match = re.match(
|
||||
r'^-\s+(.+?)\s*[→↔←]\s*(.+?)(?:\s*\((.+?)\))?\s*$',
|
||||
line
|
||||
)
|
||||
if not match:
|
||||
continue
|
||||
|
||||
raw_source = match.group(1).strip()
|
||||
raw_target = match.group(2).strip()
|
||||
reason = match.group(3) or ""
|
||||
|
||||
# Normalize keys
|
||||
source = normalize_key(raw_source)
|
||||
target = normalize_key(raw_target)
|
||||
|
||||
# Replace self-references with digest key
|
||||
if not source:
|
||||
source = digest_key
|
||||
if not target:
|
||||
target = digest_key
|
||||
|
||||
# Handle "this daily digest" patterns in the raw text
|
||||
if 'this daily' in raw_source.lower() or 'this weekly' in raw_source.lower() or 'this monthly' in raw_source.lower():
|
||||
source = digest_key
|
||||
if 'this daily' in raw_target.lower() or 'this weekly' in raw_target.lower() or 'this monthly' in raw_target.lower():
|
||||
target = digest_key
|
||||
|
||||
# Handle bare date references like "2026-02-14"
|
||||
date_match = re.match(r'^(\d{4}-\d{2}-\d{2})$', source.replace('.md', ''))
|
||||
if date_match:
|
||||
source = f"daily-{date_match.group(1)}.md"
|
||||
date_match = re.match(r'^(\d{4}-\d{2}-\d{2})$', target.replace('.md', ''))
|
||||
if date_match:
|
||||
target = f"daily-{date_match.group(1)}.md"
|
||||
|
||||
# Skip NEW: prefixed links (target doesn't exist yet)
|
||||
if source.startswith('NEW:') or target.startswith('NEW:'):
|
||||
continue
|
||||
|
||||
# Skip if source == target
|
||||
if source == target:
|
||||
continue
|
||||
|
||||
links.append({
|
||||
"source": source,
|
||||
"target": target,
|
||||
"reason": reason,
|
||||
"file": filepath.name,
|
||||
})
|
||||
|
||||
return links
|
||||
|
||||
|
||||
def main():
|
||||
do_apply = "--apply" in sys.argv
|
||||
|
||||
# Collect all links from all digests
|
||||
all_links = []
|
||||
for pattern in ["daily-*.md", "weekly-*.md", "monthly-*.md"]:
|
||||
for f in sorted(EPISODIC_DIR.glob(pattern)):
|
||||
links = extract_links(f)
|
||||
if links:
|
||||
all_links.extend(links)
|
||||
|
||||
# Deduplicate (same source→target pair)
|
||||
seen = set()
|
||||
unique_links = []
|
||||
for link in all_links:
|
||||
key = (link["source"], link["target"])
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
unique_links.append(link)
|
||||
|
||||
print(f"Found {len(all_links)} total links, {len(unique_links)} unique")
|
||||
|
||||
if not do_apply:
|
||||
# Dry run — just show them
|
||||
for i, link in enumerate(unique_links, 1):
|
||||
print(f" {i:3d}. {link['source']} → {link['target']}")
|
||||
if link['reason']:
|
||||
print(f" ({link['reason'][:80]})")
|
||||
print(f"\nTo apply: {sys.argv[0]} --apply")
|
||||
return
|
||||
|
||||
# Apply with fallback: if section-level key fails, try file-level
|
||||
applied = skipped = errors = fallbacks = 0
|
||||
for link in unique_links:
|
||||
src, tgt = link["source"], link["target"]
|
||||
reason = link.get("reason", "")
|
||||
|
||||
def try_link(s, t, r):
|
||||
cmd = ["poc-memory", "link-add", s, t]
|
||||
if r:
|
||||
cmd.append(r[:200])
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
|
||||
return result
|
||||
|
||||
try:
|
||||
r = try_link(src, tgt, reason)
|
||||
if r.returncode == 0:
|
||||
out = r.stdout.strip()
|
||||
if "already exists" in out:
|
||||
skipped += 1
|
||||
else:
|
||||
print(f" {out}")
|
||||
applied += 1
|
||||
else:
|
||||
err = r.stderr.strip()
|
||||
if "No entry for" in err:
|
||||
# Try stripping section anchors
|
||||
src_base = src.split('#')[0] if '#' in src else src
|
||||
tgt_base = tgt.split('#')[0] if '#' in tgt else tgt
|
||||
if src_base == tgt_base:
|
||||
skipped += 1 # Same file, skip
|
||||
continue
|
||||
r2 = try_link(src_base, tgt_base, reason)
|
||||
if r2.returncode == 0:
|
||||
out = r2.stdout.strip()
|
||||
if "already exists" in out:
|
||||
skipped += 1
|
||||
else:
|
||||
print(f" {out} (fallback from #{src.split('#')[-1] if '#' in src else ''}/{tgt.split('#')[-1] if '#' in tgt else ''})")
|
||||
applied += 1
|
||||
fallbacks += 1
|
||||
else:
|
||||
skipped += 1 # File truly doesn't exist
|
||||
elif "not found" in err:
|
||||
skipped += 1
|
||||
else:
|
||||
print(f" ? {src} → {tgt}: {err}")
|
||||
errors += 1
|
||||
except Exception as e:
|
||||
print(f" ! {src} → {tgt}: {e}")
|
||||
errors += 1
|
||||
|
||||
print(f"\nApplied: {applied} ({fallbacks} file-level fallbacks) Skipped: {skipped} Errors: {errors}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue