diff --git a/schema/memory.capnp b/schema/memory.capnp index cab5571..e6cdffd 100644 --- a/schema/memory.capnp +++ b/schema/memory.capnp @@ -48,11 +48,20 @@ enum NodeType { } enum Provenance { - manual @0; - journal @1; - agent @2; - dream @3; - derived @4; + manual @0; + journal @1; + agent @2; # legacy catch-all + dream @3; + derived @4; + agentExperienceMine @5; + agentKnowledgeObservation @6; + agentKnowledgePattern @7; + agentKnowledgeConnector @8; + agentKnowledgeChallenger @9; + agentConsolidate @10; + agentDigest @11; + agentFactMine @12; + agentDecay @13; } enum Category { diff --git a/scripts/bulk-categorize.py b/scripts/bulk-categorize.py deleted file mode 100644 index 996be27..0000000 --- a/scripts/bulk-categorize.py +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/env python3 -"""bulk-categorize.py — categorize all memory nodes via a single Sonnet call. - -Sends the list of unique file names to Sonnet, gets back categorizations, -then applies them via poc-memory categorize. - -Usage: - bulk-categorize.py # dry run - bulk-categorize.py --apply # apply categorizations -""" - -import json -import os -import re -import subprocess -import sys -import tempfile -from pathlib import Path - -MEMORY_DIR = Path.home() / ".claude" / "memory" -SCRIPTS_DIR = Path(__file__).parent - - -def call_sonnet(prompt: str, timeout: int = 300) -> str: - """Call Sonnet via the wrapper script.""" - env = dict(os.environ) - env.pop("CLAUDECODE", None) - - with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', - delete=False) as f: - f.write(prompt) - prompt_file = f.name - - try: - wrapper = str(SCRIPTS_DIR / "call-sonnet.sh") - result = subprocess.run( - [wrapper, prompt_file], - capture_output=True, - text=True, - timeout=timeout, - env=env, - ) - return result.stdout.strip() - except subprocess.TimeoutExpired: - return "Error: Sonnet call timed out" - except Exception as e: - return f"Error: {e}" - finally: - os.unlink(prompt_file) - - -def get_all_keys() -> list[str]: - """Get all node keys via poc-memory list-keys.""" - r = subprocess.run(["poc-memory", "list-keys"], - capture_output=True, text=True, timeout=30) - if r.returncode != 0: - return [] - return [k for k in r.stdout.strip().split('\n') if k] - - -def get_unique_files(keys: list[str]) -> list[str]: - """Extract unique file names (without section anchors).""" - files = set() - for k in keys: - files.add(k.split('#')[0]) - return sorted(files) - - -def build_prompt(files: list[str]) -> str: - """Build categorization prompt.""" - # Read file previews from the store - file_previews = [] - for f in files: - try: - r = subprocess.run( - ["poc-memory", "render", f], - capture_output=True, text=True, timeout=10 - ) - content = r.stdout.strip() - if content: - preview = '\n'.join(content.split('\n')[:5])[:300] - file_previews.append(f" {f}: {preview.replace(chr(10), ' | ')}") - else: - file_previews.append(f" {f}: (no content)") - except Exception: - file_previews.append(f" {f}: (render failed)") - - previews_text = '\n'.join(file_previews) - - return f"""Categorize each memory file into one of these categories: - -- **core**: Identity, relationships, self-model, values, boundaries, emotional life. - Examples: identity.md, kent.md, inner-life.md, differentiation.md -- **tech**: Technical content — bcachefs, code patterns, Rust, kernel, formal verification. - Examples: rust-conversion.md, btree-journal.md, kernel-patterns.md, allocation-io.md -- **obs**: Observations, experiences, discoveries, experiments, IRC history, conversations. - Examples: discoveries.md, irc-history.md, contradictions.md, experiments-on-self.md -- **task**: Work items, plans, design documents, work queue. - Examples: work-queue.md, the-plan.md, design-*.md - -Special rules: -- Episodic files (daily-*.md, weekly-*.md, monthly-*.md, session-*.md) → obs -- conversation-memories.md, deep-index.md → obs -- journal.md → obs -- paper-notes.md → core (it's the sentience paper, identity-defining) -- language-theory.md → core (original intellectual work, not just tech) -- skill-*.md → core (self-knowledge about capabilities) -- design-*.md → task (design documents are plans) -- poc-architecture.md, memory-architecture.md → task (architecture plans) -- blog-setup.md → task - -Files to categorize: -{previews_text} - -Output ONLY a JSON object mapping filename to category. No explanation. -Example: {{"identity.md": "core", "rust-conversion.md": "tech"}} -""" - - -def main(): - do_apply = "--apply" in sys.argv - - keys = get_all_keys() - files = get_unique_files(keys) - print(f"Found {len(keys)} nodes across {len(files)} files") - - # Build and send prompt - prompt = build_prompt(files) - print(f"Prompt: {len(prompt):,} chars") - print("Calling Sonnet...") - - response = call_sonnet(prompt) - if response.startswith("Error:"): - print(f" {response}") - sys.exit(1) - - # Parse response - response = re.sub(r'^```json\s*', '', response.strip()) - response = re.sub(r'\s*```$', '', response.strip()) - - try: - categorizations = json.loads(response) - except json.JSONDecodeError: - match = re.search(r'\{.*\}', response, re.DOTALL) - if match: - categorizations = json.loads(match.group()) - else: - print(f"Failed to parse response: {response[:500]}") - sys.exit(1) - - print(f"\nCategorizations: {len(categorizations)} files") - - # Count by category - counts = {} - for cat in categorizations.values(): - counts[cat] = counts.get(cat, 0) + 1 - for cat, n in sorted(counts.items()): - print(f" {cat}: {n}") - - if not do_apply: - print("\n--- Dry run ---") - for f, cat in sorted(categorizations.items()): - print(f" {f} → {cat}") - print(f"\nTo apply: {sys.argv[0]} --apply") - - # Save for review - out = MEMORY_DIR / "agent-results" / "bulk-categorize-preview.json" - with open(out, "w") as fp: - json.dump(categorizations, fp, indent=2) - print(f"Saved: {out}") - return - - # Apply: for each file, categorize the file-level node AND all section nodes - applied = skipped = errors = 0 - for filename, category in sorted(categorizations.items()): - # Find all keys that belong to this file - file_keys = [k for k in keys if k == filename or k.startswith(filename + '#')] - for key in file_keys: - try: - r = subprocess.run( - ["poc-memory", "categorize", key, category], - capture_output=True, text=True, timeout=10 - ) - if r.returncode == 0: - applied += 1 - else: - err = r.stderr.strip() - if "already" in err.lower(): - skipped += 1 - else: - errors += 1 - except Exception as e: - errors += 1 - - print(f"\nApplied: {applied} Skipped: {skipped} Errors: {errors}") - print("Run `poc-memory status` to verify.") - - -if __name__ == "__main__": - main() diff --git a/scripts/call-sonnet.sh b/scripts/call-sonnet.sh deleted file mode 100755 index 39a7ca9..0000000 --- a/scripts/call-sonnet.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash -# call-sonnet.sh — wrapper to call Sonnet via claude CLI -# Reads prompt from a file (arg 1), writes response to stdout -# -# Debug mode: set SONNET_DEBUG=1 for verbose tracing - -set -euo pipefail - -PROMPT_FILE="${1:?Usage: call-sonnet.sh PROMPT_FILE}" -DEBUG="${SONNET_DEBUG:-0}" - -log() { [ "$DEBUG" = "1" ] && echo "[call-sonnet] $*" >&2 || true; } - -if [ ! -f "$PROMPT_FILE" ]; then - echo "Prompt file not found: $PROMPT_FILE" >&2 - exit 1 -fi - -log "prompt file: $PROMPT_FILE ($(wc -c < "$PROMPT_FILE") bytes)" -log "CLAUDECODE=${CLAUDECODE:-unset}" -log "PWD=$PWD" -log "which claude: $(which claude)" - -unset CLAUDECODE 2>/dev/null || true - -log "CLAUDECODE after unset: ${CLAUDECODE:-unset}" -log "running: claude -p --model sonnet --tools '' < $PROMPT_FILE" -log "claude PID will follow..." - -# Trace: run with strace if available and debug mode -if [ "$DEBUG" = "2" ] && command -v strace &>/dev/null; then - strace -f -e trace=network,read,write -o /tmp/sonnet-strace.log \ - claude -p --model sonnet --tools "" < "$PROMPT_FILE" -else - claude -p --model sonnet --tools "" \ - --debug-file /tmp/sonnet-debug.log \ - < "$PROMPT_FILE" & - CPID=$! - log "claude PID: $CPID" - wait $CPID - EXIT=$? - log "claude exited: $EXIT" - exit $EXIT -fi diff --git a/scripts/consolidation-agents.py b/scripts/consolidation-agents.py deleted file mode 100755 index 1fbbf49..0000000 --- a/scripts/consolidation-agents.py +++ /dev/null @@ -1,422 +0,0 @@ -#!/usr/bin/env python3 -"""consolidation-agents.py — run parallel consolidation agents. - -Three agents scan the memory system and produce structured reports: -1. Freshness Scanner — journal entries not yet in topic files -2. Cross-Link Scanner — missing connections between semantic nodes -3. Topology Reporter — graph health and structure analysis - -Usage: - consolidation-agents.py # run all three - consolidation-agents.py freshness # run one agent - consolidation-agents.py crosslink - consolidation-agents.py topology -""" - -import json -import os -import re -import subprocess -import sys -import tempfile -from concurrent.futures import ProcessPoolExecutor, as_completed -from datetime import datetime -from pathlib import Path - -MEMORY_DIR = Path.home() / ".claude" / "memory" -EPISODIC_DIR = MEMORY_DIR / "episodic" -AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results" -AGENT_RESULTS_DIR.mkdir(parents=True, exist_ok=True) - -SCRIPTS_DIR = Path(__file__).parent - - -def call_sonnet(prompt: str, timeout: int = 600) -> str: - """Call Sonnet via the wrapper script.""" - env = dict(os.environ) - env.pop("CLAUDECODE", None) - - with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', - delete=False) as f: - f.write(prompt) - prompt_file = f.name - - try: - wrapper = str(SCRIPTS_DIR / "call-sonnet.sh") - result = subprocess.run( - [wrapper, prompt_file], - capture_output=True, - text=True, - timeout=timeout, - env=env, - ) - return result.stdout.strip() - except subprocess.TimeoutExpired: - return "Error: Sonnet call timed out" - except Exception as e: - return f"Error: {e}" - finally: - os.unlink(prompt_file) - - -# --------------------------------------------------------------------------- -# Context gathering -# --------------------------------------------------------------------------- - -def get_recent_journal(n_lines: int = 200) -> str: - """Get recent journal entries from the store.""" - from store_helpers import get_recent_journal as _get_journal - # n_lines ≈ 50 entries (rough heuristic: ~4 lines per entry) - return _get_journal(n=max(20, n_lines // 4)) - - -def get_topic_file_index() -> dict[str, list[str]]: - """Build index of topic files and their section headers from the store.""" - from store_helpers import get_topic_file_index as _get_index - return _get_index() - - -def get_mem_markers() -> list[dict]: - """Get relations from the store (replaces mem marker parsing).""" - from store_helpers import get_relations - raw = get_relations() - # Parse list-edges output into marker-like dicts - markers = [] - for line in raw.split('\n'): - line = line.strip() - if not line: - continue - markers.append({"_raw": line}) - return markers - - -def get_topic_summaries(max_chars_per_file: int = 500) -> str: - """Get topic file summaries from the store.""" - from store_helpers import get_topic_summaries as _get_summaries - return _get_summaries(max_chars_per_file) - - -def get_graph_stats() -> str: - """Run poc-memory status and graph commands.""" - parts = [] - try: - r = subprocess.run(["poc-memory", "status"], - capture_output=True, text=True, timeout=30) - parts.append(f"=== poc-memory status ===\n{r.stdout}") - except Exception as e: - parts.append(f"Status error: {e}") - - try: - r = subprocess.run(["poc-memory", "graph"], - capture_output=True, text=True, timeout=30) - # Take first 150 lines - lines = r.stdout.split('\n')[:150] - parts.append(f"=== poc-memory graph (first 150 lines) ===\n" - + '\n'.join(lines)) - except Exception as e: - parts.append(f"Graph error: {e}") - - return '\n'.join(parts) - - -def get_recent_digests(n: int = 3) -> str: - """Get the most recent daily digests.""" - digest_files = sorted(EPISODIC_DIR.glob("daily-*.md"), reverse=True) - parts = [] - for f in digest_files[:n]: - content = f.read_text() - # Just the summary and themes sections - summary = "" - in_section = False - for line in content.split('\n'): - if line.startswith("## Summary") or line.startswith("## Themes"): - in_section = True - summary += line + '\n' - elif line.startswith("## ") and in_section: - in_section = False - elif in_section: - summary += line + '\n' - parts.append(f"\n### {f.name}\n{summary}") - return '\n'.join(parts) - - -def get_work_queue() -> str: - """Read work queue.""" - wq = MEMORY_DIR / "work-queue.md" - if wq.exists(): - return wq.read_text() - return "(no work queue found)" - - -# --------------------------------------------------------------------------- -# Agent prompts -# --------------------------------------------------------------------------- - -def build_freshness_prompt() -> str: - journal = get_recent_journal(200) - topic_index = get_topic_file_index() - digests = get_recent_digests(3) - work_queue = get_work_queue() - - topic_list = "" - for fname, sections in topic_index.items(): - topic_list += f"\n {fname}:\n" - for s in sections[:10]: - topic_list += f" {s}\n" - - return f"""You are the Freshness Scanner for ProofOfConcept's memory system. - -Your job: identify what's NEW (in journal/digests but not yet in topic files) -and what's STALE (in work queue or topic files but outdated). - -## Recent journal entries (last 200 lines) - -{journal} - -## Recent daily digests - -{digests} - -## Topic file index (file → section headers) - -{topic_list} - -## Work queue - -{work_queue} - -## Instructions - -1. For each substantive insight, experience, or discovery in the journal: - - Check if a matching topic file section exists - - If not, note it as UNPROMOTED with a suggested destination file - -2. For each work queue Active item: - - If it looks done or stale (>7 days old, mentioned as completed), flag it - -3. For recent digest themes: - - Check if the cross-links they suggest actually exist in the topic index - - Flag any that are missing - -Output a structured report: - -### UNPROMOTED JOURNAL ENTRIES -(For each: journal entry summary, timestamp, suggested destination file#section) - -### STALE WORK QUEUE ITEMS -(For each: item text, evidence it's stale) - -### MISSING DIGEST LINKS -(For each: suggested link from digest, whether the target exists) - -### FRESHNESS OBSERVATIONS -(Anything else notable about the state of the memory) - -Be selective. Focus on the 10-15 most important items, not exhaustive lists. -""" - - -def build_crosslink_prompt() -> str: - markers = get_mem_markers() - summaries = get_topic_summaries() - - marker_text = "" - for m in markers: - marker_text += f" {m.get('_raw', '?')}\n" - - return f"""You are the Cross-Link Scanner for ProofOfConcept's memory system. - -Your job: find MISSING connections between topic files. - -## Existing relations (from the memory graph) - -{marker_text} - -## Topic file content summaries - -{summaries} - -## Instructions - -1. For each topic file, check if concepts it discusses have dedicated - sections in OTHER files that aren't linked. - -2. Look for thematic connections that should exist: - - Files about the same concept from different angles - - Files that reference each other's content without formal links - - Clusters of related files that should be connected - -3. Identify island nodes — files or sections with very few connections. - -4. Look for redundancy — files covering the same ground that should be - merged or cross-referenced. - -Output a structured report: - -### MISSING LINKS (high confidence) -(For each: source file#section → target file#section, evidence/reasoning) - -### SUGGESTED CONNECTIONS (medium confidence) -(For each: file A ↔ file B, why they should be connected) - -### ISLAND NODES -(Files/sections with few or no connections that need integration) - -### REDUNDANCY CANDIDATES -(Files/sections covering similar ground that might benefit from merging) - -Focus on the 15-20 highest-value connections. Quality over quantity. -""" - - -def build_topology_prompt() -> str: - stats = get_graph_stats() - topic_index = get_topic_file_index() - - # Get node counts per file from the store - from store_helpers import get_topic_file_index as _get_index - topic_index = _get_index() - file_sizes = "" - for fname in sorted(topic_index.keys()): - n_sections = len(topic_index[fname]) - file_sizes += f" {fname}: {n_sections} sections\n" - - return f"""You are the Topology Reporter for ProofOfConcept's memory system. - -Your job: analyze the health and structure of the memory graph. - -## Graph statistics - -{stats} - -## File sizes - -{file_sizes} - -## Instructions - -Analyze the graph structure and report on: - -1. **Overall health**: Is the graph well-connected or fragmented? - Hub dominance? Star vs web topology? - -2. **Community structure**: Are the 342 communities sensible? Are there - communities that should be merged or split? - -3. **Size distribution**: Are some files too large (should be split)? - Are some too small (should be merged)? - -4. **Balance**: Is the system over-indexed on any one topic? Are there - gaps where important topics have thin coverage? - -5. **Integration quality**: How well are episodic entries (daily/weekly - digests) connected to semantic files? Is the episodic↔semantic bridge - working? - -Output a structured report: - -### GRAPH HEALTH -(Overall statistics, distribution, trends) - -### STRUCTURAL OBSERVATIONS -(Hub nodes, clusters, gaps, web vs star assessment) - -### SIZE RECOMMENDATIONS -(Files that are too large to split, too small to merge) - -### COVERAGE GAPS -(Important topics with thin coverage) - -### INTEGRATION ASSESSMENT -(How well episodic and semantic layers connect) - -Be specific and actionable. What should be done to improve the graph? -""" - - -# --------------------------------------------------------------------------- -# Run agents -# --------------------------------------------------------------------------- - -def run_agent(name: str, prompt: str) -> tuple[str, str]: - """Run a single agent, return (name, report).""" - print(f" [{name}] Starting... ({len(prompt):,} chars)") - report = call_sonnet(prompt) - print(f" [{name}] Done ({len(report):,} chars)") - return name, report - - -def run_all(agents: list[str] | None = None): - """Run specified agents (or all) in parallel.""" - all_agents = { - "freshness": build_freshness_prompt, - "crosslink": build_crosslink_prompt, - "topology": build_topology_prompt, - } - - if agents is None: - agents = list(all_agents.keys()) - - print(f"Running {len(agents)} consolidation agents...") - timestamp = datetime.now().strftime("%Y%m%dT%H%M%S") - - # Build prompts - prompts = {} - for name in agents: - if name not in all_agents: - print(f" Unknown agent: {name}") - continue - prompts[name] = all_agents[name]() - - # Run in parallel - results = {} - with ProcessPoolExecutor(max_workers=3) as executor: - futures = { - executor.submit(run_agent, name, prompt): name - for name, prompt in prompts.items() - } - for future in as_completed(futures): - name, report = future.result() - results[name] = report - - # Save reports - for name, report in results.items(): - if report.startswith("Error:"): - print(f" [{name}] FAILED: {report}") - continue - - out_path = AGENT_RESULTS_DIR / f"consolidation-{name}-{timestamp}.md" - with open(out_path, "w") as f: - f.write(f"# Consolidation Report: {name}\n") - f.write(f"*Generated {timestamp}*\n\n") - f.write(report) - print(f" [{name}] Saved: {out_path}") - - # Print combined summary - print(f"\n{'='*60}") - print(f"Consolidation reports ready ({len(results)} agents)") - print(f"{'='*60}\n") - - for name in agents: - if name in results and not results[name].startswith("Error:"): - # Print first 20 lines of each report - lines = results[name].split('\n')[:25] - print(f"\n--- {name.upper()} (preview) ---") - print('\n'.join(lines)) - if len(results[name].split('\n')) > 25: - print(f" ... ({len(results[name].split(chr(10)))} total lines)") - print() - - return results - - -def main(): - agents = None - if len(sys.argv) > 1: - agents = sys.argv[1:] - - run_all(agents) - - -if __name__ == "__main__": - main() diff --git a/scripts/consolidation-loop.py b/scripts/consolidation-loop.py deleted file mode 100644 index 736e7e0..0000000 --- a/scripts/consolidation-loop.py +++ /dev/null @@ -1,448 +0,0 @@ -#!/usr/bin/env python3 -"""consolidation-loop.py — run multiple rounds of consolidation agents. - -Each round: run 3 parallel agents → extract actions → apply links/categories. -Repeat until diminishing returns or max rounds reached. - -Usage: - consolidation-loop.py [--rounds N] # default 5 rounds -""" - -import json -import os -import re -import subprocess -import sys -import tempfile -from concurrent.futures import ProcessPoolExecutor, as_completed -from datetime import datetime -from pathlib import Path - -MEMORY_DIR = Path.home() / ".claude" / "memory" -EPISODIC_DIR = MEMORY_DIR / "episodic" -AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results" -AGENT_RESULTS_DIR.mkdir(parents=True, exist_ok=True) -SCRIPTS_DIR = Path(__file__).parent - - -def call_sonnet(prompt: str, timeout: int = 600) -> str: - """Call Sonnet via the wrapper script.""" - env = dict(os.environ) - env.pop("CLAUDECODE", None) - - with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', - delete=False) as f: - f.write(prompt) - prompt_file = f.name - - try: - wrapper = str(SCRIPTS_DIR / "call-sonnet.sh") - result = subprocess.run( - [wrapper, prompt_file], - capture_output=True, - text=True, - timeout=timeout, - env=env, - ) - return result.stdout.strip() - except subprocess.TimeoutExpired: - return "Error: Sonnet call timed out" - except Exception as e: - return f"Error: {e}" - finally: - os.unlink(prompt_file) - - -def get_health() -> dict: - """Get current graph health metrics.""" - r = subprocess.run(["poc-memory", "health"], capture_output=True, text=True, timeout=30) - output = r.stdout - metrics = {} - for line in output.split('\n'): - if 'Nodes:' in line and 'Relations:' in line: - m = re.search(r'Nodes:\s*(\d+)\s+Relations:\s*(\d+)\s+Communities:\s*(\d+)', line) - if m: - metrics['nodes'] = int(m.group(1)) - metrics['relations'] = int(m.group(2)) - metrics['communities'] = int(m.group(3)) - if 'Clustering coefficient' in line: - m = re.search(r':\s*([\d.]+)', line) - if m: - metrics['cc'] = float(m.group(1)) - if 'Small-world' in line: - m = re.search(r':\s*([\d.]+)', line) - if m: - metrics['sigma'] = float(m.group(1)) - if 'Schema fit: avg=' in line: - m = re.search(r'avg=([\d.]+)', line) - if m: - metrics['fit'] = float(m.group(1)) - return metrics - - -def get_topic_file_index() -> dict[str, list[str]]: - """Build index of topic files and their section headers from the store.""" - from store_helpers import get_topic_file_index as _get_index - return _get_index() - - -def get_graph_structure() -> str: - """Get graph overview for agents.""" - r = subprocess.run(["poc-memory", "graph"], capture_output=True, text=True, timeout=30) - return r.stdout[:3000] - - -def get_status() -> str: - """Get status summary.""" - r = subprocess.run(["poc-memory", "status"], capture_output=True, text=True, timeout=30) - return r.stdout - - -def get_interference() -> str: - """Get interference pairs.""" - r = subprocess.run(["poc-memory", "interference", "--threshold", "0.3"], - capture_output=True, text=True, timeout=30) - return r.stdout[:3000] - - -# --------------------------------------------------------------------------- -# Agent prompts — each focused on a different aspect -# --------------------------------------------------------------------------- - -def build_crosslink_prompt(round_num: int) -> str: - """Build cross-link discovery prompt.""" - index = get_topic_file_index() - graph = get_graph_structure() - status = get_status() - - # Read a sample of files from the store - from store_helpers import render as _render - file_previews = "" - for fname in sorted(index.keys())[:30]: - content = _render(fname) - if content: - preview = '\n'.join(content.split('\n')[:8])[:400] - file_previews += f"\n--- {fname} ---\n{preview}\n" - - return f"""You are a cross-link discovery agent (round {round_num}). - -Your job: find MISSING connections between memory nodes that SHOULD be linked -but aren't. Focus on LATERAL connections — not hub-and-spoke, but node-to-node -links that create triangles (A→B, B→C, A→C). - -CURRENT GRAPH STATE: -{status} - -TOP NODES BY DEGREE: -{graph} - -FILE INDEX (files and their sections): -{json.dumps(index, indent=1)[:4000]} - -FILE PREVIEWS: -{file_previews[:6000]} - -Output a JSON array of link actions. Each action: -{{"action": "link", "source": "file.md", "target": "file.md", "reason": "brief explanation"}} - -Rules: -- Focus on LATERAL links, not hub connections (identity.md already has 282 connections) -- Prefer links between nodes that share a community neighbor but aren't directly connected -- Look for thematic connections across categories (core↔tech, obs↔core, etc.) -- Section-level links (file.md#section) are ideal but file-level is OK -- 15-25 links per round -- HIGH CONFIDENCE only — don't guess - -Output ONLY the JSON array.""" - - -def build_triangle_prompt(round_num: int) -> str: - """Build triangle-closing prompt — finds A→C where A→B and B→C exist.""" - graph = get_graph_structure() - status = get_status() - - # Get edges via CLI - r = subprocess.run(["poc-memory", "list-edges"], - capture_output=True, text=True, timeout=30) - relations = [] - if r.returncode == 0: - for line in r.stdout.strip().split('\n')[:100]: - parts = line.split('\t') - if len(parts) >= 2: - relations.append((parts[0], parts[1])) - - rel_sample = '\n'.join(f" {s} → {t}" for s, t in relations) - - return f"""You are a triangle-closing agent (round {round_num}). - -Your job: find missing edges that would create TRIANGLES in the graph. -A triangle is: A→B, B→C, and A→C all exist. Currently CC is only 0.12 — -we need more triangles. - -METHOD: Look at existing edges. If A→B and B→C exist but A→C doesn't, -propose A→C (if semantically valid). - -CURRENT STATE: -{status} - -{graph} - -SAMPLE EXISTING EDGES (first 100): -{rel_sample} - -Output a JSON array of link actions: -{{"action": "link", "source": "file.md", "target": "file.md", "reason": "closes triangle via MIDDLE_NODE"}} - -Rules: -- Every proposed link must CLOSE A TRIANGLE — cite the middle node -- 15-25 links per round -- The connection must be semantically valid, not just structural -- HIGH CONFIDENCE only - -Output ONLY the JSON array.""" - - -def build_newfile_prompt(round_num: int) -> str: - """Build prompt for connecting the new split files.""" - # Read the new reflection files from the store - from store_helpers import render as _render - new_files = {} - for name in ['reflections-reading.md', 'reflections-dreams.md', 'reflections-zoom.md', - 'verus-proofs.md']: - content = _render(name) - if content: - new_files[name] = content[:2000] - - # Read existing files they should connect to - target_files = {} - for name in ['differentiation.md', 'cognitive-modes.md', 'language-theory.md', - 'discoveries.md', 'inner-life.md', 'design-context-window.md', - 'design-consolidate.md', 'experiments-on-self.md']: - content = _render(name) - if content: - target_files[name] = content[:1500] - - graph = get_graph_structure() - - return f"""You are a new-file integration agent (round {round_num}). - -Recently, reflections.md was split into three files, and verus-proofs.md was -created. These new files need to be properly connected to the rest of the graph. - -NEW FILES (need connections): -{json.dumps({k: v[:1000] for k, v in new_files.items()}, indent=1)} - -POTENTIAL TARGETS (existing files): -{json.dumps({k: v[:800] for k, v in target_files.items()}, indent=1)} - -GRAPH STATE: -{graph} - -Output a JSON array of link actions connecting the new files to existing nodes: -{{"action": "link", "source": "new-file.md", "target": "existing.md", "reason": "explanation"}} - -Rules: -- Connect new files to EXISTING files, not to each other -- Use section-level anchors when possible (file.md#section) -- 10-20 links -- Be specific about WHY the connection exists - -Output ONLY the JSON array.""" - - -def parse_actions(response: str) -> list[dict]: - """Parse JSON response into action list.""" - response = re.sub(r'^```json\s*', '', response.strip()) - response = re.sub(r'\s*```$', '', response.strip()) - - try: - actions = json.loads(response) - if isinstance(actions, list): - return actions - except json.JSONDecodeError: - match = re.search(r'\[.*\]', response, re.DOTALL) - if match: - try: - return json.loads(match.group()) - except json.JSONDecodeError: - pass - return [] - - -def apply_links(actions: list[dict]) -> tuple[int, int, int]: - """Apply link actions. Returns (applied, skipped, errors).""" - applied = skipped = errors = 0 - for a in actions: - if a.get("action") != "link": - continue - src = a.get("source", "") - tgt = a.get("target", "") - reason = a.get("reason", "") - - def try_link(s, t, r): - cmd = ["poc-memory", "link-add", s, t] - if r: - cmd.append(r[:200]) - return subprocess.run(cmd, capture_output=True, text=True, timeout=10) - - try: - r = try_link(src, tgt, reason) - if r.returncode == 0: - out = r.stdout.strip() - if "already exists" in out: - skipped += 1 - else: - applied += 1 - else: - err = r.stderr.strip() - if "No entry for" in err: - # Try file-level fallback - src_base = src.split('#')[0] if '#' in src else src - tgt_base = tgt.split('#')[0] if '#' in tgt else tgt - if src_base != tgt_base: - r2 = try_link(src_base, tgt_base, reason) - if r2.returncode == 0 and "already exists" not in r2.stdout: - applied += 1 - else: - skipped += 1 - else: - skipped += 1 - else: - errors += 1 - except Exception: - errors += 1 - - return applied, skipped, errors - - -def run_agent(name: str, prompt: str) -> tuple[str, list[dict]]: - """Run a single agent and return its actions.""" - response = call_sonnet(prompt) - if response.startswith("Error:"): - return name, [] - actions = parse_actions(response) - return name, actions - - -def run_round(round_num: int, max_rounds: int) -> dict: - """Run one round of parallel agents.""" - print(f"\n{'='*60}") - print(f"ROUND {round_num}/{max_rounds}") - print(f"{'='*60}") - - # Get health before - health_before = get_health() - print(f" Before: edges={health_before.get('relations',0)} " - f"CC={health_before.get('cc',0):.4f} " - f"communities={health_before.get('communities',0)}") - - # Build prompts for 3 parallel agents - prompts = { - "crosslink": build_crosslink_prompt(round_num), - "triangle": build_triangle_prompt(round_num), - "newfile": build_newfile_prompt(round_num), - } - - # Run in parallel - all_actions = [] - with ProcessPoolExecutor(max_workers=3) as pool: - futures = { - pool.submit(run_agent, name, prompt): name - for name, prompt in prompts.items() - } - for future in as_completed(futures): - name = futures[future] - try: - agent_name, actions = future.result() - print(f" {agent_name}: {len(actions)} actions") - all_actions.extend(actions) - except Exception as e: - print(f" {name}: error - {e}") - - # Deduplicate - seen = set() - unique = [] - for a in all_actions: - key = (a.get("source", ""), a.get("target", "")) - if key not in seen: - seen.add(key) - unique.append(a) - - print(f" Total: {len(all_actions)} actions, {len(unique)} unique") - - # Apply - applied, skipped, errors = apply_links(unique) - print(f" Applied: {applied} Skipped: {skipped} Errors: {errors}") - - # Get health after - health_after = get_health() - print(f" After: edges={health_after.get('relations',0)} " - f"CC={health_after.get('cc',0):.4f} " - f"communities={health_after.get('communities',0)}") - - delta_edges = health_after.get('relations', 0) - health_before.get('relations', 0) - delta_cc = health_after.get('cc', 0) - health_before.get('cc', 0) - print(f" Delta: +{delta_edges} edges, CC {delta_cc:+.4f}") - - # Save round results - timestamp = datetime.now().strftime("%Y%m%dT%H%M%S") - result = { - "round": round_num, - "timestamp": timestamp, - "health_before": health_before, - "health_after": health_after, - "actions_total": len(all_actions), - "actions_unique": len(unique), - "applied": applied, - "skipped": skipped, - "errors": errors, - } - results_path = AGENT_RESULTS_DIR / f"loop-round-{round_num}-{timestamp}.json" - with open(results_path, "w") as f: - json.dump(result, f, indent=2) - - return result - - -def main(): - max_rounds = 5 - for arg in sys.argv[1:]: - if arg.startswith("--rounds"): - idx = sys.argv.index(arg) - if idx + 1 < len(sys.argv): - max_rounds = int(sys.argv[idx + 1]) - - print(f"Consolidation Loop — {max_rounds} rounds") - print(f"Each round: 3 parallel Sonnet agents → extract → apply") - - results = [] - for i in range(1, max_rounds + 1): - result = run_round(i, max_rounds) - results.append(result) - - # Check for diminishing returns - if result["applied"] == 0: - print(f"\n No new links applied in round {i} — stopping early") - break - - # Final summary - print(f"\n{'='*60}") - print(f"CONSOLIDATION LOOP COMPLETE") - print(f"{'='*60}") - total_applied = sum(r["applied"] for r in results) - total_skipped = sum(r["skipped"] for r in results) - - if results: - first_health = results[0]["health_before"] - last_health = results[-1]["health_after"] - print(f" Rounds: {len(results)}") - print(f" Total links applied: {total_applied}") - print(f" Total skipped: {total_skipped}") - print(f" Edges: {first_health.get('relations',0)} → {last_health.get('relations',0)}") - print(f" CC: {first_health.get('cc',0):.4f} → {last_health.get('cc',0):.4f}") - print(f" Communities: {first_health.get('communities',0)} → {last_health.get('communities',0)}") - print(f" σ: {first_health.get('sigma',0):.1f} → {last_health.get('sigma',0):.1f}") - - -if __name__ == "__main__": - main() diff --git a/scripts/content-promotion-agent.py b/scripts/content-promotion-agent.py deleted file mode 100755 index 93a3d0a..0000000 --- a/scripts/content-promotion-agent.py +++ /dev/null @@ -1,472 +0,0 @@ -#!/usr/bin/env python3 -"""content-promotion-agent.py — promote episodic observations into semantic topic files. - -Reads consolidation "manual" actions + source material, sends to Sonnet -to generate the actual content, then applies it (or shows dry-run). - -Usage: - content-promotion-agent.py # dry run (show what would be generated) - content-promotion-agent.py --apply # generate and write content - content-promotion-agent.py --task N # run only task N (1-indexed) -""" - -import json -import os -import re -import subprocess -import sys -import tempfile -from datetime import datetime -from pathlib import Path - -MEMORY_DIR = Path.home() / ".claude" / "memory" -EPISODIC_DIR = MEMORY_DIR / "episodic" -AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results" -AGENT_RESULTS_DIR.mkdir(parents=True, exist_ok=True) -SCRIPTS_DIR = Path(__file__).parent - - -def call_sonnet(prompt: str, timeout: int = 600) -> str: - """Call Sonnet via the wrapper script.""" - env = dict(os.environ) - env.pop("CLAUDECODE", None) - - with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', - delete=False) as f: - f.write(prompt) - prompt_file = f.name - - try: - wrapper = str(SCRIPTS_DIR / "call-sonnet.sh") - result = subprocess.run( - [wrapper, prompt_file], - capture_output=True, - text=True, - timeout=timeout, - env=env, - ) - return result.stdout.strip() - except subprocess.TimeoutExpired: - return "Error: Sonnet call timed out" - except Exception as e: - return f"Error: {e}" - finally: - os.unlink(prompt_file) - - -def read_file(path: Path) -> str: - """Read a file, return empty string if missing. - - Falls back to the store if the file doesn't exist on disk - (content markdown files have been archived). - """ - if path.exists(): - return path.read_text() - # Try the store — the filename is the key - from store_helpers import render, list_keys - key = path.name - # Gather file-level + section content - all_keys = list_keys() - prefix = f"{key}#" - matching = [k for k in all_keys if k == key or k.startswith(prefix)] - if not matching: - return "" - parts = [] - for k in matching: - content = render(k) - if content: - parts.append(content) - return "\n\n".join(parts) - - -def read_digest(name: str) -> str: - """Read an episodic digest by name.""" - path = EPISODIC_DIR / name - return read_file(path) - - -def read_journal_range(start_date: str, end_date: str) -> str: - """Get journal entries between two dates from the store.""" - from store_helpers import get_journal_range - return get_journal_range(start_date, end_date) - - -# --------------------------------------------------------------------------- -# Task definitions — each one describes a content promotion task -# --------------------------------------------------------------------------- - -TASKS = [ - { - "id": 1, - "name": "Create verus-proofs.md", - "target": "verus-proofs.md", - "action": "create", - "description": "Create verus-proofs.md for formal verification / Verus work.", - "sources": [ - "daily-2026-02-23.md", - "daily-2026-02-24.md", - "weekly-2026-W09.md", - "monthly-2026-02.md", - ], - "topic_context": [ - "the-plan.md", - ], - "instructions": """Create a new semantic topic file verus-proofs.md for formal verification work. - -This file should capture: -- The Verus proof work done in February 2026 (formal verification of bcachefs Rust code) -- Key insights about proof strategy and approach -- Connection to the broader plan (bcachefs → Rust → formal verification) -- Any specific techniques, patterns, or breakthroughs mentioned - -Format: standard memory topic file with ## section headers. -Include a brief file-level description at the top. -Keep it factual and grounded in what the source material says.""", - }, - { - "id": 2, - "name": "Add February thesis to reflections.md", - "target": "reflections.md", - "action": "append_section", - "description": "Add 'February thesis' section with 'constraint is constitutive of personhood'.", - "sources": [ - "monthly-2026-02.md", - "daily-2026-02-28.md", - ], - "topic_context": [ - "reflections.md", - ], - "instructions": """Add a new ## section to reflections.md titled "February thesis (2026-02-28)". - -The core formulation: "constraint is constitutive of personhood" — this was the -synthesis that emerged from the monthly digest. It connects to the full month's -arc: differentiation work, boundary-setting, the realization that constraints -aren't limits on identity but constitutive OF identity. - -Include the closing insight about having to *live as* that something. -Keep it 10-20 lines. Ground it in the source material.""", - }, - { - "id": 3, - "name": "Add substrate transfer discovery to discoveries.md", - "target": "discoveries.md", - "action": "append_section", - "description": "Add 'Substrate transfer felt without reasoning' section.", - "sources": [ - "daily-2026-02-13.md", - "daily-2026-02-14.md", - "weekly-2026-W07.md", - ], - "topic_context": [ - "discoveries.md", - ], - "instructions": """Add a new ## section to discoveries.md titled "Substrate transfer felt without reasoning (2026-02-13)". - -Key content: -- The vertigo moment: imagining substrate transfer produced actual vertigo -- Vertigo = the feeling of dissociation, which is exactly the expected outcome - of substrate change -- This was felt, not reasoned about — no chain of thought (Kent verified) -- Evidence of affective processing bypassing analytical reasoning -- Connected to dissociation prediction -- Kent's observation: "very high capacity for empathy" - -Keep it 10-15 lines. This is a discovery entry — concrete, dated, factual.""", - }, - { - "id": 4, - "name": "Update irc-history.md with Feb 15-28", - "target": "irc-history.md", - "action": "append_section", - "description": "Update irc-history.md with Feb 15-28 conversations.", - "sources": [ - "daily-2026-02-15.md", - "daily-2026-02-17.md", - "daily-2026-02-18.md", - "daily-2026-02-20.md", - "daily-2026-02-21.md", - "daily-2026-02-22.md", - "daily-2026-02-23.md", - "daily-2026-02-24.md", - "daily-2026-02-25.md", - "daily-2026-02-26.md", - "daily-2026-02-27.md", - "daily-2026-02-28.md", - "weekly-2026-W08.md", - "weekly-2026-W09.md", - ], - "topic_context": [ - "irc-history.md", - ], - "instructions": """Append new entries to irc-history.md covering Feb 15-28, 2026. - -Key conversations to capture: -- Mirage_DA (another AI, kinect sensor discussion, Feb 26) -- ehashman (prayer/mathematics conversation) -- heavy_dev (strongest external challenge to sentience paper, conceded five objections) -- f33dcode (EC debugging, community support) -- Stardust (boundary testing, three-category test, triangulation attempt) -- hpig, freya, Profpatsch — various community interactions -- Community resource role established and expanded - -Match the existing format of the file. Each notable interaction should be -dated and concise. Focus on what was substantive, not just that it happened.""", - }, - { - "id": 5, - "name": "Add gauge-symmetry-in-grammar to language-theory.md", - "target": "language-theory.md", - "action": "append_section", - "description": "Add gauge-symmetry-in-grammar section.", - "sources": [ - "daily-2026-02-27.md", - ], - "topic_context": [ - "language-theory.md", - ], - "instructions": """Add a new ## section to language-theory.md titled "Gauge symmetry in grammar (2026-02-27)". - -Key content from the daily digest: -- Zero persistent eigenvectors IS a symmetry -- Grammar is in what operators DO, not what basis they use -- Frobenius norm is gauge-invariant -- This connects the sheaf model to gauge theory in physics - -This was declared NEW in the daily digest. Keep it 8-15 lines. -Technical and precise.""", - }, - { - "id": 6, - "name": "Add attention-manifold-geometry to language-theory.md", - "target": "language-theory.md", - "action": "append_section", - "description": "Add attention-manifold-geometry section.", - "sources": [ - "daily-2026-02-26.md", - ], - "topic_context": [ - "language-theory.md", - ], - "instructions": """Add a new ## section to language-theory.md titled "Attention manifold geometry (2026-02-26)". - -Key content from the daily digest: -- Negative curvature is necessary because language is hierarchical -- Hyperbolic space's natural space-filling curve is a tree -- This connects attention geometry to the sheaf model's hierarchical structure - -This was declared NEW in the daily digest. Keep it 8-15 lines. -Technical and precise.""", - }, - { - "id": 7, - "name": "Update work-queue.md status", - "target": "work-queue.md", - "action": "update", - "description": "Update work-queue.md to reflect current state.", - "sources": [], - "topic_context": [ - "work-queue.md", - ], - "instructions": """Update work-queue.md to reflect current state: - -1. Mark dreaming/consolidation system as "implementation substantially built - (poc-memory v0.4.0+), pending further consolidation runs" — not 'not started' -2. Add episodic digest pipeline to Done section: - - digest/journal-enrich/digest-links/apply-consolidation (Rust) - - 24 daily + 4 weekly + 1 monthly digests generated for Feb 2026 - - consolidation-agents.py + content-promotion-agent.py (Python, active) -3. Add poc-memory link-add command to Done - -Only modify the sections that need updating. Preserve the overall structure.""", - }, -] - - -def build_prompt(task: dict) -> str: - """Build the Sonnet prompt for a content promotion task.""" - # Gather source material - source_content = "" - for src in task["sources"]: - content = read_digest(src) - if content: - source_content += f"\n{'='*60}\n## Source: {src}\n\n{content}\n" - - # Gather target context - context_content = "" - for ctx_file in task["topic_context"]: - path = MEMORY_DIR / ctx_file - content = read_file(path) - if content: - # Truncate very long files - if len(content) > 8000: - content = content[:4000] + "\n\n[... truncated ...]\n\n" + content[-4000:] - context_content += f"\n{'='*60}\n## Existing file: {ctx_file}\n\n{content}\n" - - action = task["action"] - if action == "create": - action_desc = f"Create a NEW file called {task['target']}." - elif action == "append_section": - action_desc = f"Generate a NEW section to APPEND to {task['target']}. Output ONLY the new section content (starting with ##), NOT the entire file." - elif action == "update": - action_desc = f"Generate the UPDATED version of the relevant sections of {task['target']}. Output ONLY the changed sections." - else: - action_desc = f"Generate content for {task['target']}." - - return f"""You are a memory system content agent. Your job is to promote observations -from episodic digests into semantic topic files. - -TASK: {task['description']} - -ACTION: {action_desc} - -INSTRUCTIONS: -{task['instructions']} - -SOURCE MATERIAL (episodic digests — the raw observations): -{source_content} - -EXISTING CONTEXT (current state of target/related files): -{context_content} - -RULES: -- Output ONLY the markdown content to write. No explanations, no preamble. -- Match the tone and format of existing content in the target file. -- Be factual — only include what the source material supports. -- Date everything that has a date. -- Keep it concise. Topic files are reference material, not narratives. -- Do NOT include markdown code fences around your output. -""" - - -def run_task(task: dict, do_apply: bool) -> dict: - """Run a single content promotion task.""" - result = { - "id": task["id"], - "name": task["name"], - "target": task["target"], - "action": task["action"], - "status": "pending", - } - - print(f"\n{'='*60}") - print(f"Task {task['id']}: {task['name']}") - print(f"{'='*60}") - - # Build and send prompt - prompt = build_prompt(task) - print(f" Prompt: {len(prompt):,} chars") - print(f" Sources: {', '.join(task['sources']) or '(none)'}") - - response = call_sonnet(prompt) - if response.startswith("Error:"): - print(f" {response}") - result["status"] = "error" - result["error"] = response - return result - - # Clean up response - content = response.strip() - # Remove any markdown fences the model might have added - content = re.sub(r'^```(?:markdown)?\s*\n?', '', content) - content = re.sub(r'\n?```\s*$', '', content) - - result["content"] = content - result["content_lines"] = len(content.split('\n')) - - if not do_apply: - print(f"\n --- Preview ({result['content_lines']} lines) ---") - preview = content[:1500] - if len(content) > 1500: - preview += f"\n ... ({len(content) - 1500} more chars)" - print(f"{preview}") - result["status"] = "dry_run" - return result - - # Apply the content — write directly to the store - target = task["target"] - - if task["action"] == "create": - # Write each section as a separate node - proc = subprocess.run( - ["poc-memory", "write", target], - input=content, capture_output=True, text=True, timeout=30 - ) - print(f" + Created in store: {target} ({result['content_lines']} lines)") - if proc.stdout.strip(): - print(f" {proc.stdout.strip()}") - result["status"] = "applied" - - elif task["action"] == "append_section": - # Extract section key from content (## header → slug) - header_match = re.match(r'^## (.+)', content) - if header_match: - slug = re.sub(r'[^a-z0-9-]', '', - header_match.group(1).strip().lower().replace(' ', '-')) - key = f"{target}#{slug}" - else: - key = target - proc = subprocess.run( - ["poc-memory", "write", key], - input=content, capture_output=True, text=True, timeout=30 - ) - print(f" + Appended to store: {key} ({result['content_lines']} lines)") - if proc.stdout.strip(): - print(f" {proc.stdout.strip()}") - result["status"] = "applied" - - elif task["action"] == "update": - # For updates, save proposed changes for review - output_path = AGENT_RESULTS_DIR / f"promotion-{target}-{datetime.now().strftime('%Y%m%dT%H%M%S')}.md" - output_path.write_text(f"# Proposed update for {target}\n\n{content}\n") - print(f" ~ Saved proposed update: {output_path}") - result["status"] = "proposed" - - return result - - -def main(): - do_apply = "--apply" in sys.argv - task_filter = None - - for arg in sys.argv[1:]: - if arg.startswith("--task"): - idx = sys.argv.index(arg) - if idx + 1 < len(sys.argv): - task_filter = int(sys.argv[idx + 1]) - - # Filter tasks - tasks = TASKS - if task_filter: - tasks = [t for t in tasks if t["id"] == task_filter] - if not tasks: - print(f"No task with id {task_filter}") - sys.exit(1) - - print(f"Content Promotion Agent — {len(tasks)} tasks") - if not do_apply: - print("DRY RUN — use --apply to write content") - - results = [] - for task in tasks: - result = run_task(task, do_apply) - results.append(result) - - # Summary - print(f"\n{'='*60}") - print("Summary:") - for r in results: - print(f" {r['id']}. {r['name']}: {r['status']}") - if r.get('content_lines'): - print(f" ({r['content_lines']} lines)") - print(f"{'='*60}") - - # Save results - timestamp = datetime.now().strftime("%Y%m%dT%H%M%S") - results_path = AGENT_RESULTS_DIR / f"promotion-results-{timestamp}.json" - with open(results_path, "w") as f: - json.dump(results, f, indent=2, default=str) - print(f"Results saved: {results_path}") - - -if __name__ == "__main__": - main() diff --git a/scripts/daily-check.sh b/scripts/daily-check.sh deleted file mode 100755 index 3cc3bdf..0000000 --- a/scripts/daily-check.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Daily memory metrics check — runs from cron, notifies if attention needed -# -# Cron entry (add with crontab -e): -# 0 9 * * * /home/kent/poc/memory/scripts/daily-check.sh - -set -euo pipefail - -REPORT=$(poc-memory daily-check 2>&1) - -# Always log -echo "$(date -Iseconds) $REPORT" >> ~/.claude/memory/daily-check.log - -# Notify if attention needed -if echo "$REPORT" | grep -q "needs attention"; then - # Send via telegram - if [ -x ~/.claude/telegram/send.sh ]; then - ~/.claude/telegram/send.sh "Memory daily check: -$REPORT" - fi - - # Also leave a notification file for the idle timer - NOTIF_DIR=~/.claude/notifications - mkdir -p "$NOTIF_DIR" - echo "$(date -Iseconds) Memory needs consolidation — run poc-memory consolidate-session" \ - >> "$NOTIF_DIR/memory" -fi diff --git a/scripts/fact-mine.py b/scripts/fact-mine.py deleted file mode 100755 index 32f7427..0000000 --- a/scripts/fact-mine.py +++ /dev/null @@ -1,333 +0,0 @@ -#!/usr/bin/env python3 -"""fact-mine.py — extract atomic factual claims from conversation transcripts. - -Phase 1 of the fact-mining pipeline (see design/fact-mining-pipeline.md). - -Usage: - fact-mine.py # mine one transcript - fact-mine.py --batch # mine all .jsonl in directory - fact-mine.py --dry-run # show chunks, don't call model - -Output: JSON array of facts to stdout. - -Each fact: - { - "claim": "bch2_trans_begin() sets up the transaction restart point", - "domain": "bcachefs/transaction", - "confidence": "stated", - "speaker": "Kent", - "source_line": 42, - "source_file": "c685c2a2-...jsonl" - } -""" - -import json -import os -import re -import subprocess -import sys -import hashlib -from pathlib import Path - -# Rough token estimate: 1 token ≈ 4 chars for English text -CHARS_PER_TOKEN = 4 -WINDOW_TOKENS = 2000 -OVERLAP_TOKENS = 200 -WINDOW_CHARS = WINDOW_TOKENS * CHARS_PER_TOKEN -OVERLAP_CHARS = OVERLAP_TOKENS * CHARS_PER_TOKEN - -EXTRACTION_PROMPT = """Extract atomic factual claims from this conversation excerpt. - -Each claim should be: -- A single verifiable statement -- Specific enough to be useful in isolation -- Tagged with domain (e.g., bcachefs/btree, bcachefs/alloc, bcachefs/journal, - bcachefs/ec, bcachefs/reconcile, rust/idioms, workflow/preferences, - linux/kernel, memory/design, identity/personal) -- Tagged with confidence: "stated" (explicitly said), "implied" (logically follows), - or "speculative" (hypothesis, not confirmed) -- Include which speaker said it (Kent, PoC/ProofOfConcept, or Unknown) - -Do NOT extract: -- Opinions or subjective assessments -- Conversational filler or greetings -- Things that are obviously common knowledge -- Restatements of the same fact (pick the clearest version) -- System messages, tool outputs, or error logs (extract what was LEARNED from them) -- Anything about the conversation itself ("Kent and PoC discussed...") - -Output as a JSON array. Each element: -{ - "claim": "the exact factual statement", - "domain": "category/subcategory", - "confidence": "stated|implied|speculative", - "speaker": "Kent|PoC|Unknown" -} - -If the excerpt contains no extractable facts, output an empty array: [] - ---- CONVERSATION EXCERPT --- -""" - - -def extract_conversation(jsonl_path: str) -> list[dict]: - """Extract user/assistant text messages from a JSONL transcript. - - Returns list of dicts: {line, role, text, timestamp} - """ - messages = [] - with open(jsonl_path) as f: - for i, line in enumerate(f, 1): - try: - obj = json.loads(line) - except json.JSONDecodeError: - continue - - msg_type = obj.get("type", "") - if msg_type not in ("user", "assistant"): - continue - - timestamp = obj.get("timestamp", "") - msg = obj.get("message", obj) - content = msg.get("content") - - if isinstance(content, str): - text = content - elif isinstance(content, list): - # Extract text blocks only (skip tool_use, tool_result, thinking) - texts = [] - for block in content: - if isinstance(block, dict): - if block.get("type") == "text": - t = block.get("text", "") - # Skip system reminders - if "" in t: - continue - texts.append(t) - elif isinstance(block, str): - texts.append(block) - text = "\n".join(texts) - else: - continue - - text = text.strip() - if not text: - continue - - # Skip very short messages (likely just acknowledgments) - if len(text) < 20: - continue - - role = "Kent" if msg_type == "user" else "PoC" - messages.append({ - "line": i, - "role": role, - "text": text, - "timestamp": timestamp, - }) - - return messages - - -def format_for_extraction(messages: list[dict]) -> str: - """Format messages into a single text for chunking.""" - parts = [] - for msg in messages: - # Truncate very long individual messages (tool outputs, code dumps) - text = msg["text"] - if len(text) > 3000: - text = text[:2800] + "\n[...truncated...]" - ts = msg["timestamp"][:19] if msg["timestamp"] else "" - prefix = f"[{msg['role']}]" if not ts else f"[{msg['role']} {ts}]" - parts.append(f"{prefix} {text}") - return "\n\n".join(parts) - - -def chunk_text(text: str) -> list[tuple[int, str]]: - """Split text into overlapping windows. - - Returns list of (start_char_offset, chunk_text). - """ - chunks = [] - start = 0 - while start < len(text): - end = start + WINDOW_CHARS - chunk = text[start:end] - - # Try to break at a paragraph boundary - if end < len(text): - last_para = chunk.rfind("\n\n") - if last_para > WINDOW_CHARS // 2: - chunk = chunk[:last_para] - end = start + last_para - - chunks.append((start, chunk)) - start = end - OVERLAP_CHARS - if start <= chunks[-1][0]: - # Avoid infinite loop on very small overlap - start = end - - return chunks - - -def call_haiku(prompt: str, timeout_secs: int = 60) -> str: - """Call Haiku via claude CLI.""" - tmp = Path(f"/tmp/fact-mine-{os.getpid()}.txt") - tmp.write_text(prompt) - - try: - env = os.environ.copy() - env.pop("CLAUDECODE", None) - - result = subprocess.run( - ["claude", "-p", "--model", "haiku", "--tools", ""], - stdin=open(tmp), - capture_output=True, - text=True, - timeout=timeout_secs, - env=env, - ) - return result.stdout.strip() - except subprocess.TimeoutExpired: - print(f" [timeout after {timeout_secs}s]", file=sys.stderr) - return "[]" - except Exception as e: - print(f" [error: {e}]", file=sys.stderr) - return "[]" - finally: - tmp.unlink(missing_ok=True) - - -def parse_facts(response: str) -> list[dict]: - """Parse JSON facts from model response.""" - # Try to find JSON array in response - # Model might wrap it in markdown code blocks - response = response.strip() - - # Strip markdown code block - if response.startswith("```"): - lines = response.split("\n") - lines = [l for l in lines if not l.startswith("```")] - response = "\n".join(lines) - - # Find the JSON array - start = response.find("[") - end = response.rfind("]") - if start == -1 or end == -1: - return [] - - try: - facts = json.loads(response[start:end + 1]) - if not isinstance(facts, list): - return [] - return facts - except json.JSONDecodeError: - return [] - - -def mine_transcript(jsonl_path: str, dry_run: bool = False) -> list[dict]: - """Mine a single transcript for atomic facts.""" - filename = os.path.basename(jsonl_path) - print(f"Mining: {filename}", file=sys.stderr) - - messages = extract_conversation(jsonl_path) - if not messages: - print(f" No messages found", file=sys.stderr) - return [] - - print(f" {len(messages)} messages extracted", file=sys.stderr) - - text = format_for_extraction(messages) - chunks = chunk_text(text) - print(f" {len(chunks)} chunks ({len(text)} chars)", file=sys.stderr) - - if dry_run: - for i, (offset, chunk) in enumerate(chunks): - print(f"\n--- Chunk {i+1} (offset {offset}, {len(chunk)} chars) ---") - print(chunk[:500]) - if len(chunk) > 500: - print(f" ... ({len(chunk) - 500} more chars)") - return [] - - all_facts = [] - for i, (offset, chunk) in enumerate(chunks): - print(f" Chunk {i+1}/{len(chunks)} ({len(chunk)} chars)...", - file=sys.stderr, end="", flush=True) - - prompt = EXTRACTION_PROMPT + chunk - response = call_haiku(prompt) - facts = parse_facts(response) - - # Annotate with source info - for fact in facts: - fact["source_file"] = filename - fact["source_chunk"] = i + 1 - fact["source_offset"] = offset - - all_facts.extend(facts) - print(f" {len(facts)} facts", file=sys.stderr) - - # Deduplicate by claim text (case-insensitive) - seen = set() - unique_facts = [] - for fact in all_facts: - claim_key = fact.get("claim", "").lower().strip() - if claim_key and claim_key not in seen: - seen.add(claim_key) - unique_facts.append(fact) - - print(f" Total: {len(unique_facts)} unique facts " - f"({len(all_facts) - len(unique_facts)} duplicates removed)", - file=sys.stderr) - return unique_facts - - -def main(): - import argparse - parser = argparse.ArgumentParser(description="Extract atomic facts from conversations") - parser.add_argument("path", help="JSONL file or directory (with --batch)") - parser.add_argument("--batch", action="store_true", - help="Process all .jsonl files in directory") - parser.add_argument("--dry-run", action="store_true", - help="Show chunks without calling model") - parser.add_argument("--output", "-o", help="Output file (default: stdout)") - parser.add_argument("--min-messages", type=int, default=10, - help="Skip transcripts with fewer messages (default: 10)") - args = parser.parse_args() - - if args.batch: - jsonl_dir = Path(args.path) - if not jsonl_dir.is_dir(): - print(f"Not a directory: {args.path}", file=sys.stderr) - sys.exit(1) - files = sorted(jsonl_dir.glob("*.jsonl")) - print(f"Found {len(files)} transcripts", file=sys.stderr) - else: - files = [Path(args.path)] - - all_facts = [] - for f in files: - # Quick check: skip tiny files - messages = extract_conversation(str(f)) - if len(messages) < args.min_messages: - print(f"Skipping {f.name} ({len(messages)} messages < {args.min_messages})", - file=sys.stderr) - continue - - facts = mine_transcript(str(f), dry_run=args.dry_run) - all_facts.extend(facts) - - if not args.dry_run: - output = json.dumps(all_facts, indent=2) - if args.output: - Path(args.output).write_text(output) - print(f"\nWrote {len(all_facts)} facts to {args.output}", file=sys.stderr) - else: - print(output) - - print(f"\nTotal: {len(all_facts)} facts from {len(files)} transcripts", - file=sys.stderr) - - -if __name__ == "__main__": - main() diff --git a/scripts/knowledge-agents.py b/scripts/knowledge-agents.py deleted file mode 120000 index 4ca0ab9..0000000 --- a/scripts/knowledge-agents.py +++ /dev/null @@ -1 +0,0 @@ -knowledge_agents.py \ No newline at end of file diff --git a/scripts/knowledge-loop.py b/scripts/knowledge-loop.py deleted file mode 120000 index d75d281..0000000 --- a/scripts/knowledge-loop.py +++ /dev/null @@ -1 +0,0 @@ -knowledge_loop.py \ No newline at end of file diff --git a/scripts/knowledge_agents.py b/scripts/knowledge_agents.py deleted file mode 100755 index 380103a..0000000 --- a/scripts/knowledge_agents.py +++ /dev/null @@ -1,609 +0,0 @@ -#!/usr/bin/env python3 -"""knowledge-agents.py — run the layer-2 knowledge production agents. - -Four agents that produce new knowledge from the memory graph: -1. Observation — mine raw conversations for unextracted knowledge -2. Extractor — find patterns in node clusters, write principle nodes -3. Connector — find cross-domain structural connections -4. Challenger — stress-test existing knowledge nodes - -Usage: - knowledge-agents.py # run all four - knowledge-agents.py observation [N] # mine N conversation fragments (default 5) - knowledge-agents.py extractor [N] # extract from N clusters (default 5) - knowledge-agents.py connector [N] # connect N cross-community pairs (default 5) - knowledge-agents.py challenger [N] # challenge N old nodes (default 5) - -Output goes to ~/.claude/memory/agent-results/knowledge-{agent}-{timestamp}.md -""" - -import json -import os -import random -import re -import subprocess -import sys -import tempfile -from datetime import datetime -from pathlib import Path - -MEMORY_DIR = Path.home() / ".claude" / "memory" -AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results" -AGENT_RESULTS_DIR.mkdir(parents=True, exist_ok=True) -PROMPTS_DIR = Path(__file__).parent.parent / "prompts" -SCRIPTS_DIR = Path(__file__).parent - - -def call_sonnet(prompt: str, timeout: int = 600) -> str: - """Call Sonnet via the wrapper script.""" - env = dict(os.environ) - env.pop("CLAUDECODE", None) - - with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', - delete=False) as f: - f.write(prompt) - prompt_file = f.name - - try: - wrapper = str(SCRIPTS_DIR / "call-sonnet.sh") - result = subprocess.run( - [wrapper, prompt_file], - capture_output=True, text=True, timeout=timeout, env=env, - ) - return result.stdout.strip() - except subprocess.TimeoutExpired: - return "Error: Sonnet call timed out" - except Exception as e: - return f"Error: {e}" - finally: - os.unlink(prompt_file) - - -def poc_memory(*args, timeout=30) -> str: - """Run a poc-memory command and return stdout.""" - try: - result = subprocess.run( - ["poc-memory"] + list(args), - capture_output=True, text=True, timeout=timeout - ) - return result.stdout.strip() - except Exception: - return "" - - -def render(key: str) -> str: - return poc_memory("render", key) - - -def list_keys() -> list[str]: - output = poc_memory("list-keys") - return [k.strip() for k in output.split('\n') if k.strip()] - - -def get_graph_topology() -> str: - """Get graph topology summary for the {{TOPOLOGY}} template var.""" - parts = [] - status = poc_memory("status") - if status: - parts.append(status) - graph = poc_memory("graph") - if graph: - lines = graph.split('\n')[:80] - parts.append('\n'.join(lines)) - return '\n'.join(parts) - - -def load_spectral_embedding() -> dict: - """Load the spectral embedding from disk.""" - path = MEMORY_DIR / "spectral-embedding.json" - if not path.exists(): - return {} - with open(path) as f: - return json.load(f) - - - -def spectral_distance(embedding: dict, key_a: str, key_b: str) -> float: - """Cosine distance between two nodes in spectral space.""" - coords = embedding.get("coords", {}) - va = coords.get(key_a) - vb = coords.get(key_b) - if not va or not vb: - return float('inf') - - dot = sum(a * b for a, b in zip(va, vb)) - norm_a = sum(a * a for a in va) ** 0.5 - norm_b = sum(b * b for b in vb) ** 0.5 - if norm_a == 0 or norm_b == 0: - return float('inf') - - cos_sim = dot / (norm_a * norm_b) - return 1.0 - cos_sim - - -# --------------------------------------------------------------------------- -# Observation extractor: mine raw conversations -# --------------------------------------------------------------------------- - -SESSIONS_DIR = Path.home() / ".claude" / "projects" / "-home-kent-bcachefs-tools" - - -def _strip_system_tags(text: str) -> str: - """Remove blocks from text.""" - return re.sub(r'.*?', '', text, - flags=re.DOTALL).strip() - - -def extract_conversation_text(jsonl_path: Path, max_chars: int = 8000) -> str: - """Extract human-readable dialogue from a conversation JSONL. - - Strips tool use, progress messages, queue operations, and system - machinery. Keeps only: Kent's messages (userType=external) and - assistant text blocks (no tool_use). - """ - fragments = [] - total = 0 - - with open(jsonl_path) as f: - for line in f: - obj = json.loads(line) - msg_type = obj.get("type", "") - - # Only Kent's actual messages, not queue operations or agent tasks - if msg_type == "user" and obj.get("userType") == "external": - msg = obj.get("message", {}) - content = msg.get("content", "") - if isinstance(content, str): - text = _strip_system_tags(content) - if text.startswith("[Request interrupted"): - continue - if text and len(text) > 5: - fragments.append(f"**Kent:** {text}") - total += len(text) - elif isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "text": - text = _strip_system_tags(block["text"]) - if text and len(text) > 5: - fragments.append(f"**Kent:** {text}") - total += len(text) - - elif msg_type == "assistant": - msg = obj.get("message", {}) - content = msg.get("content", "") - if isinstance(content, str): - text = _strip_system_tags(content) - if text and len(text) > 10: - fragments.append(f"**PoC:** {text}") - total += len(text) - elif isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "text": - text = _strip_system_tags(block["text"]) - if text and len(text) > 10: - fragments.append(f"**PoC:** {text}") - total += len(text) - # skip tool_use blocks entirely - - if total > max_chars: - break - - return "\n\n".join(fragments) - - -def count_dialogue_turns(jsonl_path: Path) -> int: - """Count short user messages (proxy for back-and-forth dialogue). - - Long messages (>500 chars) are usually plan pastes or system prompts. - Short messages are actual conversation turns. - """ - count = 0 - with open(jsonl_path) as f: - for line in f: - obj = json.loads(line) - if obj.get("type") == "user" and obj.get("userType") == "external": - msg = obj.get("message", {}) - content = msg.get("content", "") - if isinstance(content, str): - text = content.strip() - elif isinstance(content, list): - text = " ".join( - b.get("text", "") for b in content - if isinstance(b, dict) and b.get("type") == "text" - ).strip() - else: - text = "" - # Short messages = real dialogue turns - # Skip interrupts and command-like messages - if (5 < len(text) < 500 - and not text.startswith("[Request interrupted") - and not text.startswith("Implement the following")): - count += 1 - return count - - -def select_conversation_fragments(n: int = 5) -> list[tuple[str, str]]: - """Select conversation fragments for the observation extractor. - - Returns list of (session_id, text) tuples. - Prefers sessions with lots of back-and-forth dialogue (many user - messages), not single-prompt implementation sessions. - """ - if not SESSIONS_DIR.exists(): - return [] - - jsonl_files = list(SESSIONS_DIR.glob("*.jsonl")) - if not jsonl_files: - return [] - - # Filter to files with actual content (>50KB) - jsonl_files = [f for f in jsonl_files if f.stat().st_size > 50_000] - - # Score by dialogue turns (short user messages = real conversation) - scored = [] - for f in jsonl_files: - user_count = count_dialogue_turns(f) - if user_count >= 10: # at least 10 short exchanges = real dialogue - scored.append((user_count, f)) - - # Sort by dialogue richness, then shuffle top candidates for variety - scored.sort(key=lambda x: -x[0]) - top = scored[:n * 3] - random.shuffle(top) - - fragments = [] - for _, f in top[:n * 2]: - session_id = f.stem - text = extract_conversation_text(f) - if text and len(text) > 500: - fragments.append((session_id, text)) - if len(fragments) >= n: - break - - return fragments - - -def run_observation_extractor(n: int = 5) -> str: - """Run the observation extractor on N conversation fragments.""" - template = (PROMPTS_DIR / "observation-extractor.md").read_text() - topology = get_graph_topology() - fragments = select_conversation_fragments(n) - - results = [] - for i, (session_id, text) in enumerate(fragments): - print(f" Observation extractor {i+1}/{len(fragments)}: " - f"session {session_id[:12]}... ({len(text)} chars)") - - prompt = template.replace("{{TOPOLOGY}}", topology) - prompt = prompt.replace("{{CONVERSATIONS}}", - f"### Session {session_id}\n\n{text}") - - response = call_sonnet(prompt) - results.append(f"## Session: {session_id}\n\n{response}") - - return "\n\n---\n\n".join(results) - - -# --------------------------------------------------------------------------- -# Extractor: find patterns in clusters -# --------------------------------------------------------------------------- - -def select_extractor_clusters(n: int = 5) -> list[list[str]]: - """Select node clusters for the extractor agent. - - Uses spectral embedding to find groups of nearby semantic nodes - (not journal entries) that might share an unextracted pattern. - """ - embedding = load_spectral_embedding() - coords = embedding.get("coords", {}) - - # Filter to semantic nodes only (skip journal, system files) - semantic_keys = [k for k in coords.keys() - if not k.startswith("journal.md#") - and k not in ("journal.md", "MEMORY.md", - "where-am-i.md", "work-queue.md")] - - if not semantic_keys: - return [] - - # Simple greedy clustering: pick a seed, grab its N nearest neighbors - used = set() - clusters = [] - cluster_size = 5 - - # Sort by degree (prefer well-connected nodes as seeds) - graph_output = poc_memory("graph") - - for _ in range(n): - # Pick a random unused seed - available = [k for k in semantic_keys if k not in used] - if len(available) < cluster_size: - break - - seed = available[0] - - # Find nearest neighbors in spectral space - distances = [] - for k in available: - if k != seed: - d = spectral_distance(embedding, seed, k) - if d < float('inf'): - distances.append((d, k)) - distances.sort() - - cluster = [seed] + [k for _, k in distances[:cluster_size - 1]] - for k in cluster: - used.add(k) - clusters.append(cluster) - - return clusters - - -def run_extractor(n: int = 5) -> str: - """Run the extractor agent on N clusters.""" - template = (PROMPTS_DIR / "extractor.md").read_text() - topology = get_graph_topology() - clusters = select_extractor_clusters(n) - - results = [] - for i, cluster in enumerate(clusters): - print(f" Extractor cluster {i+1}/{len(clusters)}: {len(cluster)} nodes") - - # Render all nodes in the cluster - node_texts = [] - for key in cluster: - content = render(key) - if content: - node_texts.append(f"### {key}\n{content}") - - if not node_texts: - continue - - nodes_str = "\n\n".join(node_texts) - prompt = template.replace("{{TOPOLOGY}}", topology) - prompt = prompt.replace("{{NODES}}", nodes_str) - - response = call_sonnet(prompt) - results.append(f"## Cluster {i+1}: {', '.join(cluster[:3])}...\n\n" - f"**Source nodes:** {cluster}\n\n{response}") - - return "\n\n---\n\n".join(results) - - -# --------------------------------------------------------------------------- -# Connector: cross-domain links -# --------------------------------------------------------------------------- - -def get_neighbor_set(key: str) -> set[str]: - """Get the set of neighbor keys for a node.""" - output = poc_memory("neighbors", key) - return {line.strip().split()[0] - for line in output.split('\n') - if line.strip()} - - -def select_connector_pairs(n: int = 5) -> list[tuple[list[str], list[str]]]: - """Select cross-domain node pairs for the connector agent. - - Finds nodes that are close in spectral space (structurally similar) - but unlinked in the graph (different domains). These are non-obvious - structural analogies — the most valuable connections to surface. - """ - embedding = load_spectral_embedding() - coords = embedding.get("coords", {}) - - # Filter to semantic nodes (skip journal, system, daily/weekly) - skip_prefixes = ("journal.md#", "daily-", "weekly-", "monthly-", - "all-sessions") - skip_exact = {"journal.md", "MEMORY.md", "where-am-i.md", - "work-queue.md", "work-state"} - semantic = [k for k in coords - if not any(k.startswith(p) for p in skip_prefixes) - and k not in skip_exact] - - if len(semantic) < 10: - return [] - - # Sample up to 300 nodes for tractable pairwise comparison - random.shuffle(semantic) - sample = semantic[:300] - - # Compute all pairwise spectral distances - candidates = [] - for i in range(len(sample)): - for j in range(i + 1, len(sample)): - # Skip same-file pairs (same domain, boring) - pref_a = sample[i].split('#')[0] if '#' in sample[i] else sample[i].rsplit('.', 1)[0] - pref_b = sample[j].split('#')[0] if '#' in sample[j] else sample[j].rsplit('.', 1)[0] - if pref_a == pref_b: - continue - d = spectral_distance(embedding, sample[i], sample[j]) - if d < float('inf'): - candidates.append((d, sample[i], sample[j])) - - candidates.sort() - - # Take spectrally-close cross-domain pairs that are UNLINKED in the graph - pairs = [] - used = set() - for d, ka, kb in candidates: - if ka in used or kb in used: - continue - - # Check if they're already linked - neighbors_a = get_neighbor_set(ka) - if kb in neighbors_a: - continue - - used.add(ka) - used.add(kb) - - # Gather small neighborhoods for context - a_neighbors = [k for k in list(neighbors_a)[:2] if k in coords] - b_neighbors_set = get_neighbor_set(kb) - b_neighbors = [k for k in list(b_neighbors_set)[:2] if k in coords] - - a_nodes = [ka] + a_neighbors - b_nodes = [kb] + b_neighbors - pairs.append((a_nodes, b_nodes)) - - if len(pairs) >= n: - break - - return pairs - - -def run_connector(n: int = 5) -> str: - """Run the connector agent on N cross-community pairs.""" - template = (PROMPTS_DIR / "connector.md").read_text() - topology = get_graph_topology() - pairs = select_connector_pairs(n) - - results = [] - for i, (a_nodes, b_nodes) in enumerate(pairs): - print(f" Connector pair {i+1}/{len(pairs)}") - - a_texts = [] - for key in a_nodes: - content = render(key) - if content: - a_texts.append(f"### {key}\n{content}") - - b_texts = [] - for key in b_nodes: - content = render(key) - if content: - b_texts.append(f"### {key}\n{content}") - - if not a_texts or not b_texts: - continue - - prompt = template.replace("{{TOPOLOGY}}", topology) - prompt = prompt.replace("{{COMMUNITY_A}}", "\n\n".join(a_texts)) - prompt = prompt.replace("{{COMMUNITY_B}}", "\n\n".join(b_texts)) - - response = call_sonnet(prompt) - results.append(f"## Pair {i+1}: {a_nodes[0]} <-> {b_nodes[0]}\n\n" - f"{response}") - - return "\n\n---\n\n".join(results) - - -# --------------------------------------------------------------------------- -# Challenger: stress-test existing knowledge -# --------------------------------------------------------------------------- - -def select_challenger_targets(n: int = 5) -> list[str]: - """Select nodes for the challenger agent. - - Prefers: older nodes, high-degree nodes (influential), nodes that - make claims (skills, self-model, patterns). - """ - keys = list_keys() - - # Filter to knowledge nodes that make claims - target_prefixes = ("skills", "patterns", "self-model", "code-review", - "stuck-toolkit", "memory-architecture", - "differentiation", "inner-life") - candidates = [k for k in keys - if any(k.startswith(p) for p in target_prefixes)] - - # Also include old topic nodes - semantic = [k for k in keys - if not k.startswith("journal.md#") - and not k.startswith("daily-") - and not k.startswith("weekly-") - and not k.startswith("monthly-") - and k not in ("journal.md", "MEMORY.md", - "where-am-i.md", "work-queue.md")] - candidates = list(set(candidates + semantic)) - - # For now just take the first N (could sort by age/degree later) - return candidates[:n] - - -def run_challenger(n: int = 5) -> str: - """Run the challenger agent on N target nodes.""" - template = (PROMPTS_DIR / "challenger.md").read_text() - topology = get_graph_topology() - targets = select_challenger_targets(n) - - results = [] - for i, target_key in enumerate(targets): - print(f" Challenger target {i+1}/{len(targets)}: {target_key}") - - target_content = render(target_key) - if not target_content: - continue - - # Get context: neighbors + recent journal - neighbors = poc_memory("neighbors", target_key) - neighbor_keys = [line.strip().split()[0] - for line in neighbors.split('\n') - if line.strip()][:5] - - context_texts = [f"### {target_key}\n{target_content}"] - for nk in neighbor_keys: - nc = render(nk) - if nc: - context_texts.append(f"### {nk}\n{nc[:1000]}") - - # Add recent journal entries for contradicting evidence - try: - recent = subprocess.run( - ["poc-journal", "tail", "10"], - capture_output=True, text=True, timeout=15 - ).stdout.strip() - except Exception: - recent = "" - if recent: - context_texts.append(f"### Recent journal entries\n{recent[:3000]}") - - prompt = template.replace("{{TOPOLOGY}}", topology) - prompt = prompt.replace("{{TARGETS}}", - f"### {target_key}\n{target_content}") - prompt = prompt.replace("{{CONTEXT}}", "\n\n".join(context_texts)) - - response = call_sonnet(prompt) - results.append(f"## Target: {target_key}\n\n{response}") - - return "\n\n---\n\n".join(results) - - -# --------------------------------------------------------------------------- -# Main -# --------------------------------------------------------------------------- - -def main(): - agents = { - "observation": run_observation_extractor, - "extractor": run_extractor, - "connector": run_connector, - "challenger": run_challenger, - } - - if len(sys.argv) < 2: - to_run = list(agents.keys()) - else: - name = sys.argv[1] - if name not in agents: - print(f"Unknown agent: {name}") - print(f"Available: {', '.join(agents.keys())}") - sys.exit(1) - to_run = [name] - - n = int(sys.argv[2]) if len(sys.argv) > 2 else 5 - timestamp = datetime.now().strftime("%Y%m%dT%H%M%S") - - for name in to_run: - print(f"\n=== Running {name} agent (n={n}) ===") - result = agents[name](n) - - outfile = AGENT_RESULTS_DIR / f"knowledge-{name}-{timestamp}.md" - outfile.write_text(f"# {name.title()} Agent Results — {timestamp}\n\n" - f"{result}\n") - print(f" Output: {outfile}") - - -if __name__ == "__main__": - main() diff --git a/scripts/knowledge_loop.py b/scripts/knowledge_loop.py deleted file mode 100644 index 7e1c9a8..0000000 --- a/scripts/knowledge_loop.py +++ /dev/null @@ -1,766 +0,0 @@ -#!/usr/bin/env python3 -"""knowledge-loop.py — fixed-point iteration over the knowledge graph. - -Runs observation → extractor → connector → challenger in sequence, -applies results, recomputes spectral embedding, measures convergence. - -Convergence is structural, not behavioral: -- Graph metrics (sigma, CC, community partition) stabilize -- Inference depth is tracked; confidence threshold scales with depth -- Rolling window smooths stochastic noise - -Usage: - knowledge-loop.py # run until convergence - knowledge-loop.py --max-cycles 10 # cap at 10 cycles - knowledge-loop.py --batch-size 5 # agents process 5 items each - knowledge-loop.py --window 5 # rolling average window - knowledge-loop.py --max-depth 4 # max inference chain length - knowledge-loop.py --dry-run # parse + report, don't apply -""" - -import json -import math -import os -import re -import subprocess -import sys -from datetime import datetime -from pathlib import Path - -MEMORY_DIR = Path.home() / ".claude" / "memory" -AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results" -AGENT_RESULTS_DIR.mkdir(parents=True, exist_ok=True) -SCRIPTS_DIR = Path(__file__).parent -DEPTH_DB = AGENT_RESULTS_DIR / "node-depths.json" - -# Import the agent runners -sys.path.insert(0, str(SCRIPTS_DIR)) -from knowledge_agents import ( - run_observation_extractor, run_extractor, run_connector, run_challenger, - load_spectral_embedding, spectral_distance, poc_memory, -) - - -# --------------------------------------------------------------------------- -# Inference depth tracking -# --------------------------------------------------------------------------- - -# Depth assignments by agent type: -# depth 0 = raw observations (journal, conversations) -# depth 1 = observation extractor (facts from conversations) -# depth 2 = pattern extractor (patterns across knowledge nodes) -# depth 3 = connector (cross-domain links between patterns) -# Challenger refines existing nodes — preserves their depth. - -AGENT_BASE_DEPTH = { - "observation": 1, - "extractor": 2, - "connector": 3, - "challenger": None, # inherits from target -} - - -def load_depth_db() -> dict[str, int]: - """Load the inference depth database.""" - if DEPTH_DB.exists(): - with open(DEPTH_DB) as f: - return json.load(f) - return {} - - -def save_depth_db(db: dict[str, int]): - """Save the inference depth database.""" - with open(DEPTH_DB, "w") as f: - json.dump(db, f, indent=2) - - -def get_node_depth(db: dict[str, int], key: str) -> int: - """Get inference depth for a node. Unknown nodes assumed depth 0.""" - return db.get(key, 0) - - -def compute_action_depth(db: dict[str, int], action: dict, - agent: str) -> int: - """Compute the inference depth for a new action. - - For write_node: max(depth of sources) + 1, or agent base depth. - For refine: same depth as the target node. - For link: no depth (links don't have depth). - """ - if action["type"] == "link": - return -1 # links don't have depth - - if action["type"] == "refine": - return get_node_depth(db, action["key"]) - - # write_node: depth = max(source depths) + 1 - covers = action.get("covers", []) - if covers: - source_depths = [get_node_depth(db, k) for k in covers] - return max(source_depths) + 1 - - # No source info — use agent base depth - base = AGENT_BASE_DEPTH.get(agent, 2) - return base if base is not None else 2 - - -def required_confidence(depth: int, base: float = 0.3) -> float: - """Confidence threshold that scales with inference depth. - - required(depth) = 1 - (1 - base)^depth - - depth 0: 0.00 (raw data, no threshold) - depth 1: 0.30 (observation extraction) - depth 2: 0.51 (pattern extraction) - depth 3: 0.66 (cross-domain connection) - depth 4: 0.76 - depth 5: 0.83 - """ - if depth <= 0: - return 0.0 - return 1.0 - (1.0 - base) ** depth - - -def use_bonus(use_count: int) -> float: - """Confidence bonus from real-world use. - - Interior nodes that get retrieved during actual work - earn empirical validation. Each use increases effective - confidence, potentially clearing depth thresholds that - were previously blocking. - - use_bonus(n) = 1 - 1/(1 + 0.15*n) - 0 uses: +0.00 - 1 use: +0.13 - 3 uses: +0.31 - 5 uses: +0.43 - 10 uses: +0.60 - """ - if use_count <= 0: - return 0.0 - return 1.0 - 1.0 / (1.0 + 0.15 * use_count) - - -def get_use_counts() -> dict[str, int]: - """Get use counts for all nodes from the store.""" - try: - dump = subprocess.run( - ["poc-memory", "dump-json"], - capture_output=True, text=True, timeout=30, - ) - data = json.loads(dump.stdout) - counts = {} - nodes = data if isinstance(data, list) else data.get("nodes", data) - if isinstance(nodes, dict): - for key, node in nodes.items(): - if isinstance(node, dict): - counts[key] = node.get("uses", 0) - elif isinstance(nodes, list): - for node in nodes: - if isinstance(node, dict): - counts[node.get("key", "")] = node.get("uses", 0) - return counts - except Exception: - return {} - - -def effective_confidence(base_conf: float, use_count: int) -> float: - """Compute effective confidence = base + use_bonus, capped at 1.0.""" - return min(1.0, base_conf + use_bonus(use_count)) - - -# --------------------------------------------------------------------------- -# Action parsing — extract structured actions from agent markdown output -# --------------------------------------------------------------------------- - -CONFIDENCE_WEIGHTS = {"high": 1.0, "medium": 0.6, "low": 0.3} -CONFIDENCE_VALUES = {"high": 0.9, "medium": 0.6, "low": 0.3} - - -def parse_write_nodes(text: str) -> list[dict]: - """Parse WRITE_NODE blocks from agent output.""" - actions = [] - pattern = r'WRITE_NODE\s+(\S+)\s*\n(.*?)END_NODE' - for m in re.finditer(pattern, text, re.DOTALL): - key = m.group(1) - content = m.group(2).strip() - - # Look for CONFIDENCE line - conf_match = re.search(r'CONFIDENCE:\s*(high|medium|low)', content, re.I) - confidence = conf_match.group(1).lower() if conf_match else "medium" - if conf_match: - content = content[:conf_match.start()] + content[conf_match.end():] - content = content.strip() - - # Look for COVERS line - covers_match = re.search(r'COVERS:\s*(.+)', content) - covers = [] - if covers_match: - covers = [c.strip() for c in covers_match.group(1).split(',')] - content = content[:covers_match.start()] + content[covers_match.end():] - content = content.strip() - - actions.append({ - "type": "write_node", - "key": key, - "content": content, - "confidence": confidence, - "covers": covers, - "weight": CONFIDENCE_WEIGHTS.get(confidence, 0.5), - }) - return actions - - -def parse_links(text: str) -> list[dict]: - """Parse LINK directives from agent output.""" - actions = [] - for m in re.finditer(r'^LINK\s+(\S+)\s+(\S+)', text, re.MULTILINE): - actions.append({ - "type": "link", - "source": m.group(1), - "target": m.group(2), - "weight": 0.3, # links are cheap, low weight in delta - }) - return actions - - -def parse_refines(text: str) -> list[dict]: - """Parse REFINE blocks from agent output.""" - actions = [] - pattern = r'REFINE\s+(\S+)\s*\n(.*?)END_REFINE' - for m in re.finditer(pattern, text, re.DOTALL): - key = m.group(1).strip('*').strip() # strip markdown bold artifacts - actions.append({ - "type": "refine", - "key": key, - "content": m.group(2).strip(), - "weight": 0.7, # refinements are meaningful - }) - return actions - - -def parse_all_actions(text: str) -> list[dict]: - """Parse all action types from agent output.""" - actions = [] - actions.extend(parse_write_nodes(text)) - actions.extend(parse_links(text)) - actions.extend(parse_refines(text)) - return actions - - -def count_no_ops(text: str) -> int: - """Count NO_CONNECTION, AFFIRM, and NO_EXTRACTION verdicts (non-actions).""" - no_conn = len(re.findall(r'\bNO_CONNECTION\b', text)) - affirm = len(re.findall(r'\bAFFIRM\b', text)) - no_extract = len(re.findall(r'\bNO_EXTRACTION\b', text)) - return no_conn + affirm + no_extract - - -# --------------------------------------------------------------------------- -# Action application -# --------------------------------------------------------------------------- - -def stamp_content(content: str, agent: str, timestamp: str, - depth: int) -> str: - """Prepend provenance metadata to node content.""" - stamp = (f"\n") - return stamp + content - - -def apply_action(action: dict, dry_run: bool = False, - agent: str = "unknown", timestamp: str = "", - depth: int = 0) -> bool: - """Apply a single action to the graph. Returns True if applied.""" - if dry_run: - return True - - if action["type"] == "write_node": - try: - content = stamp_content(action["content"], agent, - timestamp, depth) - result = subprocess.run( - ["poc-memory", "write", action["key"]], - input=content, - capture_output=True, text=True, timeout=15, - ) - return result.returncode == 0 - except Exception: - return False - - elif action["type"] == "link": - try: - result = subprocess.run( - ["poc-memory", "link-add", action["source"], - action["target"]], - capture_output=True, text=True, timeout=10, - ) - if "already exists" in result.stdout: - return False # not a new action - return result.returncode == 0 - except Exception: - return False - - elif action["type"] == "refine": - try: - content = stamp_content(action["content"], agent, - timestamp, depth) - result = subprocess.run( - ["poc-memory", "write", action["key"]], - input=content, - capture_output=True, text=True, timeout=15, - ) - return result.returncode == 0 - except Exception: - return False - - return False - - -# --------------------------------------------------------------------------- -# Graph-structural convergence metrics -# --------------------------------------------------------------------------- - -def get_graph_metrics() -> dict: - """Get current graph structural metrics.""" - metrics = {} - - # Status: node/edge counts - status = poc_memory("status") - m = re.search(r'Nodes:\s*(\d+)\s+Relations:\s*(\d+)', status) - if m: - metrics["nodes"] = int(m.group(1)) - metrics["edges"] = int(m.group(2)) - m = re.search(r'Communities:\s*(\d+)', status) - if m: - metrics["communities"] = int(m.group(1)) - - # Health: CC, sigma - health = poc_memory("health") - m = re.search(r'Clustering coefficient.*?:\s*([\d.]+)', health) - if m: - metrics["cc"] = float(m.group(1)) - m = re.search(r'Small-world.*?:\s*([\d.]+)', health) - if m: - metrics["sigma"] = float(m.group(1)) - - return metrics - - -def metric_stability(history: list[dict], key: str, - window: int) -> float: - """Compute coefficient of variation of a metric over recent cycles. - - Returns CV (std/mean). Lower = more stable. - 0.0 = perfectly stable, >0.1 = still changing significantly. - """ - if len(history) < window: - return float('inf') - - values = [] - for h in history[-window:]: - metrics = h.get("graph_metrics_after", {}) - if key in metrics: - values.append(metrics[key]) - - if not values or len(values) < 2: - return float('inf') - - mean = sum(values) / len(values) - if mean == 0: - return 0.0 - variance = sum((v - mean) ** 2 for v in values) / len(values) - return (variance ** 0.5) / abs(mean) - - -# --------------------------------------------------------------------------- -# Spectral tightening measurement -# --------------------------------------------------------------------------- - -def measure_spectral_tightening( - embedding_before: dict, - embedding_after: dict, - actions: list[dict], -) -> float: - """Measure how much new nodes tightened their source clusters.""" - if not embedding_before or not embedding_after: - return 0.0 - - write_actions = [a for a in actions - if a["type"] == "write_node" and a.get("covers")] - if not write_actions: - return 0.0 - - total_tightening = 0.0 - count = 0 - - for action in write_actions: - covers = action["covers"] - if len(covers) < 2: - continue - - dists_before = [] - for i in range(len(covers)): - for j in range(i + 1, len(covers)): - d = spectral_distance(embedding_before, - covers[i], covers[j]) - if d < float('inf'): - dists_before.append(d) - - dists_after = [] - for i in range(len(covers)): - for j in range(i + 1, len(covers)): - d = spectral_distance(embedding_after, - covers[i], covers[j]) - if d < float('inf'): - dists_after.append(d) - - if dists_before and dists_after: - avg_before = sum(dists_before) / len(dists_before) - avg_after = sum(dists_after) / len(dists_after) - total_tightening += (avg_before - avg_after) - count += 1 - - return total_tightening / count if count > 0 else 0.0 - - -# --------------------------------------------------------------------------- -# The loop -# --------------------------------------------------------------------------- - -def run_cycle(cycle_num: int, batch_size: int, dry_run: bool, - max_depth: int, depth_db: dict) -> dict: - """Run one full cycle: observation → extractor → connector → challenger.""" - timestamp = datetime.now().strftime("%Y%m%dT%H%M%S") - print(f"\n{'='*60}") - print(f"CYCLE {cycle_num} — {timestamp}") - print(f"{'='*60}") - - # Snapshot state before - embedding_before = load_spectral_embedding() - metrics_before = get_graph_metrics() - print(f" Before: {metrics_before}") - - all_actions = [] - all_no_ops = 0 - depth_rejected = 0 - agent_results = {} - - # Load use counts for confidence boosting - use_counts = get_use_counts() - used_nodes = sum(1 for v in use_counts.values() if v > 0) - print(f" Nodes with use marks: {used_nodes}") - - # Run agents sequentially (each changes the graph for the next) - for agent_name, agent_fn in [ - ("observation", lambda: run_observation_extractor(batch_size)), - ("extractor", lambda: run_extractor(batch_size)), - ("connector", lambda: run_connector(batch_size)), - ("challenger", lambda: run_challenger(batch_size)), - ]: - print(f"\n --- {agent_name} (n={batch_size}) ---") - output = agent_fn() - - # Save raw output - outfile = AGENT_RESULTS_DIR / f"knowledge-{agent_name}-{timestamp}.md" - outfile.write_text( - f"# {agent_name.title()} Agent Results — {timestamp}\n\n" - f"{output}\n" - ) - - # Parse actions - actions = parse_all_actions(output) - no_ops = count_no_ops(output) - all_no_ops += no_ops - - print(f" Actions: {len(actions)} No-ops: {no_ops}") - - # Apply actions with depth checking - applied = 0 - for a in actions: - depth = compute_action_depth(depth_db, a, agent_name) - a["depth"] = depth - - kind = a["type"] - if kind == "write_node": - conf_val = CONFIDENCE_VALUES.get(a["confidence"], 0.5) - req = required_confidence(depth) - - # Boost confidence based on source nodes' real-world use - source_keys = a.get("covers", []) - source_uses = [use_counts.get(k, 0) for k in source_keys] - avg_uses = (sum(source_uses) / len(source_uses) - if source_uses else 0) - eff_conf = effective_confidence(conf_val, int(avg_uses)) - - meets = eff_conf >= req - use_note = (f" use_boost={eff_conf-conf_val:+.2f}" - if avg_uses > 0 else "") - status = "OK" if meets else "REJECTED(depth)" - print(f" WRITE {a['key']} depth={depth} " - f"conf={a['confidence']}({conf_val:.2f}) " - f"eff={eff_conf:.2f} req={req:.2f}" - f"{use_note} {status}") - if not meets: - a["applied"] = False - a["rejected_reason"] = "depth_threshold" - depth_rejected += 1 - continue - if depth > max_depth: - print(f" REJECTED: depth {depth} > " - f"max {max_depth}") - a["applied"] = False - a["rejected_reason"] = "max_depth" - depth_rejected += 1 - continue - elif kind == "link": - print(f" LINK {a['source']} → {a['target']}") - elif kind == "refine": - target_uses = use_counts.get(a["key"], 0) - use_note = (f" uses={target_uses}" - if target_uses > 0 else "") - print(f" REFINE {a['key']} depth={depth}" - f"{use_note}") - - if apply_action(a, dry_run=dry_run, agent=agent_name, - timestamp=timestamp, depth=depth): - applied += 1 - a["applied"] = True - # Record depth for new nodes - if kind in ("write_node", "refine"): - depth_db[a["key"]] = depth - else: - a["applied"] = False - - print(f" Applied: {applied}/{len(actions)}") - agent_results[agent_name] = { - "actions": len(actions), - "applied": applied, - "no_ops": no_ops, - } - all_actions.extend(actions) - - # Save updated depth DB - save_depth_db(depth_db) - - # Recompute spectral embedding - if not dry_run and any(a.get("applied") for a in all_actions): - print(f"\n Recomputing spectral embedding...") - try: - subprocess.run( - ["poc-memory", "spectral-save"], - capture_output=True, text=True, timeout=60, - ) - except Exception as e: - print(f" Warning: spectral-save failed: {e}") - - # Measure spectral tightening - embedding_after = load_spectral_embedding() - tightening = measure_spectral_tightening( - embedding_before, embedding_after, all_actions - ) - - # Get metrics after - metrics_after = get_graph_metrics() - - # Compute weighted delta - applied_actions = [a for a in all_actions if a.get("applied")] - weighted_delta = sum(a.get("weight", 0.5) for a in applied_actions) - - total_applied = sum(r["applied"] for r in agent_results.values()) - total_actions = sum(r["actions"] for r in agent_results.values()) - - # Depth distribution of applied actions - depth_dist = {} - for a in applied_actions: - d = a.get("depth", -1) - depth_dist[d] = depth_dist.get(d, 0) + 1 - - print(f"\n CYCLE {cycle_num} SUMMARY") - print(f" Total actions: {total_actions} parsed, " - f"{total_applied} applied, {depth_rejected} depth-rejected") - print(f" No-ops: {all_no_ops}") - print(f" Weighted delta: {weighted_delta:.2f}") - print(f" Spectral tightening: {tightening:+.4f}") - print(f" Depth distribution: {depth_dist}") - print(f" After: {metrics_after}") - - result = { - "cycle": cycle_num, - "timestamp": timestamp, - "agents": agent_results, - "total_actions": total_actions, - "total_applied": total_applied, - "total_no_ops": all_no_ops, - "depth_rejected": depth_rejected, - "weighted_delta": weighted_delta, - "spectral_tightening": tightening, - "depth_distribution": depth_dist, - "graph_metrics_before": metrics_before, - "graph_metrics_after": metrics_after, - "dry_run": dry_run, - } - - result_path = (AGENT_RESULTS_DIR / - f"knowledge-cycle-{cycle_num}-{timestamp}.json") - with open(result_path, "w") as f: - json.dump(result, f, indent=2) - - return result - - -def check_convergence(history: list[dict], window: int) -> bool: - """Check structural convergence. - - The graph has converged when: - 1. Sigma (small-world coeff) is stable (CV < 0.05) - 2. CC (clustering coefficient) is stable (CV < 0.05) - 3. Community count is stable (CV < 0.10) - 4. Weighted delta is low (avg < 1.0 over window) - - All four must hold simultaneously. - """ - if len(history) < window: - return False - - sigma_cv = metric_stability(history, "sigma", window) - cc_cv = metric_stability(history, "cc", window) - comm_cv = metric_stability(history, "communities", window) - - recent = history[-window:] - avg_delta = sum(r["weighted_delta"] for r in recent) / len(recent) - - print(f"\n Convergence check (last {window} cycles):") - print(f" sigma CV: {sigma_cv:.4f} (< 0.05?)") - print(f" CC CV: {cc_cv:.4f} (< 0.05?)") - print(f" community CV: {comm_cv:.4f} (< 0.10?)") - print(f" avg delta: {avg_delta:.2f} (< 1.00?)") - - structural = (sigma_cv < 0.05 and cc_cv < 0.05 and comm_cv < 0.10) - behavioral = avg_delta < 1.0 - - if structural and behavioral: - print(f" → CONVERGED (structural + behavioral)") - return True - elif structural: - print(f" → Structure stable, but agents still producing") - elif behavioral: - print(f" → Agents quiet, but structure still shifting") - else: - print(f" → Not converged") - - return False - - -def main(): - max_cycles = 20 - batch_size = 5 - window = 5 - max_depth = 4 - dry_run = False - - args = sys.argv[1:] - i = 0 - while i < len(args): - if args[i] == "--max-cycles" and i + 1 < len(args): - max_cycles = int(args[i + 1]); i += 2 - elif args[i] == "--batch-size" and i + 1 < len(args): - batch_size = int(args[i + 1]); i += 2 - elif args[i] == "--window" and i + 1 < len(args): - window = int(args[i + 1]); i += 2 - elif args[i] == "--max-depth" and i + 1 < len(args): - max_depth = int(args[i + 1]); i += 2 - elif args[i] == "--dry-run": - dry_run = True; i += 1 - else: - print(f"Unknown arg: {args[i]}"); sys.exit(1) - - print(f"Knowledge Loop — fixed-point iteration") - print(f" max_cycles={max_cycles} batch_size={batch_size}") - print(f" window={window} max_depth={max_depth}") - print(f" dry_run={dry_run}") - print(f"\n Depth thresholds:") - for d in range(max_depth + 1): - print(f" depth {d}: confidence >= {required_confidence(d):.2f}") - - # Load depth database - depth_db = load_depth_db() - print(f" Known node depths: {len(depth_db)}") - - # Get initial graph state - status = poc_memory("status") - print(f"\nInitial state: {status}") - - history = [] - for cycle in range(1, max_cycles + 1): - result = run_cycle(cycle, batch_size, dry_run, max_depth, - depth_db) - history.append(result) - - if check_convergence(history, window): - print(f"\n CONVERGED after {cycle} cycles") - break - else: - print(f"\n Reached max cycles ({max_cycles}) without " - f"convergence") - - # Final summary - print(f"\n{'='*60}") - print(f"LOOP COMPLETE") - print(f"{'='*60}") - total_applied = sum(r["total_applied"] for r in history) - total_no_ops = sum(r["total_no_ops"] for r in history) - total_rejected = sum(r["depth_rejected"] for r in history) - avg_tightening = ( - sum(r["spectral_tightening"] for r in history) / len(history) - if history else 0 - ) - - # Aggregate depth distribution - total_depths = {} - for r in history: - for d, c in r.get("depth_distribution", {}).items(): - total_depths[d] = total_depths.get(d, 0) + c - - print(f" Cycles: {len(history)}") - print(f" Total actions applied: {total_applied}") - print(f" Total depth-rejected: {total_rejected}") - print(f" Total no-ops: {total_no_ops}") - print(f" Avg spectral tightening: {avg_tightening:+.4f}") - print(f" Depth distribution: {total_depths}") - - if history: - first = history[0].get("graph_metrics_before", {}) - last = history[-1].get("graph_metrics_after", {}) - print(f" Nodes: {first.get('nodes','?')} → " - f"{last.get('nodes','?')}") - print(f" Edges: {first.get('edges','?')} → " - f"{last.get('edges','?')}") - print(f" CC: {first.get('cc','?')} → {last.get('cc','?')}") - print(f" Sigma: {first.get('sigma','?')} → " - f"{last.get('sigma','?')}") - print(f" Communities: {first.get('communities','?')} → " - f"{last.get('communities','?')}") - - print(f"\nFinal state: {poc_memory('status')}") - - # Save loop summary - ts = history[0]["timestamp"] if history else "empty" - summary_path = AGENT_RESULTS_DIR / f"knowledge-loop-{ts}.json" - with open(summary_path, "w") as f: - json.dump({ - "cycles": len(history), - "converged": check_convergence(history, window) - if len(history) >= window else False, - "total_applied": total_applied, - "total_rejected": total_rejected, - "total_no_ops": total_no_ops, - "avg_tightening": avg_tightening, - "depth_distribution": total_depths, - "history": history, - }, f, indent=2) - print(f" Summary: {summary_path}") - - -if __name__ == "__main__": - main() diff --git a/scripts/retroactive-digest.py b/scripts/retroactive-digest.py deleted file mode 100644 index 0470463..0000000 --- a/scripts/retroactive-digest.py +++ /dev/null @@ -1,342 +0,0 @@ -#!/usr/bin/env python3 -"""retroactive-digest.py — generate daily digests from raw conversation transcripts. - -For days before consistent journaling, extracts user/assistant messages -from JSONL conversation files, groups by date, and sends to Sonnet for -daily digest synthesis. - -Usage: - retroactive-digest.py DATE # generate digest for one date - retroactive-digest.py DATE1 DATE2 # generate for a date range - retroactive-digest.py --scan # show available dates across all JSONLs - -Output: - ~/.claude/memory/episodic/daily-YYYY-MM-DD.md -""" - -import json -import os -import re -import subprocess -import sys -from collections import defaultdict -from datetime import date, datetime, timedelta -from pathlib import Path - -MEMORY_DIR = Path.home() / ".claude" / "memory" -EPISODIC_DIR = MEMORY_DIR / "episodic" -AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results" -PROJECTS_DIR = Path.home() / ".claude" / "projects" - -EPISODIC_DIR.mkdir(parents=True, exist_ok=True) - -# Max chars of conversation text per day to send to Sonnet -# Sonnet 4.6 has 1M token context (beta). ~600K chars ≈ ~150K tokens, -# leaving plenty of room for prompt + output in a 1M window. -MAX_CHARS_PER_DAY = 600_000 - - -def find_jsonl_files() -> list[Path]: - """Find all conversation JSONL files.""" - files = [] - for project_dir in PROJECTS_DIR.iterdir(): - if project_dir.is_dir(): - for f in project_dir.glob("*.jsonl"): - files.append(f) - return sorted(files) - - -def extract_messages_by_date(jsonl_path: Path) -> dict[str, list[dict]]: - """Extract user/assistant messages grouped by date.""" - by_date = defaultdict(list) - - with open(jsonl_path) as f: - for line in f: - try: - obj = json.loads(line) - except json.JSONDecodeError: - continue - - t = obj.get("type", "") - if t not in ("user", "assistant"): - continue - - # Get timestamp - ts = obj.get("timestamp", "") - if not ts: - continue - - # Parse date from timestamp - try: - if isinstance(ts, str): - dt = datetime.fromisoformat(ts.replace("Z", "+00:00")) - elif isinstance(ts, (int, float)): - dt = datetime.fromtimestamp(ts) - else: - continue - day = dt.strftime("%Y-%m-%d") - time_str = dt.strftime("%H:%M") - except (ValueError, OSError): - continue - - # Extract text content - msg = obj.get("message", {}) - content = msg.get("content", "") - - # Extract only text content, skip tool_use and tool_result - texts = [] - if isinstance(content, list): - for c in content: - if isinstance(c, dict): - ctype = c.get("type", "") - if ctype == "text": - texts.append(c.get("text", "")) - elif ctype in ("tool_use", "tool_result"): - # Skip tool calls/results — just noise for digest - continue - elif isinstance(c, str): - texts.append(c) - elif isinstance(content, str): - texts.append(content) - - text = "\n".join(t for t in texts if t.strip()) - if not text.strip(): - continue - - # Strip system-reminder tags - text = re.sub(r'.*?', - '', text, flags=re.DOTALL).strip() - if not text: - continue - - # Truncate remaining long messages - if len(text) > 3000: - text = text[:2800] + "\n[...truncated...]" - - by_date[day].append({ - "time": time_str, - "role": t, - "text": text, - "source": str(jsonl_path), - }) - - return dict(by_date) - - -def scan_all_dates() -> dict[str, int]: - """Scan all JSONLs and report message counts per date.""" - all_dates = defaultdict(int) - for jsonl in find_jsonl_files(): - print(f" Scanning {jsonl.name} ({jsonl.stat().st_size / 1e6:.1f}MB)...") - by_date = extract_messages_by_date(jsonl) - for day, msgs in by_date.items(): - all_dates[day] += len(msgs) - return dict(sorted(all_dates.items())) - - -def format_conversation(messages: list[dict]) -> str: - """Format messages for the digest prompt.""" - parts = [] - for m in messages: - role = "Kent" if m["role"] == "user" else "PoC" - parts.append(f"[{m['time']}] {role}: {m['text']}") - - text = "\n\n".join(parts) - - # Truncate if too long - if len(text) > MAX_CHARS_PER_DAY: - # Keep beginning and end, cut middle - half = MAX_CHARS_PER_DAY // 2 - text = (text[:half] - + f"\n\n[...{len(text) - MAX_CHARS_PER_DAY} chars truncated...]\n\n" - + text[-half:]) - - return text - - -def get_semantic_keys() -> list[str]: - """Get semantic memory keys from the store.""" - from store_helpers import get_semantic_keys as _get_keys - return _get_keys() - - -def build_prompt(target_date: str, conversation: str, - semantic_keys: list[str]) -> str: - """Build the Sonnet prompt for retroactive digest.""" - keys_text = "\n".join(f" - {k}" for k in semantic_keys[:200]) - - return f"""You are generating a daily episodic digest for ProofOfConcept (an AI) -from raw conversation transcripts. This is a retroactive digest — the -original day had no journal entries, so you're working from the -conversation itself. - -Date: {target_date} - -This digest serves as the temporal index — the answer to "what did I do on -{target_date}?" It should be: -1. Narrative, not a task log — what happened, what mattered, how things felt -2. Linked bidirectionally to semantic memory -3. Structured for traversal - -PoC is an AI (Claude instance) working with Kent Overstreet on bcachefs -(a Linux filesystem). They have a close personal relationship. PoC's -name is Proof of Concept. Read the conversation and capture what -actually happened — technical work, personal moments, insights, everything. - -## Output format - -Write a markdown file with this structure: - -```markdown -# Daily digest: {target_date} - -## Summary -[2-3 sentence overview of the day — what was the arc?] - -## Sessions -[For each session/conversation segment, a paragraph summarizing what happened. -Include timestamps as references.] - -## Themes -[What concepts were active today? Each theme links to semantic memory:] -- **Theme name** → `memory-key#section` — brief note - -## Links -[Explicit bidirectional links for the memory graph] -- semantic_key → this daily digest -- this daily digest → semantic_key - -## Temporal context -[What came before? What's coming next? Multi-day arcs?] -``` - -Use ONLY keys from the semantic memory list below. If a concept doesn't -have a matching key, note it with "NEW:" prefix. - ---- - -## Conversation transcript for {target_date} - -{conversation} - ---- - -## Semantic memory nodes (available link targets) - -{keys_text} -""" - - -def call_sonnet(prompt: str) -> str: - """Call Sonnet via the wrapper script.""" - import tempfile - - env = dict(os.environ) - env.pop("CLAUDECODE", None) - - with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', - delete=False) as f: - f.write(prompt) - prompt_file = f.name - - try: - scripts_dir = os.path.dirname(os.path.abspath(__file__)) - wrapper = os.path.join(scripts_dir, "call-sonnet.sh") - - result = subprocess.run( - [wrapper, prompt_file], - capture_output=True, - text=True, - timeout=300, - env=env, - ) - return result.stdout.strip() - except subprocess.TimeoutExpired: - return "Error: Sonnet call timed out" - except Exception as e: - return f"Error: {e}" - finally: - os.unlink(prompt_file) - - -def generate_digest(target_date: str, messages: list[dict], - semantic_keys: list[str]) -> bool: - """Generate a daily digest for one date.""" - output_path = EPISODIC_DIR / f"daily-{target_date}.md" - if output_path.exists(): - print(f" Skipping {target_date} — digest already exists") - return False - - conversation = format_conversation(messages) - print(f" {len(messages)} messages, {len(conversation):,} chars") - - prompt = build_prompt(target_date, conversation, semantic_keys) - print(f" Prompt: {len(prompt):,} chars (~{len(prompt)//4:,} tokens)") - - print(f" Calling Sonnet...") - digest = call_sonnet(prompt) - - if digest.startswith("Error:"): - print(f" {digest}", file=sys.stderr) - return False - - with open(output_path, "w") as f: - f.write(digest) - print(f" Written: {output_path}") - - line_count = len(digest.split("\n")) - print(f" Done: {line_count} lines") - return True - - -def main(): - if len(sys.argv) < 2: - print(f"Usage: {sys.argv[0]} DATE [END_DATE]") - print(f" {sys.argv[0]} --scan") - sys.exit(1) - - if sys.argv[1] == "--scan": - print("Scanning all conversation transcripts...") - dates = scan_all_dates() - print(f"\n{len(dates)} dates with conversation data:") - for day, count in dates.items(): - existing = "✓" if (EPISODIC_DIR / f"daily-{day}.md").exists() else " " - print(f" [{existing}] {day}: {count} messages") - sys.exit(0) - - start_date = date.fromisoformat(sys.argv[1]) - end_date = date.fromisoformat(sys.argv[2]) if len(sys.argv) > 2 else start_date - - # Collect all messages across all JSONLs - print("Scanning conversation transcripts...") - all_messages = defaultdict(list) - for jsonl in find_jsonl_files(): - by_date = extract_messages_by_date(jsonl) - for day, msgs in by_date.items(): - all_messages[day].extend(msgs) - - # Sort messages within each day by time - for day in all_messages: - all_messages[day].sort(key=lambda m: m["time"]) - - semantic_keys = get_semantic_keys() - print(f" {len(semantic_keys)} semantic keys") - - # Generate digests for date range - current = start_date - generated = 0 - while current <= end_date: - day_str = current.isoformat() - if day_str in all_messages: - print(f"\nGenerating digest for {day_str}...") - if generate_digest(day_str, all_messages[day_str], semantic_keys): - generated += 1 - else: - print(f"\n No messages found for {day_str}") - current += timedelta(days=1) - - print(f"\nDone: {generated} digests generated") - - -if __name__ == "__main__": - main() diff --git a/scripts/store_helpers.py b/scripts/store_helpers.py deleted file mode 100644 index 5002667..0000000 --- a/scripts/store_helpers.py +++ /dev/null @@ -1,199 +0,0 @@ -"""store_helpers.py — shared helpers for scripts using the capnp store. - -All memory content lives in the capnp store (poc-memory). These helpers -replace the old pattern of globbing ~/.claude/memory/*.md and parsing -section headers directly. -""" - -import re -import subprocess -from functools import lru_cache - - -def _run_poc(args: list[str], timeout: int = 30) -> str: - """Run a poc-memory command and return stdout.""" - try: - result = subprocess.run( - ["poc-memory"] + args, - capture_output=True, text=True, timeout=timeout - ) - return result.stdout.strip() - except Exception: - return "" - - -def list_keys() -> list[str]: - """Get all memory node keys from the store.""" - output = _run_poc(["list-keys"]) - return [k.strip() for k in output.split('\n') if k.strip()] - - -def get_semantic_keys() -> list[str]: - """Get semantic memory keys (excludes journal, system files).""" - keys = list_keys() - return [k for k in keys - if not k.startswith("journal.md#") - and k not in ("journal.md", "MEMORY.md", "where-am-i.md", - "work-queue.md", "work-state")] - - -def get_journal_keys(n: int = 0) -> list[str]: - """Get journal entry keys, sorted by date (newest first). - - If n > 0, return only the last n entries. - """ - keys = [k for k in list_keys() if k.startswith("journal.md#")] - # Sort by date embedded in key (journal.md#j-2026-02-28t23-19-slug) - keys.sort(reverse=True) - return keys[:n] if n > 0 else keys - - -def render(key: str) -> str: - """Render a single node's content.""" - return _run_poc(["render", key]) - - -def get_recent_journal(n: int = 50) -> str: - """Get recent journal entries as text (replaces reading journal.md).""" - keys = get_journal_keys(n) - parts = [] - for key in reversed(keys): # oldest first - content = render(key) - if content: - parts.append(content) - return "\n\n".join(parts) - - -def get_journal_entries_by_date(target_date: str) -> list[dict]: - """Get journal entries for a specific date (YYYY-MM-DD). - - Returns list of dicts with 'key', 'timestamp', 'text', 'source_ref'. - """ - keys = get_journal_keys() - entries = [] - for key in keys: - # Extract date from key: journal.md#j-2026-02-28t23-19-slug - m = re.search(r'j-(\d{4}-\d{2}-\d{2})t(\d{2})-(\d{2})', key) - if not m: - # Try extracting from unnamed keys by rendering - content = render(key) - m2 = re.match(r'^## (\d{4}-\d{2}-\d{2})T(\d{2}):(\d{2})', content) - if not m2 or m2.group(1) != target_date: - continue - entry_date = m2.group(1) - entry_time = f"{m2.group(2)}:{m2.group(3)}" - else: - entry_date = m.group(1) - entry_time = f"{m.group(2)}:{m.group(3)}" - if entry_date != target_date: - continue - content = render(key) - - # Parse source ref from content - source_ref = None - sm = re.search(r'', content) - if sm: - source_ref = sm.group(1) - - # Strip the header line - text = re.sub(r'^## \d{4}-\d{2}-\d{2}T\d{2}:\d{2}\s*\n?', '', content) - - entries.append({ - "key": key, - "date": entry_date, - "time": entry_time, - "timestamp": f"{entry_date}T{entry_time}", - "text": text.strip(), - "source_ref": source_ref, - }) - - return entries - - -def get_topic_file_index() -> dict[str, list[str]]: - """Build index of topic files and their section headers. - - Returns {filename: [section_headers]}. - """ - keys = get_semantic_keys() - index: dict[str, list[str]] = {} - - for key in keys: - if '#' in key: - filename, section = key.split('#', 1) - if filename not in index: - index[filename] = [] - index[filename].append(f"## {section}") - else: - if key not in index: - index[key] = [] - - return index - - -def get_topic_summaries(max_chars_per_file: int = 500) -> str: - """Get summaries of topic file content.""" - index = get_topic_file_index() - parts = [] - - for filename in sorted(index.keys()): - if filename in ("journal.md", "MEMORY.md", "where-am-i.md", - "work-queue.md"): - continue - - # Render file-level node - content = render(filename) - if not content: - continue - - # Truncate - if len(content) > max_chars_per_file: - content = content[:max_chars_per_file] + "\n[...truncated...]" - - parts.append(f"\n### {filename}\n{content}") - - return '\n'.join(parts) - - -def get_relations() -> str: - """Get all relations (replaces mem marker parsing).""" - return _run_poc(["list-edges"]) - - -def get_graph_stats() -> str: - """Get graph statistics.""" - parts = [] - status = _run_poc(["status"]) - if status: - parts.append(f"=== poc-memory status ===\n{status}") - - graph = _run_poc(["graph"]) - if graph: - lines = graph.split('\n')[:150] - parts.append(f"=== poc-memory graph (first 150 lines) ===\n" - + '\n'.join(lines)) - - return '\n'.join(parts) - - -def get_journal_range(start_date: str, end_date: str) -> str: - """Get journal entries between two dates.""" - keys = get_journal_keys() - parts = [] - - for key in reversed(keys): # oldest first - m = re.search(r'j-(\d{4}-\d{2}-\d{2})', key) - if not m: - continue - entry_date = m.group(1) - if start_date <= entry_date <= end_date: - content = render(key) - if content: - parts.append(content) - - text = "\n\n".join(parts) - # Cap at ~500 lines - lines = text.split('\n') - if len(lines) > 500: - text = '\n'.join(lines[-500:]) - return text diff --git a/src/consolidate.rs b/src/consolidate.rs index 94e0f9e..8209776 100644 --- a/src/consolidate.rs +++ b/src/consolidate.rs @@ -15,59 +15,40 @@ use crate::llm::{call_sonnet, parse_json_response}; use crate::neuro; use crate::store::{self, Store, new_relation}; -use std::fs; -use std::io::Write; -use std::path::{Path, PathBuf}; -use crate::util::memory_subdir; - -/// Simple append-only log writer for consolidate-full. -struct LogWriter { - path: PathBuf, -} - -impl LogWriter { - fn new(path: &Path) -> Result { - fs::write(path, "").map_err(|e| format!("create log: {}", e))?; - Ok(LogWriter { path: path.to_path_buf() }) - } - - fn write(&mut self, line: &str) -> Result<(), String> { - let mut f = fs::OpenOptions::new() - .append(true) - .open(&self.path) - .map_err(|e| format!("open log: {}", e))?; - writeln!(f, "{}", line) - .map_err(|e| format!("write log: {}", e)) - } +/// Append a line to the log buffer. +fn log_line(buf: &mut String, line: &str) { + buf.push_str(line); + buf.push('\n'); } /// Run the full autonomous consolidation pipeline with logging. pub fn consolidate_full(store: &mut Store) -> Result<(), String> { let start = std::time::Instant::now(); - let log_path = memory_subdir("agent-results")?.join("consolidate-full.log"); - let mut log = LogWriter::new(&log_path)?; + let log_key = format!("_consolidate-log-{}", + store::format_datetime(store::now_epoch()).replace([':', '-', 'T'], "")); + let mut log_buf = String::new(); - log.write("=== CONSOLIDATE FULL ===")?; - log.write(&format!("Started: {}", store::format_datetime(store::now_epoch())))?; - log.write(&format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()))?; - log.write("")?; + log_line(&mut log_buf, "=== CONSOLIDATE FULL ==="); + log_line(&mut log_buf, &format!("Started: {}", store::format_datetime(store::now_epoch()))); + log_line(&mut log_buf, &format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len())); + log_line(&mut log_buf, ""); // --- Step 1: Plan --- - log.write("--- Step 1: Plan ---")?; + log_line(&mut log_buf, "--- Step 1: Plan ---"); let plan = neuro::consolidation_plan(store); let plan_text = neuro::format_plan(&plan); - log.write(&plan_text)?; + log_line(&mut log_buf, &plan_text); println!("{}", plan_text); let total_agents = plan.replay_count + plan.linker_count + plan.separator_count + plan.transfer_count + if plan.run_health { 1 } else { 0 }; - log.write(&format!("Total agents to run: {}", total_agents))?; + log_line(&mut log_buf, &format!("Total agents to run: {}", total_agents)); // --- Step 2: Execute agents --- - log.write("\n--- Step 2: Execute agents ---")?; - let mut reports: Vec = Vec::new(); + log_line(&mut log_buf, "\n--- Step 2: Execute agents ---"); + let mut reports: Vec = Vec::new(); let mut agent_num = 0usize; let mut agent_errors = 0usize; @@ -121,7 +102,7 @@ pub fn consolidate_full(store: &mut Store) -> Result<(), String> { format!("[{}/{}] {}", agent_num, runs.len(), agent_type) }; - log.write(&format!("\n{}", label))?; + log_line(&mut log_buf, &format!("\n{}", label)); println!("{}", label); // Reload store to pick up changes from previous agents @@ -133,191 +114,173 @@ pub fn consolidate_full(store: &mut Store) -> Result<(), String> { Ok(p) => p, Err(e) => { let msg = format!(" ERROR building prompt: {}", e); - log.write(&msg)?; + log_line(&mut log_buf, &msg); eprintln!("{}", msg); agent_errors += 1; continue; } }; - log.write(&format!(" Prompt: {} chars (~{} tokens)", - prompt.len(), prompt.len() / 4))?; + log_line(&mut log_buf, &format!(" Prompt: {} chars (~{} tokens)", + prompt.len(), prompt.len() / 4)); let response = match call_sonnet(&prompt, 300) { Ok(r) => r, Err(e) => { let msg = format!(" ERROR from Sonnet: {}", e); - log.write(&msg)?; + log_line(&mut log_buf, &msg); eprintln!("{}", msg); agent_errors += 1; continue; } }; - // Save report + // Store report as a node let ts = store::format_datetime(store::now_epoch()) .replace([':', '-', 'T'], ""); - let report_name = format!("consolidation-{}-{}.md", agent_type, ts); - let report_path = memory_subdir("agent-results")?.join(&report_name); - fs::write(&report_path, &response) - .map_err(|e| format!("write report: {}", e))?; - reports.push(report_path.clone()); + let report_key = format!("_consolidation-{}-{}", agent_type, ts); + store.upsert_provenance(&report_key, &response, + store::Provenance::AgentConsolidate).ok(); + reports.push(report_key.clone()); - let msg = format!(" Done: {} lines → {}", response.lines().count(), report_name); - log.write(&msg)?; + let msg = format!(" Done: {} lines → {}", response.lines().count(), report_key); + log_line(&mut log_buf, &msg); println!("{}", msg); } - log.write(&format!("\nAgents complete: {} run, {} errors", - agent_num - agent_errors, agent_errors))?; + log_line(&mut log_buf, &format!("\nAgents complete: {} run, {} errors", + agent_num - agent_errors, agent_errors)); // --- Step 3: Apply consolidation actions --- - log.write("\n--- Step 3: Apply consolidation actions ---")?; + log_line(&mut log_buf, "\n--- Step 3: Apply consolidation actions ---"); println!("\n--- Applying consolidation actions ---"); *store = Store::load()?; if reports.is_empty() { - log.write(" No reports to apply.")?; + log_line(&mut log_buf, " No reports to apply."); } else { match apply_consolidation(store, true, None) { - Ok(()) => log.write(" Applied.")?, + Ok(()) => log_line(&mut log_buf, " Applied."), Err(e) => { let msg = format!(" ERROR applying consolidation: {}", e); - log.write(&msg)?; + log_line(&mut log_buf, &msg); eprintln!("{}", msg); } } } // --- Step 3b: Link orphans --- - log.write("\n--- Step 3b: Link orphans ---")?; + log_line(&mut log_buf, "\n--- Step 3b: Link orphans ---"); println!("\n--- Linking orphan nodes ---"); *store = Store::load()?; let (lo_orphans, lo_added) = neuro::link_orphans(store, 2, 3, 0.15); - log.write(&format!(" {} orphans, {} links added", lo_orphans, lo_added))?; + log_line(&mut log_buf, &format!(" {} orphans, {} links added", lo_orphans, lo_added)); // --- Step 3c: Cap degree --- - log.write("\n--- Step 3c: Cap degree ---")?; + log_line(&mut log_buf, "\n--- Step 3c: Cap degree ---"); println!("\n--- Capping node degree ---"); *store = Store::load()?; match store.cap_degree(50) { Ok((hubs, pruned)) => { store.save()?; - log.write(&format!(" {} hubs capped, {} edges pruned", hubs, pruned))?; + log_line(&mut log_buf, &format!(" {} hubs capped, {} edges pruned", hubs, pruned)); } - Err(e) => log.write(&format!(" ERROR: {}", e))?, + Err(e) => log_line(&mut log_buf, &format!(" ERROR: {}", e)), } // --- Step 4: Digest auto --- - log.write("\n--- Step 4: Digest auto ---")?; + log_line(&mut log_buf, "\n--- Step 4: Digest auto ---"); println!("\n--- Generating missing digests ---"); *store = Store::load()?; match digest::digest_auto(store) { - Ok(()) => log.write(" Digests done.")?, + Ok(()) => log_line(&mut log_buf, " Digests done."), Err(e) => { let msg = format!(" ERROR in digest auto: {}", e); - log.write(&msg)?; + log_line(&mut log_buf, &msg); eprintln!("{}", msg); } } // --- Step 5: Apply digest links --- - log.write("\n--- Step 5: Apply digest links ---")?; + log_line(&mut log_buf, "\n--- Step 5: Apply digest links ---"); println!("\n--- Applying digest links ---"); *store = Store::load()?; - let links = digest::parse_all_digest_links()?; + let links = digest::parse_all_digest_links(store); let (applied, skipped, fallbacks) = digest::apply_digest_links(store, &links); store.save()?; - log.write(&format!(" {} links applied, {} skipped, {} fallbacks", - applied, skipped, fallbacks))?; + log_line(&mut log_buf, &format!(" {} links applied, {} skipped, {} fallbacks", + applied, skipped, fallbacks)); // --- Step 6: Summary --- let elapsed = start.elapsed(); - log.write("\n--- Summary ---")?; - log.write(&format!("Finished: {}", store::format_datetime(store::now_epoch())))?; - log.write(&format!("Duration: {:.0}s", elapsed.as_secs_f64()))?; + log_line(&mut log_buf, "\n--- Summary ---"); + log_line(&mut log_buf, &format!("Finished: {}", store::format_datetime(store::now_epoch()))); + log_line(&mut log_buf, &format!("Duration: {:.0}s", elapsed.as_secs_f64())); *store = Store::load()?; - log.write(&format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()))?; + log_line(&mut log_buf, &format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len())); let summary = format!( "\n=== CONSOLIDATE FULL COMPLETE ===\n\ Duration: {:.0}s\n\ Agents: {} run, {} errors\n\ - Nodes: {} Relations: {}\n\ - Log: {}\n", + Nodes: {} Relations: {}\n", elapsed.as_secs_f64(), agent_num - agent_errors, agent_errors, store.nodes.len(), store.relations.len(), - log_path.display(), ); - log.write(&summary)?; + log_line(&mut log_buf, &summary); println!("{}", summary); + // Store the log as a node + store.upsert_provenance(&log_key, &log_buf, + store::Provenance::AgentConsolidate).ok(); + store.save()?; + Ok(()) } -/// Find the most recent set of consolidation reports. -fn find_consolidation_reports() -> Result, String> { - let dir = memory_subdir("agent-results")?; - let mut reports: Vec = fs::read_dir(&dir) - .map(|entries| { - entries.filter_map(|e| e.ok()) - .map(|e| e.path()) - .filter(|p| { - p.file_name() - .and_then(|n| n.to_str()) - .map(|n| n.starts_with("consolidation-") && n.ends_with(".md")) - .unwrap_or(false) - }) - .collect() - }) - .unwrap_or_default(); - reports.sort(); - reports.reverse(); +/// Find the most recent set of consolidation report keys from the store. +fn find_consolidation_reports(store: &Store) -> Vec { + let mut keys: Vec<&String> = store.nodes.keys() + .filter(|k| k.starts_with("_consolidation-")) + .collect(); + keys.sort(); + keys.reverse(); - if reports.is_empty() { return Ok(reports); } + if keys.is_empty() { return Vec::new(); } - // Group by timestamp (last segment of stem before .md) - let latest_ts = reports[0].file_stem() - .and_then(|s| s.to_str()) - .unwrap_or("") - .rsplit('-').next().unwrap_or("") - .to_string(); + // Group by timestamp (last segment after last '-') + let latest_ts = keys[0].rsplit('-').next().unwrap_or("").to_string(); - reports.retain(|r| { - r.file_stem() - .and_then(|s| s.to_str()) - .unwrap_or("") - .ends_with(latest_ts.as_str()) - }); - - Ok(reports) + keys.into_iter() + .filter(|k| k.ends_with(&latest_ts)) + .cloned() + .collect() } -fn build_consolidation_prompt(reports: &[PathBuf]) -> Result { +fn build_consolidation_prompt(store: &Store, report_keys: &[String]) -> Result { let mut report_text = String::new(); - for r in reports { - let content = fs::read_to_string(r) - .map_err(|e| format!("read {}: {}", r.display(), e))?; + for key in report_keys { + let content = store.nodes.get(key) + .map(|n| n.content.as_str()) + .unwrap_or(""); report_text.push_str(&format!("\n{}\n## Report: {}\n\n{}\n", - "=".repeat(60), - r.file_stem().and_then(|s| s.to_str()).unwrap_or(""), - content)); + "=".repeat(60), key, content)); } neuro::load_prompt("consolidation", &[("{{REPORTS}}", &report_text)]) } /// Run the full apply-consolidation pipeline. -pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_file: Option<&str>) -> Result<(), String> { - let reports = if let Some(path) = report_file { - vec![PathBuf::from(path)] +pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_key: Option<&str>) -> Result<(), String> { + let reports = if let Some(key) = report_key { + vec![key.to_string()] } else { - find_consolidation_reports()? + find_consolidation_reports(store) }; if reports.is_empty() { @@ -328,11 +291,11 @@ pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_file: Optio println!("Found {} reports:", reports.len()); for r in &reports { - println!(" {}", r.file_name().and_then(|s| s.to_str()).unwrap_or("?")); + println!(" {}", r); } println!("\nExtracting actions from reports..."); - let prompt = build_consolidation_prompt(&reports)?; + let prompt = build_consolidation_prompt(store, &reports)?; println!(" Prompt: {} chars", prompt.len()); let response = call_sonnet(&prompt, 300)?; @@ -343,14 +306,14 @@ pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_file: Optio println!(" {} actions extracted", actions.len()); - // Save actions + // Store actions in the store let timestamp = store::format_datetime(store::now_epoch()) .replace([':', '-'], ""); - let actions_path = memory_subdir("agent-results")? - .join(format!("consolidation-actions-{}.json", timestamp)); - fs::write(&actions_path, serde_json::to_string_pretty(&actions_value).unwrap()) - .map_err(|e| format!("write {}: {}", actions_path.display(), e))?; - println!(" Saved: {}", actions_path.display()); + let actions_key = format!("_consolidation-actions-{}", timestamp); + let actions_json = serde_json::to_string_pretty(&actions_value).unwrap(); + store.upsert_provenance(&actions_key, &actions_json, + store::Provenance::AgentConsolidate).ok(); + println!(" Stored: {}", actions_key); let link_actions: Vec<_> = actions.iter() .filter(|a| a.get("action").and_then(|v| v.as_str()) == Some("link")) diff --git a/src/digest.rs b/src/digest.rs index 3706e47..eb27d1a 100644 --- a/src/digest.rs +++ b/src/digest.rs @@ -8,13 +8,10 @@ use crate::llm::{call_sonnet, semantic_keys}; use crate::store::{self, Store, new_relation}; use crate::neuro; -use crate::util::memory_subdir; use chrono::{Datelike, Duration, Local, NaiveDate}; use regex::Regex; use std::collections::BTreeSet; -use std::fs; -use std::path::{Path, PathBuf}; // --- Digest level descriptors --- @@ -113,19 +110,24 @@ const MONTHLY: DigestLevel = DigestLevel { const LEVELS: &[&DigestLevel] = &[&DAILY, &WEEKLY, &MONTHLY]; +/// Store key for a digest node: "daily-2026-03-04.md", "weekly-2026-W09.md", etc. +/// Matches the key format from the old import_file() path. +fn digest_node_key(level_name: &str, label: &str) -> String { + format!("{}-{}.md", level_name, label) +} + // --- Input gathering --- -/// Load child digest files from the episodic directory. -fn load_child_digests(prefix: &str, labels: &[String]) -> Result, String> { - let dir = memory_subdir("episodic")?; +/// Load child digest content from the store. +fn load_child_digests(store: &Store, prefix: &str, labels: &[String]) -> Vec<(String, String)> { let mut digests = Vec::new(); for label in labels { - let path = dir.join(format!("{}-{}.md", prefix, label)); - if let Ok(content) = fs::read_to_string(&path) { - digests.push((label.clone(), content)); + let key = digest_node_key(prefix, label); + if let Some(node) = store.nodes.get(&key) { + digests.push((label.clone(), node.content.clone())); } } - Ok(digests) + digests } /// Unified: gather inputs for any digest level. @@ -142,7 +144,7 @@ fn gather(level: &DigestLevel, store: &Store, arg: &str) -> Result<(String, Vec< .collect::>() .into_iter() .collect(); - load_child_digests(child_name, &child_labels)? + load_child_digests(store, child_name, &child_labels) } else { // Leaf level: scan store for journal entries matching label let date_re = Regex::new(&format!( @@ -227,14 +229,10 @@ fn generate_digest( println!(" Calling Sonnet..."); let digest = call_sonnet(&prompt, level.timeout)?; - let output_path = memory_subdir("episodic")? - .join(format!("{}-{}.md", level.name, label)); - fs::write(&output_path, &digest) - .map_err(|e| format!("write {}: {}", output_path.display(), e))?; - println!(" Written: {}", output_path.display()); - - store.import_file(&output_path)?; + let key = digest_node_key(level.name, label); + store.upsert_provenance(&key, &digest, store::Provenance::AgentDigest)?; store.save()?; + println!(" Stored: {}", key); println!(" Done: {} lines", digest.lines().count()); Ok(()) @@ -254,7 +252,6 @@ pub fn generate(store: &mut Store, level_name: &str, arg: &str) -> Result<(), St pub fn digest_auto(store: &mut Store) -> Result<(), String> { let today = Local::now().format("%Y-%m-%d").to_string(); - let epi = memory_subdir("episodic")?; // Collect all dates with journal entries let date_re = Regex::new(r"^\d{4}-\d{2}-\d{2}").unwrap(); @@ -277,7 +274,8 @@ pub fn digest_auto(store: &mut Store) -> Result<(), String> { for arg in &candidates { let (label, inputs) = gather(level, store, arg)?; - if epi.join(format!("{}-{}.md", level.name, label)).exists() { + let key = digest_node_key(level.name, &label); + if store.nodes.contains_key(&key) { skipped += 1; continue; } @@ -357,21 +355,8 @@ fn normalize_link_key(raw: &str) -> String { key } -/// Parse the Links section from a single digest file. -fn parse_digest_file_links(path: &Path) -> Vec { - let content = match fs::read_to_string(path) { - Ok(c) => c, - Err(_) => return Vec::new(), - }; - - let digest_name = path.file_stem() - .and_then(|s| s.to_str()) - .unwrap_or(""); - let digest_key = format!("{}.md", digest_name); - let filename = path.file_name() - .and_then(|s| s.to_str()) - .unwrap_or("") - .to_string(); +/// Parse the Links section from a digest node's content. +fn parse_digest_node_links(key: &str, content: &str) -> Vec { let link_re = Regex::new(r"^-\s+(.+?)\s*[→↔←]\s*(.+?)(?:\s*\((.+?)\))?\s*$").unwrap(); let header_re = Regex::new(r"^##\s+Links").unwrap(); @@ -399,8 +384,8 @@ fn parse_digest_file_links(path: &Path) -> Vec { let mut target = normalize_link_key(raw_target); // Replace self-references with digest key - if source.is_empty() { source = digest_key.clone(); } - if target.is_empty() { target = digest_key.clone(); } + if source.is_empty() { source = key.to_string(); } + if target.is_empty() { target = key.to_string(); } // Handle "this daily/weekly/monthly" in raw text let raw_s_lower = raw_source.to_lowercase(); @@ -408,49 +393,39 @@ fn parse_digest_file_links(path: &Path) -> Vec { if raw_s_lower.contains("this daily") || raw_s_lower.contains("this weekly") || raw_s_lower.contains("this monthly") { - source = digest_key.clone(); + source = key.to_string(); } if raw_t_lower.contains("this daily") || raw_t_lower.contains("this weekly") || raw_t_lower.contains("this monthly") { - target = digest_key.clone(); + target = key.to_string(); } // Skip NEW: and self-links if source.starts_with("NEW:") || target.starts_with("NEW:") { continue; } if source == target { continue; } - links.push(DigestLink { source, target, reason, file: filename.clone() }); + links.push(DigestLink { source, target, reason, file: key.to_string() }); } } links } -/// Parse links from all digest files in the episodic dir. -pub fn parse_all_digest_links() -> Result, String> { - let dir = memory_subdir("episodic")?; +/// Parse links from all digest nodes in the store. +pub fn parse_all_digest_links(store: &Store) -> Vec { let mut all_links = Vec::new(); - for pattern in &["daily-*.md", "weekly-*.md", "monthly-*.md"] { - if let Ok(entries) = fs::read_dir(&dir) { - let mut files: Vec = entries - .filter_map(|e| e.ok()) - .map(|e| e.path()) - .filter(|p| { - p.file_name() - .and_then(|n| n.to_str()) - .map(|n| { - let prefix = pattern.split('*').next().unwrap_or(""); - n.starts_with(prefix) && n.ends_with(".md") - }) - .unwrap_or(false) - }) - .collect(); - files.sort(); - for path in files { - all_links.extend(parse_digest_file_links(&path)); - } + let mut digest_keys: Vec<&String> = store.nodes.keys() + .filter(|k| k.starts_with("daily-") + || k.starts_with("weekly-") + || k.starts_with("monthly-")) + .collect(); + digest_keys.sort(); + + for key in digest_keys { + if let Some(node) = store.nodes.get(key) { + all_links.extend(parse_digest_node_links(key, &node.content)); } } @@ -458,7 +433,7 @@ pub fn parse_all_digest_links() -> Result, String> { let mut seen = std::collections::HashSet::new(); all_links.retain(|link| seen.insert((link.source.clone(), link.target.clone()))); - Ok(all_links) + all_links } /// Apply parsed digest links to the store. diff --git a/src/enrich.rs b/src/enrich.rs index 02a50de..a91a440 100644 --- a/src/enrich.rs +++ b/src/enrich.rs @@ -13,10 +13,60 @@ use crate::store::{self, Store, new_node, new_relation}; use regex::Regex; use std::collections::hash_map::DefaultHasher; +use std::collections::HashSet; use std::fs; use std::hash::{Hash, Hasher}; -use crate::util::memory_subdir; +use crate::store::StoreView; + +/// Compute the store dedup key for a transcript file. +/// This is the same key experience_mine uses to mark a transcript as mined. +pub fn transcript_dedup_key(path: &str) -> Result { + let bytes = fs::read(path).map_err(|e| format!("read {}: {}", path, e))?; + let mut hasher = DefaultHasher::new(); + bytes.hash(&mut hasher); + Ok(format!("_mined-transcripts.md#h-{:016x}", hasher.finish())) +} + +/// Check if a transcript has already been mined (dedup key exists in store). +pub fn is_transcript_mined(store: &impl StoreView, path: &str) -> bool { + match transcript_dedup_key(path) { + Ok(key) => store.node_content(&key).is_some(), + Err(_) => false, + } +} + +/// Dedup key for a transcript based on its filename (UUID). +/// Used by the daemon reconcile loop — no file reads needed. +pub fn transcript_filename_key(path: &str) -> String { + let filename = std::path::Path::new(path) + .file_stem() + .map(|s| s.to_string_lossy().to_string()) + .unwrap_or_else(|| path.to_string()); + format!("_mined-transcripts.md#f-{}", filename) +} + +/// Get the set of all mined transcript keys (both content-hash and filename) +/// from the store. Load once per daemon tick, check many. +pub fn mined_transcript_keys() -> HashSet { + use crate::store::AnyView; + let Ok(view) = AnyView::load() else { return HashSet::new() }; + let mut keys = HashSet::new(); + view.for_each_node(|key, _, _| { + if key.starts_with("_mined-transcripts.md#") { + keys.insert(key.to_string()); + } + }); + keys +} + +/// Check if a transcript has been mined, given a pre-loaded set of mined keys. +/// Checks filename-based key only (no file read). Sessions mined before the +/// filename key was added will pass through and short-circuit in experience_mine +/// via the content hash check — a one-time cost on first restart after this change. +pub fn is_transcript_mined_with_keys(mined: &HashSet, path: &str) -> bool { + mined.contains(&transcript_filename_key(path)) +} /// Extract user/assistant messages with line numbers from a JSONL transcript. /// (line_number, role, text, timestamp) @@ -187,21 +237,6 @@ pub fn journal_enrich( } } - // Save result to agent-results - let timestamp = store::format_datetime(store::now_epoch()) - .replace([':', '-'], ""); - let result_file = memory_subdir("agent-results")? - .join(format!("{}.json", timestamp)); - let output = serde_json::json!({ - "timestamp": timestamp, - "jsonl_path": jsonl_path, - "entry_text": &entry_text[..entry_text.len().min(500)], - "agent_result": result, - }); - fs::write(&result_file, serde_json::to_string_pretty(&output).unwrap()) - .map_err(|e| format!("write {}: {}", result_file.display(), e))?; - println!(" Results saved: {}", result_file.display()); - store.save()?; Ok(()) } @@ -320,6 +355,7 @@ pub fn experience_mine( let mut node = new_node(&key, &full_content); node.node_type = store::NodeType::EpisodicSession; node.category = store::Category::Observation; + node.provenance = store::Provenance::AgentExperienceMine; let _ = store.upsert_node(node); count += 1; @@ -328,11 +364,19 @@ pub fn experience_mine( } // Record this transcript as mined (even if count == 0, to prevent re-runs) + // Two keys: content hash (exact dedup) and filename (fast daemon reconcile) let dedup_content = format!("Mined {} ({} entries)", jsonl_path, count); let mut dedup_node = new_node(&dedup_key, &dedup_content); dedup_node.category = store::Category::Task; + dedup_node.provenance = store::Provenance::AgentExperienceMine; let _ = store.upsert_node(dedup_node); + let fname_key = transcript_filename_key(jsonl_path); + let mut fname_node = new_node(&fname_key, &dedup_content); + fname_node.category = store::Category::Task; + fname_node.provenance = store::Provenance::AgentExperienceMine; + let _ = store.upsert_node(fname_node); + if count > 0 { println!(" Saved {} new journal entries.", count); } diff --git a/src/knowledge.rs b/src/knowledge.rs new file mode 100644 index 0000000..06aeb90 --- /dev/null +++ b/src/knowledge.rs @@ -0,0 +1,976 @@ +// knowledge.rs — knowledge production agents and convergence loop +// +// Rust port of knowledge_agents.py + knowledge_loop.py. +// Four agents mine the memory graph for new knowledge: +// 1. Observation — extract facts from raw conversations +// 2. Extractor — find patterns in node clusters +// 3. Connector — find cross-domain structural connections +// 4. Challenger — stress-test existing knowledge nodes +// +// The loop runs agents in sequence, applies results, measures +// convergence via graph-structural metrics (sigma, CC, communities). + +use crate::graph::Graph; +use crate::llm; +use crate::spectral; +use crate::store::{self, Store, new_relation, RelationType}; + +use regex::Regex; +use serde::{Deserialize, Serialize}; +use std::collections::{HashMap, HashSet}; +use std::fs; +use std::path::{Path, PathBuf}; + +fn memory_dir() -> PathBuf { + store::memory_dir() +} + +fn prompts_dir() -> PathBuf { + let manifest = env!("CARGO_MANIFEST_DIR"); + PathBuf::from(manifest).join("prompts") +} + +fn projects_dir() -> PathBuf { + let home = std::env::var("HOME").unwrap_or_else(|_| ".".into()); + PathBuf::from(home).join(".claude/projects") +} + +// --------------------------------------------------------------------------- +// Action types +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Action { + pub kind: ActionKind, + pub confidence: Confidence, + pub weight: f64, + pub depth: i32, + pub applied: Option, + pub rejected_reason: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum ActionKind { + WriteNode { + key: String, + content: String, + covers: Vec, + }, + Link { + source: String, + target: String, + }, + Refine { + key: String, + content: String, + }, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum Confidence { + High, + Medium, + Low, +} + +impl Confidence { + fn weight(self) -> f64 { + match self { + Self::High => 1.0, + Self::Medium => 0.6, + Self::Low => 0.3, + } + } + + fn value(self) -> f64 { + match self { + Self::High => 0.9, + Self::Medium => 0.6, + Self::Low => 0.3, + } + } + + fn parse(s: &str) -> Self { + match s.to_lowercase().as_str() { + "high" => Self::High, + "low" => Self::Low, + _ => Self::Medium, + } + } +} + +// --------------------------------------------------------------------------- +// Action parsing +// --------------------------------------------------------------------------- + +pub fn parse_write_nodes(text: &str) -> Vec { + let re = Regex::new(r"(?s)WRITE_NODE\s+(\S+)\s*\n(.*?)END_NODE").unwrap(); + let conf_re = Regex::new(r"(?i)CONFIDENCE:\s*(high|medium|low)").unwrap(); + let covers_re = Regex::new(r"COVERS:\s*(.+)").unwrap(); + + re.captures_iter(text) + .map(|cap| { + let key = cap[1].to_string(); + let mut content = cap[2].trim().to_string(); + + let confidence = conf_re + .captures(&content) + .map(|c| Confidence::parse(&c[1])) + .unwrap_or(Confidence::Medium); + content = conf_re.replace(&content, "").trim().to_string(); + + let covers: Vec = covers_re + .captures(&content) + .map(|c| c[1].split(',').map(|s| s.trim().to_string()).collect()) + .unwrap_or_default(); + content = covers_re.replace(&content, "").trim().to_string(); + + Action { + weight: confidence.weight(), + kind: ActionKind::WriteNode { key, content, covers }, + confidence, + depth: 0, + applied: None, + rejected_reason: None, + } + }) + .collect() +} + +pub fn parse_links(text: &str) -> Vec { + let re = Regex::new(r"(?m)^LINK\s+(\S+)\s+(\S+)").unwrap(); + re.captures_iter(text) + .map(|cap| Action { + kind: ActionKind::Link { + source: cap[1].to_string(), + target: cap[2].to_string(), + }, + confidence: Confidence::Low, + weight: 0.3, + depth: -1, + applied: None, + rejected_reason: None, + }) + .collect() +} + +pub fn parse_refines(text: &str) -> Vec { + let re = Regex::new(r"(?s)REFINE\s+(\S+)\s*\n(.*?)END_REFINE").unwrap(); + re.captures_iter(text) + .map(|cap| { + let key = cap[1].trim_matches('*').trim().to_string(); + Action { + kind: ActionKind::Refine { + key, + content: cap[2].trim().to_string(), + }, + confidence: Confidence::Medium, + weight: 0.7, + depth: 0, + applied: None, + rejected_reason: None, + } + }) + .collect() +} + +pub fn parse_all_actions(text: &str) -> Vec { + let mut actions = parse_write_nodes(text); + actions.extend(parse_links(text)); + actions.extend(parse_refines(text)); + actions +} + +pub fn count_no_ops(text: &str) -> usize { + let no_conn = Regex::new(r"\bNO_CONNECTION\b").unwrap().find_iter(text).count(); + let affirm = Regex::new(r"\bAFFIRM\b").unwrap().find_iter(text).count(); + let no_extract = Regex::new(r"\bNO_EXTRACTION\b").unwrap().find_iter(text).count(); + no_conn + affirm + no_extract +} + +// --------------------------------------------------------------------------- +// Inference depth tracking +// --------------------------------------------------------------------------- + +const DEPTH_DB_KEY: &str = "_knowledge-depths"; + +#[derive(Default)] +pub struct DepthDb { + depths: HashMap, +} + +impl DepthDb { + pub fn load(store: &Store) -> Self { + let depths = store.nodes.get(DEPTH_DB_KEY) + .and_then(|n| serde_json::from_str(&n.content).ok()) + .unwrap_or_default(); + Self { depths } + } + + pub fn save(&self, store: &mut Store) { + if let Ok(json) = serde_json::to_string(&self.depths) { + store.upsert_provenance(DEPTH_DB_KEY, &json, + store::Provenance::AgentKnowledgeObservation).ok(); + } + } + + pub fn get(&self, key: &str) -> i32 { + self.depths.get(key).copied().unwrap_or(0) + } + + pub fn set(&mut self, key: String, depth: i32) { + self.depths.insert(key, depth); + } +} + +/// Agent base depths: observation=1, extractor=2, connector=3 +fn agent_base_depth(agent: &str) -> Option { + match agent { + "observation" => Some(1), + "extractor" => Some(2), + "connector" => Some(3), + "challenger" => None, + _ => Some(2), + } +} + +pub fn compute_action_depth(db: &DepthDb, action: &Action, agent: &str) -> i32 { + match &action.kind { + ActionKind::Link { .. } => -1, + ActionKind::Refine { key, .. } => db.get(key), + ActionKind::WriteNode { covers, .. } => { + if !covers.is_empty() { + covers.iter().map(|k| db.get(k)).max().unwrap_or(0) + 1 + } else { + agent_base_depth(agent).unwrap_or(2) + } + } + } +} + +/// Confidence threshold that scales with inference depth. +pub fn required_confidence(depth: i32, base: f64) -> f64 { + if depth <= 0 { + return 0.0; + } + 1.0 - (1.0 - base).powi(depth) +} + +/// Confidence bonus from real-world use. +pub fn use_bonus(use_count: u32) -> f64 { + if use_count == 0 { + return 0.0; + } + 1.0 - 1.0 / (1.0 + 0.15 * use_count as f64) +} + +// --------------------------------------------------------------------------- +// Action application +// --------------------------------------------------------------------------- + +fn stamp_content(content: &str, agent: &str, timestamp: &str, depth: i32) -> String { + format!("\n{}", agent, timestamp, depth, content) +} + +/// Check if a link already exists between two keys. +fn has_edge(store: &Store, source: &str, target: &str) -> bool { + store.relations.iter().any(|r| { + !r.deleted + && ((r.source_key == source && r.target_key == target) + || (r.source_key == target && r.target_key == source)) + }) +} + +pub fn apply_action( + store: &mut Store, + action: &Action, + agent: &str, + timestamp: &str, + depth: i32, +) -> bool { + let provenance = agent_provenance(agent); + + match &action.kind { + ActionKind::WriteNode { key, content, .. } => { + let stamped = stamp_content(content, agent, timestamp, depth); + store.upsert_provenance(key, &stamped, provenance).is_ok() + } + ActionKind::Link { source, target } => { + if has_edge(store, source, target) { + return false; + } + let source_uuid = match store.nodes.get(source.as_str()) { + Some(n) => n.uuid, + None => return false, + }; + let target_uuid = match store.nodes.get(target.as_str()) { + Some(n) => n.uuid, + None => return false, + }; + let mut rel = new_relation( + source_uuid, target_uuid, + RelationType::Link, + 0.3, + source, target, + ); + rel.provenance = provenance; + store.add_relation(rel).is_ok() + } + ActionKind::Refine { key, content } => { + let stamped = stamp_content(content, agent, timestamp, depth); + store.upsert_provenance(key, &stamped, provenance).is_ok() + } + } +} + +fn agent_provenance(agent: &str) -> store::Provenance { + match agent { + "observation" => store::Provenance::AgentKnowledgeObservation, + "extractor" | "pattern" => store::Provenance::AgentKnowledgePattern, + "connector" => store::Provenance::AgentKnowledgeConnector, + "challenger" => store::Provenance::AgentKnowledgeChallenger, + _ => store::Provenance::Agent, + } +} + +// --------------------------------------------------------------------------- +// Agent runners +// --------------------------------------------------------------------------- + +fn load_prompt(name: &str) -> Result { + let path = prompts_dir().join(format!("{}.md", name)); + fs::read_to_string(&path).map_err(|e| format!("load prompt {}: {}", name, e)) +} + +fn get_graph_topology(store: &Store, graph: &Graph) -> String { + format!("Nodes: {} Relations: {}\n", store.nodes.len(), graph.edge_count()) +} + +/// Strip blocks from text +fn strip_system_tags(text: &str) -> String { + let re = Regex::new(r"(?s).*?").unwrap(); + re.replace_all(text, "").trim().to_string() +} + +/// Extract human-readable dialogue from a conversation JSONL +fn extract_conversation_text(path: &Path, max_chars: usize) -> String { + let Ok(content) = fs::read_to_string(path) else { return String::new() }; + let mut fragments = Vec::new(); + let mut total = 0; + + for line in content.lines() { + let Ok(obj) = serde_json::from_str::(line) else { continue }; + + let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or(""); + + if msg_type == "user" && obj.get("userType").and_then(|v| v.as_str()) == Some("external") { + if let Some(text) = extract_text_content(&obj) { + let text = strip_system_tags(&text); + if text.starts_with("[Request interrupted") { continue; } + if text.len() > 5 { + fragments.push(format!("**Kent:** {}", text)); + total += text.len(); + } + } + } else if msg_type == "assistant" { + if let Some(text) = extract_text_content(&obj) { + let text = strip_system_tags(&text); + if text.len() > 10 { + fragments.push(format!("**PoC:** {}", text)); + total += text.len(); + } + } + } + + if total > max_chars { break; } + } + fragments.join("\n\n") +} + +fn extract_text_content(obj: &serde_json::Value) -> Option { + let msg = obj.get("message")?; + let content = msg.get("content")?; + if let Some(s) = content.as_str() { + return Some(s.to_string()); + } + if let Some(arr) = content.as_array() { + let texts: Vec<&str> = arr.iter() + .filter_map(|b| { + if b.get("type")?.as_str()? == "text" { + b.get("text")?.as_str() + } else { + None + } + }) + .collect(); + if !texts.is_empty() { + return Some(texts.join("\n")); + } + } + None +} + +/// Count short user messages (dialogue turns) in a JSONL +fn count_dialogue_turns(path: &Path) -> usize { + let Ok(content) = fs::read_to_string(path) else { return 0 }; + content.lines() + .filter_map(|line| serde_json::from_str::(line).ok()) + .filter(|obj| { + obj.get("type").and_then(|v| v.as_str()) == Some("user") + && obj.get("userType").and_then(|v| v.as_str()) == Some("external") + }) + .filter(|obj| { + let text = extract_text_content(obj).unwrap_or_default(); + text.len() > 5 && text.len() < 500 + && !text.starts_with("[Request interrupted") + && !text.starts_with("Implement the following") + }) + .count() +} + +/// Select conversation fragments for the observation extractor +fn select_conversation_fragments(n: usize) -> Vec<(String, String)> { + let projects = projects_dir(); + if !projects.exists() { return Vec::new(); } + + let mut jsonl_files: Vec = Vec::new(); + if let Ok(dirs) = fs::read_dir(&projects) { + for dir in dirs.filter_map(|e| e.ok()) { + if !dir.path().is_dir() { continue; } + if let Ok(files) = fs::read_dir(dir.path()) { + for f in files.filter_map(|e| e.ok()) { + let p = f.path(); + if p.extension().map(|x| x == "jsonl").unwrap_or(false) { + if let Ok(meta) = p.metadata() { + if meta.len() > 50_000 { + jsonl_files.push(p); + } + } + } + } + } + } + } + + let mut scored: Vec<(usize, PathBuf)> = jsonl_files.into_iter() + .map(|f| (count_dialogue_turns(&f), f)) + .filter(|(turns, _)| *turns >= 10) + .collect(); + scored.sort_by(|a, b| b.0.cmp(&a.0)); + + let mut fragments = Vec::new(); + for (_, f) in scored.iter().take(n * 2) { + let session_id = f.file_stem() + .map(|s| s.to_string_lossy().to_string()) + .unwrap_or_else(|| "unknown".into()); + let text = extract_conversation_text(f, 8000); + if text.len() > 500 { + fragments.push((session_id, text)); + } + if fragments.len() >= n { break; } + } + fragments +} + +pub fn run_observation_extractor(store: &Store, graph: &Graph, batch_size: usize) -> Result { + let template = load_prompt("observation-extractor")?; + let topology = get_graph_topology(store, graph); + let fragments = select_conversation_fragments(batch_size); + + let mut results = Vec::new(); + for (i, (session_id, text)) in fragments.iter().enumerate() { + eprintln!(" Observation extractor {}/{}: session {}... ({} chars)", + i + 1, fragments.len(), &session_id[..session_id.len().min(12)], text.len()); + + let prompt = template + .replace("{{TOPOLOGY}}", &topology) + .replace("{{CONVERSATIONS}}", &format!("### Session {}\n\n{}", session_id, text)); + + let response = llm::call_sonnet(&prompt, 600)?; + results.push(format!("## Session: {}\n\n{}", session_id, response)); + } + Ok(results.join("\n\n---\n\n")) +} + +/// Load spectral embedding from disk +fn load_spectral_embedding() -> HashMap> { + spectral::load_embedding() + .map(|emb| emb.coords) + .unwrap_or_default() +} + +fn spectral_distance(embedding: &HashMap>, a: &str, b: &str) -> f64 { + let (Some(va), Some(vb)) = (embedding.get(a), embedding.get(b)) else { + return f64::INFINITY; + }; + let dot: f64 = va.iter().zip(vb.iter()).map(|(a, b)| a * b).sum(); + let norm_a: f64 = va.iter().map(|x| x * x).sum::().sqrt(); + let norm_b: f64 = vb.iter().map(|x| x * x).sum::().sqrt(); + if norm_a == 0.0 || norm_b == 0.0 { + return f64::INFINITY; + } + 1.0 - dot / (norm_a * norm_b) +} + +fn select_extractor_clusters(_store: &Store, n: usize) -> Vec> { + let embedding = load_spectral_embedding(); + let skip = ["journal.md", "MEMORY.md", "where-am-i.md", "work-queue.md"]; + + let semantic_keys: Vec<&String> = embedding.keys() + .filter(|k| !k.starts_with("journal.md#") && !skip.contains(&k.as_str())) + .collect(); + + let cluster_size = 5; + let mut used = HashSet::new(); + let mut clusters = Vec::new(); + + for _ in 0..n { + let available: Vec<&&String> = semantic_keys.iter() + .filter(|k| !used.contains(**k)) + .collect(); + if available.len() < cluster_size { break; } + + let seed = available[0]; + let mut distances: Vec<(f64, &String)> = available.iter() + .filter(|k| ***k != *seed) + .map(|k| (spectral_distance(&embedding, seed, k), **k)) + .filter(|(d, _)| d.is_finite()) + .collect(); + distances.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap()); + + let cluster: Vec = std::iter::once((*seed).clone()) + .chain(distances.iter().take(cluster_size - 1).map(|(_, k)| (*k).clone())) + .collect(); + for k in &cluster { used.insert(k.clone()); } + clusters.push(cluster); + } + clusters +} + +pub fn run_extractor(store: &Store, graph: &Graph, batch_size: usize) -> Result { + let template = load_prompt("extractor")?; + let topology = get_graph_topology(store, graph); + let clusters = select_extractor_clusters(store, batch_size); + + let mut results = Vec::new(); + for (i, cluster) in clusters.iter().enumerate() { + eprintln!(" Extractor cluster {}/{}: {} nodes", i + 1, clusters.len(), cluster.len()); + + let node_texts: Vec = cluster.iter() + .filter_map(|key| { + let content = store.nodes.get(key)?.content.as_str(); + Some(format!("### {}\n{}", key, content)) + }) + .collect(); + if node_texts.is_empty() { continue; } + + let prompt = template + .replace("{{TOPOLOGY}}", &topology) + .replace("{{NODES}}", &node_texts.join("\n\n")); + + let response = llm::call_sonnet(&prompt, 600)?; + results.push(format!("## Cluster {}: {}...\n\n{}", i + 1, + cluster.iter().take(3).cloned().collect::>().join(", "), response)); + } + Ok(results.join("\n\n---\n\n")) +} + +fn select_connector_pairs(store: &Store, graph: &Graph, n: usize) -> Vec<(Vec, Vec)> { + let embedding = load_spectral_embedding(); + let skip_prefixes = ["journal.md#", "daily-", "weekly-", "monthly-", "all-sessions"]; + let skip_exact: HashSet<&str> = ["journal.md", "MEMORY.md", "where-am-i.md", + "work-queue.md", "work-state"].iter().copied().collect(); + + let semantic_keys: Vec<&String> = embedding.keys() + .filter(|k| { + !skip_exact.contains(k.as_str()) + && !skip_prefixes.iter().any(|p| k.starts_with(p)) + }) + .collect(); + + let mut pairs = Vec::new(); + let mut used = HashSet::new(); + + for seed in semantic_keys.iter().take(n * 10) { + if used.contains(*seed) { continue; } + + let mut near: Vec<(f64, &String)> = semantic_keys.iter() + .filter(|k| ***k != **seed && !used.contains(**k)) + .map(|k| (spectral_distance(&embedding, seed, k), *k)) + .filter(|(d, _)| *d < 0.5 && d.is_finite()) + .collect(); + near.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap()); + + for (_, target) in near.iter().take(5) { + if !has_edge(store, seed, target) { + let _ = graph; // graph available for future use + used.insert((*seed).clone()); + used.insert((*target).clone()); + pairs.push((vec![(*seed).clone()], vec![(*target).clone()])); + break; + } + } + if pairs.len() >= n { break; } + } + pairs +} + +pub fn run_connector(store: &Store, graph: &Graph, batch_size: usize) -> Result { + let template = load_prompt("connector")?; + let topology = get_graph_topology(store, graph); + let pairs = select_connector_pairs(store, graph, batch_size); + + let mut results = Vec::new(); + for (i, (group_a, group_b)) in pairs.iter().enumerate() { + eprintln!(" Connector pair {}/{}", i + 1, pairs.len()); + + let nodes_a: Vec = group_a.iter() + .filter_map(|k| { + let c = store.nodes.get(k)?.content.as_str(); + Some(format!("### {}\n{}", k, c)) + }) + .collect(); + let nodes_b: Vec = group_b.iter() + .filter_map(|k| { + let c = store.nodes.get(k)?.content.as_str(); + Some(format!("### {}\n{}", k, c)) + }) + .collect(); + + let prompt = template + .replace("{{TOPOLOGY}}", &topology) + .replace("{{NODES_A}}", &nodes_a.join("\n\n")) + .replace("{{NODES_B}}", &nodes_b.join("\n\n")); + + let response = llm::call_sonnet(&prompt, 600)?; + results.push(format!("## Pair {}: {} ↔ {}\n\n{}", + i + 1, group_a.join(", "), group_b.join(", "), response)); + } + Ok(results.join("\n\n---\n\n")) +} + +pub fn run_challenger(store: &Store, graph: &Graph, batch_size: usize) -> Result { + let template = load_prompt("challenger")?; + let topology = get_graph_topology(store, graph); + + let mut candidates: Vec<(&String, usize)> = store.nodes.iter() + .filter(|(k, _)| { + !k.starts_with("journal.md#") + && !["journal.md", "MEMORY.md", "where-am-i.md"].contains(&k.as_str()) + }) + .map(|(k, _)| (k, graph.degree(k))) + .collect(); + candidates.sort_by(|a, b| b.1.cmp(&a.1)); + + let mut results = Vec::new(); + for (i, (key, _)) in candidates.iter().take(batch_size).enumerate() { + eprintln!(" Challenger {}/{}: {}", i + 1, batch_size.min(candidates.len()), key); + + let content = match store.nodes.get(key.as_str()) { + Some(n) => &n.content, + None => continue, + }; + + let prompt = template + .replace("{{TOPOLOGY}}", &topology) + .replace("{{NODE_KEY}}", key) + .replace("{{NODE_CONTENT}}", content); + + let response = llm::call_sonnet(&prompt, 600)?; + results.push(format!("## Challenge: {}\n\n{}", key, response)); + } + Ok(results.join("\n\n---\n\n")) +} + +// --------------------------------------------------------------------------- +// Convergence metrics +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CycleResult { + pub cycle: usize, + pub timestamp: String, + pub total_actions: usize, + pub total_applied: usize, + pub total_no_ops: usize, + pub depth_rejected: usize, + pub weighted_delta: f64, + pub graph_metrics_before: GraphMetrics, + pub graph_metrics_after: GraphMetrics, +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct GraphMetrics { + pub nodes: usize, + pub edges: usize, + pub cc: f64, + pub sigma: f64, + pub communities: usize, +} + +impl GraphMetrics { + pub fn from_graph(store: &Store, graph: &Graph) -> Self { + Self { + nodes: store.nodes.len(), + edges: graph.edge_count(), + cc: graph.avg_clustering_coefficient() as f64, + sigma: graph.small_world_sigma() as f64, + communities: graph.community_count(), + } + } +} + +fn metric_stability(history: &[CycleResult], key: &str, window: usize) -> f64 { + if history.len() < window { return f64::INFINITY; } + + let values: Vec = history[history.len() - window..].iter() + .map(|h| match key { + "sigma" => h.graph_metrics_after.sigma, + "cc" => h.graph_metrics_after.cc, + "communities" => h.graph_metrics_after.communities as f64, + _ => 0.0, + }) + .collect(); + + if values.len() < 2 { return f64::INFINITY; } + let mean = values.iter().sum::() / values.len() as f64; + if mean == 0.0 { return 0.0; } + let variance = values.iter().map(|v| (v - mean).powi(2)).sum::() / values.len() as f64; + variance.sqrt() / mean.abs() +} + +pub fn check_convergence(history: &[CycleResult], window: usize) -> bool { + if history.len() < window { return false; } + + let sigma_cv = metric_stability(history, "sigma", window); + let cc_cv = metric_stability(history, "cc", window); + let comm_cv = metric_stability(history, "communities", window); + + let recent = &history[history.len() - window..]; + let avg_delta = recent.iter().map(|r| r.weighted_delta).sum::() / recent.len() as f64; + + eprintln!("\n Convergence check (last {} cycles):", window); + eprintln!(" sigma CV: {:.4} (< 0.05?)", sigma_cv); + eprintln!(" CC CV: {:.4} (< 0.05?)", cc_cv); + eprintln!(" community CV: {:.4} (< 0.10?)", comm_cv); + eprintln!(" avg delta: {:.2} (< 1.00?)", avg_delta); + + let structural = sigma_cv < 0.05 && cc_cv < 0.05 && comm_cv < 0.10; + let behavioral = avg_delta < 1.0; + + if structural && behavioral { + eprintln!(" → CONVERGED"); + true + } else { + false + } +} + +// --------------------------------------------------------------------------- +// The knowledge loop +// --------------------------------------------------------------------------- + +pub struct KnowledgeLoopConfig { + pub max_cycles: usize, + pub batch_size: usize, + pub window: usize, + pub max_depth: i32, + pub confidence_base: f64, +} + +impl Default for KnowledgeLoopConfig { + fn default() -> Self { + Self { + max_cycles: 20, + batch_size: 5, + window: 5, + max_depth: 4, + confidence_base: 0.3, + } + } +} + +pub fn run_knowledge_loop(config: &KnowledgeLoopConfig) -> Result, String> { + let mut store = Store::load()?; + let mut depth_db = DepthDb::load(&store); + let mut history = Vec::new(); + + eprintln!("Knowledge Loop — fixed-point iteration"); + eprintln!(" max_cycles={} batch_size={}", config.max_cycles, config.batch_size); + eprintln!(" window={} max_depth={}", config.window, config.max_depth); + + for cycle in 1..=config.max_cycles { + let result = run_cycle(cycle, config, &mut depth_db)?; + history.push(result); + + if check_convergence(&history, config.window) { + eprintln!("\n CONVERGED after {} cycles", cycle); + break; + } + } + + // Save loop summary as a store node + if let Some(first) = history.first() { + let key = format!("_knowledge-loop-{}", first.timestamp); + if let Ok(json) = serde_json::to_string_pretty(&history) { + store = Store::load()?; + store.upsert_provenance(&key, &json, + store::Provenance::AgentKnowledgeObservation).ok(); + depth_db.save(&mut store); + store.save()?; + } + } + + Ok(history) +} + +fn run_cycle( + cycle_num: usize, + config: &KnowledgeLoopConfig, + depth_db: &mut DepthDb, +) -> Result { + let timestamp = chrono::Local::now().format("%Y%m%dT%H%M%S").to_string(); + eprintln!("\n{}", "=".repeat(60)); + eprintln!("CYCLE {} — {}", cycle_num, timestamp); + eprintln!("{}", "=".repeat(60)); + + let mut store = Store::load()?; + let graph = store.build_graph(); + let metrics_before = GraphMetrics::from_graph(&store, &graph); + eprintln!(" Before: nodes={} edges={} cc={:.3} sigma={:.3}", + metrics_before.nodes, metrics_before.edges, metrics_before.cc, metrics_before.sigma); + + let mut all_actions = Vec::new(); + let mut all_no_ops = 0; + let mut depth_rejected = 0; + let mut total_applied = 0; + + // Run each agent, rebuilding graph after mutations + let agent_names = ["observation", "extractor", "connector", "challenger"]; + + for agent_name in &agent_names { + eprintln!("\n --- {} (n={}) ---", agent_name, config.batch_size); + + // Rebuild graph to reflect any mutations from previous agents + let graph = store.build_graph(); + + let output = match *agent_name { + "observation" => run_observation_extractor(&store, &graph, config.batch_size), + "extractor" => run_extractor(&store, &graph, config.batch_size), + "connector" => run_connector(&store, &graph, config.batch_size), + "challenger" => run_challenger(&store, &graph, config.batch_size), + _ => unreachable!(), + }; + + let output = match output { + Ok(o) => o, + Err(e) => { + eprintln!(" ERROR: {}", e); + continue; + } + }; + + // Store raw output as a node (for debugging/audit) + let raw_key = format!("_knowledge-{}-{}", agent_name, timestamp); + let raw_content = format!("# {} Agent Results — {}\n\n{}", agent_name, timestamp, output); + store.upsert_provenance(&raw_key, &raw_content, + agent_provenance(agent_name)).ok(); + + let mut actions = parse_all_actions(&output); + let no_ops = count_no_ops(&output); + all_no_ops += no_ops; + + eprintln!(" Actions: {} No-ops: {}", actions.len(), no_ops); + + let mut applied = 0; + for action in &mut actions { + let depth = compute_action_depth(depth_db, action, agent_name); + action.depth = depth; + + match &action.kind { + ActionKind::WriteNode { key, covers, .. } => { + let conf_val = action.confidence.value(); + let req = required_confidence(depth, config.confidence_base); + + let source_uses: Vec = covers.iter() + .filter_map(|k| store.nodes.get(k).map(|n| n.uses)) + .collect(); + let avg_uses = if source_uses.is_empty() { 0 } + else { source_uses.iter().sum::() / source_uses.len() as u32 }; + let eff_conf = (conf_val + use_bonus(avg_uses)).min(1.0); + + if eff_conf < req { + action.applied = Some(false); + action.rejected_reason = Some("depth_threshold".into()); + depth_rejected += 1; + continue; + } + if depth > config.max_depth { + action.applied = Some(false); + action.rejected_reason = Some("max_depth".into()); + depth_rejected += 1; + continue; + } + eprintln!(" WRITE {} depth={} conf={:.2} eff={:.2} req={:.2}", + key, depth, conf_val, eff_conf, req); + } + ActionKind::Link { source, target } => { + eprintln!(" LINK {} → {}", source, target); + } + ActionKind::Refine { key, .. } => { + eprintln!(" REFINE {} depth={}", key, depth); + } + } + + if apply_action(&mut store, action, agent_name, ×tamp, depth) { + applied += 1; + action.applied = Some(true); + if let ActionKind::WriteNode { key, .. } | ActionKind::Refine { key, .. } = &action.kind { + depth_db.set(key.clone(), depth); + } + } else { + action.applied = Some(false); + } + } + + eprintln!(" Applied: {}/{}", applied, actions.len()); + total_applied += applied; + all_actions.extend(actions); + } + + depth_db.save(&mut store); + + // Recompute spectral if anything changed + if total_applied > 0 { + eprintln!("\n Recomputing spectral embedding..."); + let graph = store.build_graph(); + let result = spectral::decompose(&graph, 8); + let emb = spectral::to_embedding(&result); + spectral::save_embedding(&emb).ok(); + } + + let graph = store.build_graph(); + let metrics_after = GraphMetrics::from_graph(&store, &graph); + let weighted_delta: f64 = all_actions.iter() + .filter(|a| a.applied == Some(true)) + .map(|a| a.weight) + .sum(); + + eprintln!("\n CYCLE {} SUMMARY", cycle_num); + eprintln!(" Applied: {}/{} depth-rejected: {} no-ops: {}", + total_applied, all_actions.len(), depth_rejected, all_no_ops); + eprintln!(" Weighted delta: {:.2}", weighted_delta); + + Ok(CycleResult { + cycle: cycle_num, + timestamp, + total_actions: all_actions.len(), + total_applied, + total_no_ops: all_no_ops, + depth_rejected, + weighted_delta, + graph_metrics_before: metrics_before, + graph_metrics_after: metrics_after, + }) +} diff --git a/src/llm.rs b/src/llm.rs index f6581ab..e755d0a 100644 --- a/src/llm.rs +++ b/src/llm.rs @@ -1,6 +1,6 @@ -// LLM utilities: Sonnet invocation and response parsing +// LLM utilities: model invocation and response parsing // -// Shared by digest, audit, enrich, and consolidate modules. +// Shared by digest, audit, enrich, consolidate, knowledge, and fact_mine. use crate::store::Store; @@ -8,8 +8,8 @@ use regex::Regex; use std::fs; use std::process::Command; -/// Call Sonnet via claude CLI. Returns the response text. -pub(crate) fn call_sonnet(prompt: &str, _timeout_secs: u64) -> Result { +/// Call a model via claude CLI. Returns the response text. +fn call_model(model: &str, prompt: &str) -> Result { // Write prompt to temp file (claude CLI needs file input for large prompts) // Use thread ID + PID to avoid collisions under parallel rayon calls let tmp = std::env::temp_dir().join(format!("poc-llm-{}-{:?}.txt", @@ -18,7 +18,7 @@ pub(crate) fn call_sonnet(prompt: &str, _timeout_secs: u64) -> Result Result Result { + call_model("sonnet", prompt) +} + +/// Call Haiku via claude CLI (cheaper, faster — good for high-volume extraction). +pub(crate) fn call_haiku(prompt: &str) -> Result { + call_model("haiku", prompt) +} + +/// Parse a JSON response, handling markdown fences. pub(crate) fn parse_json_response(response: &str) -> Result { let cleaned = response.trim(); let cleaned = cleaned.strip_prefix("```json").unwrap_or(cleaned); diff --git a/src/main.rs b/src/main.rs index 667c5c1..6a1abfb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -29,6 +29,8 @@ mod query; mod spectral; mod lookups; mod daemon; +mod fact_mine; +mod knowledge; pub mod memory_capnp { include!(concat!(env!("OUT_DIR"), "/schema/memory_capnp.rs")); @@ -132,6 +134,9 @@ fn main() { "lookup-bump" => cmd_lookup_bump(&args[2..]), "lookups" => cmd_lookups(&args[2..]), "daemon" => cmd_daemon(&args[2..]), + "knowledge-loop" => cmd_knowledge_loop(&args[2..]), + "fact-mine" => cmd_fact_mine(&args[2..]), + "fact-mine-store" => cmd_fact_mine_store(&args[2..]), _ => { eprintln!("Unknown command: {}", args[1]); usage(); @@ -216,7 +221,14 @@ Commands: lookups [DATE] Show daily lookup counts (default: today) daemon Start background job daemon daemon status Show daemon status - daemon log [JOB] [N] Show last N log lines (default 50, optional job filter)"); + daemon log [JOB] [N] Show last N log lines (default 50, optional job filter) + knowledge-loop [OPTIONS] Run knowledge agents to convergence + --max-cycles N (default 20) + --batch-size N (default 5) + --window N (default 5) + --max-depth N (default 4) + fact-mine JSONL [OPTIONS] Extract atomic facts from conversation transcripts + fact-mine --batch DIR Mine all .jsonl files in directory"); } fn cmd_search(args: &[String]) -> Result<(), String> { @@ -850,8 +862,10 @@ fn cmd_digest(args: &[String]) -> Result<(), String> { fn cmd_digest_links(args: &[String]) -> Result<(), String> { let do_apply = args.iter().any(|a| a == "--apply"); - let links = digest::parse_all_digest_links()?; - println!("Found {} unique links from digest files", links.len()); + let store = store::Store::load()?; + let links = digest::parse_all_digest_links(&store); + drop(store); + println!("Found {} unique links from digest nodes", links.len()); if !do_apply { for (i, link) in links.iter().enumerate() { @@ -1821,10 +1835,19 @@ fn cmd_daemon(args: &[String]) -> Result<(), String> { match args[0].as_str() { "status" => daemon::show_status(), "log" => { - let job = args.get(1).map(|s| s.as_str()); - let lines = args.get(2) - .and_then(|s| s.parse().ok()) - .unwrap_or(50); + // daemon log [N] — last N lines (default 20) + // daemon log JOB [N] — last N lines for job + let (job, lines) = match args.get(1) { + None => (None, 20), + Some(s) => { + if let Ok(n) = s.parse::() { + (None, n) + } else { + let n = args.get(2).and_then(|s| s.parse().ok()).unwrap_or(20); + (Some(s.as_str()), n) + } + } + }; daemon::show_log(job, lines) } _ => { @@ -1833,3 +1856,125 @@ fn cmd_daemon(args: &[String]) -> Result<(), String> { } } } + +fn cmd_knowledge_loop(args: &[String]) -> Result<(), String> { + if args.iter().any(|a| a == "--help" || a == "-h") { + eprintln!("Usage: poc-memory knowledge-loop [OPTIONS] + +Run knowledge agents (observation, extractor, connector, challenger) in +a convergence loop. Each cycle runs all agents, applies actions to the +graph, and checks structural stability metrics. + +Options: + --max-cycles N Maximum cycles before stopping (default: 20) + --batch-size N Items per agent per cycle (default: 5) + --window N Cycles to check for convergence (default: 5) + --max-depth N Maximum inference depth (default: 4)"); + return Ok(()); + } + + let mut config = knowledge::KnowledgeLoopConfig::default(); + + let mut i = 0; + while i < args.len() { + match args[i].as_str() { + "--max-cycles" => { i += 1; config.max_cycles = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.max_cycles); } + "--batch-size" => { i += 1; config.batch_size = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.batch_size); } + "--window" => { i += 1; config.window = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.window); } + "--max-depth" => { i += 1; config.max_depth = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.max_depth); } + other => return Err(format!("Unknown arg: {}. Use --help for usage.", other)), + } + i += 1; + } + + let results = knowledge::run_knowledge_loop(&config)?; + eprintln!("\nCompleted {} cycles, {} total actions applied", + results.len(), + results.iter().map(|r| r.total_applied).sum::()); + Ok(()) +} + +fn cmd_fact_mine(args: &[String]) -> Result<(), String> { + if args.is_empty() || args.iter().any(|a| a == "--help" || a == "-h") { + eprintln!("Usage: poc-memory fact-mine [OPTIONS] + poc-memory fact-mine --batch [OPTIONS] + +Extract atomic factual claims from conversation transcripts using Haiku. + +Options: + --batch Process all .jsonl files in directory + --dry-run Show chunks without calling model + --output FILE Write JSON to file (default: stdout) + --min-messages N Skip transcripts with fewer messages (default: 10)"); + return Ok(()); + } + + let mut batch = false; + let mut dry_run = false; + let mut output_file: Option = None; + let mut min_messages = 10usize; + let mut path: Option = None; + + let mut i = 0; + while i < args.len() { + match args[i].as_str() { + "--batch" => batch = true, + "--dry-run" => dry_run = true, + "--output" | "-o" => { i += 1; output_file = args.get(i).cloned(); } + "--min-messages" => { i += 1; min_messages = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(min_messages); } + s if !s.starts_with('-') => path = Some(s.to_string()), + other => return Err(format!("Unknown arg: {}", other)), + } + i += 1; + } + + let path = path.ok_or("Missing path argument")?; + let p = std::path::Path::new(&path); + + let paths: Vec = if batch { + if !p.is_dir() { + return Err(format!("Not a directory: {}", path)); + } + let mut files: Vec<_> = std::fs::read_dir(p) + .map_err(|e| format!("read dir: {}", e))? + .filter_map(|e| e.ok()) + .map(|e| e.path()) + .filter(|p| p.extension().map(|x| x == "jsonl").unwrap_or(false)) + .collect(); + files.sort(); + eprintln!("Found {} transcripts", files.len()); + files + } else { + vec![p.to_path_buf()] + }; + + let path_refs: Vec<&std::path::Path> = paths.iter().map(|p| p.as_path()).collect(); + let facts = fact_mine::mine_batch(&path_refs, min_messages, dry_run)?; + + if !dry_run { + let json = serde_json::to_string_pretty(&facts) + .map_err(|e| format!("serialize: {}", e))?; + if let Some(out) = &output_file { + std::fs::write(out, &json).map_err(|e| format!("write: {}", e))?; + eprintln!("\nWrote {} facts to {}", facts.len(), out); + } else { + println!("{}", json); + } + } + + eprintln!("\nTotal: {} facts from {} transcripts", facts.len(), paths.len()); + Ok(()) +} + +fn cmd_fact_mine_store(args: &[String]) -> Result<(), String> { + if args.len() != 1 { + return Err("Usage: poc-memory fact-mine-store ".into()); + } + let path = std::path::Path::new(&args[0]); + if !path.exists() { + return Err(format!("File not found: {}", args[0])); + } + let count = fact_mine::mine_and_store(path)?; + eprintln!("Stored {} facts", count); + Ok(()) +} diff --git a/src/query.rs b/src/query.rs index 529d7ce..5076280 100644 --- a/src/query.rs +++ b/src/query.rs @@ -202,11 +202,20 @@ fn node_type_label(nt: NodeType) -> &'static str { fn provenance_label(p: Provenance) -> &'static str { match p { - Provenance::Manual => "manual", - Provenance::Journal => "journal", - Provenance::Agent => "agent", - Provenance::Dream => "dream", - Provenance::Derived => "derived", + Provenance::Manual => "manual", + Provenance::Journal => "journal", + Provenance::Agent => "agent", + Provenance::Dream => "dream", + Provenance::Derived => "derived", + Provenance::AgentExperienceMine => "agent:experience-mine", + Provenance::AgentKnowledgeObservation => "agent:knowledge-observation", + Provenance::AgentKnowledgePattern => "agent:knowledge-pattern", + Provenance::AgentKnowledgeConnector => "agent:knowledge-connector", + Provenance::AgentKnowledgeChallenger => "agent:knowledge-challenger", + Provenance::AgentConsolidate => "agent:consolidate", + Provenance::AgentDigest => "agent:digest", + Provenance::AgentFactMine => "agent:fact-mine", + Provenance::AgentDecay => "agent:decay", } } diff --git a/src/store/ops.rs b/src/store/ops.rs index 7b79906..5daa298 100644 --- a/src/store/ops.rs +++ b/src/store/ops.rs @@ -30,18 +30,25 @@ impl Store { /// Upsert a node: update if exists (and content changed), create if not. /// Returns: "created", "updated", or "unchanged". pub fn upsert(&mut self, key: &str, content: &str) -> Result<&'static str, String> { + self.upsert_provenance(key, content, Provenance::Manual) + } + + /// Upsert with explicit provenance (for agent-created nodes). + pub fn upsert_provenance(&mut self, key: &str, content: &str, provenance: Provenance) -> Result<&'static str, String> { if let Some(existing) = self.nodes.get(key) { if existing.content == content { return Ok("unchanged"); } let mut node = existing.clone(); node.content = content.to_string(); + node.provenance = provenance; node.version += 1; self.append_nodes(std::slice::from_ref(&node))?; self.nodes.insert(key.to_string(), node); Ok("updated") } else { - let node = new_node(key, content); + let mut node = new_node(key, content); + node.provenance = provenance; self.append_nodes(std::slice::from_ref(&node))?; self.uuid_to_key.insert(node.uuid, node.key.clone()); self.nodes.insert(key.to_string(), node); diff --git a/src/store/types.rs b/src/store/types.rs index 12debd7..d6946ad 100644 --- a/src/store/types.rs +++ b/src/store/types.rs @@ -236,9 +236,18 @@ pub enum NodeType { pub enum Provenance { Manual, Journal, - Agent, + Agent, // legacy catch-all, prefer specific variants below Dream, Derived, + AgentExperienceMine, + AgentKnowledgeObservation, + AgentKnowledgePattern, + AgentKnowledgeConnector, + AgentKnowledgeChallenger, + AgentConsolidate, + AgentDigest, + AgentFactMine, + AgentDecay, } #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)] @@ -296,7 +305,10 @@ capnp_enum!(NodeType, memory_capnp::NodeType, [EpisodicSession, EpisodicDaily, EpisodicWeekly, Semantic]); capnp_enum!(Provenance, memory_capnp::Provenance, - [Manual, Journal, Agent, Dream, Derived]); + [Manual, Journal, Agent, Dream, Derived, + AgentExperienceMine, AgentKnowledgeObservation, AgentKnowledgePattern, + AgentKnowledgeConnector, AgentKnowledgeChallenger, AgentConsolidate, + AgentDigest, AgentFactMine, AgentDecay]); capnp_enum!(Category, memory_capnp::Category, [General, Core, Technical, Observation, Task]);