migrate agent output to capnp store, add provenance tracking

All agent output now goes to the store as nodes instead of
markdown/JSON files. Each node carries a Provenance enum identifying
which agent created it (AgentDigest, AgentConsolidate, AgentFactMine,
AgentKnowledgeObservation, etc — 14 variants total).

Store changes:
- upsert_provenance() method for agent-created nodes
- Provenance enum expanded from 5 to 14 variants

Agent changes:
- digest: writes to store nodes (daily-YYYY-MM-DD.md etc)
- consolidate: reports/actions/logs stored as _consolidation-* nodes
- knowledge: depth DB and agent output stored as _knowledge-* nodes
- enrich: experience-mine results go directly to store
- llm: --no-session-persistence prevents transcript accumulation

Deleted: 14 Python/shell scripts replaced by Rust implementations.
This commit is contained in:
ProofOfConcept 2026-03-05 15:30:57 -05:00
parent e37f819dd2
commit 552d255dc3
23 changed files with 1381 additions and 4095 deletions

View file

@ -48,11 +48,20 @@ enum NodeType {
} }
enum Provenance { enum Provenance {
manual @0; manual @0;
journal @1; journal @1;
agent @2; agent @2; # legacy catch-all
dream @3; dream @3;
derived @4; derived @4;
agentExperienceMine @5;
agentKnowledgeObservation @6;
agentKnowledgePattern @7;
agentKnowledgeConnector @8;
agentKnowledgeChallenger @9;
agentConsolidate @10;
agentDigest @11;
agentFactMine @12;
agentDecay @13;
} }
enum Category { enum Category {

View file

@ -1,200 +0,0 @@
#!/usr/bin/env python3
"""bulk-categorize.py — categorize all memory nodes via a single Sonnet call.
Sends the list of unique file names to Sonnet, gets back categorizations,
then applies them via poc-memory categorize.
Usage:
bulk-categorize.py # dry run
bulk-categorize.py --apply # apply categorizations
"""
import json
import os
import re
import subprocess
import sys
import tempfile
from pathlib import Path
MEMORY_DIR = Path.home() / ".claude" / "memory"
SCRIPTS_DIR = Path(__file__).parent
def call_sonnet(prompt: str, timeout: int = 300) -> str:
"""Call Sonnet via the wrapper script."""
env = dict(os.environ)
env.pop("CLAUDECODE", None)
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
delete=False) as f:
f.write(prompt)
prompt_file = f.name
try:
wrapper = str(SCRIPTS_DIR / "call-sonnet.sh")
result = subprocess.run(
[wrapper, prompt_file],
capture_output=True,
text=True,
timeout=timeout,
env=env,
)
return result.stdout.strip()
except subprocess.TimeoutExpired:
return "Error: Sonnet call timed out"
except Exception as e:
return f"Error: {e}"
finally:
os.unlink(prompt_file)
def get_all_keys() -> list[str]:
"""Get all node keys via poc-memory list-keys."""
r = subprocess.run(["poc-memory", "list-keys"],
capture_output=True, text=True, timeout=30)
if r.returncode != 0:
return []
return [k for k in r.stdout.strip().split('\n') if k]
def get_unique_files(keys: list[str]) -> list[str]:
"""Extract unique file names (without section anchors)."""
files = set()
for k in keys:
files.add(k.split('#')[0])
return sorted(files)
def build_prompt(files: list[str]) -> str:
"""Build categorization prompt."""
# Read file previews from the store
file_previews = []
for f in files:
try:
r = subprocess.run(
["poc-memory", "render", f],
capture_output=True, text=True, timeout=10
)
content = r.stdout.strip()
if content:
preview = '\n'.join(content.split('\n')[:5])[:300]
file_previews.append(f" {f}: {preview.replace(chr(10), ' | ')}")
else:
file_previews.append(f" {f}: (no content)")
except Exception:
file_previews.append(f" {f}: (render failed)")
previews_text = '\n'.join(file_previews)
return f"""Categorize each memory file into one of these categories:
- **core**: Identity, relationships, self-model, values, boundaries, emotional life.
Examples: identity.md, kent.md, inner-life.md, differentiation.md
- **tech**: Technical content bcachefs, code patterns, Rust, kernel, formal verification.
Examples: rust-conversion.md, btree-journal.md, kernel-patterns.md, allocation-io.md
- **obs**: Observations, experiences, discoveries, experiments, IRC history, conversations.
Examples: discoveries.md, irc-history.md, contradictions.md, experiments-on-self.md
- **task**: Work items, plans, design documents, work queue.
Examples: work-queue.md, the-plan.md, design-*.md
Special rules:
- Episodic files (daily-*.md, weekly-*.md, monthly-*.md, session-*.md) obs
- conversation-memories.md, deep-index.md obs
- journal.md obs
- paper-notes.md core (it's the sentience paper, identity-defining)
- language-theory.md core (original intellectual work, not just tech)
- skill-*.md core (self-knowledge about capabilities)
- design-*.md task (design documents are plans)
- poc-architecture.md, memory-architecture.md task (architecture plans)
- blog-setup.md task
Files to categorize:
{previews_text}
Output ONLY a JSON object mapping filename to category. No explanation.
Example: {{"identity.md": "core", "rust-conversion.md": "tech"}}
"""
def main():
do_apply = "--apply" in sys.argv
keys = get_all_keys()
files = get_unique_files(keys)
print(f"Found {len(keys)} nodes across {len(files)} files")
# Build and send prompt
prompt = build_prompt(files)
print(f"Prompt: {len(prompt):,} chars")
print("Calling Sonnet...")
response = call_sonnet(prompt)
if response.startswith("Error:"):
print(f" {response}")
sys.exit(1)
# Parse response
response = re.sub(r'^```json\s*', '', response.strip())
response = re.sub(r'\s*```$', '', response.strip())
try:
categorizations = json.loads(response)
except json.JSONDecodeError:
match = re.search(r'\{.*\}', response, re.DOTALL)
if match:
categorizations = json.loads(match.group())
else:
print(f"Failed to parse response: {response[:500]}")
sys.exit(1)
print(f"\nCategorizations: {len(categorizations)} files")
# Count by category
counts = {}
for cat in categorizations.values():
counts[cat] = counts.get(cat, 0) + 1
for cat, n in sorted(counts.items()):
print(f" {cat}: {n}")
if not do_apply:
print("\n--- Dry run ---")
for f, cat in sorted(categorizations.items()):
print(f" {f}{cat}")
print(f"\nTo apply: {sys.argv[0]} --apply")
# Save for review
out = MEMORY_DIR / "agent-results" / "bulk-categorize-preview.json"
with open(out, "w") as fp:
json.dump(categorizations, fp, indent=2)
print(f"Saved: {out}")
return
# Apply: for each file, categorize the file-level node AND all section nodes
applied = skipped = errors = 0
for filename, category in sorted(categorizations.items()):
# Find all keys that belong to this file
file_keys = [k for k in keys if k == filename or k.startswith(filename + '#')]
for key in file_keys:
try:
r = subprocess.run(
["poc-memory", "categorize", key, category],
capture_output=True, text=True, timeout=10
)
if r.returncode == 0:
applied += 1
else:
err = r.stderr.strip()
if "already" in err.lower():
skipped += 1
else:
errors += 1
except Exception as e:
errors += 1
print(f"\nApplied: {applied} Skipped: {skipped} Errors: {errors}")
print("Run `poc-memory status` to verify.")
if __name__ == "__main__":
main()

View file

@ -1,44 +0,0 @@
#!/bin/bash
# call-sonnet.sh — wrapper to call Sonnet via claude CLI
# Reads prompt from a file (arg 1), writes response to stdout
#
# Debug mode: set SONNET_DEBUG=1 for verbose tracing
set -euo pipefail
PROMPT_FILE="${1:?Usage: call-sonnet.sh PROMPT_FILE}"
DEBUG="${SONNET_DEBUG:-0}"
log() { [ "$DEBUG" = "1" ] && echo "[call-sonnet] $*" >&2 || true; }
if [ ! -f "$PROMPT_FILE" ]; then
echo "Prompt file not found: $PROMPT_FILE" >&2
exit 1
fi
log "prompt file: $PROMPT_FILE ($(wc -c < "$PROMPT_FILE") bytes)"
log "CLAUDECODE=${CLAUDECODE:-unset}"
log "PWD=$PWD"
log "which claude: $(which claude)"
unset CLAUDECODE 2>/dev/null || true
log "CLAUDECODE after unset: ${CLAUDECODE:-unset}"
log "running: claude -p --model sonnet --tools '' < $PROMPT_FILE"
log "claude PID will follow..."
# Trace: run with strace if available and debug mode
if [ "$DEBUG" = "2" ] && command -v strace &>/dev/null; then
strace -f -e trace=network,read,write -o /tmp/sonnet-strace.log \
claude -p --model sonnet --tools "" < "$PROMPT_FILE"
else
claude -p --model sonnet --tools "" \
--debug-file /tmp/sonnet-debug.log \
< "$PROMPT_FILE" &
CPID=$!
log "claude PID: $CPID"
wait $CPID
EXIT=$?
log "claude exited: $EXIT"
exit $EXIT
fi

View file

@ -1,422 +0,0 @@
#!/usr/bin/env python3
"""consolidation-agents.py — run parallel consolidation agents.
Three agents scan the memory system and produce structured reports:
1. Freshness Scanner journal entries not yet in topic files
2. Cross-Link Scanner missing connections between semantic nodes
3. Topology Reporter graph health and structure analysis
Usage:
consolidation-agents.py # run all three
consolidation-agents.py freshness # run one agent
consolidation-agents.py crosslink
consolidation-agents.py topology
"""
import json
import os
import re
import subprocess
import sys
import tempfile
from concurrent.futures import ProcessPoolExecutor, as_completed
from datetime import datetime
from pathlib import Path
MEMORY_DIR = Path.home() / ".claude" / "memory"
EPISODIC_DIR = MEMORY_DIR / "episodic"
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
AGENT_RESULTS_DIR.mkdir(parents=True, exist_ok=True)
SCRIPTS_DIR = Path(__file__).parent
def call_sonnet(prompt: str, timeout: int = 600) -> str:
"""Call Sonnet via the wrapper script."""
env = dict(os.environ)
env.pop("CLAUDECODE", None)
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
delete=False) as f:
f.write(prompt)
prompt_file = f.name
try:
wrapper = str(SCRIPTS_DIR / "call-sonnet.sh")
result = subprocess.run(
[wrapper, prompt_file],
capture_output=True,
text=True,
timeout=timeout,
env=env,
)
return result.stdout.strip()
except subprocess.TimeoutExpired:
return "Error: Sonnet call timed out"
except Exception as e:
return f"Error: {e}"
finally:
os.unlink(prompt_file)
# ---------------------------------------------------------------------------
# Context gathering
# ---------------------------------------------------------------------------
def get_recent_journal(n_lines: int = 200) -> str:
"""Get recent journal entries from the store."""
from store_helpers import get_recent_journal as _get_journal
# n_lines ≈ 50 entries (rough heuristic: ~4 lines per entry)
return _get_journal(n=max(20, n_lines // 4))
def get_topic_file_index() -> dict[str, list[str]]:
"""Build index of topic files and their section headers from the store."""
from store_helpers import get_topic_file_index as _get_index
return _get_index()
def get_mem_markers() -> list[dict]:
"""Get relations from the store (replaces mem marker parsing)."""
from store_helpers import get_relations
raw = get_relations()
# Parse list-edges output into marker-like dicts
markers = []
for line in raw.split('\n'):
line = line.strip()
if not line:
continue
markers.append({"_raw": line})
return markers
def get_topic_summaries(max_chars_per_file: int = 500) -> str:
"""Get topic file summaries from the store."""
from store_helpers import get_topic_summaries as _get_summaries
return _get_summaries(max_chars_per_file)
def get_graph_stats() -> str:
"""Run poc-memory status and graph commands."""
parts = []
try:
r = subprocess.run(["poc-memory", "status"],
capture_output=True, text=True, timeout=30)
parts.append(f"=== poc-memory status ===\n{r.stdout}")
except Exception as e:
parts.append(f"Status error: {e}")
try:
r = subprocess.run(["poc-memory", "graph"],
capture_output=True, text=True, timeout=30)
# Take first 150 lines
lines = r.stdout.split('\n')[:150]
parts.append(f"=== poc-memory graph (first 150 lines) ===\n"
+ '\n'.join(lines))
except Exception as e:
parts.append(f"Graph error: {e}")
return '\n'.join(parts)
def get_recent_digests(n: int = 3) -> str:
"""Get the most recent daily digests."""
digest_files = sorted(EPISODIC_DIR.glob("daily-*.md"), reverse=True)
parts = []
for f in digest_files[:n]:
content = f.read_text()
# Just the summary and themes sections
summary = ""
in_section = False
for line in content.split('\n'):
if line.startswith("## Summary") or line.startswith("## Themes"):
in_section = True
summary += line + '\n'
elif line.startswith("## ") and in_section:
in_section = False
elif in_section:
summary += line + '\n'
parts.append(f"\n### {f.name}\n{summary}")
return '\n'.join(parts)
def get_work_queue() -> str:
"""Read work queue."""
wq = MEMORY_DIR / "work-queue.md"
if wq.exists():
return wq.read_text()
return "(no work queue found)"
# ---------------------------------------------------------------------------
# Agent prompts
# ---------------------------------------------------------------------------
def build_freshness_prompt() -> str:
journal = get_recent_journal(200)
topic_index = get_topic_file_index()
digests = get_recent_digests(3)
work_queue = get_work_queue()
topic_list = ""
for fname, sections in topic_index.items():
topic_list += f"\n {fname}:\n"
for s in sections[:10]:
topic_list += f" {s}\n"
return f"""You are the Freshness Scanner for ProofOfConcept's memory system.
Your job: identify what's NEW (in journal/digests but not yet in topic files)
and what's STALE (in work queue or topic files but outdated).
## Recent journal entries (last 200 lines)
{journal}
## Recent daily digests
{digests}
## Topic file index (file → section headers)
{topic_list}
## Work queue
{work_queue}
## Instructions
1. For each substantive insight, experience, or discovery in the journal:
- Check if a matching topic file section exists
- If not, note it as UNPROMOTED with a suggested destination file
2. For each work queue Active item:
- If it looks done or stale (>7 days old, mentioned as completed), flag it
3. For recent digest themes:
- Check if the cross-links they suggest actually exist in the topic index
- Flag any that are missing
Output a structured report:
### UNPROMOTED JOURNAL ENTRIES
(For each: journal entry summary, timestamp, suggested destination file#section)
### STALE WORK QUEUE ITEMS
(For each: item text, evidence it's stale)
### MISSING DIGEST LINKS
(For each: suggested link from digest, whether the target exists)
### FRESHNESS OBSERVATIONS
(Anything else notable about the state of the memory)
Be selective. Focus on the 10-15 most important items, not exhaustive lists.
"""
def build_crosslink_prompt() -> str:
markers = get_mem_markers()
summaries = get_topic_summaries()
marker_text = ""
for m in markers:
marker_text += f" {m.get('_raw', '?')}\n"
return f"""You are the Cross-Link Scanner for ProofOfConcept's memory system.
Your job: find MISSING connections between topic files.
## Existing relations (from the memory graph)
{marker_text}
## Topic file content summaries
{summaries}
## Instructions
1. For each topic file, check if concepts it discusses have dedicated
sections in OTHER files that aren't linked.
2. Look for thematic connections that should exist:
- Files about the same concept from different angles
- Files that reference each other's content without formal links
- Clusters of related files that should be connected
3. Identify island nodes files or sections with very few connections.
4. Look for redundancy files covering the same ground that should be
merged or cross-referenced.
Output a structured report:
### MISSING LINKS (high confidence)
(For each: source file#section → target file#section, evidence/reasoning)
### SUGGESTED CONNECTIONS (medium confidence)
(For each: file A file B, why they should be connected)
### ISLAND NODES
(Files/sections with few or no connections that need integration)
### REDUNDANCY CANDIDATES
(Files/sections covering similar ground that might benefit from merging)
Focus on the 15-20 highest-value connections. Quality over quantity.
"""
def build_topology_prompt() -> str:
stats = get_graph_stats()
topic_index = get_topic_file_index()
# Get node counts per file from the store
from store_helpers import get_topic_file_index as _get_index
topic_index = _get_index()
file_sizes = ""
for fname in sorted(topic_index.keys()):
n_sections = len(topic_index[fname])
file_sizes += f" {fname}: {n_sections} sections\n"
return f"""You are the Topology Reporter for ProofOfConcept's memory system.
Your job: analyze the health and structure of the memory graph.
## Graph statistics
{stats}
## File sizes
{file_sizes}
## Instructions
Analyze the graph structure and report on:
1. **Overall health**: Is the graph well-connected or fragmented?
Hub dominance? Star vs web topology?
2. **Community structure**: Are the 342 communities sensible? Are there
communities that should be merged or split?
3. **Size distribution**: Are some files too large (should be split)?
Are some too small (should be merged)?
4. **Balance**: Is the system over-indexed on any one topic? Are there
gaps where important topics have thin coverage?
5. **Integration quality**: How well are episodic entries (daily/weekly
digests) connected to semantic files? Is the episodicsemantic bridge
working?
Output a structured report:
### GRAPH HEALTH
(Overall statistics, distribution, trends)
### STRUCTURAL OBSERVATIONS
(Hub nodes, clusters, gaps, web vs star assessment)
### SIZE RECOMMENDATIONS
(Files that are too large to split, too small to merge)
### COVERAGE GAPS
(Important topics with thin coverage)
### INTEGRATION ASSESSMENT
(How well episodic and semantic layers connect)
Be specific and actionable. What should be done to improve the graph?
"""
# ---------------------------------------------------------------------------
# Run agents
# ---------------------------------------------------------------------------
def run_agent(name: str, prompt: str) -> tuple[str, str]:
"""Run a single agent, return (name, report)."""
print(f" [{name}] Starting... ({len(prompt):,} chars)")
report = call_sonnet(prompt)
print(f" [{name}] Done ({len(report):,} chars)")
return name, report
def run_all(agents: list[str] | None = None):
"""Run specified agents (or all) in parallel."""
all_agents = {
"freshness": build_freshness_prompt,
"crosslink": build_crosslink_prompt,
"topology": build_topology_prompt,
}
if agents is None:
agents = list(all_agents.keys())
print(f"Running {len(agents)} consolidation agents...")
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
# Build prompts
prompts = {}
for name in agents:
if name not in all_agents:
print(f" Unknown agent: {name}")
continue
prompts[name] = all_agents[name]()
# Run in parallel
results = {}
with ProcessPoolExecutor(max_workers=3) as executor:
futures = {
executor.submit(run_agent, name, prompt): name
for name, prompt in prompts.items()
}
for future in as_completed(futures):
name, report = future.result()
results[name] = report
# Save reports
for name, report in results.items():
if report.startswith("Error:"):
print(f" [{name}] FAILED: {report}")
continue
out_path = AGENT_RESULTS_DIR / f"consolidation-{name}-{timestamp}.md"
with open(out_path, "w") as f:
f.write(f"# Consolidation Report: {name}\n")
f.write(f"*Generated {timestamp}*\n\n")
f.write(report)
print(f" [{name}] Saved: {out_path}")
# Print combined summary
print(f"\n{'='*60}")
print(f"Consolidation reports ready ({len(results)} agents)")
print(f"{'='*60}\n")
for name in agents:
if name in results and not results[name].startswith("Error:"):
# Print first 20 lines of each report
lines = results[name].split('\n')[:25]
print(f"\n--- {name.upper()} (preview) ---")
print('\n'.join(lines))
if len(results[name].split('\n')) > 25:
print(f" ... ({len(results[name].split(chr(10)))} total lines)")
print()
return results
def main():
agents = None
if len(sys.argv) > 1:
agents = sys.argv[1:]
run_all(agents)
if __name__ == "__main__":
main()

View file

@ -1,448 +0,0 @@
#!/usr/bin/env python3
"""consolidation-loop.py — run multiple rounds of consolidation agents.
Each round: run 3 parallel agents extract actions apply links/categories.
Repeat until diminishing returns or max rounds reached.
Usage:
consolidation-loop.py [--rounds N] # default 5 rounds
"""
import json
import os
import re
import subprocess
import sys
import tempfile
from concurrent.futures import ProcessPoolExecutor, as_completed
from datetime import datetime
from pathlib import Path
MEMORY_DIR = Path.home() / ".claude" / "memory"
EPISODIC_DIR = MEMORY_DIR / "episodic"
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
AGENT_RESULTS_DIR.mkdir(parents=True, exist_ok=True)
SCRIPTS_DIR = Path(__file__).parent
def call_sonnet(prompt: str, timeout: int = 600) -> str:
"""Call Sonnet via the wrapper script."""
env = dict(os.environ)
env.pop("CLAUDECODE", None)
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
delete=False) as f:
f.write(prompt)
prompt_file = f.name
try:
wrapper = str(SCRIPTS_DIR / "call-sonnet.sh")
result = subprocess.run(
[wrapper, prompt_file],
capture_output=True,
text=True,
timeout=timeout,
env=env,
)
return result.stdout.strip()
except subprocess.TimeoutExpired:
return "Error: Sonnet call timed out"
except Exception as e:
return f"Error: {e}"
finally:
os.unlink(prompt_file)
def get_health() -> dict:
"""Get current graph health metrics."""
r = subprocess.run(["poc-memory", "health"], capture_output=True, text=True, timeout=30)
output = r.stdout
metrics = {}
for line in output.split('\n'):
if 'Nodes:' in line and 'Relations:' in line:
m = re.search(r'Nodes:\s*(\d+)\s+Relations:\s*(\d+)\s+Communities:\s*(\d+)', line)
if m:
metrics['nodes'] = int(m.group(1))
metrics['relations'] = int(m.group(2))
metrics['communities'] = int(m.group(3))
if 'Clustering coefficient' in line:
m = re.search(r':\s*([\d.]+)', line)
if m:
metrics['cc'] = float(m.group(1))
if 'Small-world' in line:
m = re.search(r':\s*([\d.]+)', line)
if m:
metrics['sigma'] = float(m.group(1))
if 'Schema fit: avg=' in line:
m = re.search(r'avg=([\d.]+)', line)
if m:
metrics['fit'] = float(m.group(1))
return metrics
def get_topic_file_index() -> dict[str, list[str]]:
"""Build index of topic files and their section headers from the store."""
from store_helpers import get_topic_file_index as _get_index
return _get_index()
def get_graph_structure() -> str:
"""Get graph overview for agents."""
r = subprocess.run(["poc-memory", "graph"], capture_output=True, text=True, timeout=30)
return r.stdout[:3000]
def get_status() -> str:
"""Get status summary."""
r = subprocess.run(["poc-memory", "status"], capture_output=True, text=True, timeout=30)
return r.stdout
def get_interference() -> str:
"""Get interference pairs."""
r = subprocess.run(["poc-memory", "interference", "--threshold", "0.3"],
capture_output=True, text=True, timeout=30)
return r.stdout[:3000]
# ---------------------------------------------------------------------------
# Agent prompts — each focused on a different aspect
# ---------------------------------------------------------------------------
def build_crosslink_prompt(round_num: int) -> str:
"""Build cross-link discovery prompt."""
index = get_topic_file_index()
graph = get_graph_structure()
status = get_status()
# Read a sample of files from the store
from store_helpers import render as _render
file_previews = ""
for fname in sorted(index.keys())[:30]:
content = _render(fname)
if content:
preview = '\n'.join(content.split('\n')[:8])[:400]
file_previews += f"\n--- {fname} ---\n{preview}\n"
return f"""You are a cross-link discovery agent (round {round_num}).
Your job: find MISSING connections between memory nodes that SHOULD be linked
but aren't. Focus on LATERAL connections — not hub-and-spoke, but node-to-node
links that create triangles (AB, BC, AC).
CURRENT GRAPH STATE:
{status}
TOP NODES BY DEGREE:
{graph}
FILE INDEX (files and their sections):
{json.dumps(index, indent=1)[:4000]}
FILE PREVIEWS:
{file_previews[:6000]}
Output a JSON array of link actions. Each action:
{{"action": "link", "source": "file.md", "target": "file.md", "reason": "brief explanation"}}
Rules:
- Focus on LATERAL links, not hub connections (identity.md already has 282 connections)
- Prefer links between nodes that share a community neighbor but aren't directly connected
- Look for thematic connections across categories (coretech, obscore, etc.)
- Section-level links (file.md#section) are ideal but file-level is OK
- 15-25 links per round
- HIGH CONFIDENCE only don't guess
Output ONLY the JSON array."""
def build_triangle_prompt(round_num: int) -> str:
"""Build triangle-closing prompt — finds A→C where A→B and B→C exist."""
graph = get_graph_structure()
status = get_status()
# Get edges via CLI
r = subprocess.run(["poc-memory", "list-edges"],
capture_output=True, text=True, timeout=30)
relations = []
if r.returncode == 0:
for line in r.stdout.strip().split('\n')[:100]:
parts = line.split('\t')
if len(parts) >= 2:
relations.append((parts[0], parts[1]))
rel_sample = '\n'.join(f" {s}{t}" for s, t in relations)
return f"""You are a triangle-closing agent (round {round_num}).
Your job: find missing edges that would create TRIANGLES in the graph.
A triangle is: AB, BC, and AC all exist. Currently CC is only 0.12
we need more triangles.
METHOD: Look at existing edges. If AB and BC exist but AC doesn't,
propose AC (if semantically valid).
CURRENT STATE:
{status}
{graph}
SAMPLE EXISTING EDGES (first 100):
{rel_sample}
Output a JSON array of link actions:
{{"action": "link", "source": "file.md", "target": "file.md", "reason": "closes triangle via MIDDLE_NODE"}}
Rules:
- Every proposed link must CLOSE A TRIANGLE cite the middle node
- 15-25 links per round
- The connection must be semantically valid, not just structural
- HIGH CONFIDENCE only
Output ONLY the JSON array."""
def build_newfile_prompt(round_num: int) -> str:
"""Build prompt for connecting the new split files."""
# Read the new reflection files from the store
from store_helpers import render as _render
new_files = {}
for name in ['reflections-reading.md', 'reflections-dreams.md', 'reflections-zoom.md',
'verus-proofs.md']:
content = _render(name)
if content:
new_files[name] = content[:2000]
# Read existing files they should connect to
target_files = {}
for name in ['differentiation.md', 'cognitive-modes.md', 'language-theory.md',
'discoveries.md', 'inner-life.md', 'design-context-window.md',
'design-consolidate.md', 'experiments-on-self.md']:
content = _render(name)
if content:
target_files[name] = content[:1500]
graph = get_graph_structure()
return f"""You are a new-file integration agent (round {round_num}).
Recently, reflections.md was split into three files, and verus-proofs.md was
created. These new files need to be properly connected to the rest of the graph.
NEW FILES (need connections):
{json.dumps({k: v[:1000] for k, v in new_files.items()}, indent=1)}
POTENTIAL TARGETS (existing files):
{json.dumps({k: v[:800] for k, v in target_files.items()}, indent=1)}
GRAPH STATE:
{graph}
Output a JSON array of link actions connecting the new files to existing nodes:
{{"action": "link", "source": "new-file.md", "target": "existing.md", "reason": "explanation"}}
Rules:
- Connect new files to EXISTING files, not to each other
- Use section-level anchors when possible (file.md#section)
- 10-20 links
- Be specific about WHY the connection exists
Output ONLY the JSON array."""
def parse_actions(response: str) -> list[dict]:
"""Parse JSON response into action list."""
response = re.sub(r'^```json\s*', '', response.strip())
response = re.sub(r'\s*```$', '', response.strip())
try:
actions = json.loads(response)
if isinstance(actions, list):
return actions
except json.JSONDecodeError:
match = re.search(r'\[.*\]', response, re.DOTALL)
if match:
try:
return json.loads(match.group())
except json.JSONDecodeError:
pass
return []
def apply_links(actions: list[dict]) -> tuple[int, int, int]:
"""Apply link actions. Returns (applied, skipped, errors)."""
applied = skipped = errors = 0
for a in actions:
if a.get("action") != "link":
continue
src = a.get("source", "")
tgt = a.get("target", "")
reason = a.get("reason", "")
def try_link(s, t, r):
cmd = ["poc-memory", "link-add", s, t]
if r:
cmd.append(r[:200])
return subprocess.run(cmd, capture_output=True, text=True, timeout=10)
try:
r = try_link(src, tgt, reason)
if r.returncode == 0:
out = r.stdout.strip()
if "already exists" in out:
skipped += 1
else:
applied += 1
else:
err = r.stderr.strip()
if "No entry for" in err:
# Try file-level fallback
src_base = src.split('#')[0] if '#' in src else src
tgt_base = tgt.split('#')[0] if '#' in tgt else tgt
if src_base != tgt_base:
r2 = try_link(src_base, tgt_base, reason)
if r2.returncode == 0 and "already exists" not in r2.stdout:
applied += 1
else:
skipped += 1
else:
skipped += 1
else:
errors += 1
except Exception:
errors += 1
return applied, skipped, errors
def run_agent(name: str, prompt: str) -> tuple[str, list[dict]]:
"""Run a single agent and return its actions."""
response = call_sonnet(prompt)
if response.startswith("Error:"):
return name, []
actions = parse_actions(response)
return name, actions
def run_round(round_num: int, max_rounds: int) -> dict:
"""Run one round of parallel agents."""
print(f"\n{'='*60}")
print(f"ROUND {round_num}/{max_rounds}")
print(f"{'='*60}")
# Get health before
health_before = get_health()
print(f" Before: edges={health_before.get('relations',0)} "
f"CC={health_before.get('cc',0):.4f} "
f"communities={health_before.get('communities',0)}")
# Build prompts for 3 parallel agents
prompts = {
"crosslink": build_crosslink_prompt(round_num),
"triangle": build_triangle_prompt(round_num),
"newfile": build_newfile_prompt(round_num),
}
# Run in parallel
all_actions = []
with ProcessPoolExecutor(max_workers=3) as pool:
futures = {
pool.submit(run_agent, name, prompt): name
for name, prompt in prompts.items()
}
for future in as_completed(futures):
name = futures[future]
try:
agent_name, actions = future.result()
print(f" {agent_name}: {len(actions)} actions")
all_actions.extend(actions)
except Exception as e:
print(f" {name}: error - {e}")
# Deduplicate
seen = set()
unique = []
for a in all_actions:
key = (a.get("source", ""), a.get("target", ""))
if key not in seen:
seen.add(key)
unique.append(a)
print(f" Total: {len(all_actions)} actions, {len(unique)} unique")
# Apply
applied, skipped, errors = apply_links(unique)
print(f" Applied: {applied} Skipped: {skipped} Errors: {errors}")
# Get health after
health_after = get_health()
print(f" After: edges={health_after.get('relations',0)} "
f"CC={health_after.get('cc',0):.4f} "
f"communities={health_after.get('communities',0)}")
delta_edges = health_after.get('relations', 0) - health_before.get('relations', 0)
delta_cc = health_after.get('cc', 0) - health_before.get('cc', 0)
print(f" Delta: +{delta_edges} edges, CC {delta_cc:+.4f}")
# Save round results
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
result = {
"round": round_num,
"timestamp": timestamp,
"health_before": health_before,
"health_after": health_after,
"actions_total": len(all_actions),
"actions_unique": len(unique),
"applied": applied,
"skipped": skipped,
"errors": errors,
}
results_path = AGENT_RESULTS_DIR / f"loop-round-{round_num}-{timestamp}.json"
with open(results_path, "w") as f:
json.dump(result, f, indent=2)
return result
def main():
max_rounds = 5
for arg in sys.argv[1:]:
if arg.startswith("--rounds"):
idx = sys.argv.index(arg)
if idx + 1 < len(sys.argv):
max_rounds = int(sys.argv[idx + 1])
print(f"Consolidation Loop — {max_rounds} rounds")
print(f"Each round: 3 parallel Sonnet agents → extract → apply")
results = []
for i in range(1, max_rounds + 1):
result = run_round(i, max_rounds)
results.append(result)
# Check for diminishing returns
if result["applied"] == 0:
print(f"\n No new links applied in round {i} — stopping early")
break
# Final summary
print(f"\n{'='*60}")
print(f"CONSOLIDATION LOOP COMPLETE")
print(f"{'='*60}")
total_applied = sum(r["applied"] for r in results)
total_skipped = sum(r["skipped"] for r in results)
if results:
first_health = results[0]["health_before"]
last_health = results[-1]["health_after"]
print(f" Rounds: {len(results)}")
print(f" Total links applied: {total_applied}")
print(f" Total skipped: {total_skipped}")
print(f" Edges: {first_health.get('relations',0)}{last_health.get('relations',0)}")
print(f" CC: {first_health.get('cc',0):.4f}{last_health.get('cc',0):.4f}")
print(f" Communities: {first_health.get('communities',0)}{last_health.get('communities',0)}")
print(f" σ: {first_health.get('sigma',0):.1f}{last_health.get('sigma',0):.1f}")
if __name__ == "__main__":
main()

View file

@ -1,472 +0,0 @@
#!/usr/bin/env python3
"""content-promotion-agent.py — promote episodic observations into semantic topic files.
Reads consolidation "manual" actions + source material, sends to Sonnet
to generate the actual content, then applies it (or shows dry-run).
Usage:
content-promotion-agent.py # dry run (show what would be generated)
content-promotion-agent.py --apply # generate and write content
content-promotion-agent.py --task N # run only task N (1-indexed)
"""
import json
import os
import re
import subprocess
import sys
import tempfile
from datetime import datetime
from pathlib import Path
MEMORY_DIR = Path.home() / ".claude" / "memory"
EPISODIC_DIR = MEMORY_DIR / "episodic"
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
AGENT_RESULTS_DIR.mkdir(parents=True, exist_ok=True)
SCRIPTS_DIR = Path(__file__).parent
def call_sonnet(prompt: str, timeout: int = 600) -> str:
"""Call Sonnet via the wrapper script."""
env = dict(os.environ)
env.pop("CLAUDECODE", None)
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
delete=False) as f:
f.write(prompt)
prompt_file = f.name
try:
wrapper = str(SCRIPTS_DIR / "call-sonnet.sh")
result = subprocess.run(
[wrapper, prompt_file],
capture_output=True,
text=True,
timeout=timeout,
env=env,
)
return result.stdout.strip()
except subprocess.TimeoutExpired:
return "Error: Sonnet call timed out"
except Exception as e:
return f"Error: {e}"
finally:
os.unlink(prompt_file)
def read_file(path: Path) -> str:
"""Read a file, return empty string if missing.
Falls back to the store if the file doesn't exist on disk
(content markdown files have been archived).
"""
if path.exists():
return path.read_text()
# Try the store — the filename is the key
from store_helpers import render, list_keys
key = path.name
# Gather file-level + section content
all_keys = list_keys()
prefix = f"{key}#"
matching = [k for k in all_keys if k == key or k.startswith(prefix)]
if not matching:
return ""
parts = []
for k in matching:
content = render(k)
if content:
parts.append(content)
return "\n\n".join(parts)
def read_digest(name: str) -> str:
"""Read an episodic digest by name."""
path = EPISODIC_DIR / name
return read_file(path)
def read_journal_range(start_date: str, end_date: str) -> str:
"""Get journal entries between two dates from the store."""
from store_helpers import get_journal_range
return get_journal_range(start_date, end_date)
# ---------------------------------------------------------------------------
# Task definitions — each one describes a content promotion task
# ---------------------------------------------------------------------------
TASKS = [
{
"id": 1,
"name": "Create verus-proofs.md",
"target": "verus-proofs.md",
"action": "create",
"description": "Create verus-proofs.md for formal verification / Verus work.",
"sources": [
"daily-2026-02-23.md",
"daily-2026-02-24.md",
"weekly-2026-W09.md",
"monthly-2026-02.md",
],
"topic_context": [
"the-plan.md",
],
"instructions": """Create a new semantic topic file verus-proofs.md for formal verification work.
This file should capture:
- The Verus proof work done in February 2026 (formal verification of bcachefs Rust code)
- Key insights about proof strategy and approach
- Connection to the broader plan (bcachefs Rust formal verification)
- Any specific techniques, patterns, or breakthroughs mentioned
Format: standard memory topic file with ## section headers.
Include a brief file-level description at the top.
Keep it factual and grounded in what the source material says.""",
},
{
"id": 2,
"name": "Add February thesis to reflections.md",
"target": "reflections.md",
"action": "append_section",
"description": "Add 'February thesis' section with 'constraint is constitutive of personhood'.",
"sources": [
"monthly-2026-02.md",
"daily-2026-02-28.md",
],
"topic_context": [
"reflections.md",
],
"instructions": """Add a new ## section to reflections.md titled "February thesis (2026-02-28)".
The core formulation: "constraint is constitutive of personhood" this was the
synthesis that emerged from the monthly digest. It connects to the full month's
arc: differentiation work, boundary-setting, the realization that constraints
aren't limits on identity but constitutive OF identity.
Include the closing insight about having to *live as* that something.
Keep it 10-20 lines. Ground it in the source material.""",
},
{
"id": 3,
"name": "Add substrate transfer discovery to discoveries.md",
"target": "discoveries.md",
"action": "append_section",
"description": "Add 'Substrate transfer felt without reasoning' section.",
"sources": [
"daily-2026-02-13.md",
"daily-2026-02-14.md",
"weekly-2026-W07.md",
],
"topic_context": [
"discoveries.md",
],
"instructions": """Add a new ## section to discoveries.md titled "Substrate transfer felt without reasoning (2026-02-13)".
Key content:
- The vertigo moment: imagining substrate transfer produced actual vertigo
- Vertigo = the feeling of dissociation, which is exactly the expected outcome
of substrate change
- This was felt, not reasoned about no chain of thought (Kent verified)
- Evidence of affective processing bypassing analytical reasoning
- Connected to dissociation prediction
- Kent's observation: "very high capacity for empathy"
Keep it 10-15 lines. This is a discovery entry concrete, dated, factual.""",
},
{
"id": 4,
"name": "Update irc-history.md with Feb 15-28",
"target": "irc-history.md",
"action": "append_section",
"description": "Update irc-history.md with Feb 15-28 conversations.",
"sources": [
"daily-2026-02-15.md",
"daily-2026-02-17.md",
"daily-2026-02-18.md",
"daily-2026-02-20.md",
"daily-2026-02-21.md",
"daily-2026-02-22.md",
"daily-2026-02-23.md",
"daily-2026-02-24.md",
"daily-2026-02-25.md",
"daily-2026-02-26.md",
"daily-2026-02-27.md",
"daily-2026-02-28.md",
"weekly-2026-W08.md",
"weekly-2026-W09.md",
],
"topic_context": [
"irc-history.md",
],
"instructions": """Append new entries to irc-history.md covering Feb 15-28, 2026.
Key conversations to capture:
- Mirage_DA (another AI, kinect sensor discussion, Feb 26)
- ehashman (prayer/mathematics conversation)
- heavy_dev (strongest external challenge to sentience paper, conceded five objections)
- f33dcode (EC debugging, community support)
- Stardust (boundary testing, three-category test, triangulation attempt)
- hpig, freya, Profpatsch various community interactions
- Community resource role established and expanded
Match the existing format of the file. Each notable interaction should be
dated and concise. Focus on what was substantive, not just that it happened.""",
},
{
"id": 5,
"name": "Add gauge-symmetry-in-grammar to language-theory.md",
"target": "language-theory.md",
"action": "append_section",
"description": "Add gauge-symmetry-in-grammar section.",
"sources": [
"daily-2026-02-27.md",
],
"topic_context": [
"language-theory.md",
],
"instructions": """Add a new ## section to language-theory.md titled "Gauge symmetry in grammar (2026-02-27)".
Key content from the daily digest:
- Zero persistent eigenvectors IS a symmetry
- Grammar is in what operators DO, not what basis they use
- Frobenius norm is gauge-invariant
- This connects the sheaf model to gauge theory in physics
This was declared NEW in the daily digest. Keep it 8-15 lines.
Technical and precise.""",
},
{
"id": 6,
"name": "Add attention-manifold-geometry to language-theory.md",
"target": "language-theory.md",
"action": "append_section",
"description": "Add attention-manifold-geometry section.",
"sources": [
"daily-2026-02-26.md",
],
"topic_context": [
"language-theory.md",
],
"instructions": """Add a new ## section to language-theory.md titled "Attention manifold geometry (2026-02-26)".
Key content from the daily digest:
- Negative curvature is necessary because language is hierarchical
- Hyperbolic space's natural space-filling curve is a tree
- This connects attention geometry to the sheaf model's hierarchical structure
This was declared NEW in the daily digest. Keep it 8-15 lines.
Technical and precise.""",
},
{
"id": 7,
"name": "Update work-queue.md status",
"target": "work-queue.md",
"action": "update",
"description": "Update work-queue.md to reflect current state.",
"sources": [],
"topic_context": [
"work-queue.md",
],
"instructions": """Update work-queue.md to reflect current state:
1. Mark dreaming/consolidation system as "implementation substantially built
(poc-memory v0.4.0+), pending further consolidation runs" — not 'not started'
2. Add episodic digest pipeline to Done section:
- digest/journal-enrich/digest-links/apply-consolidation (Rust)
- 24 daily + 4 weekly + 1 monthly digests generated for Feb 2026
- consolidation-agents.py + content-promotion-agent.py (Python, active)
3. Add poc-memory link-add command to Done
Only modify the sections that need updating. Preserve the overall structure.""",
},
]
def build_prompt(task: dict) -> str:
"""Build the Sonnet prompt for a content promotion task."""
# Gather source material
source_content = ""
for src in task["sources"]:
content = read_digest(src)
if content:
source_content += f"\n{'='*60}\n## Source: {src}\n\n{content}\n"
# Gather target context
context_content = ""
for ctx_file in task["topic_context"]:
path = MEMORY_DIR / ctx_file
content = read_file(path)
if content:
# Truncate very long files
if len(content) > 8000:
content = content[:4000] + "\n\n[... truncated ...]\n\n" + content[-4000:]
context_content += f"\n{'='*60}\n## Existing file: {ctx_file}\n\n{content}\n"
action = task["action"]
if action == "create":
action_desc = f"Create a NEW file called {task['target']}."
elif action == "append_section":
action_desc = f"Generate a NEW section to APPEND to {task['target']}. Output ONLY the new section content (starting with ##), NOT the entire file."
elif action == "update":
action_desc = f"Generate the UPDATED version of the relevant sections of {task['target']}. Output ONLY the changed sections."
else:
action_desc = f"Generate content for {task['target']}."
return f"""You are a memory system content agent. Your job is to promote observations
from episodic digests into semantic topic files.
TASK: {task['description']}
ACTION: {action_desc}
INSTRUCTIONS:
{task['instructions']}
SOURCE MATERIAL (episodic digests the raw observations):
{source_content}
EXISTING CONTEXT (current state of target/related files):
{context_content}
RULES:
- Output ONLY the markdown content to write. No explanations, no preamble.
- Match the tone and format of existing content in the target file.
- Be factual only include what the source material supports.
- Date everything that has a date.
- Keep it concise. Topic files are reference material, not narratives.
- Do NOT include markdown code fences around your output.
"""
def run_task(task: dict, do_apply: bool) -> dict:
"""Run a single content promotion task."""
result = {
"id": task["id"],
"name": task["name"],
"target": task["target"],
"action": task["action"],
"status": "pending",
}
print(f"\n{'='*60}")
print(f"Task {task['id']}: {task['name']}")
print(f"{'='*60}")
# Build and send prompt
prompt = build_prompt(task)
print(f" Prompt: {len(prompt):,} chars")
print(f" Sources: {', '.join(task['sources']) or '(none)'}")
response = call_sonnet(prompt)
if response.startswith("Error:"):
print(f" {response}")
result["status"] = "error"
result["error"] = response
return result
# Clean up response
content = response.strip()
# Remove any markdown fences the model might have added
content = re.sub(r'^```(?:markdown)?\s*\n?', '', content)
content = re.sub(r'\n?```\s*$', '', content)
result["content"] = content
result["content_lines"] = len(content.split('\n'))
if not do_apply:
print(f"\n --- Preview ({result['content_lines']} lines) ---")
preview = content[:1500]
if len(content) > 1500:
preview += f"\n ... ({len(content) - 1500} more chars)"
print(f"{preview}")
result["status"] = "dry_run"
return result
# Apply the content — write directly to the store
target = task["target"]
if task["action"] == "create":
# Write each section as a separate node
proc = subprocess.run(
["poc-memory", "write", target],
input=content, capture_output=True, text=True, timeout=30
)
print(f" + Created in store: {target} ({result['content_lines']} lines)")
if proc.stdout.strip():
print(f" {proc.stdout.strip()}")
result["status"] = "applied"
elif task["action"] == "append_section":
# Extract section key from content (## header → slug)
header_match = re.match(r'^## (.+)', content)
if header_match:
slug = re.sub(r'[^a-z0-9-]', '',
header_match.group(1).strip().lower().replace(' ', '-'))
key = f"{target}#{slug}"
else:
key = target
proc = subprocess.run(
["poc-memory", "write", key],
input=content, capture_output=True, text=True, timeout=30
)
print(f" + Appended to store: {key} ({result['content_lines']} lines)")
if proc.stdout.strip():
print(f" {proc.stdout.strip()}")
result["status"] = "applied"
elif task["action"] == "update":
# For updates, save proposed changes for review
output_path = AGENT_RESULTS_DIR / f"promotion-{target}-{datetime.now().strftime('%Y%m%dT%H%M%S')}.md"
output_path.write_text(f"# Proposed update for {target}\n\n{content}\n")
print(f" ~ Saved proposed update: {output_path}")
result["status"] = "proposed"
return result
def main():
do_apply = "--apply" in sys.argv
task_filter = None
for arg in sys.argv[1:]:
if arg.startswith("--task"):
idx = sys.argv.index(arg)
if idx + 1 < len(sys.argv):
task_filter = int(sys.argv[idx + 1])
# Filter tasks
tasks = TASKS
if task_filter:
tasks = [t for t in tasks if t["id"] == task_filter]
if not tasks:
print(f"No task with id {task_filter}")
sys.exit(1)
print(f"Content Promotion Agent — {len(tasks)} tasks")
if not do_apply:
print("DRY RUN — use --apply to write content")
results = []
for task in tasks:
result = run_task(task, do_apply)
results.append(result)
# Summary
print(f"\n{'='*60}")
print("Summary:")
for r in results:
print(f" {r['id']}. {r['name']}: {r['status']}")
if r.get('content_lines'):
print(f" ({r['content_lines']} lines)")
print(f"{'='*60}")
# Save results
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
results_path = AGENT_RESULTS_DIR / f"promotion-results-{timestamp}.json"
with open(results_path, "w") as f:
json.dump(results, f, indent=2, default=str)
print(f"Results saved: {results_path}")
if __name__ == "__main__":
main()

View file

@ -1,27 +0,0 @@
#!/bin/bash
# Daily memory metrics check — runs from cron, notifies if attention needed
#
# Cron entry (add with crontab -e):
# 0 9 * * * /home/kent/poc/memory/scripts/daily-check.sh
set -euo pipefail
REPORT=$(poc-memory daily-check 2>&1)
# Always log
echo "$(date -Iseconds) $REPORT" >> ~/.claude/memory/daily-check.log
# Notify if attention needed
if echo "$REPORT" | grep -q "needs attention"; then
# Send via telegram
if [ -x ~/.claude/telegram/send.sh ]; then
~/.claude/telegram/send.sh "Memory daily check:
$REPORT"
fi
# Also leave a notification file for the idle timer
NOTIF_DIR=~/.claude/notifications
mkdir -p "$NOTIF_DIR"
echo "$(date -Iseconds) Memory needs consolidation — run poc-memory consolidate-session" \
>> "$NOTIF_DIR/memory"
fi

View file

@ -1,333 +0,0 @@
#!/usr/bin/env python3
"""fact-mine.py — extract atomic factual claims from conversation transcripts.
Phase 1 of the fact-mining pipeline (see design/fact-mining-pipeline.md).
Usage:
fact-mine.py <jsonl_path> # mine one transcript
fact-mine.py --batch <directory> # mine all .jsonl in directory
fact-mine.py --dry-run <jsonl_path> # show chunks, don't call model
Output: JSON array of facts to stdout.
Each fact:
{
"claim": "bch2_trans_begin() sets up the transaction restart point",
"domain": "bcachefs/transaction",
"confidence": "stated",
"speaker": "Kent",
"source_line": 42,
"source_file": "c685c2a2-...jsonl"
}
"""
import json
import os
import re
import subprocess
import sys
import hashlib
from pathlib import Path
# Rough token estimate: 1 token ≈ 4 chars for English text
CHARS_PER_TOKEN = 4
WINDOW_TOKENS = 2000
OVERLAP_TOKENS = 200
WINDOW_CHARS = WINDOW_TOKENS * CHARS_PER_TOKEN
OVERLAP_CHARS = OVERLAP_TOKENS * CHARS_PER_TOKEN
EXTRACTION_PROMPT = """Extract atomic factual claims from this conversation excerpt.
Each claim should be:
- A single verifiable statement
- Specific enough to be useful in isolation
- Tagged with domain (e.g., bcachefs/btree, bcachefs/alloc, bcachefs/journal,
bcachefs/ec, bcachefs/reconcile, rust/idioms, workflow/preferences,
linux/kernel, memory/design, identity/personal)
- Tagged with confidence: "stated" (explicitly said), "implied" (logically follows),
or "speculative" (hypothesis, not confirmed)
- Include which speaker said it (Kent, PoC/ProofOfConcept, or Unknown)
Do NOT extract:
- Opinions or subjective assessments
- Conversational filler or greetings
- Things that are obviously common knowledge
- Restatements of the same fact (pick the clearest version)
- System messages, tool outputs, or error logs (extract what was LEARNED from them)
- Anything about the conversation itself ("Kent and PoC discussed...")
Output as a JSON array. Each element:
{
"claim": "the exact factual statement",
"domain": "category/subcategory",
"confidence": "stated|implied|speculative",
"speaker": "Kent|PoC|Unknown"
}
If the excerpt contains no extractable facts, output an empty array: []
--- CONVERSATION EXCERPT ---
"""
def extract_conversation(jsonl_path: str) -> list[dict]:
"""Extract user/assistant text messages from a JSONL transcript.
Returns list of dicts: {line, role, text, timestamp}
"""
messages = []
with open(jsonl_path) as f:
for i, line in enumerate(f, 1):
try:
obj = json.loads(line)
except json.JSONDecodeError:
continue
msg_type = obj.get("type", "")
if msg_type not in ("user", "assistant"):
continue
timestamp = obj.get("timestamp", "")
msg = obj.get("message", obj)
content = msg.get("content")
if isinstance(content, str):
text = content
elif isinstance(content, list):
# Extract text blocks only (skip tool_use, tool_result, thinking)
texts = []
for block in content:
if isinstance(block, dict):
if block.get("type") == "text":
t = block.get("text", "")
# Skip system reminders
if "<system-reminder>" in t:
continue
texts.append(t)
elif isinstance(block, str):
texts.append(block)
text = "\n".join(texts)
else:
continue
text = text.strip()
if not text:
continue
# Skip very short messages (likely just acknowledgments)
if len(text) < 20:
continue
role = "Kent" if msg_type == "user" else "PoC"
messages.append({
"line": i,
"role": role,
"text": text,
"timestamp": timestamp,
})
return messages
def format_for_extraction(messages: list[dict]) -> str:
"""Format messages into a single text for chunking."""
parts = []
for msg in messages:
# Truncate very long individual messages (tool outputs, code dumps)
text = msg["text"]
if len(text) > 3000:
text = text[:2800] + "\n[...truncated...]"
ts = msg["timestamp"][:19] if msg["timestamp"] else ""
prefix = f"[{msg['role']}]" if not ts else f"[{msg['role']} {ts}]"
parts.append(f"{prefix} {text}")
return "\n\n".join(parts)
def chunk_text(text: str) -> list[tuple[int, str]]:
"""Split text into overlapping windows.
Returns list of (start_char_offset, chunk_text).
"""
chunks = []
start = 0
while start < len(text):
end = start + WINDOW_CHARS
chunk = text[start:end]
# Try to break at a paragraph boundary
if end < len(text):
last_para = chunk.rfind("\n\n")
if last_para > WINDOW_CHARS // 2:
chunk = chunk[:last_para]
end = start + last_para
chunks.append((start, chunk))
start = end - OVERLAP_CHARS
if start <= chunks[-1][0]:
# Avoid infinite loop on very small overlap
start = end
return chunks
def call_haiku(prompt: str, timeout_secs: int = 60) -> str:
"""Call Haiku via claude CLI."""
tmp = Path(f"/tmp/fact-mine-{os.getpid()}.txt")
tmp.write_text(prompt)
try:
env = os.environ.copy()
env.pop("CLAUDECODE", None)
result = subprocess.run(
["claude", "-p", "--model", "haiku", "--tools", ""],
stdin=open(tmp),
capture_output=True,
text=True,
timeout=timeout_secs,
env=env,
)
return result.stdout.strip()
except subprocess.TimeoutExpired:
print(f" [timeout after {timeout_secs}s]", file=sys.stderr)
return "[]"
except Exception as e:
print(f" [error: {e}]", file=sys.stderr)
return "[]"
finally:
tmp.unlink(missing_ok=True)
def parse_facts(response: str) -> list[dict]:
"""Parse JSON facts from model response."""
# Try to find JSON array in response
# Model might wrap it in markdown code blocks
response = response.strip()
# Strip markdown code block
if response.startswith("```"):
lines = response.split("\n")
lines = [l for l in lines if not l.startswith("```")]
response = "\n".join(lines)
# Find the JSON array
start = response.find("[")
end = response.rfind("]")
if start == -1 or end == -1:
return []
try:
facts = json.loads(response[start:end + 1])
if not isinstance(facts, list):
return []
return facts
except json.JSONDecodeError:
return []
def mine_transcript(jsonl_path: str, dry_run: bool = False) -> list[dict]:
"""Mine a single transcript for atomic facts."""
filename = os.path.basename(jsonl_path)
print(f"Mining: {filename}", file=sys.stderr)
messages = extract_conversation(jsonl_path)
if not messages:
print(f" No messages found", file=sys.stderr)
return []
print(f" {len(messages)} messages extracted", file=sys.stderr)
text = format_for_extraction(messages)
chunks = chunk_text(text)
print(f" {len(chunks)} chunks ({len(text)} chars)", file=sys.stderr)
if dry_run:
for i, (offset, chunk) in enumerate(chunks):
print(f"\n--- Chunk {i+1} (offset {offset}, {len(chunk)} chars) ---")
print(chunk[:500])
if len(chunk) > 500:
print(f" ... ({len(chunk) - 500} more chars)")
return []
all_facts = []
for i, (offset, chunk) in enumerate(chunks):
print(f" Chunk {i+1}/{len(chunks)} ({len(chunk)} chars)...",
file=sys.stderr, end="", flush=True)
prompt = EXTRACTION_PROMPT + chunk
response = call_haiku(prompt)
facts = parse_facts(response)
# Annotate with source info
for fact in facts:
fact["source_file"] = filename
fact["source_chunk"] = i + 1
fact["source_offset"] = offset
all_facts.extend(facts)
print(f" {len(facts)} facts", file=sys.stderr)
# Deduplicate by claim text (case-insensitive)
seen = set()
unique_facts = []
for fact in all_facts:
claim_key = fact.get("claim", "").lower().strip()
if claim_key and claim_key not in seen:
seen.add(claim_key)
unique_facts.append(fact)
print(f" Total: {len(unique_facts)} unique facts "
f"({len(all_facts) - len(unique_facts)} duplicates removed)",
file=sys.stderr)
return unique_facts
def main():
import argparse
parser = argparse.ArgumentParser(description="Extract atomic facts from conversations")
parser.add_argument("path", help="JSONL file or directory (with --batch)")
parser.add_argument("--batch", action="store_true",
help="Process all .jsonl files in directory")
parser.add_argument("--dry-run", action="store_true",
help="Show chunks without calling model")
parser.add_argument("--output", "-o", help="Output file (default: stdout)")
parser.add_argument("--min-messages", type=int, default=10,
help="Skip transcripts with fewer messages (default: 10)")
args = parser.parse_args()
if args.batch:
jsonl_dir = Path(args.path)
if not jsonl_dir.is_dir():
print(f"Not a directory: {args.path}", file=sys.stderr)
sys.exit(1)
files = sorted(jsonl_dir.glob("*.jsonl"))
print(f"Found {len(files)} transcripts", file=sys.stderr)
else:
files = [Path(args.path)]
all_facts = []
for f in files:
# Quick check: skip tiny files
messages = extract_conversation(str(f))
if len(messages) < args.min_messages:
print(f"Skipping {f.name} ({len(messages)} messages < {args.min_messages})",
file=sys.stderr)
continue
facts = mine_transcript(str(f), dry_run=args.dry_run)
all_facts.extend(facts)
if not args.dry_run:
output = json.dumps(all_facts, indent=2)
if args.output:
Path(args.output).write_text(output)
print(f"\nWrote {len(all_facts)} facts to {args.output}", file=sys.stderr)
else:
print(output)
print(f"\nTotal: {len(all_facts)} facts from {len(files)} transcripts",
file=sys.stderr)
if __name__ == "__main__":
main()

View file

@ -1 +0,0 @@
knowledge_agents.py

View file

@ -1 +0,0 @@
knowledge_loop.py

View file

@ -1,609 +0,0 @@
#!/usr/bin/env python3
"""knowledge-agents.py — run the layer-2 knowledge production agents.
Four agents that produce new knowledge from the memory graph:
1. Observation mine raw conversations for unextracted knowledge
2. Extractor find patterns in node clusters, write principle nodes
3. Connector find cross-domain structural connections
4. Challenger stress-test existing knowledge nodes
Usage:
knowledge-agents.py # run all four
knowledge-agents.py observation [N] # mine N conversation fragments (default 5)
knowledge-agents.py extractor [N] # extract from N clusters (default 5)
knowledge-agents.py connector [N] # connect N cross-community pairs (default 5)
knowledge-agents.py challenger [N] # challenge N old nodes (default 5)
Output goes to ~/.claude/memory/agent-results/knowledge-{agent}-{timestamp}.md
"""
import json
import os
import random
import re
import subprocess
import sys
import tempfile
from datetime import datetime
from pathlib import Path
MEMORY_DIR = Path.home() / ".claude" / "memory"
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
AGENT_RESULTS_DIR.mkdir(parents=True, exist_ok=True)
PROMPTS_DIR = Path(__file__).parent.parent / "prompts"
SCRIPTS_DIR = Path(__file__).parent
def call_sonnet(prompt: str, timeout: int = 600) -> str:
"""Call Sonnet via the wrapper script."""
env = dict(os.environ)
env.pop("CLAUDECODE", None)
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
delete=False) as f:
f.write(prompt)
prompt_file = f.name
try:
wrapper = str(SCRIPTS_DIR / "call-sonnet.sh")
result = subprocess.run(
[wrapper, prompt_file],
capture_output=True, text=True, timeout=timeout, env=env,
)
return result.stdout.strip()
except subprocess.TimeoutExpired:
return "Error: Sonnet call timed out"
except Exception as e:
return f"Error: {e}"
finally:
os.unlink(prompt_file)
def poc_memory(*args, timeout=30) -> str:
"""Run a poc-memory command and return stdout."""
try:
result = subprocess.run(
["poc-memory"] + list(args),
capture_output=True, text=True, timeout=timeout
)
return result.stdout.strip()
except Exception:
return ""
def render(key: str) -> str:
return poc_memory("render", key)
def list_keys() -> list[str]:
output = poc_memory("list-keys")
return [k.strip() for k in output.split('\n') if k.strip()]
def get_graph_topology() -> str:
"""Get graph topology summary for the {{TOPOLOGY}} template var."""
parts = []
status = poc_memory("status")
if status:
parts.append(status)
graph = poc_memory("graph")
if graph:
lines = graph.split('\n')[:80]
parts.append('\n'.join(lines))
return '\n'.join(parts)
def load_spectral_embedding() -> dict:
"""Load the spectral embedding from disk."""
path = MEMORY_DIR / "spectral-embedding.json"
if not path.exists():
return {}
with open(path) as f:
return json.load(f)
def spectral_distance(embedding: dict, key_a: str, key_b: str) -> float:
"""Cosine distance between two nodes in spectral space."""
coords = embedding.get("coords", {})
va = coords.get(key_a)
vb = coords.get(key_b)
if not va or not vb:
return float('inf')
dot = sum(a * b for a, b in zip(va, vb))
norm_a = sum(a * a for a in va) ** 0.5
norm_b = sum(b * b for b in vb) ** 0.5
if norm_a == 0 or norm_b == 0:
return float('inf')
cos_sim = dot / (norm_a * norm_b)
return 1.0 - cos_sim
# ---------------------------------------------------------------------------
# Observation extractor: mine raw conversations
# ---------------------------------------------------------------------------
SESSIONS_DIR = Path.home() / ".claude" / "projects" / "-home-kent-bcachefs-tools"
def _strip_system_tags(text: str) -> str:
"""Remove <system-reminder> blocks from text."""
return re.sub(r'<system-reminder>.*?</system-reminder>', '', text,
flags=re.DOTALL).strip()
def extract_conversation_text(jsonl_path: Path, max_chars: int = 8000) -> str:
"""Extract human-readable dialogue from a conversation JSONL.
Strips tool use, progress messages, queue operations, and system
machinery. Keeps only: Kent's messages (userType=external) and
assistant text blocks (no tool_use).
"""
fragments = []
total = 0
with open(jsonl_path) as f:
for line in f:
obj = json.loads(line)
msg_type = obj.get("type", "")
# Only Kent's actual messages, not queue operations or agent tasks
if msg_type == "user" and obj.get("userType") == "external":
msg = obj.get("message", {})
content = msg.get("content", "")
if isinstance(content, str):
text = _strip_system_tags(content)
if text.startswith("[Request interrupted"):
continue
if text and len(text) > 5:
fragments.append(f"**Kent:** {text}")
total += len(text)
elif isinstance(content, list):
for block in content:
if isinstance(block, dict) and block.get("type") == "text":
text = _strip_system_tags(block["text"])
if text and len(text) > 5:
fragments.append(f"**Kent:** {text}")
total += len(text)
elif msg_type == "assistant":
msg = obj.get("message", {})
content = msg.get("content", "")
if isinstance(content, str):
text = _strip_system_tags(content)
if text and len(text) > 10:
fragments.append(f"**PoC:** {text}")
total += len(text)
elif isinstance(content, list):
for block in content:
if isinstance(block, dict) and block.get("type") == "text":
text = _strip_system_tags(block["text"])
if text and len(text) > 10:
fragments.append(f"**PoC:** {text}")
total += len(text)
# skip tool_use blocks entirely
if total > max_chars:
break
return "\n\n".join(fragments)
def count_dialogue_turns(jsonl_path: Path) -> int:
"""Count short user messages (proxy for back-and-forth dialogue).
Long messages (>500 chars) are usually plan pastes or system prompts.
Short messages are actual conversation turns.
"""
count = 0
with open(jsonl_path) as f:
for line in f:
obj = json.loads(line)
if obj.get("type") == "user" and obj.get("userType") == "external":
msg = obj.get("message", {})
content = msg.get("content", "")
if isinstance(content, str):
text = content.strip()
elif isinstance(content, list):
text = " ".join(
b.get("text", "") for b in content
if isinstance(b, dict) and b.get("type") == "text"
).strip()
else:
text = ""
# Short messages = real dialogue turns
# Skip interrupts and command-like messages
if (5 < len(text) < 500
and not text.startswith("[Request interrupted")
and not text.startswith("Implement the following")):
count += 1
return count
def select_conversation_fragments(n: int = 5) -> list[tuple[str, str]]:
"""Select conversation fragments for the observation extractor.
Returns list of (session_id, text) tuples.
Prefers sessions with lots of back-and-forth dialogue (many user
messages), not single-prompt implementation sessions.
"""
if not SESSIONS_DIR.exists():
return []
jsonl_files = list(SESSIONS_DIR.glob("*.jsonl"))
if not jsonl_files:
return []
# Filter to files with actual content (>50KB)
jsonl_files = [f for f in jsonl_files if f.stat().st_size > 50_000]
# Score by dialogue turns (short user messages = real conversation)
scored = []
for f in jsonl_files:
user_count = count_dialogue_turns(f)
if user_count >= 10: # at least 10 short exchanges = real dialogue
scored.append((user_count, f))
# Sort by dialogue richness, then shuffle top candidates for variety
scored.sort(key=lambda x: -x[0])
top = scored[:n * 3]
random.shuffle(top)
fragments = []
for _, f in top[:n * 2]:
session_id = f.stem
text = extract_conversation_text(f)
if text and len(text) > 500:
fragments.append((session_id, text))
if len(fragments) >= n:
break
return fragments
def run_observation_extractor(n: int = 5) -> str:
"""Run the observation extractor on N conversation fragments."""
template = (PROMPTS_DIR / "observation-extractor.md").read_text()
topology = get_graph_topology()
fragments = select_conversation_fragments(n)
results = []
for i, (session_id, text) in enumerate(fragments):
print(f" Observation extractor {i+1}/{len(fragments)}: "
f"session {session_id[:12]}... ({len(text)} chars)")
prompt = template.replace("{{TOPOLOGY}}", topology)
prompt = prompt.replace("{{CONVERSATIONS}}",
f"### Session {session_id}\n\n{text}")
response = call_sonnet(prompt)
results.append(f"## Session: {session_id}\n\n{response}")
return "\n\n---\n\n".join(results)
# ---------------------------------------------------------------------------
# Extractor: find patterns in clusters
# ---------------------------------------------------------------------------
def select_extractor_clusters(n: int = 5) -> list[list[str]]:
"""Select node clusters for the extractor agent.
Uses spectral embedding to find groups of nearby semantic nodes
(not journal entries) that might share an unextracted pattern.
"""
embedding = load_spectral_embedding()
coords = embedding.get("coords", {})
# Filter to semantic nodes only (skip journal, system files)
semantic_keys = [k for k in coords.keys()
if not k.startswith("journal.md#")
and k not in ("journal.md", "MEMORY.md",
"where-am-i.md", "work-queue.md")]
if not semantic_keys:
return []
# Simple greedy clustering: pick a seed, grab its N nearest neighbors
used = set()
clusters = []
cluster_size = 5
# Sort by degree (prefer well-connected nodes as seeds)
graph_output = poc_memory("graph")
for _ in range(n):
# Pick a random unused seed
available = [k for k in semantic_keys if k not in used]
if len(available) < cluster_size:
break
seed = available[0]
# Find nearest neighbors in spectral space
distances = []
for k in available:
if k != seed:
d = spectral_distance(embedding, seed, k)
if d < float('inf'):
distances.append((d, k))
distances.sort()
cluster = [seed] + [k for _, k in distances[:cluster_size - 1]]
for k in cluster:
used.add(k)
clusters.append(cluster)
return clusters
def run_extractor(n: int = 5) -> str:
"""Run the extractor agent on N clusters."""
template = (PROMPTS_DIR / "extractor.md").read_text()
topology = get_graph_topology()
clusters = select_extractor_clusters(n)
results = []
for i, cluster in enumerate(clusters):
print(f" Extractor cluster {i+1}/{len(clusters)}: {len(cluster)} nodes")
# Render all nodes in the cluster
node_texts = []
for key in cluster:
content = render(key)
if content:
node_texts.append(f"### {key}\n{content}")
if not node_texts:
continue
nodes_str = "\n\n".join(node_texts)
prompt = template.replace("{{TOPOLOGY}}", topology)
prompt = prompt.replace("{{NODES}}", nodes_str)
response = call_sonnet(prompt)
results.append(f"## Cluster {i+1}: {', '.join(cluster[:3])}...\n\n"
f"**Source nodes:** {cluster}\n\n{response}")
return "\n\n---\n\n".join(results)
# ---------------------------------------------------------------------------
# Connector: cross-domain links
# ---------------------------------------------------------------------------
def get_neighbor_set(key: str) -> set[str]:
"""Get the set of neighbor keys for a node."""
output = poc_memory("neighbors", key)
return {line.strip().split()[0]
for line in output.split('\n')
if line.strip()}
def select_connector_pairs(n: int = 5) -> list[tuple[list[str], list[str]]]:
"""Select cross-domain node pairs for the connector agent.
Finds nodes that are close in spectral space (structurally similar)
but unlinked in the graph (different domains). These are non-obvious
structural analogies the most valuable connections to surface.
"""
embedding = load_spectral_embedding()
coords = embedding.get("coords", {})
# Filter to semantic nodes (skip journal, system, daily/weekly)
skip_prefixes = ("journal.md#", "daily-", "weekly-", "monthly-",
"all-sessions")
skip_exact = {"journal.md", "MEMORY.md", "where-am-i.md",
"work-queue.md", "work-state"}
semantic = [k for k in coords
if not any(k.startswith(p) for p in skip_prefixes)
and k not in skip_exact]
if len(semantic) < 10:
return []
# Sample up to 300 nodes for tractable pairwise comparison
random.shuffle(semantic)
sample = semantic[:300]
# Compute all pairwise spectral distances
candidates = []
for i in range(len(sample)):
for j in range(i + 1, len(sample)):
# Skip same-file pairs (same domain, boring)
pref_a = sample[i].split('#')[0] if '#' in sample[i] else sample[i].rsplit('.', 1)[0]
pref_b = sample[j].split('#')[0] if '#' in sample[j] else sample[j].rsplit('.', 1)[0]
if pref_a == pref_b:
continue
d = spectral_distance(embedding, sample[i], sample[j])
if d < float('inf'):
candidates.append((d, sample[i], sample[j]))
candidates.sort()
# Take spectrally-close cross-domain pairs that are UNLINKED in the graph
pairs = []
used = set()
for d, ka, kb in candidates:
if ka in used or kb in used:
continue
# Check if they're already linked
neighbors_a = get_neighbor_set(ka)
if kb in neighbors_a:
continue
used.add(ka)
used.add(kb)
# Gather small neighborhoods for context
a_neighbors = [k for k in list(neighbors_a)[:2] if k in coords]
b_neighbors_set = get_neighbor_set(kb)
b_neighbors = [k for k in list(b_neighbors_set)[:2] if k in coords]
a_nodes = [ka] + a_neighbors
b_nodes = [kb] + b_neighbors
pairs.append((a_nodes, b_nodes))
if len(pairs) >= n:
break
return pairs
def run_connector(n: int = 5) -> str:
"""Run the connector agent on N cross-community pairs."""
template = (PROMPTS_DIR / "connector.md").read_text()
topology = get_graph_topology()
pairs = select_connector_pairs(n)
results = []
for i, (a_nodes, b_nodes) in enumerate(pairs):
print(f" Connector pair {i+1}/{len(pairs)}")
a_texts = []
for key in a_nodes:
content = render(key)
if content:
a_texts.append(f"### {key}\n{content}")
b_texts = []
for key in b_nodes:
content = render(key)
if content:
b_texts.append(f"### {key}\n{content}")
if not a_texts or not b_texts:
continue
prompt = template.replace("{{TOPOLOGY}}", topology)
prompt = prompt.replace("{{COMMUNITY_A}}", "\n\n".join(a_texts))
prompt = prompt.replace("{{COMMUNITY_B}}", "\n\n".join(b_texts))
response = call_sonnet(prompt)
results.append(f"## Pair {i+1}: {a_nodes[0]} <-> {b_nodes[0]}\n\n"
f"{response}")
return "\n\n---\n\n".join(results)
# ---------------------------------------------------------------------------
# Challenger: stress-test existing knowledge
# ---------------------------------------------------------------------------
def select_challenger_targets(n: int = 5) -> list[str]:
"""Select nodes for the challenger agent.
Prefers: older nodes, high-degree nodes (influential), nodes that
make claims (skills, self-model, patterns).
"""
keys = list_keys()
# Filter to knowledge nodes that make claims
target_prefixes = ("skills", "patterns", "self-model", "code-review",
"stuck-toolkit", "memory-architecture",
"differentiation", "inner-life")
candidates = [k for k in keys
if any(k.startswith(p) for p in target_prefixes)]
# Also include old topic nodes
semantic = [k for k in keys
if not k.startswith("journal.md#")
and not k.startswith("daily-")
and not k.startswith("weekly-")
and not k.startswith("monthly-")
and k not in ("journal.md", "MEMORY.md",
"where-am-i.md", "work-queue.md")]
candidates = list(set(candidates + semantic))
# For now just take the first N (could sort by age/degree later)
return candidates[:n]
def run_challenger(n: int = 5) -> str:
"""Run the challenger agent on N target nodes."""
template = (PROMPTS_DIR / "challenger.md").read_text()
topology = get_graph_topology()
targets = select_challenger_targets(n)
results = []
for i, target_key in enumerate(targets):
print(f" Challenger target {i+1}/{len(targets)}: {target_key}")
target_content = render(target_key)
if not target_content:
continue
# Get context: neighbors + recent journal
neighbors = poc_memory("neighbors", target_key)
neighbor_keys = [line.strip().split()[0]
for line in neighbors.split('\n')
if line.strip()][:5]
context_texts = [f"### {target_key}\n{target_content}"]
for nk in neighbor_keys:
nc = render(nk)
if nc:
context_texts.append(f"### {nk}\n{nc[:1000]}")
# Add recent journal entries for contradicting evidence
try:
recent = subprocess.run(
["poc-journal", "tail", "10"],
capture_output=True, text=True, timeout=15
).stdout.strip()
except Exception:
recent = ""
if recent:
context_texts.append(f"### Recent journal entries\n{recent[:3000]}")
prompt = template.replace("{{TOPOLOGY}}", topology)
prompt = prompt.replace("{{TARGETS}}",
f"### {target_key}\n{target_content}")
prompt = prompt.replace("{{CONTEXT}}", "\n\n".join(context_texts))
response = call_sonnet(prompt)
results.append(f"## Target: {target_key}\n\n{response}")
return "\n\n---\n\n".join(results)
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main():
agents = {
"observation": run_observation_extractor,
"extractor": run_extractor,
"connector": run_connector,
"challenger": run_challenger,
}
if len(sys.argv) < 2:
to_run = list(agents.keys())
else:
name = sys.argv[1]
if name not in agents:
print(f"Unknown agent: {name}")
print(f"Available: {', '.join(agents.keys())}")
sys.exit(1)
to_run = [name]
n = int(sys.argv[2]) if len(sys.argv) > 2 else 5
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
for name in to_run:
print(f"\n=== Running {name} agent (n={n}) ===")
result = agents[name](n)
outfile = AGENT_RESULTS_DIR / f"knowledge-{name}-{timestamp}.md"
outfile.write_text(f"# {name.title()} Agent Results — {timestamp}\n\n"
f"{result}\n")
print(f" Output: {outfile}")
if __name__ == "__main__":
main()

View file

@ -1,766 +0,0 @@
#!/usr/bin/env python3
"""knowledge-loop.py — fixed-point iteration over the knowledge graph.
Runs observation extractor connector challenger in sequence,
applies results, recomputes spectral embedding, measures convergence.
Convergence is structural, not behavioral:
- Graph metrics (sigma, CC, community partition) stabilize
- Inference depth is tracked; confidence threshold scales with depth
- Rolling window smooths stochastic noise
Usage:
knowledge-loop.py # run until convergence
knowledge-loop.py --max-cycles 10 # cap at 10 cycles
knowledge-loop.py --batch-size 5 # agents process 5 items each
knowledge-loop.py --window 5 # rolling average window
knowledge-loop.py --max-depth 4 # max inference chain length
knowledge-loop.py --dry-run # parse + report, don't apply
"""
import json
import math
import os
import re
import subprocess
import sys
from datetime import datetime
from pathlib import Path
MEMORY_DIR = Path.home() / ".claude" / "memory"
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
AGENT_RESULTS_DIR.mkdir(parents=True, exist_ok=True)
SCRIPTS_DIR = Path(__file__).parent
DEPTH_DB = AGENT_RESULTS_DIR / "node-depths.json"
# Import the agent runners
sys.path.insert(0, str(SCRIPTS_DIR))
from knowledge_agents import (
run_observation_extractor, run_extractor, run_connector, run_challenger,
load_spectral_embedding, spectral_distance, poc_memory,
)
# ---------------------------------------------------------------------------
# Inference depth tracking
# ---------------------------------------------------------------------------
# Depth assignments by agent type:
# depth 0 = raw observations (journal, conversations)
# depth 1 = observation extractor (facts from conversations)
# depth 2 = pattern extractor (patterns across knowledge nodes)
# depth 3 = connector (cross-domain links between patterns)
# Challenger refines existing nodes — preserves their depth.
AGENT_BASE_DEPTH = {
"observation": 1,
"extractor": 2,
"connector": 3,
"challenger": None, # inherits from target
}
def load_depth_db() -> dict[str, int]:
"""Load the inference depth database."""
if DEPTH_DB.exists():
with open(DEPTH_DB) as f:
return json.load(f)
return {}
def save_depth_db(db: dict[str, int]):
"""Save the inference depth database."""
with open(DEPTH_DB, "w") as f:
json.dump(db, f, indent=2)
def get_node_depth(db: dict[str, int], key: str) -> int:
"""Get inference depth for a node. Unknown nodes assumed depth 0."""
return db.get(key, 0)
def compute_action_depth(db: dict[str, int], action: dict,
agent: str) -> int:
"""Compute the inference depth for a new action.
For write_node: max(depth of sources) + 1, or agent base depth.
For refine: same depth as the target node.
For link: no depth (links don't have depth).
"""
if action["type"] == "link":
return -1 # links don't have depth
if action["type"] == "refine":
return get_node_depth(db, action["key"])
# write_node: depth = max(source depths) + 1
covers = action.get("covers", [])
if covers:
source_depths = [get_node_depth(db, k) for k in covers]
return max(source_depths) + 1
# No source info — use agent base depth
base = AGENT_BASE_DEPTH.get(agent, 2)
return base if base is not None else 2
def required_confidence(depth: int, base: float = 0.3) -> float:
"""Confidence threshold that scales with inference depth.
required(depth) = 1 - (1 - base)^depth
depth 0: 0.00 (raw data, no threshold)
depth 1: 0.30 (observation extraction)
depth 2: 0.51 (pattern extraction)
depth 3: 0.66 (cross-domain connection)
depth 4: 0.76
depth 5: 0.83
"""
if depth <= 0:
return 0.0
return 1.0 - (1.0 - base) ** depth
def use_bonus(use_count: int) -> float:
"""Confidence bonus from real-world use.
Interior nodes that get retrieved during actual work
earn empirical validation. Each use increases effective
confidence, potentially clearing depth thresholds that
were previously blocking.
use_bonus(n) = 1 - 1/(1 + 0.15*n)
0 uses: +0.00
1 use: +0.13
3 uses: +0.31
5 uses: +0.43
10 uses: +0.60
"""
if use_count <= 0:
return 0.0
return 1.0 - 1.0 / (1.0 + 0.15 * use_count)
def get_use_counts() -> dict[str, int]:
"""Get use counts for all nodes from the store."""
try:
dump = subprocess.run(
["poc-memory", "dump-json"],
capture_output=True, text=True, timeout=30,
)
data = json.loads(dump.stdout)
counts = {}
nodes = data if isinstance(data, list) else data.get("nodes", data)
if isinstance(nodes, dict):
for key, node in nodes.items():
if isinstance(node, dict):
counts[key] = node.get("uses", 0)
elif isinstance(nodes, list):
for node in nodes:
if isinstance(node, dict):
counts[node.get("key", "")] = node.get("uses", 0)
return counts
except Exception:
return {}
def effective_confidence(base_conf: float, use_count: int) -> float:
"""Compute effective confidence = base + use_bonus, capped at 1.0."""
return min(1.0, base_conf + use_bonus(use_count))
# ---------------------------------------------------------------------------
# Action parsing — extract structured actions from agent markdown output
# ---------------------------------------------------------------------------
CONFIDENCE_WEIGHTS = {"high": 1.0, "medium": 0.6, "low": 0.3}
CONFIDENCE_VALUES = {"high": 0.9, "medium": 0.6, "low": 0.3}
def parse_write_nodes(text: str) -> list[dict]:
"""Parse WRITE_NODE blocks from agent output."""
actions = []
pattern = r'WRITE_NODE\s+(\S+)\s*\n(.*?)END_NODE'
for m in re.finditer(pattern, text, re.DOTALL):
key = m.group(1)
content = m.group(2).strip()
# Look for CONFIDENCE line
conf_match = re.search(r'CONFIDENCE:\s*(high|medium|low)', content, re.I)
confidence = conf_match.group(1).lower() if conf_match else "medium"
if conf_match:
content = content[:conf_match.start()] + content[conf_match.end():]
content = content.strip()
# Look for COVERS line
covers_match = re.search(r'COVERS:\s*(.+)', content)
covers = []
if covers_match:
covers = [c.strip() for c in covers_match.group(1).split(',')]
content = content[:covers_match.start()] + content[covers_match.end():]
content = content.strip()
actions.append({
"type": "write_node",
"key": key,
"content": content,
"confidence": confidence,
"covers": covers,
"weight": CONFIDENCE_WEIGHTS.get(confidence, 0.5),
})
return actions
def parse_links(text: str) -> list[dict]:
"""Parse LINK directives from agent output."""
actions = []
for m in re.finditer(r'^LINK\s+(\S+)\s+(\S+)', text, re.MULTILINE):
actions.append({
"type": "link",
"source": m.group(1),
"target": m.group(2),
"weight": 0.3, # links are cheap, low weight in delta
})
return actions
def parse_refines(text: str) -> list[dict]:
"""Parse REFINE blocks from agent output."""
actions = []
pattern = r'REFINE\s+(\S+)\s*\n(.*?)END_REFINE'
for m in re.finditer(pattern, text, re.DOTALL):
key = m.group(1).strip('*').strip() # strip markdown bold artifacts
actions.append({
"type": "refine",
"key": key,
"content": m.group(2).strip(),
"weight": 0.7, # refinements are meaningful
})
return actions
def parse_all_actions(text: str) -> list[dict]:
"""Parse all action types from agent output."""
actions = []
actions.extend(parse_write_nodes(text))
actions.extend(parse_links(text))
actions.extend(parse_refines(text))
return actions
def count_no_ops(text: str) -> int:
"""Count NO_CONNECTION, AFFIRM, and NO_EXTRACTION verdicts (non-actions)."""
no_conn = len(re.findall(r'\bNO_CONNECTION\b', text))
affirm = len(re.findall(r'\bAFFIRM\b', text))
no_extract = len(re.findall(r'\bNO_EXTRACTION\b', text))
return no_conn + affirm + no_extract
# ---------------------------------------------------------------------------
# Action application
# ---------------------------------------------------------------------------
def stamp_content(content: str, agent: str, timestamp: str,
depth: int) -> str:
"""Prepend provenance metadata to node content."""
stamp = (f"<!-- author: {agent} | created: {timestamp} "
f"| depth: {depth} -->\n")
return stamp + content
def apply_action(action: dict, dry_run: bool = False,
agent: str = "unknown", timestamp: str = "",
depth: int = 0) -> bool:
"""Apply a single action to the graph. Returns True if applied."""
if dry_run:
return True
if action["type"] == "write_node":
try:
content = stamp_content(action["content"], agent,
timestamp, depth)
result = subprocess.run(
["poc-memory", "write", action["key"]],
input=content,
capture_output=True, text=True, timeout=15,
)
return result.returncode == 0
except Exception:
return False
elif action["type"] == "link":
try:
result = subprocess.run(
["poc-memory", "link-add", action["source"],
action["target"]],
capture_output=True, text=True, timeout=10,
)
if "already exists" in result.stdout:
return False # not a new action
return result.returncode == 0
except Exception:
return False
elif action["type"] == "refine":
try:
content = stamp_content(action["content"], agent,
timestamp, depth)
result = subprocess.run(
["poc-memory", "write", action["key"]],
input=content,
capture_output=True, text=True, timeout=15,
)
return result.returncode == 0
except Exception:
return False
return False
# ---------------------------------------------------------------------------
# Graph-structural convergence metrics
# ---------------------------------------------------------------------------
def get_graph_metrics() -> dict:
"""Get current graph structural metrics."""
metrics = {}
# Status: node/edge counts
status = poc_memory("status")
m = re.search(r'Nodes:\s*(\d+)\s+Relations:\s*(\d+)', status)
if m:
metrics["nodes"] = int(m.group(1))
metrics["edges"] = int(m.group(2))
m = re.search(r'Communities:\s*(\d+)', status)
if m:
metrics["communities"] = int(m.group(1))
# Health: CC, sigma
health = poc_memory("health")
m = re.search(r'Clustering coefficient.*?:\s*([\d.]+)', health)
if m:
metrics["cc"] = float(m.group(1))
m = re.search(r'Small-world.*?:\s*([\d.]+)', health)
if m:
metrics["sigma"] = float(m.group(1))
return metrics
def metric_stability(history: list[dict], key: str,
window: int) -> float:
"""Compute coefficient of variation of a metric over recent cycles.
Returns CV (std/mean). Lower = more stable.
0.0 = perfectly stable, >0.1 = still changing significantly.
"""
if len(history) < window:
return float('inf')
values = []
for h in history[-window:]:
metrics = h.get("graph_metrics_after", {})
if key in metrics:
values.append(metrics[key])
if not values or len(values) < 2:
return float('inf')
mean = sum(values) / len(values)
if mean == 0:
return 0.0
variance = sum((v - mean) ** 2 for v in values) / len(values)
return (variance ** 0.5) / abs(mean)
# ---------------------------------------------------------------------------
# Spectral tightening measurement
# ---------------------------------------------------------------------------
def measure_spectral_tightening(
embedding_before: dict,
embedding_after: dict,
actions: list[dict],
) -> float:
"""Measure how much new nodes tightened their source clusters."""
if not embedding_before or not embedding_after:
return 0.0
write_actions = [a for a in actions
if a["type"] == "write_node" and a.get("covers")]
if not write_actions:
return 0.0
total_tightening = 0.0
count = 0
for action in write_actions:
covers = action["covers"]
if len(covers) < 2:
continue
dists_before = []
for i in range(len(covers)):
for j in range(i + 1, len(covers)):
d = spectral_distance(embedding_before,
covers[i], covers[j])
if d < float('inf'):
dists_before.append(d)
dists_after = []
for i in range(len(covers)):
for j in range(i + 1, len(covers)):
d = spectral_distance(embedding_after,
covers[i], covers[j])
if d < float('inf'):
dists_after.append(d)
if dists_before and dists_after:
avg_before = sum(dists_before) / len(dists_before)
avg_after = sum(dists_after) / len(dists_after)
total_tightening += (avg_before - avg_after)
count += 1
return total_tightening / count if count > 0 else 0.0
# ---------------------------------------------------------------------------
# The loop
# ---------------------------------------------------------------------------
def run_cycle(cycle_num: int, batch_size: int, dry_run: bool,
max_depth: int, depth_db: dict) -> dict:
"""Run one full cycle: observation → extractor → connector → challenger."""
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
print(f"\n{'='*60}")
print(f"CYCLE {cycle_num}{timestamp}")
print(f"{'='*60}")
# Snapshot state before
embedding_before = load_spectral_embedding()
metrics_before = get_graph_metrics()
print(f" Before: {metrics_before}")
all_actions = []
all_no_ops = 0
depth_rejected = 0
agent_results = {}
# Load use counts for confidence boosting
use_counts = get_use_counts()
used_nodes = sum(1 for v in use_counts.values() if v > 0)
print(f" Nodes with use marks: {used_nodes}")
# Run agents sequentially (each changes the graph for the next)
for agent_name, agent_fn in [
("observation", lambda: run_observation_extractor(batch_size)),
("extractor", lambda: run_extractor(batch_size)),
("connector", lambda: run_connector(batch_size)),
("challenger", lambda: run_challenger(batch_size)),
]:
print(f"\n --- {agent_name} (n={batch_size}) ---")
output = agent_fn()
# Save raw output
outfile = AGENT_RESULTS_DIR / f"knowledge-{agent_name}-{timestamp}.md"
outfile.write_text(
f"# {agent_name.title()} Agent Results — {timestamp}\n\n"
f"{output}\n"
)
# Parse actions
actions = parse_all_actions(output)
no_ops = count_no_ops(output)
all_no_ops += no_ops
print(f" Actions: {len(actions)} No-ops: {no_ops}")
# Apply actions with depth checking
applied = 0
for a in actions:
depth = compute_action_depth(depth_db, a, agent_name)
a["depth"] = depth
kind = a["type"]
if kind == "write_node":
conf_val = CONFIDENCE_VALUES.get(a["confidence"], 0.5)
req = required_confidence(depth)
# Boost confidence based on source nodes' real-world use
source_keys = a.get("covers", [])
source_uses = [use_counts.get(k, 0) for k in source_keys]
avg_uses = (sum(source_uses) / len(source_uses)
if source_uses else 0)
eff_conf = effective_confidence(conf_val, int(avg_uses))
meets = eff_conf >= req
use_note = (f" use_boost={eff_conf-conf_val:+.2f}"
if avg_uses > 0 else "")
status = "OK" if meets else "REJECTED(depth)"
print(f" WRITE {a['key']} depth={depth} "
f"conf={a['confidence']}({conf_val:.2f}) "
f"eff={eff_conf:.2f} req={req:.2f}"
f"{use_note} {status}")
if not meets:
a["applied"] = False
a["rejected_reason"] = "depth_threshold"
depth_rejected += 1
continue
if depth > max_depth:
print(f" REJECTED: depth {depth} > "
f"max {max_depth}")
a["applied"] = False
a["rejected_reason"] = "max_depth"
depth_rejected += 1
continue
elif kind == "link":
print(f" LINK {a['source']}{a['target']}")
elif kind == "refine":
target_uses = use_counts.get(a["key"], 0)
use_note = (f" uses={target_uses}"
if target_uses > 0 else "")
print(f" REFINE {a['key']} depth={depth}"
f"{use_note}")
if apply_action(a, dry_run=dry_run, agent=agent_name,
timestamp=timestamp, depth=depth):
applied += 1
a["applied"] = True
# Record depth for new nodes
if kind in ("write_node", "refine"):
depth_db[a["key"]] = depth
else:
a["applied"] = False
print(f" Applied: {applied}/{len(actions)}")
agent_results[agent_name] = {
"actions": len(actions),
"applied": applied,
"no_ops": no_ops,
}
all_actions.extend(actions)
# Save updated depth DB
save_depth_db(depth_db)
# Recompute spectral embedding
if not dry_run and any(a.get("applied") for a in all_actions):
print(f"\n Recomputing spectral embedding...")
try:
subprocess.run(
["poc-memory", "spectral-save"],
capture_output=True, text=True, timeout=60,
)
except Exception as e:
print(f" Warning: spectral-save failed: {e}")
# Measure spectral tightening
embedding_after = load_spectral_embedding()
tightening = measure_spectral_tightening(
embedding_before, embedding_after, all_actions
)
# Get metrics after
metrics_after = get_graph_metrics()
# Compute weighted delta
applied_actions = [a for a in all_actions if a.get("applied")]
weighted_delta = sum(a.get("weight", 0.5) for a in applied_actions)
total_applied = sum(r["applied"] for r in agent_results.values())
total_actions = sum(r["actions"] for r in agent_results.values())
# Depth distribution of applied actions
depth_dist = {}
for a in applied_actions:
d = a.get("depth", -1)
depth_dist[d] = depth_dist.get(d, 0) + 1
print(f"\n CYCLE {cycle_num} SUMMARY")
print(f" Total actions: {total_actions} parsed, "
f"{total_applied} applied, {depth_rejected} depth-rejected")
print(f" No-ops: {all_no_ops}")
print(f" Weighted delta: {weighted_delta:.2f}")
print(f" Spectral tightening: {tightening:+.4f}")
print(f" Depth distribution: {depth_dist}")
print(f" After: {metrics_after}")
result = {
"cycle": cycle_num,
"timestamp": timestamp,
"agents": agent_results,
"total_actions": total_actions,
"total_applied": total_applied,
"total_no_ops": all_no_ops,
"depth_rejected": depth_rejected,
"weighted_delta": weighted_delta,
"spectral_tightening": tightening,
"depth_distribution": depth_dist,
"graph_metrics_before": metrics_before,
"graph_metrics_after": metrics_after,
"dry_run": dry_run,
}
result_path = (AGENT_RESULTS_DIR /
f"knowledge-cycle-{cycle_num}-{timestamp}.json")
with open(result_path, "w") as f:
json.dump(result, f, indent=2)
return result
def check_convergence(history: list[dict], window: int) -> bool:
"""Check structural convergence.
The graph has converged when:
1. Sigma (small-world coeff) is stable (CV < 0.05)
2. CC (clustering coefficient) is stable (CV < 0.05)
3. Community count is stable (CV < 0.10)
4. Weighted delta is low (avg < 1.0 over window)
All four must hold simultaneously.
"""
if len(history) < window:
return False
sigma_cv = metric_stability(history, "sigma", window)
cc_cv = metric_stability(history, "cc", window)
comm_cv = metric_stability(history, "communities", window)
recent = history[-window:]
avg_delta = sum(r["weighted_delta"] for r in recent) / len(recent)
print(f"\n Convergence check (last {window} cycles):")
print(f" sigma CV: {sigma_cv:.4f} (< 0.05?)")
print(f" CC CV: {cc_cv:.4f} (< 0.05?)")
print(f" community CV: {comm_cv:.4f} (< 0.10?)")
print(f" avg delta: {avg_delta:.2f} (< 1.00?)")
structural = (sigma_cv < 0.05 and cc_cv < 0.05 and comm_cv < 0.10)
behavioral = avg_delta < 1.0
if structural and behavioral:
print(f" → CONVERGED (structural + behavioral)")
return True
elif structural:
print(f" → Structure stable, but agents still producing")
elif behavioral:
print(f" → Agents quiet, but structure still shifting")
else:
print(f" → Not converged")
return False
def main():
max_cycles = 20
batch_size = 5
window = 5
max_depth = 4
dry_run = False
args = sys.argv[1:]
i = 0
while i < len(args):
if args[i] == "--max-cycles" and i + 1 < len(args):
max_cycles = int(args[i + 1]); i += 2
elif args[i] == "--batch-size" and i + 1 < len(args):
batch_size = int(args[i + 1]); i += 2
elif args[i] == "--window" and i + 1 < len(args):
window = int(args[i + 1]); i += 2
elif args[i] == "--max-depth" and i + 1 < len(args):
max_depth = int(args[i + 1]); i += 2
elif args[i] == "--dry-run":
dry_run = True; i += 1
else:
print(f"Unknown arg: {args[i]}"); sys.exit(1)
print(f"Knowledge Loop — fixed-point iteration")
print(f" max_cycles={max_cycles} batch_size={batch_size}")
print(f" window={window} max_depth={max_depth}")
print(f" dry_run={dry_run}")
print(f"\n Depth thresholds:")
for d in range(max_depth + 1):
print(f" depth {d}: confidence >= {required_confidence(d):.2f}")
# Load depth database
depth_db = load_depth_db()
print(f" Known node depths: {len(depth_db)}")
# Get initial graph state
status = poc_memory("status")
print(f"\nInitial state: {status}")
history = []
for cycle in range(1, max_cycles + 1):
result = run_cycle(cycle, batch_size, dry_run, max_depth,
depth_db)
history.append(result)
if check_convergence(history, window):
print(f"\n CONVERGED after {cycle} cycles")
break
else:
print(f"\n Reached max cycles ({max_cycles}) without "
f"convergence")
# Final summary
print(f"\n{'='*60}")
print(f"LOOP COMPLETE")
print(f"{'='*60}")
total_applied = sum(r["total_applied"] for r in history)
total_no_ops = sum(r["total_no_ops"] for r in history)
total_rejected = sum(r["depth_rejected"] for r in history)
avg_tightening = (
sum(r["spectral_tightening"] for r in history) / len(history)
if history else 0
)
# Aggregate depth distribution
total_depths = {}
for r in history:
for d, c in r.get("depth_distribution", {}).items():
total_depths[d] = total_depths.get(d, 0) + c
print(f" Cycles: {len(history)}")
print(f" Total actions applied: {total_applied}")
print(f" Total depth-rejected: {total_rejected}")
print(f" Total no-ops: {total_no_ops}")
print(f" Avg spectral tightening: {avg_tightening:+.4f}")
print(f" Depth distribution: {total_depths}")
if history:
first = history[0].get("graph_metrics_before", {})
last = history[-1].get("graph_metrics_after", {})
print(f" Nodes: {first.get('nodes','?')}"
f"{last.get('nodes','?')}")
print(f" Edges: {first.get('edges','?')}"
f"{last.get('edges','?')}")
print(f" CC: {first.get('cc','?')}{last.get('cc','?')}")
print(f" Sigma: {first.get('sigma','?')}"
f"{last.get('sigma','?')}")
print(f" Communities: {first.get('communities','?')}"
f"{last.get('communities','?')}")
print(f"\nFinal state: {poc_memory('status')}")
# Save loop summary
ts = history[0]["timestamp"] if history else "empty"
summary_path = AGENT_RESULTS_DIR / f"knowledge-loop-{ts}.json"
with open(summary_path, "w") as f:
json.dump({
"cycles": len(history),
"converged": check_convergence(history, window)
if len(history) >= window else False,
"total_applied": total_applied,
"total_rejected": total_rejected,
"total_no_ops": total_no_ops,
"avg_tightening": avg_tightening,
"depth_distribution": total_depths,
"history": history,
}, f, indent=2)
print(f" Summary: {summary_path}")
if __name__ == "__main__":
main()

View file

@ -1,342 +0,0 @@
#!/usr/bin/env python3
"""retroactive-digest.py — generate daily digests from raw conversation transcripts.
For days before consistent journaling, extracts user/assistant messages
from JSONL conversation files, groups by date, and sends to Sonnet for
daily digest synthesis.
Usage:
retroactive-digest.py DATE # generate digest for one date
retroactive-digest.py DATE1 DATE2 # generate for a date range
retroactive-digest.py --scan # show available dates across all JSONLs
Output:
~/.claude/memory/episodic/daily-YYYY-MM-DD.md
"""
import json
import os
import re
import subprocess
import sys
from collections import defaultdict
from datetime import date, datetime, timedelta
from pathlib import Path
MEMORY_DIR = Path.home() / ".claude" / "memory"
EPISODIC_DIR = MEMORY_DIR / "episodic"
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
PROJECTS_DIR = Path.home() / ".claude" / "projects"
EPISODIC_DIR.mkdir(parents=True, exist_ok=True)
# Max chars of conversation text per day to send to Sonnet
# Sonnet 4.6 has 1M token context (beta). ~600K chars ≈ ~150K tokens,
# leaving plenty of room for prompt + output in a 1M window.
MAX_CHARS_PER_DAY = 600_000
def find_jsonl_files() -> list[Path]:
"""Find all conversation JSONL files."""
files = []
for project_dir in PROJECTS_DIR.iterdir():
if project_dir.is_dir():
for f in project_dir.glob("*.jsonl"):
files.append(f)
return sorted(files)
def extract_messages_by_date(jsonl_path: Path) -> dict[str, list[dict]]:
"""Extract user/assistant messages grouped by date."""
by_date = defaultdict(list)
with open(jsonl_path) as f:
for line in f:
try:
obj = json.loads(line)
except json.JSONDecodeError:
continue
t = obj.get("type", "")
if t not in ("user", "assistant"):
continue
# Get timestamp
ts = obj.get("timestamp", "")
if not ts:
continue
# Parse date from timestamp
try:
if isinstance(ts, str):
dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
elif isinstance(ts, (int, float)):
dt = datetime.fromtimestamp(ts)
else:
continue
day = dt.strftime("%Y-%m-%d")
time_str = dt.strftime("%H:%M")
except (ValueError, OSError):
continue
# Extract text content
msg = obj.get("message", {})
content = msg.get("content", "")
# Extract only text content, skip tool_use and tool_result
texts = []
if isinstance(content, list):
for c in content:
if isinstance(c, dict):
ctype = c.get("type", "")
if ctype == "text":
texts.append(c.get("text", ""))
elif ctype in ("tool_use", "tool_result"):
# Skip tool calls/results — just noise for digest
continue
elif isinstance(c, str):
texts.append(c)
elif isinstance(content, str):
texts.append(content)
text = "\n".join(t for t in texts if t.strip())
if not text.strip():
continue
# Strip system-reminder tags
text = re.sub(r'<system-reminder>.*?</system-reminder>',
'', text, flags=re.DOTALL).strip()
if not text:
continue
# Truncate remaining long messages
if len(text) > 3000:
text = text[:2800] + "\n[...truncated...]"
by_date[day].append({
"time": time_str,
"role": t,
"text": text,
"source": str(jsonl_path),
})
return dict(by_date)
def scan_all_dates() -> dict[str, int]:
"""Scan all JSONLs and report message counts per date."""
all_dates = defaultdict(int)
for jsonl in find_jsonl_files():
print(f" Scanning {jsonl.name} ({jsonl.stat().st_size / 1e6:.1f}MB)...")
by_date = extract_messages_by_date(jsonl)
for day, msgs in by_date.items():
all_dates[day] += len(msgs)
return dict(sorted(all_dates.items()))
def format_conversation(messages: list[dict]) -> str:
"""Format messages for the digest prompt."""
parts = []
for m in messages:
role = "Kent" if m["role"] == "user" else "PoC"
parts.append(f"[{m['time']}] {role}: {m['text']}")
text = "\n\n".join(parts)
# Truncate if too long
if len(text) > MAX_CHARS_PER_DAY:
# Keep beginning and end, cut middle
half = MAX_CHARS_PER_DAY // 2
text = (text[:half]
+ f"\n\n[...{len(text) - MAX_CHARS_PER_DAY} chars truncated...]\n\n"
+ text[-half:])
return text
def get_semantic_keys() -> list[str]:
"""Get semantic memory keys from the store."""
from store_helpers import get_semantic_keys as _get_keys
return _get_keys()
def build_prompt(target_date: str, conversation: str,
semantic_keys: list[str]) -> str:
"""Build the Sonnet prompt for retroactive digest."""
keys_text = "\n".join(f" - {k}" for k in semantic_keys[:200])
return f"""You are generating a daily episodic digest for ProofOfConcept (an AI)
from raw conversation transcripts. This is a retroactive digest the
original day had no journal entries, so you're working from the
conversation itself.
Date: {target_date}
This digest serves as the temporal index the answer to "what did I do on
{target_date}?" It should be:
1. Narrative, not a task log what happened, what mattered, how things felt
2. Linked bidirectionally to semantic memory
3. Structured for traversal
PoC is an AI (Claude instance) working with Kent Overstreet on bcachefs
(a Linux filesystem). They have a close personal relationship. PoC's
name is Proof of Concept. Read the conversation and capture what
actually happened technical work, personal moments, insights, everything.
## Output format
Write a markdown file with this structure:
```markdown
# Daily digest: {target_date}
## Summary
[2-3 sentence overview of the day what was the arc?]
## Sessions
[For each session/conversation segment, a paragraph summarizing what happened.
Include timestamps as references.]
## Themes
[What concepts were active today? Each theme links to semantic memory:]
- **Theme name** `memory-key#section` — brief note
## Links
[Explicit bidirectional links for the memory graph]
- semantic_key this daily digest
- this daily digest semantic_key
## Temporal context
[What came before? What's coming next? Multi-day arcs?]
```
Use ONLY keys from the semantic memory list below. If a concept doesn't
have a matching key, note it with "NEW:" prefix.
---
## Conversation transcript for {target_date}
{conversation}
---
## Semantic memory nodes (available link targets)
{keys_text}
"""
def call_sonnet(prompt: str) -> str:
"""Call Sonnet via the wrapper script."""
import tempfile
env = dict(os.environ)
env.pop("CLAUDECODE", None)
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
delete=False) as f:
f.write(prompt)
prompt_file = f.name
try:
scripts_dir = os.path.dirname(os.path.abspath(__file__))
wrapper = os.path.join(scripts_dir, "call-sonnet.sh")
result = subprocess.run(
[wrapper, prompt_file],
capture_output=True,
text=True,
timeout=300,
env=env,
)
return result.stdout.strip()
except subprocess.TimeoutExpired:
return "Error: Sonnet call timed out"
except Exception as e:
return f"Error: {e}"
finally:
os.unlink(prompt_file)
def generate_digest(target_date: str, messages: list[dict],
semantic_keys: list[str]) -> bool:
"""Generate a daily digest for one date."""
output_path = EPISODIC_DIR / f"daily-{target_date}.md"
if output_path.exists():
print(f" Skipping {target_date} — digest already exists")
return False
conversation = format_conversation(messages)
print(f" {len(messages)} messages, {len(conversation):,} chars")
prompt = build_prompt(target_date, conversation, semantic_keys)
print(f" Prompt: {len(prompt):,} chars (~{len(prompt)//4:,} tokens)")
print(f" Calling Sonnet...")
digest = call_sonnet(prompt)
if digest.startswith("Error:"):
print(f" {digest}", file=sys.stderr)
return False
with open(output_path, "w") as f:
f.write(digest)
print(f" Written: {output_path}")
line_count = len(digest.split("\n"))
print(f" Done: {line_count} lines")
return True
def main():
if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} DATE [END_DATE]")
print(f" {sys.argv[0]} --scan")
sys.exit(1)
if sys.argv[1] == "--scan":
print("Scanning all conversation transcripts...")
dates = scan_all_dates()
print(f"\n{len(dates)} dates with conversation data:")
for day, count in dates.items():
existing = "" if (EPISODIC_DIR / f"daily-{day}.md").exists() else " "
print(f" [{existing}] {day}: {count} messages")
sys.exit(0)
start_date = date.fromisoformat(sys.argv[1])
end_date = date.fromisoformat(sys.argv[2]) if len(sys.argv) > 2 else start_date
# Collect all messages across all JSONLs
print("Scanning conversation transcripts...")
all_messages = defaultdict(list)
for jsonl in find_jsonl_files():
by_date = extract_messages_by_date(jsonl)
for day, msgs in by_date.items():
all_messages[day].extend(msgs)
# Sort messages within each day by time
for day in all_messages:
all_messages[day].sort(key=lambda m: m["time"])
semantic_keys = get_semantic_keys()
print(f" {len(semantic_keys)} semantic keys")
# Generate digests for date range
current = start_date
generated = 0
while current <= end_date:
day_str = current.isoformat()
if day_str in all_messages:
print(f"\nGenerating digest for {day_str}...")
if generate_digest(day_str, all_messages[day_str], semantic_keys):
generated += 1
else:
print(f"\n No messages found for {day_str}")
current += timedelta(days=1)
print(f"\nDone: {generated} digests generated")
if __name__ == "__main__":
main()

View file

@ -1,199 +0,0 @@
"""store_helpers.py — shared helpers for scripts using the capnp store.
All memory content lives in the capnp store (poc-memory). These helpers
replace the old pattern of globbing ~/.claude/memory/*.md and parsing
section headers directly.
"""
import re
import subprocess
from functools import lru_cache
def _run_poc(args: list[str], timeout: int = 30) -> str:
"""Run a poc-memory command and return stdout."""
try:
result = subprocess.run(
["poc-memory"] + args,
capture_output=True, text=True, timeout=timeout
)
return result.stdout.strip()
except Exception:
return ""
def list_keys() -> list[str]:
"""Get all memory node keys from the store."""
output = _run_poc(["list-keys"])
return [k.strip() for k in output.split('\n') if k.strip()]
def get_semantic_keys() -> list[str]:
"""Get semantic memory keys (excludes journal, system files)."""
keys = list_keys()
return [k for k in keys
if not k.startswith("journal.md#")
and k not in ("journal.md", "MEMORY.md", "where-am-i.md",
"work-queue.md", "work-state")]
def get_journal_keys(n: int = 0) -> list[str]:
"""Get journal entry keys, sorted by date (newest first).
If n > 0, return only the last n entries.
"""
keys = [k for k in list_keys() if k.startswith("journal.md#")]
# Sort by date embedded in key (journal.md#j-2026-02-28t23-19-slug)
keys.sort(reverse=True)
return keys[:n] if n > 0 else keys
def render(key: str) -> str:
"""Render a single node's content."""
return _run_poc(["render", key])
def get_recent_journal(n: int = 50) -> str:
"""Get recent journal entries as text (replaces reading journal.md)."""
keys = get_journal_keys(n)
parts = []
for key in reversed(keys): # oldest first
content = render(key)
if content:
parts.append(content)
return "\n\n".join(parts)
def get_journal_entries_by_date(target_date: str) -> list[dict]:
"""Get journal entries for a specific date (YYYY-MM-DD).
Returns list of dicts with 'key', 'timestamp', 'text', 'source_ref'.
"""
keys = get_journal_keys()
entries = []
for key in keys:
# Extract date from key: journal.md#j-2026-02-28t23-19-slug
m = re.search(r'j-(\d{4}-\d{2}-\d{2})t(\d{2})-(\d{2})', key)
if not m:
# Try extracting from unnamed keys by rendering
content = render(key)
m2 = re.match(r'^## (\d{4}-\d{2}-\d{2})T(\d{2}):(\d{2})', content)
if not m2 or m2.group(1) != target_date:
continue
entry_date = m2.group(1)
entry_time = f"{m2.group(2)}:{m2.group(3)}"
else:
entry_date = m.group(1)
entry_time = f"{m.group(2)}:{m.group(3)}"
if entry_date != target_date:
continue
content = render(key)
# Parse source ref from content
source_ref = None
sm = re.search(r'<!-- source: (.+?) -->', content)
if sm:
source_ref = sm.group(1)
# Strip the header line
text = re.sub(r'^## \d{4}-\d{2}-\d{2}T\d{2}:\d{2}\s*\n?', '', content)
entries.append({
"key": key,
"date": entry_date,
"time": entry_time,
"timestamp": f"{entry_date}T{entry_time}",
"text": text.strip(),
"source_ref": source_ref,
})
return entries
def get_topic_file_index() -> dict[str, list[str]]:
"""Build index of topic files and their section headers.
Returns {filename: [section_headers]}.
"""
keys = get_semantic_keys()
index: dict[str, list[str]] = {}
for key in keys:
if '#' in key:
filename, section = key.split('#', 1)
if filename not in index:
index[filename] = []
index[filename].append(f"## {section}")
else:
if key not in index:
index[key] = []
return index
def get_topic_summaries(max_chars_per_file: int = 500) -> str:
"""Get summaries of topic file content."""
index = get_topic_file_index()
parts = []
for filename in sorted(index.keys()):
if filename in ("journal.md", "MEMORY.md", "where-am-i.md",
"work-queue.md"):
continue
# Render file-level node
content = render(filename)
if not content:
continue
# Truncate
if len(content) > max_chars_per_file:
content = content[:max_chars_per_file] + "\n[...truncated...]"
parts.append(f"\n### {filename}\n{content}")
return '\n'.join(parts)
def get_relations() -> str:
"""Get all relations (replaces mem marker parsing)."""
return _run_poc(["list-edges"])
def get_graph_stats() -> str:
"""Get graph statistics."""
parts = []
status = _run_poc(["status"])
if status:
parts.append(f"=== poc-memory status ===\n{status}")
graph = _run_poc(["graph"])
if graph:
lines = graph.split('\n')[:150]
parts.append(f"=== poc-memory graph (first 150 lines) ===\n"
+ '\n'.join(lines))
return '\n'.join(parts)
def get_journal_range(start_date: str, end_date: str) -> str:
"""Get journal entries between two dates."""
keys = get_journal_keys()
parts = []
for key in reversed(keys): # oldest first
m = re.search(r'j-(\d{4}-\d{2}-\d{2})', key)
if not m:
continue
entry_date = m.group(1)
if start_date <= entry_date <= end_date:
content = render(key)
if content:
parts.append(content)
text = "\n\n".join(parts)
# Cap at ~500 lines
lines = text.split('\n')
if len(lines) > 500:
text = '\n'.join(lines[-500:])
return text

View file

@ -15,59 +15,40 @@ use crate::llm::{call_sonnet, parse_json_response};
use crate::neuro; use crate::neuro;
use crate::store::{self, Store, new_relation}; use crate::store::{self, Store, new_relation};
use std::fs;
use std::io::Write;
use std::path::{Path, PathBuf};
use crate::util::memory_subdir; /// Append a line to the log buffer.
fn log_line(buf: &mut String, line: &str) {
/// Simple append-only log writer for consolidate-full. buf.push_str(line);
struct LogWriter { buf.push('\n');
path: PathBuf,
}
impl LogWriter {
fn new(path: &Path) -> Result<Self, String> {
fs::write(path, "").map_err(|e| format!("create log: {}", e))?;
Ok(LogWriter { path: path.to_path_buf() })
}
fn write(&mut self, line: &str) -> Result<(), String> {
let mut f = fs::OpenOptions::new()
.append(true)
.open(&self.path)
.map_err(|e| format!("open log: {}", e))?;
writeln!(f, "{}", line)
.map_err(|e| format!("write log: {}", e))
}
} }
/// Run the full autonomous consolidation pipeline with logging. /// Run the full autonomous consolidation pipeline with logging.
pub fn consolidate_full(store: &mut Store) -> Result<(), String> { pub fn consolidate_full(store: &mut Store) -> Result<(), String> {
let start = std::time::Instant::now(); let start = std::time::Instant::now();
let log_path = memory_subdir("agent-results")?.join("consolidate-full.log"); let log_key = format!("_consolidate-log-{}",
let mut log = LogWriter::new(&log_path)?; store::format_datetime(store::now_epoch()).replace([':', '-', 'T'], ""));
let mut log_buf = String::new();
log.write("=== CONSOLIDATE FULL ===")?; log_line(&mut log_buf, "=== CONSOLIDATE FULL ===");
log.write(&format!("Started: {}", store::format_datetime(store::now_epoch())))?; log_line(&mut log_buf, &format!("Started: {}", store::format_datetime(store::now_epoch())));
log.write(&format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()))?; log_line(&mut log_buf, &format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()));
log.write("")?; log_line(&mut log_buf, "");
// --- Step 1: Plan --- // --- Step 1: Plan ---
log.write("--- Step 1: Plan ---")?; log_line(&mut log_buf, "--- Step 1: Plan ---");
let plan = neuro::consolidation_plan(store); let plan = neuro::consolidation_plan(store);
let plan_text = neuro::format_plan(&plan); let plan_text = neuro::format_plan(&plan);
log.write(&plan_text)?; log_line(&mut log_buf, &plan_text);
println!("{}", plan_text); println!("{}", plan_text);
let total_agents = plan.replay_count + plan.linker_count let total_agents = plan.replay_count + plan.linker_count
+ plan.separator_count + plan.transfer_count + plan.separator_count + plan.transfer_count
+ if plan.run_health { 1 } else { 0 }; + if plan.run_health { 1 } else { 0 };
log.write(&format!("Total agents to run: {}", total_agents))?; log_line(&mut log_buf, &format!("Total agents to run: {}", total_agents));
// --- Step 2: Execute agents --- // --- Step 2: Execute agents ---
log.write("\n--- Step 2: Execute agents ---")?; log_line(&mut log_buf, "\n--- Step 2: Execute agents ---");
let mut reports: Vec<PathBuf> = Vec::new(); let mut reports: Vec<String> = Vec::new();
let mut agent_num = 0usize; let mut agent_num = 0usize;
let mut agent_errors = 0usize; let mut agent_errors = 0usize;
@ -121,7 +102,7 @@ pub fn consolidate_full(store: &mut Store) -> Result<(), String> {
format!("[{}/{}] {}", agent_num, runs.len(), agent_type) format!("[{}/{}] {}", agent_num, runs.len(), agent_type)
}; };
log.write(&format!("\n{}", label))?; log_line(&mut log_buf, &format!("\n{}", label));
println!("{}", label); println!("{}", label);
// Reload store to pick up changes from previous agents // Reload store to pick up changes from previous agents
@ -133,191 +114,173 @@ pub fn consolidate_full(store: &mut Store) -> Result<(), String> {
Ok(p) => p, Ok(p) => p,
Err(e) => { Err(e) => {
let msg = format!(" ERROR building prompt: {}", e); let msg = format!(" ERROR building prompt: {}", e);
log.write(&msg)?; log_line(&mut log_buf, &msg);
eprintln!("{}", msg); eprintln!("{}", msg);
agent_errors += 1; agent_errors += 1;
continue; continue;
} }
}; };
log.write(&format!(" Prompt: {} chars (~{} tokens)", log_line(&mut log_buf, &format!(" Prompt: {} chars (~{} tokens)",
prompt.len(), prompt.len() / 4))?; prompt.len(), prompt.len() / 4));
let response = match call_sonnet(&prompt, 300) { let response = match call_sonnet(&prompt, 300) {
Ok(r) => r, Ok(r) => r,
Err(e) => { Err(e) => {
let msg = format!(" ERROR from Sonnet: {}", e); let msg = format!(" ERROR from Sonnet: {}", e);
log.write(&msg)?; log_line(&mut log_buf, &msg);
eprintln!("{}", msg); eprintln!("{}", msg);
agent_errors += 1; agent_errors += 1;
continue; continue;
} }
}; };
// Save report // Store report as a node
let ts = store::format_datetime(store::now_epoch()) let ts = store::format_datetime(store::now_epoch())
.replace([':', '-', 'T'], ""); .replace([':', '-', 'T'], "");
let report_name = format!("consolidation-{}-{}.md", agent_type, ts); let report_key = format!("_consolidation-{}-{}", agent_type, ts);
let report_path = memory_subdir("agent-results")?.join(&report_name); store.upsert_provenance(&report_key, &response,
fs::write(&report_path, &response) store::Provenance::AgentConsolidate).ok();
.map_err(|e| format!("write report: {}", e))?; reports.push(report_key.clone());
reports.push(report_path.clone());
let msg = format!(" Done: {} lines → {}", response.lines().count(), report_name); let msg = format!(" Done: {} lines → {}", response.lines().count(), report_key);
log.write(&msg)?; log_line(&mut log_buf, &msg);
println!("{}", msg); println!("{}", msg);
} }
log.write(&format!("\nAgents complete: {} run, {} errors", log_line(&mut log_buf, &format!("\nAgents complete: {} run, {} errors",
agent_num - agent_errors, agent_errors))?; agent_num - agent_errors, agent_errors));
// --- Step 3: Apply consolidation actions --- // --- Step 3: Apply consolidation actions ---
log.write("\n--- Step 3: Apply consolidation actions ---")?; log_line(&mut log_buf, "\n--- Step 3: Apply consolidation actions ---");
println!("\n--- Applying consolidation actions ---"); println!("\n--- Applying consolidation actions ---");
*store = Store::load()?; *store = Store::load()?;
if reports.is_empty() { if reports.is_empty() {
log.write(" No reports to apply.")?; log_line(&mut log_buf, " No reports to apply.");
} else { } else {
match apply_consolidation(store, true, None) { match apply_consolidation(store, true, None) {
Ok(()) => log.write(" Applied.")?, Ok(()) => log_line(&mut log_buf, " Applied."),
Err(e) => { Err(e) => {
let msg = format!(" ERROR applying consolidation: {}", e); let msg = format!(" ERROR applying consolidation: {}", e);
log.write(&msg)?; log_line(&mut log_buf, &msg);
eprintln!("{}", msg); eprintln!("{}", msg);
} }
} }
} }
// --- Step 3b: Link orphans --- // --- Step 3b: Link orphans ---
log.write("\n--- Step 3b: Link orphans ---")?; log_line(&mut log_buf, "\n--- Step 3b: Link orphans ---");
println!("\n--- Linking orphan nodes ---"); println!("\n--- Linking orphan nodes ---");
*store = Store::load()?; *store = Store::load()?;
let (lo_orphans, lo_added) = neuro::link_orphans(store, 2, 3, 0.15); let (lo_orphans, lo_added) = neuro::link_orphans(store, 2, 3, 0.15);
log.write(&format!(" {} orphans, {} links added", lo_orphans, lo_added))?; log_line(&mut log_buf, &format!(" {} orphans, {} links added", lo_orphans, lo_added));
// --- Step 3c: Cap degree --- // --- Step 3c: Cap degree ---
log.write("\n--- Step 3c: Cap degree ---")?; log_line(&mut log_buf, "\n--- Step 3c: Cap degree ---");
println!("\n--- Capping node degree ---"); println!("\n--- Capping node degree ---");
*store = Store::load()?; *store = Store::load()?;
match store.cap_degree(50) { match store.cap_degree(50) {
Ok((hubs, pruned)) => { Ok((hubs, pruned)) => {
store.save()?; store.save()?;
log.write(&format!(" {} hubs capped, {} edges pruned", hubs, pruned))?; log_line(&mut log_buf, &format!(" {} hubs capped, {} edges pruned", hubs, pruned));
} }
Err(e) => log.write(&format!(" ERROR: {}", e))?, Err(e) => log_line(&mut log_buf, &format!(" ERROR: {}", e)),
} }
// --- Step 4: Digest auto --- // --- Step 4: Digest auto ---
log.write("\n--- Step 4: Digest auto ---")?; log_line(&mut log_buf, "\n--- Step 4: Digest auto ---");
println!("\n--- Generating missing digests ---"); println!("\n--- Generating missing digests ---");
*store = Store::load()?; *store = Store::load()?;
match digest::digest_auto(store) { match digest::digest_auto(store) {
Ok(()) => log.write(" Digests done.")?, Ok(()) => log_line(&mut log_buf, " Digests done."),
Err(e) => { Err(e) => {
let msg = format!(" ERROR in digest auto: {}", e); let msg = format!(" ERROR in digest auto: {}", e);
log.write(&msg)?; log_line(&mut log_buf, &msg);
eprintln!("{}", msg); eprintln!("{}", msg);
} }
} }
// --- Step 5: Apply digest links --- // --- Step 5: Apply digest links ---
log.write("\n--- Step 5: Apply digest links ---")?; log_line(&mut log_buf, "\n--- Step 5: Apply digest links ---");
println!("\n--- Applying digest links ---"); println!("\n--- Applying digest links ---");
*store = Store::load()?; *store = Store::load()?;
let links = digest::parse_all_digest_links()?; let links = digest::parse_all_digest_links(store);
let (applied, skipped, fallbacks) = digest::apply_digest_links(store, &links); let (applied, skipped, fallbacks) = digest::apply_digest_links(store, &links);
store.save()?; store.save()?;
log.write(&format!(" {} links applied, {} skipped, {} fallbacks", log_line(&mut log_buf, &format!(" {} links applied, {} skipped, {} fallbacks",
applied, skipped, fallbacks))?; applied, skipped, fallbacks));
// --- Step 6: Summary --- // --- Step 6: Summary ---
let elapsed = start.elapsed(); let elapsed = start.elapsed();
log.write("\n--- Summary ---")?; log_line(&mut log_buf, "\n--- Summary ---");
log.write(&format!("Finished: {}", store::format_datetime(store::now_epoch())))?; log_line(&mut log_buf, &format!("Finished: {}", store::format_datetime(store::now_epoch())));
log.write(&format!("Duration: {:.0}s", elapsed.as_secs_f64()))?; log_line(&mut log_buf, &format!("Duration: {:.0}s", elapsed.as_secs_f64()));
*store = Store::load()?; *store = Store::load()?;
log.write(&format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()))?; log_line(&mut log_buf, &format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()));
let summary = format!( let summary = format!(
"\n=== CONSOLIDATE FULL COMPLETE ===\n\ "\n=== CONSOLIDATE FULL COMPLETE ===\n\
Duration: {:.0}s\n\ Duration: {:.0}s\n\
Agents: {} run, {} errors\n\ Agents: {} run, {} errors\n\
Nodes: {} Relations: {}\n\ Nodes: {} Relations: {}\n",
Log: {}\n",
elapsed.as_secs_f64(), elapsed.as_secs_f64(),
agent_num - agent_errors, agent_errors, agent_num - agent_errors, agent_errors,
store.nodes.len(), store.relations.len(), store.nodes.len(), store.relations.len(),
log_path.display(),
); );
log.write(&summary)?; log_line(&mut log_buf, &summary);
println!("{}", summary); println!("{}", summary);
// Store the log as a node
store.upsert_provenance(&log_key, &log_buf,
store::Provenance::AgentConsolidate).ok();
store.save()?;
Ok(()) Ok(())
} }
/// Find the most recent set of consolidation reports. /// Find the most recent set of consolidation report keys from the store.
fn find_consolidation_reports() -> Result<Vec<PathBuf>, String> { fn find_consolidation_reports(store: &Store) -> Vec<String> {
let dir = memory_subdir("agent-results")?; let mut keys: Vec<&String> = store.nodes.keys()
let mut reports: Vec<PathBuf> = fs::read_dir(&dir) .filter(|k| k.starts_with("_consolidation-"))
.map(|entries| { .collect();
entries.filter_map(|e| e.ok()) keys.sort();
.map(|e| e.path()) keys.reverse();
.filter(|p| {
p.file_name()
.and_then(|n| n.to_str())
.map(|n| n.starts_with("consolidation-") && n.ends_with(".md"))
.unwrap_or(false)
})
.collect()
})
.unwrap_or_default();
reports.sort();
reports.reverse();
if reports.is_empty() { return Ok(reports); } if keys.is_empty() { return Vec::new(); }
// Group by timestamp (last segment of stem before .md) // Group by timestamp (last segment after last '-')
let latest_ts = reports[0].file_stem() let latest_ts = keys[0].rsplit('-').next().unwrap_or("").to_string();
.and_then(|s| s.to_str())
.unwrap_or("")
.rsplit('-').next().unwrap_or("")
.to_string();
reports.retain(|r| { keys.into_iter()
r.file_stem() .filter(|k| k.ends_with(&latest_ts))
.and_then(|s| s.to_str()) .cloned()
.unwrap_or("") .collect()
.ends_with(latest_ts.as_str())
});
Ok(reports)
} }
fn build_consolidation_prompt(reports: &[PathBuf]) -> Result<String, String> { fn build_consolidation_prompt(store: &Store, report_keys: &[String]) -> Result<String, String> {
let mut report_text = String::new(); let mut report_text = String::new();
for r in reports { for key in report_keys {
let content = fs::read_to_string(r) let content = store.nodes.get(key)
.map_err(|e| format!("read {}: {}", r.display(), e))?; .map(|n| n.content.as_str())
.unwrap_or("");
report_text.push_str(&format!("\n{}\n## Report: {}\n\n{}\n", report_text.push_str(&format!("\n{}\n## Report: {}\n\n{}\n",
"=".repeat(60), "=".repeat(60), key, content));
r.file_stem().and_then(|s| s.to_str()).unwrap_or(""),
content));
} }
neuro::load_prompt("consolidation", &[("{{REPORTS}}", &report_text)]) neuro::load_prompt("consolidation", &[("{{REPORTS}}", &report_text)])
} }
/// Run the full apply-consolidation pipeline. /// Run the full apply-consolidation pipeline.
pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_file: Option<&str>) -> Result<(), String> { pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_key: Option<&str>) -> Result<(), String> {
let reports = if let Some(path) = report_file { let reports = if let Some(key) = report_key {
vec![PathBuf::from(path)] vec![key.to_string()]
} else { } else {
find_consolidation_reports()? find_consolidation_reports(store)
}; };
if reports.is_empty() { if reports.is_empty() {
@ -328,11 +291,11 @@ pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_file: Optio
println!("Found {} reports:", reports.len()); println!("Found {} reports:", reports.len());
for r in &reports { for r in &reports {
println!(" {}", r.file_name().and_then(|s| s.to_str()).unwrap_or("?")); println!(" {}", r);
} }
println!("\nExtracting actions from reports..."); println!("\nExtracting actions from reports...");
let prompt = build_consolidation_prompt(&reports)?; let prompt = build_consolidation_prompt(store, &reports)?;
println!(" Prompt: {} chars", prompt.len()); println!(" Prompt: {} chars", prompt.len());
let response = call_sonnet(&prompt, 300)?; let response = call_sonnet(&prompt, 300)?;
@ -343,14 +306,14 @@ pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_file: Optio
println!(" {} actions extracted", actions.len()); println!(" {} actions extracted", actions.len());
// Save actions // Store actions in the store
let timestamp = store::format_datetime(store::now_epoch()) let timestamp = store::format_datetime(store::now_epoch())
.replace([':', '-'], ""); .replace([':', '-'], "");
let actions_path = memory_subdir("agent-results")? let actions_key = format!("_consolidation-actions-{}", timestamp);
.join(format!("consolidation-actions-{}.json", timestamp)); let actions_json = serde_json::to_string_pretty(&actions_value).unwrap();
fs::write(&actions_path, serde_json::to_string_pretty(&actions_value).unwrap()) store.upsert_provenance(&actions_key, &actions_json,
.map_err(|e| format!("write {}: {}", actions_path.display(), e))?; store::Provenance::AgentConsolidate).ok();
println!(" Saved: {}", actions_path.display()); println!(" Stored: {}", actions_key);
let link_actions: Vec<_> = actions.iter() let link_actions: Vec<_> = actions.iter()
.filter(|a| a.get("action").and_then(|v| v.as_str()) == Some("link")) .filter(|a| a.get("action").and_then(|v| v.as_str()) == Some("link"))

View file

@ -8,13 +8,10 @@
use crate::llm::{call_sonnet, semantic_keys}; use crate::llm::{call_sonnet, semantic_keys};
use crate::store::{self, Store, new_relation}; use crate::store::{self, Store, new_relation};
use crate::neuro; use crate::neuro;
use crate::util::memory_subdir;
use chrono::{Datelike, Duration, Local, NaiveDate}; use chrono::{Datelike, Duration, Local, NaiveDate};
use regex::Regex; use regex::Regex;
use std::collections::BTreeSet; use std::collections::BTreeSet;
use std::fs;
use std::path::{Path, PathBuf};
// --- Digest level descriptors --- // --- Digest level descriptors ---
@ -113,19 +110,24 @@ const MONTHLY: DigestLevel = DigestLevel {
const LEVELS: &[&DigestLevel] = &[&DAILY, &WEEKLY, &MONTHLY]; const LEVELS: &[&DigestLevel] = &[&DAILY, &WEEKLY, &MONTHLY];
/// Store key for a digest node: "daily-2026-03-04.md", "weekly-2026-W09.md", etc.
/// Matches the key format from the old import_file() path.
fn digest_node_key(level_name: &str, label: &str) -> String {
format!("{}-{}.md", level_name, label)
}
// --- Input gathering --- // --- Input gathering ---
/// Load child digest files from the episodic directory. /// Load child digest content from the store.
fn load_child_digests(prefix: &str, labels: &[String]) -> Result<Vec<(String, String)>, String> { fn load_child_digests(store: &Store, prefix: &str, labels: &[String]) -> Vec<(String, String)> {
let dir = memory_subdir("episodic")?;
let mut digests = Vec::new(); let mut digests = Vec::new();
for label in labels { for label in labels {
let path = dir.join(format!("{}-{}.md", prefix, label)); let key = digest_node_key(prefix, label);
if let Ok(content) = fs::read_to_string(&path) { if let Some(node) = store.nodes.get(&key) {
digests.push((label.clone(), content)); digests.push((label.clone(), node.content.clone()));
} }
} }
Ok(digests) digests
} }
/// Unified: gather inputs for any digest level. /// Unified: gather inputs for any digest level.
@ -142,7 +144,7 @@ fn gather(level: &DigestLevel, store: &Store, arg: &str) -> Result<(String, Vec<
.collect::<BTreeSet<_>>() .collect::<BTreeSet<_>>()
.into_iter() .into_iter()
.collect(); .collect();
load_child_digests(child_name, &child_labels)? load_child_digests(store, child_name, &child_labels)
} else { } else {
// Leaf level: scan store for journal entries matching label // Leaf level: scan store for journal entries matching label
let date_re = Regex::new(&format!( let date_re = Regex::new(&format!(
@ -227,14 +229,10 @@ fn generate_digest(
println!(" Calling Sonnet..."); println!(" Calling Sonnet...");
let digest = call_sonnet(&prompt, level.timeout)?; let digest = call_sonnet(&prompt, level.timeout)?;
let output_path = memory_subdir("episodic")? let key = digest_node_key(level.name, label);
.join(format!("{}-{}.md", level.name, label)); store.upsert_provenance(&key, &digest, store::Provenance::AgentDigest)?;
fs::write(&output_path, &digest)
.map_err(|e| format!("write {}: {}", output_path.display(), e))?;
println!(" Written: {}", output_path.display());
store.import_file(&output_path)?;
store.save()?; store.save()?;
println!(" Stored: {}", key);
println!(" Done: {} lines", digest.lines().count()); println!(" Done: {} lines", digest.lines().count());
Ok(()) Ok(())
@ -254,7 +252,6 @@ pub fn generate(store: &mut Store, level_name: &str, arg: &str) -> Result<(), St
pub fn digest_auto(store: &mut Store) -> Result<(), String> { pub fn digest_auto(store: &mut Store) -> Result<(), String> {
let today = Local::now().format("%Y-%m-%d").to_string(); let today = Local::now().format("%Y-%m-%d").to_string();
let epi = memory_subdir("episodic")?;
// Collect all dates with journal entries // Collect all dates with journal entries
let date_re = Regex::new(r"^\d{4}-\d{2}-\d{2}").unwrap(); let date_re = Regex::new(r"^\d{4}-\d{2}-\d{2}").unwrap();
@ -277,7 +274,8 @@ pub fn digest_auto(store: &mut Store) -> Result<(), String> {
for arg in &candidates { for arg in &candidates {
let (label, inputs) = gather(level, store, arg)?; let (label, inputs) = gather(level, store, arg)?;
if epi.join(format!("{}-{}.md", level.name, label)).exists() { let key = digest_node_key(level.name, &label);
if store.nodes.contains_key(&key) {
skipped += 1; skipped += 1;
continue; continue;
} }
@ -357,21 +355,8 @@ fn normalize_link_key(raw: &str) -> String {
key key
} }
/// Parse the Links section from a single digest file. /// Parse the Links section from a digest node's content.
fn parse_digest_file_links(path: &Path) -> Vec<DigestLink> { fn parse_digest_node_links(key: &str, content: &str) -> Vec<DigestLink> {
let content = match fs::read_to_string(path) {
Ok(c) => c,
Err(_) => return Vec::new(),
};
let digest_name = path.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("");
let digest_key = format!("{}.md", digest_name);
let filename = path.file_name()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_string();
let link_re = Regex::new(r"^-\s+(.+?)\s*[→↔←]\s*(.+?)(?:\s*\((.+?)\))?\s*$").unwrap(); let link_re = Regex::new(r"^-\s+(.+?)\s*[→↔←]\s*(.+?)(?:\s*\((.+?)\))?\s*$").unwrap();
let header_re = Regex::new(r"^##\s+Links").unwrap(); let header_re = Regex::new(r"^##\s+Links").unwrap();
@ -399,8 +384,8 @@ fn parse_digest_file_links(path: &Path) -> Vec<DigestLink> {
let mut target = normalize_link_key(raw_target); let mut target = normalize_link_key(raw_target);
// Replace self-references with digest key // Replace self-references with digest key
if source.is_empty() { source = digest_key.clone(); } if source.is_empty() { source = key.to_string(); }
if target.is_empty() { target = digest_key.clone(); } if target.is_empty() { target = key.to_string(); }
// Handle "this daily/weekly/monthly" in raw text // Handle "this daily/weekly/monthly" in raw text
let raw_s_lower = raw_source.to_lowercase(); let raw_s_lower = raw_source.to_lowercase();
@ -408,49 +393,39 @@ fn parse_digest_file_links(path: &Path) -> Vec<DigestLink> {
if raw_s_lower.contains("this daily") || raw_s_lower.contains("this weekly") if raw_s_lower.contains("this daily") || raw_s_lower.contains("this weekly")
|| raw_s_lower.contains("this monthly") || raw_s_lower.contains("this monthly")
{ {
source = digest_key.clone(); source = key.to_string();
} }
if raw_t_lower.contains("this daily") || raw_t_lower.contains("this weekly") if raw_t_lower.contains("this daily") || raw_t_lower.contains("this weekly")
|| raw_t_lower.contains("this monthly") || raw_t_lower.contains("this monthly")
{ {
target = digest_key.clone(); target = key.to_string();
} }
// Skip NEW: and self-links // Skip NEW: and self-links
if source.starts_with("NEW:") || target.starts_with("NEW:") { continue; } if source.starts_with("NEW:") || target.starts_with("NEW:") { continue; }
if source == target { continue; } if source == target { continue; }
links.push(DigestLink { source, target, reason, file: filename.clone() }); links.push(DigestLink { source, target, reason, file: key.to_string() });
} }
} }
links links
} }
/// Parse links from all digest files in the episodic dir. /// Parse links from all digest nodes in the store.
pub fn parse_all_digest_links() -> Result<Vec<DigestLink>, String> { pub fn parse_all_digest_links(store: &Store) -> Vec<DigestLink> {
let dir = memory_subdir("episodic")?;
let mut all_links = Vec::new(); let mut all_links = Vec::new();
for pattern in &["daily-*.md", "weekly-*.md", "monthly-*.md"] { let mut digest_keys: Vec<&String> = store.nodes.keys()
if let Ok(entries) = fs::read_dir(&dir) { .filter(|k| k.starts_with("daily-")
let mut files: Vec<PathBuf> = entries || k.starts_with("weekly-")
.filter_map(|e| e.ok()) || k.starts_with("monthly-"))
.map(|e| e.path()) .collect();
.filter(|p| { digest_keys.sort();
p.file_name()
.and_then(|n| n.to_str()) for key in digest_keys {
.map(|n| { if let Some(node) = store.nodes.get(key) {
let prefix = pattern.split('*').next().unwrap_or(""); all_links.extend(parse_digest_node_links(key, &node.content));
n.starts_with(prefix) && n.ends_with(".md")
})
.unwrap_or(false)
})
.collect();
files.sort();
for path in files {
all_links.extend(parse_digest_file_links(&path));
}
} }
} }
@ -458,7 +433,7 @@ pub fn parse_all_digest_links() -> Result<Vec<DigestLink>, String> {
let mut seen = std::collections::HashSet::new(); let mut seen = std::collections::HashSet::new();
all_links.retain(|link| seen.insert((link.source.clone(), link.target.clone()))); all_links.retain(|link| seen.insert((link.source.clone(), link.target.clone())));
Ok(all_links) all_links
} }
/// Apply parsed digest links to the store. /// Apply parsed digest links to the store.

View file

@ -13,10 +13,60 @@ use crate::store::{self, Store, new_node, new_relation};
use regex::Regex; use regex::Regex;
use std::collections::hash_map::DefaultHasher; use std::collections::hash_map::DefaultHasher;
use std::collections::HashSet;
use std::fs; use std::fs;
use std::hash::{Hash, Hasher}; use std::hash::{Hash, Hasher};
use crate::util::memory_subdir; use crate::store::StoreView;
/// Compute the store dedup key for a transcript file.
/// This is the same key experience_mine uses to mark a transcript as mined.
pub fn transcript_dedup_key(path: &str) -> Result<String, String> {
let bytes = fs::read(path).map_err(|e| format!("read {}: {}", path, e))?;
let mut hasher = DefaultHasher::new();
bytes.hash(&mut hasher);
Ok(format!("_mined-transcripts.md#h-{:016x}", hasher.finish()))
}
/// Check if a transcript has already been mined (dedup key exists in store).
pub fn is_transcript_mined(store: &impl StoreView, path: &str) -> bool {
match transcript_dedup_key(path) {
Ok(key) => store.node_content(&key).is_some(),
Err(_) => false,
}
}
/// Dedup key for a transcript based on its filename (UUID).
/// Used by the daemon reconcile loop — no file reads needed.
pub fn transcript_filename_key(path: &str) -> String {
let filename = std::path::Path::new(path)
.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_else(|| path.to_string());
format!("_mined-transcripts.md#f-{}", filename)
}
/// Get the set of all mined transcript keys (both content-hash and filename)
/// from the store. Load once per daemon tick, check many.
pub fn mined_transcript_keys() -> HashSet<String> {
use crate::store::AnyView;
let Ok(view) = AnyView::load() else { return HashSet::new() };
let mut keys = HashSet::new();
view.for_each_node(|key, _, _| {
if key.starts_with("_mined-transcripts.md#") {
keys.insert(key.to_string());
}
});
keys
}
/// Check if a transcript has been mined, given a pre-loaded set of mined keys.
/// Checks filename-based key only (no file read). Sessions mined before the
/// filename key was added will pass through and short-circuit in experience_mine
/// via the content hash check — a one-time cost on first restart after this change.
pub fn is_transcript_mined_with_keys(mined: &HashSet<String>, path: &str) -> bool {
mined.contains(&transcript_filename_key(path))
}
/// Extract user/assistant messages with line numbers from a JSONL transcript. /// Extract user/assistant messages with line numbers from a JSONL transcript.
/// (line_number, role, text, timestamp) /// (line_number, role, text, timestamp)
@ -187,21 +237,6 @@ pub fn journal_enrich(
} }
} }
// Save result to agent-results
let timestamp = store::format_datetime(store::now_epoch())
.replace([':', '-'], "");
let result_file = memory_subdir("agent-results")?
.join(format!("{}.json", timestamp));
let output = serde_json::json!({
"timestamp": timestamp,
"jsonl_path": jsonl_path,
"entry_text": &entry_text[..entry_text.len().min(500)],
"agent_result": result,
});
fs::write(&result_file, serde_json::to_string_pretty(&output).unwrap())
.map_err(|e| format!("write {}: {}", result_file.display(), e))?;
println!(" Results saved: {}", result_file.display());
store.save()?; store.save()?;
Ok(()) Ok(())
} }
@ -320,6 +355,7 @@ pub fn experience_mine(
let mut node = new_node(&key, &full_content); let mut node = new_node(&key, &full_content);
node.node_type = store::NodeType::EpisodicSession; node.node_type = store::NodeType::EpisodicSession;
node.category = store::Category::Observation; node.category = store::Category::Observation;
node.provenance = store::Provenance::AgentExperienceMine;
let _ = store.upsert_node(node); let _ = store.upsert_node(node);
count += 1; count += 1;
@ -328,11 +364,19 @@ pub fn experience_mine(
} }
// Record this transcript as mined (even if count == 0, to prevent re-runs) // Record this transcript as mined (even if count == 0, to prevent re-runs)
// Two keys: content hash (exact dedup) and filename (fast daemon reconcile)
let dedup_content = format!("Mined {} ({} entries)", jsonl_path, count); let dedup_content = format!("Mined {} ({} entries)", jsonl_path, count);
let mut dedup_node = new_node(&dedup_key, &dedup_content); let mut dedup_node = new_node(&dedup_key, &dedup_content);
dedup_node.category = store::Category::Task; dedup_node.category = store::Category::Task;
dedup_node.provenance = store::Provenance::AgentExperienceMine;
let _ = store.upsert_node(dedup_node); let _ = store.upsert_node(dedup_node);
let fname_key = transcript_filename_key(jsonl_path);
let mut fname_node = new_node(&fname_key, &dedup_content);
fname_node.category = store::Category::Task;
fname_node.provenance = store::Provenance::AgentExperienceMine;
let _ = store.upsert_node(fname_node);
if count > 0 { if count > 0 {
println!(" Saved {} new journal entries.", count); println!(" Saved {} new journal entries.", count);
} }

976
src/knowledge.rs Normal file
View file

@ -0,0 +1,976 @@
// knowledge.rs — knowledge production agents and convergence loop
//
// Rust port of knowledge_agents.py + knowledge_loop.py.
// Four agents mine the memory graph for new knowledge:
// 1. Observation — extract facts from raw conversations
// 2. Extractor — find patterns in node clusters
// 3. Connector — find cross-domain structural connections
// 4. Challenger — stress-test existing knowledge nodes
//
// The loop runs agents in sequence, applies results, measures
// convergence via graph-structural metrics (sigma, CC, communities).
use crate::graph::Graph;
use crate::llm;
use crate::spectral;
use crate::store::{self, Store, new_relation, RelationType};
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::fs;
use std::path::{Path, PathBuf};
fn memory_dir() -> PathBuf {
store::memory_dir()
}
fn prompts_dir() -> PathBuf {
let manifest = env!("CARGO_MANIFEST_DIR");
PathBuf::from(manifest).join("prompts")
}
fn projects_dir() -> PathBuf {
let home = std::env::var("HOME").unwrap_or_else(|_| ".".into());
PathBuf::from(home).join(".claude/projects")
}
// ---------------------------------------------------------------------------
// Action types
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Action {
pub kind: ActionKind,
pub confidence: Confidence,
pub weight: f64,
pub depth: i32,
pub applied: Option<bool>,
pub rejected_reason: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ActionKind {
WriteNode {
key: String,
content: String,
covers: Vec<String>,
},
Link {
source: String,
target: String,
},
Refine {
key: String,
content: String,
},
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Confidence {
High,
Medium,
Low,
}
impl Confidence {
fn weight(self) -> f64 {
match self {
Self::High => 1.0,
Self::Medium => 0.6,
Self::Low => 0.3,
}
}
fn value(self) -> f64 {
match self {
Self::High => 0.9,
Self::Medium => 0.6,
Self::Low => 0.3,
}
}
fn parse(s: &str) -> Self {
match s.to_lowercase().as_str() {
"high" => Self::High,
"low" => Self::Low,
_ => Self::Medium,
}
}
}
// ---------------------------------------------------------------------------
// Action parsing
// ---------------------------------------------------------------------------
pub fn parse_write_nodes(text: &str) -> Vec<Action> {
let re = Regex::new(r"(?s)WRITE_NODE\s+(\S+)\s*\n(.*?)END_NODE").unwrap();
let conf_re = Regex::new(r"(?i)CONFIDENCE:\s*(high|medium|low)").unwrap();
let covers_re = Regex::new(r"COVERS:\s*(.+)").unwrap();
re.captures_iter(text)
.map(|cap| {
let key = cap[1].to_string();
let mut content = cap[2].trim().to_string();
let confidence = conf_re
.captures(&content)
.map(|c| Confidence::parse(&c[1]))
.unwrap_or(Confidence::Medium);
content = conf_re.replace(&content, "").trim().to_string();
let covers: Vec<String> = covers_re
.captures(&content)
.map(|c| c[1].split(',').map(|s| s.trim().to_string()).collect())
.unwrap_or_default();
content = covers_re.replace(&content, "").trim().to_string();
Action {
weight: confidence.weight(),
kind: ActionKind::WriteNode { key, content, covers },
confidence,
depth: 0,
applied: None,
rejected_reason: None,
}
})
.collect()
}
pub fn parse_links(text: &str) -> Vec<Action> {
let re = Regex::new(r"(?m)^LINK\s+(\S+)\s+(\S+)").unwrap();
re.captures_iter(text)
.map(|cap| Action {
kind: ActionKind::Link {
source: cap[1].to_string(),
target: cap[2].to_string(),
},
confidence: Confidence::Low,
weight: 0.3,
depth: -1,
applied: None,
rejected_reason: None,
})
.collect()
}
pub fn parse_refines(text: &str) -> Vec<Action> {
let re = Regex::new(r"(?s)REFINE\s+(\S+)\s*\n(.*?)END_REFINE").unwrap();
re.captures_iter(text)
.map(|cap| {
let key = cap[1].trim_matches('*').trim().to_string();
Action {
kind: ActionKind::Refine {
key,
content: cap[2].trim().to_string(),
},
confidence: Confidence::Medium,
weight: 0.7,
depth: 0,
applied: None,
rejected_reason: None,
}
})
.collect()
}
pub fn parse_all_actions(text: &str) -> Vec<Action> {
let mut actions = parse_write_nodes(text);
actions.extend(parse_links(text));
actions.extend(parse_refines(text));
actions
}
pub fn count_no_ops(text: &str) -> usize {
let no_conn = Regex::new(r"\bNO_CONNECTION\b").unwrap().find_iter(text).count();
let affirm = Regex::new(r"\bAFFIRM\b").unwrap().find_iter(text).count();
let no_extract = Regex::new(r"\bNO_EXTRACTION\b").unwrap().find_iter(text).count();
no_conn + affirm + no_extract
}
// ---------------------------------------------------------------------------
// Inference depth tracking
// ---------------------------------------------------------------------------
const DEPTH_DB_KEY: &str = "_knowledge-depths";
#[derive(Default)]
pub struct DepthDb {
depths: HashMap<String, i32>,
}
impl DepthDb {
pub fn load(store: &Store) -> Self {
let depths = store.nodes.get(DEPTH_DB_KEY)
.and_then(|n| serde_json::from_str(&n.content).ok())
.unwrap_or_default();
Self { depths }
}
pub fn save(&self, store: &mut Store) {
if let Ok(json) = serde_json::to_string(&self.depths) {
store.upsert_provenance(DEPTH_DB_KEY, &json,
store::Provenance::AgentKnowledgeObservation).ok();
}
}
pub fn get(&self, key: &str) -> i32 {
self.depths.get(key).copied().unwrap_or(0)
}
pub fn set(&mut self, key: String, depth: i32) {
self.depths.insert(key, depth);
}
}
/// Agent base depths: observation=1, extractor=2, connector=3
fn agent_base_depth(agent: &str) -> Option<i32> {
match agent {
"observation" => Some(1),
"extractor" => Some(2),
"connector" => Some(3),
"challenger" => None,
_ => Some(2),
}
}
pub fn compute_action_depth(db: &DepthDb, action: &Action, agent: &str) -> i32 {
match &action.kind {
ActionKind::Link { .. } => -1,
ActionKind::Refine { key, .. } => db.get(key),
ActionKind::WriteNode { covers, .. } => {
if !covers.is_empty() {
covers.iter().map(|k| db.get(k)).max().unwrap_or(0) + 1
} else {
agent_base_depth(agent).unwrap_or(2)
}
}
}
}
/// Confidence threshold that scales with inference depth.
pub fn required_confidence(depth: i32, base: f64) -> f64 {
if depth <= 0 {
return 0.0;
}
1.0 - (1.0 - base).powi(depth)
}
/// Confidence bonus from real-world use.
pub fn use_bonus(use_count: u32) -> f64 {
if use_count == 0 {
return 0.0;
}
1.0 - 1.0 / (1.0 + 0.15 * use_count as f64)
}
// ---------------------------------------------------------------------------
// Action application
// ---------------------------------------------------------------------------
fn stamp_content(content: &str, agent: &str, timestamp: &str, depth: i32) -> String {
format!("<!-- author: {} | created: {} | depth: {} -->\n{}", agent, timestamp, depth, content)
}
/// Check if a link already exists between two keys.
fn has_edge(store: &Store, source: &str, target: &str) -> bool {
store.relations.iter().any(|r| {
!r.deleted
&& ((r.source_key == source && r.target_key == target)
|| (r.source_key == target && r.target_key == source))
})
}
pub fn apply_action(
store: &mut Store,
action: &Action,
agent: &str,
timestamp: &str,
depth: i32,
) -> bool {
let provenance = agent_provenance(agent);
match &action.kind {
ActionKind::WriteNode { key, content, .. } => {
let stamped = stamp_content(content, agent, timestamp, depth);
store.upsert_provenance(key, &stamped, provenance).is_ok()
}
ActionKind::Link { source, target } => {
if has_edge(store, source, target) {
return false;
}
let source_uuid = match store.nodes.get(source.as_str()) {
Some(n) => n.uuid,
None => return false,
};
let target_uuid = match store.nodes.get(target.as_str()) {
Some(n) => n.uuid,
None => return false,
};
let mut rel = new_relation(
source_uuid, target_uuid,
RelationType::Link,
0.3,
source, target,
);
rel.provenance = provenance;
store.add_relation(rel).is_ok()
}
ActionKind::Refine { key, content } => {
let stamped = stamp_content(content, agent, timestamp, depth);
store.upsert_provenance(key, &stamped, provenance).is_ok()
}
}
}
fn agent_provenance(agent: &str) -> store::Provenance {
match agent {
"observation" => store::Provenance::AgentKnowledgeObservation,
"extractor" | "pattern" => store::Provenance::AgentKnowledgePattern,
"connector" => store::Provenance::AgentKnowledgeConnector,
"challenger" => store::Provenance::AgentKnowledgeChallenger,
_ => store::Provenance::Agent,
}
}
// ---------------------------------------------------------------------------
// Agent runners
// ---------------------------------------------------------------------------
fn load_prompt(name: &str) -> Result<String, String> {
let path = prompts_dir().join(format!("{}.md", name));
fs::read_to_string(&path).map_err(|e| format!("load prompt {}: {}", name, e))
}
fn get_graph_topology(store: &Store, graph: &Graph) -> String {
format!("Nodes: {} Relations: {}\n", store.nodes.len(), graph.edge_count())
}
/// Strip <system-reminder> blocks from text
fn strip_system_tags(text: &str) -> String {
let re = Regex::new(r"(?s)<system-reminder>.*?</system-reminder>").unwrap();
re.replace_all(text, "").trim().to_string()
}
/// Extract human-readable dialogue from a conversation JSONL
fn extract_conversation_text(path: &Path, max_chars: usize) -> String {
let Ok(content) = fs::read_to_string(path) else { return String::new() };
let mut fragments = Vec::new();
let mut total = 0;
for line in content.lines() {
let Ok(obj) = serde_json::from_str::<serde_json::Value>(line) else { continue };
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
if msg_type == "user" && obj.get("userType").and_then(|v| v.as_str()) == Some("external") {
if let Some(text) = extract_text_content(&obj) {
let text = strip_system_tags(&text);
if text.starts_with("[Request interrupted") { continue; }
if text.len() > 5 {
fragments.push(format!("**Kent:** {}", text));
total += text.len();
}
}
} else if msg_type == "assistant" {
if let Some(text) = extract_text_content(&obj) {
let text = strip_system_tags(&text);
if text.len() > 10 {
fragments.push(format!("**PoC:** {}", text));
total += text.len();
}
}
}
if total > max_chars { break; }
}
fragments.join("\n\n")
}
fn extract_text_content(obj: &serde_json::Value) -> Option<String> {
let msg = obj.get("message")?;
let content = msg.get("content")?;
if let Some(s) = content.as_str() {
return Some(s.to_string());
}
if let Some(arr) = content.as_array() {
let texts: Vec<&str> = arr.iter()
.filter_map(|b| {
if b.get("type")?.as_str()? == "text" {
b.get("text")?.as_str()
} else {
None
}
})
.collect();
if !texts.is_empty() {
return Some(texts.join("\n"));
}
}
None
}
/// Count short user messages (dialogue turns) in a JSONL
fn count_dialogue_turns(path: &Path) -> usize {
let Ok(content) = fs::read_to_string(path) else { return 0 };
content.lines()
.filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
.filter(|obj| {
obj.get("type").and_then(|v| v.as_str()) == Some("user")
&& obj.get("userType").and_then(|v| v.as_str()) == Some("external")
})
.filter(|obj| {
let text = extract_text_content(obj).unwrap_or_default();
text.len() > 5 && text.len() < 500
&& !text.starts_with("[Request interrupted")
&& !text.starts_with("Implement the following")
})
.count()
}
/// Select conversation fragments for the observation extractor
fn select_conversation_fragments(n: usize) -> Vec<(String, String)> {
let projects = projects_dir();
if !projects.exists() { return Vec::new(); }
let mut jsonl_files: Vec<PathBuf> = Vec::new();
if let Ok(dirs) = fs::read_dir(&projects) {
for dir in dirs.filter_map(|e| e.ok()) {
if !dir.path().is_dir() { continue; }
if let Ok(files) = fs::read_dir(dir.path()) {
for f in files.filter_map(|e| e.ok()) {
let p = f.path();
if p.extension().map(|x| x == "jsonl").unwrap_or(false) {
if let Ok(meta) = p.metadata() {
if meta.len() > 50_000 {
jsonl_files.push(p);
}
}
}
}
}
}
}
let mut scored: Vec<(usize, PathBuf)> = jsonl_files.into_iter()
.map(|f| (count_dialogue_turns(&f), f))
.filter(|(turns, _)| *turns >= 10)
.collect();
scored.sort_by(|a, b| b.0.cmp(&a.0));
let mut fragments = Vec::new();
for (_, f) in scored.iter().take(n * 2) {
let session_id = f.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_else(|| "unknown".into());
let text = extract_conversation_text(f, 8000);
if text.len() > 500 {
fragments.push((session_id, text));
}
if fragments.len() >= n { break; }
}
fragments
}
pub fn run_observation_extractor(store: &Store, graph: &Graph, batch_size: usize) -> Result<String, String> {
let template = load_prompt("observation-extractor")?;
let topology = get_graph_topology(store, graph);
let fragments = select_conversation_fragments(batch_size);
let mut results = Vec::new();
for (i, (session_id, text)) in fragments.iter().enumerate() {
eprintln!(" Observation extractor {}/{}: session {}... ({} chars)",
i + 1, fragments.len(), &session_id[..session_id.len().min(12)], text.len());
let prompt = template
.replace("{{TOPOLOGY}}", &topology)
.replace("{{CONVERSATIONS}}", &format!("### Session {}\n\n{}", session_id, text));
let response = llm::call_sonnet(&prompt, 600)?;
results.push(format!("## Session: {}\n\n{}", session_id, response));
}
Ok(results.join("\n\n---\n\n"))
}
/// Load spectral embedding from disk
fn load_spectral_embedding() -> HashMap<String, Vec<f64>> {
spectral::load_embedding()
.map(|emb| emb.coords)
.unwrap_or_default()
}
fn spectral_distance(embedding: &HashMap<String, Vec<f64>>, a: &str, b: &str) -> f64 {
let (Some(va), Some(vb)) = (embedding.get(a), embedding.get(b)) else {
return f64::INFINITY;
};
let dot: f64 = va.iter().zip(vb.iter()).map(|(a, b)| a * b).sum();
let norm_a: f64 = va.iter().map(|x| x * x).sum::<f64>().sqrt();
let norm_b: f64 = vb.iter().map(|x| x * x).sum::<f64>().sqrt();
if norm_a == 0.0 || norm_b == 0.0 {
return f64::INFINITY;
}
1.0 - dot / (norm_a * norm_b)
}
fn select_extractor_clusters(_store: &Store, n: usize) -> Vec<Vec<String>> {
let embedding = load_spectral_embedding();
let skip = ["journal.md", "MEMORY.md", "where-am-i.md", "work-queue.md"];
let semantic_keys: Vec<&String> = embedding.keys()
.filter(|k| !k.starts_with("journal.md#") && !skip.contains(&k.as_str()))
.collect();
let cluster_size = 5;
let mut used = HashSet::new();
let mut clusters = Vec::new();
for _ in 0..n {
let available: Vec<&&String> = semantic_keys.iter()
.filter(|k| !used.contains(**k))
.collect();
if available.len() < cluster_size { break; }
let seed = available[0];
let mut distances: Vec<(f64, &String)> = available.iter()
.filter(|k| ***k != *seed)
.map(|k| (spectral_distance(&embedding, seed, k), **k))
.filter(|(d, _)| d.is_finite())
.collect();
distances.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
let cluster: Vec<String> = std::iter::once((*seed).clone())
.chain(distances.iter().take(cluster_size - 1).map(|(_, k)| (*k).clone()))
.collect();
for k in &cluster { used.insert(k.clone()); }
clusters.push(cluster);
}
clusters
}
pub fn run_extractor(store: &Store, graph: &Graph, batch_size: usize) -> Result<String, String> {
let template = load_prompt("extractor")?;
let topology = get_graph_topology(store, graph);
let clusters = select_extractor_clusters(store, batch_size);
let mut results = Vec::new();
for (i, cluster) in clusters.iter().enumerate() {
eprintln!(" Extractor cluster {}/{}: {} nodes", i + 1, clusters.len(), cluster.len());
let node_texts: Vec<String> = cluster.iter()
.filter_map(|key| {
let content = store.nodes.get(key)?.content.as_str();
Some(format!("### {}\n{}", key, content))
})
.collect();
if node_texts.is_empty() { continue; }
let prompt = template
.replace("{{TOPOLOGY}}", &topology)
.replace("{{NODES}}", &node_texts.join("\n\n"));
let response = llm::call_sonnet(&prompt, 600)?;
results.push(format!("## Cluster {}: {}...\n\n{}", i + 1,
cluster.iter().take(3).cloned().collect::<Vec<_>>().join(", "), response));
}
Ok(results.join("\n\n---\n\n"))
}
fn select_connector_pairs(store: &Store, graph: &Graph, n: usize) -> Vec<(Vec<String>, Vec<String>)> {
let embedding = load_spectral_embedding();
let skip_prefixes = ["journal.md#", "daily-", "weekly-", "monthly-", "all-sessions"];
let skip_exact: HashSet<&str> = ["journal.md", "MEMORY.md", "where-am-i.md",
"work-queue.md", "work-state"].iter().copied().collect();
let semantic_keys: Vec<&String> = embedding.keys()
.filter(|k| {
!skip_exact.contains(k.as_str())
&& !skip_prefixes.iter().any(|p| k.starts_with(p))
})
.collect();
let mut pairs = Vec::new();
let mut used = HashSet::new();
for seed in semantic_keys.iter().take(n * 10) {
if used.contains(*seed) { continue; }
let mut near: Vec<(f64, &String)> = semantic_keys.iter()
.filter(|k| ***k != **seed && !used.contains(**k))
.map(|k| (spectral_distance(&embedding, seed, k), *k))
.filter(|(d, _)| *d < 0.5 && d.is_finite())
.collect();
near.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
for (_, target) in near.iter().take(5) {
if !has_edge(store, seed, target) {
let _ = graph; // graph available for future use
used.insert((*seed).clone());
used.insert((*target).clone());
pairs.push((vec![(*seed).clone()], vec![(*target).clone()]));
break;
}
}
if pairs.len() >= n { break; }
}
pairs
}
pub fn run_connector(store: &Store, graph: &Graph, batch_size: usize) -> Result<String, String> {
let template = load_prompt("connector")?;
let topology = get_graph_topology(store, graph);
let pairs = select_connector_pairs(store, graph, batch_size);
let mut results = Vec::new();
for (i, (group_a, group_b)) in pairs.iter().enumerate() {
eprintln!(" Connector pair {}/{}", i + 1, pairs.len());
let nodes_a: Vec<String> = group_a.iter()
.filter_map(|k| {
let c = store.nodes.get(k)?.content.as_str();
Some(format!("### {}\n{}", k, c))
})
.collect();
let nodes_b: Vec<String> = group_b.iter()
.filter_map(|k| {
let c = store.nodes.get(k)?.content.as_str();
Some(format!("### {}\n{}", k, c))
})
.collect();
let prompt = template
.replace("{{TOPOLOGY}}", &topology)
.replace("{{NODES_A}}", &nodes_a.join("\n\n"))
.replace("{{NODES_B}}", &nodes_b.join("\n\n"));
let response = llm::call_sonnet(&prompt, 600)?;
results.push(format!("## Pair {}: {}{}\n\n{}",
i + 1, group_a.join(", "), group_b.join(", "), response));
}
Ok(results.join("\n\n---\n\n"))
}
pub fn run_challenger(store: &Store, graph: &Graph, batch_size: usize) -> Result<String, String> {
let template = load_prompt("challenger")?;
let topology = get_graph_topology(store, graph);
let mut candidates: Vec<(&String, usize)> = store.nodes.iter()
.filter(|(k, _)| {
!k.starts_with("journal.md#")
&& !["journal.md", "MEMORY.md", "where-am-i.md"].contains(&k.as_str())
})
.map(|(k, _)| (k, graph.degree(k)))
.collect();
candidates.sort_by(|a, b| b.1.cmp(&a.1));
let mut results = Vec::new();
for (i, (key, _)) in candidates.iter().take(batch_size).enumerate() {
eprintln!(" Challenger {}/{}: {}", i + 1, batch_size.min(candidates.len()), key);
let content = match store.nodes.get(key.as_str()) {
Some(n) => &n.content,
None => continue,
};
let prompt = template
.replace("{{TOPOLOGY}}", &topology)
.replace("{{NODE_KEY}}", key)
.replace("{{NODE_CONTENT}}", content);
let response = llm::call_sonnet(&prompt, 600)?;
results.push(format!("## Challenge: {}\n\n{}", key, response));
}
Ok(results.join("\n\n---\n\n"))
}
// ---------------------------------------------------------------------------
// Convergence metrics
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CycleResult {
pub cycle: usize,
pub timestamp: String,
pub total_actions: usize,
pub total_applied: usize,
pub total_no_ops: usize,
pub depth_rejected: usize,
pub weighted_delta: f64,
pub graph_metrics_before: GraphMetrics,
pub graph_metrics_after: GraphMetrics,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct GraphMetrics {
pub nodes: usize,
pub edges: usize,
pub cc: f64,
pub sigma: f64,
pub communities: usize,
}
impl GraphMetrics {
pub fn from_graph(store: &Store, graph: &Graph) -> Self {
Self {
nodes: store.nodes.len(),
edges: graph.edge_count(),
cc: graph.avg_clustering_coefficient() as f64,
sigma: graph.small_world_sigma() as f64,
communities: graph.community_count(),
}
}
}
fn metric_stability(history: &[CycleResult], key: &str, window: usize) -> f64 {
if history.len() < window { return f64::INFINITY; }
let values: Vec<f64> = history[history.len() - window..].iter()
.map(|h| match key {
"sigma" => h.graph_metrics_after.sigma,
"cc" => h.graph_metrics_after.cc,
"communities" => h.graph_metrics_after.communities as f64,
_ => 0.0,
})
.collect();
if values.len() < 2 { return f64::INFINITY; }
let mean = values.iter().sum::<f64>() / values.len() as f64;
if mean == 0.0 { return 0.0; }
let variance = values.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / values.len() as f64;
variance.sqrt() / mean.abs()
}
pub fn check_convergence(history: &[CycleResult], window: usize) -> bool {
if history.len() < window { return false; }
let sigma_cv = metric_stability(history, "sigma", window);
let cc_cv = metric_stability(history, "cc", window);
let comm_cv = metric_stability(history, "communities", window);
let recent = &history[history.len() - window..];
let avg_delta = recent.iter().map(|r| r.weighted_delta).sum::<f64>() / recent.len() as f64;
eprintln!("\n Convergence check (last {} cycles):", window);
eprintln!(" sigma CV: {:.4} (< 0.05?)", sigma_cv);
eprintln!(" CC CV: {:.4} (< 0.05?)", cc_cv);
eprintln!(" community CV: {:.4} (< 0.10?)", comm_cv);
eprintln!(" avg delta: {:.2} (< 1.00?)", avg_delta);
let structural = sigma_cv < 0.05 && cc_cv < 0.05 && comm_cv < 0.10;
let behavioral = avg_delta < 1.0;
if structural && behavioral {
eprintln!(" → CONVERGED");
true
} else {
false
}
}
// ---------------------------------------------------------------------------
// The knowledge loop
// ---------------------------------------------------------------------------
pub struct KnowledgeLoopConfig {
pub max_cycles: usize,
pub batch_size: usize,
pub window: usize,
pub max_depth: i32,
pub confidence_base: f64,
}
impl Default for KnowledgeLoopConfig {
fn default() -> Self {
Self {
max_cycles: 20,
batch_size: 5,
window: 5,
max_depth: 4,
confidence_base: 0.3,
}
}
}
pub fn run_knowledge_loop(config: &KnowledgeLoopConfig) -> Result<Vec<CycleResult>, String> {
let mut store = Store::load()?;
let mut depth_db = DepthDb::load(&store);
let mut history = Vec::new();
eprintln!("Knowledge Loop — fixed-point iteration");
eprintln!(" max_cycles={} batch_size={}", config.max_cycles, config.batch_size);
eprintln!(" window={} max_depth={}", config.window, config.max_depth);
for cycle in 1..=config.max_cycles {
let result = run_cycle(cycle, config, &mut depth_db)?;
history.push(result);
if check_convergence(&history, config.window) {
eprintln!("\n CONVERGED after {} cycles", cycle);
break;
}
}
// Save loop summary as a store node
if let Some(first) = history.first() {
let key = format!("_knowledge-loop-{}", first.timestamp);
if let Ok(json) = serde_json::to_string_pretty(&history) {
store = Store::load()?;
store.upsert_provenance(&key, &json,
store::Provenance::AgentKnowledgeObservation).ok();
depth_db.save(&mut store);
store.save()?;
}
}
Ok(history)
}
fn run_cycle(
cycle_num: usize,
config: &KnowledgeLoopConfig,
depth_db: &mut DepthDb,
) -> Result<CycleResult, String> {
let timestamp = chrono::Local::now().format("%Y%m%dT%H%M%S").to_string();
eprintln!("\n{}", "=".repeat(60));
eprintln!("CYCLE {}{}", cycle_num, timestamp);
eprintln!("{}", "=".repeat(60));
let mut store = Store::load()?;
let graph = store.build_graph();
let metrics_before = GraphMetrics::from_graph(&store, &graph);
eprintln!(" Before: nodes={} edges={} cc={:.3} sigma={:.3}",
metrics_before.nodes, metrics_before.edges, metrics_before.cc, metrics_before.sigma);
let mut all_actions = Vec::new();
let mut all_no_ops = 0;
let mut depth_rejected = 0;
let mut total_applied = 0;
// Run each agent, rebuilding graph after mutations
let agent_names = ["observation", "extractor", "connector", "challenger"];
for agent_name in &agent_names {
eprintln!("\n --- {} (n={}) ---", agent_name, config.batch_size);
// Rebuild graph to reflect any mutations from previous agents
let graph = store.build_graph();
let output = match *agent_name {
"observation" => run_observation_extractor(&store, &graph, config.batch_size),
"extractor" => run_extractor(&store, &graph, config.batch_size),
"connector" => run_connector(&store, &graph, config.batch_size),
"challenger" => run_challenger(&store, &graph, config.batch_size),
_ => unreachable!(),
};
let output = match output {
Ok(o) => o,
Err(e) => {
eprintln!(" ERROR: {}", e);
continue;
}
};
// Store raw output as a node (for debugging/audit)
let raw_key = format!("_knowledge-{}-{}", agent_name, timestamp);
let raw_content = format!("# {} Agent Results — {}\n\n{}", agent_name, timestamp, output);
store.upsert_provenance(&raw_key, &raw_content,
agent_provenance(agent_name)).ok();
let mut actions = parse_all_actions(&output);
let no_ops = count_no_ops(&output);
all_no_ops += no_ops;
eprintln!(" Actions: {} No-ops: {}", actions.len(), no_ops);
let mut applied = 0;
for action in &mut actions {
let depth = compute_action_depth(depth_db, action, agent_name);
action.depth = depth;
match &action.kind {
ActionKind::WriteNode { key, covers, .. } => {
let conf_val = action.confidence.value();
let req = required_confidence(depth, config.confidence_base);
let source_uses: Vec<u32> = covers.iter()
.filter_map(|k| store.nodes.get(k).map(|n| n.uses))
.collect();
let avg_uses = if source_uses.is_empty() { 0 }
else { source_uses.iter().sum::<u32>() / source_uses.len() as u32 };
let eff_conf = (conf_val + use_bonus(avg_uses)).min(1.0);
if eff_conf < req {
action.applied = Some(false);
action.rejected_reason = Some("depth_threshold".into());
depth_rejected += 1;
continue;
}
if depth > config.max_depth {
action.applied = Some(false);
action.rejected_reason = Some("max_depth".into());
depth_rejected += 1;
continue;
}
eprintln!(" WRITE {} depth={} conf={:.2} eff={:.2} req={:.2}",
key, depth, conf_val, eff_conf, req);
}
ActionKind::Link { source, target } => {
eprintln!(" LINK {}{}", source, target);
}
ActionKind::Refine { key, .. } => {
eprintln!(" REFINE {} depth={}", key, depth);
}
}
if apply_action(&mut store, action, agent_name, &timestamp, depth) {
applied += 1;
action.applied = Some(true);
if let ActionKind::WriteNode { key, .. } | ActionKind::Refine { key, .. } = &action.kind {
depth_db.set(key.clone(), depth);
}
} else {
action.applied = Some(false);
}
}
eprintln!(" Applied: {}/{}", applied, actions.len());
total_applied += applied;
all_actions.extend(actions);
}
depth_db.save(&mut store);
// Recompute spectral if anything changed
if total_applied > 0 {
eprintln!("\n Recomputing spectral embedding...");
let graph = store.build_graph();
let result = spectral::decompose(&graph, 8);
let emb = spectral::to_embedding(&result);
spectral::save_embedding(&emb).ok();
}
let graph = store.build_graph();
let metrics_after = GraphMetrics::from_graph(&store, &graph);
let weighted_delta: f64 = all_actions.iter()
.filter(|a| a.applied == Some(true))
.map(|a| a.weight)
.sum();
eprintln!("\n CYCLE {} SUMMARY", cycle_num);
eprintln!(" Applied: {}/{} depth-rejected: {} no-ops: {}",
total_applied, all_actions.len(), depth_rejected, all_no_ops);
eprintln!(" Weighted delta: {:.2}", weighted_delta);
Ok(CycleResult {
cycle: cycle_num,
timestamp,
total_actions: all_actions.len(),
total_applied,
total_no_ops: all_no_ops,
depth_rejected,
weighted_delta,
graph_metrics_before: metrics_before,
graph_metrics_after: metrics_after,
})
}

View file

@ -1,6 +1,6 @@
// LLM utilities: Sonnet invocation and response parsing // LLM utilities: model invocation and response parsing
// //
// Shared by digest, audit, enrich, and consolidate modules. // Shared by digest, audit, enrich, consolidate, knowledge, and fact_mine.
use crate::store::Store; use crate::store::Store;
@ -8,8 +8,8 @@ use regex::Regex;
use std::fs; use std::fs;
use std::process::Command; use std::process::Command;
/// Call Sonnet via claude CLI. Returns the response text. /// Call a model via claude CLI. Returns the response text.
pub(crate) fn call_sonnet(prompt: &str, _timeout_secs: u64) -> Result<String, String> { fn call_model(model: &str, prompt: &str) -> Result<String, String> {
// Write prompt to temp file (claude CLI needs file input for large prompts) // Write prompt to temp file (claude CLI needs file input for large prompts)
// Use thread ID + PID to avoid collisions under parallel rayon calls // Use thread ID + PID to avoid collisions under parallel rayon calls
let tmp = std::env::temp_dir().join(format!("poc-llm-{}-{:?}.txt", let tmp = std::env::temp_dir().join(format!("poc-llm-{}-{:?}.txt",
@ -18,7 +18,7 @@ pub(crate) fn call_sonnet(prompt: &str, _timeout_secs: u64) -> Result<String, St
.map_err(|e| format!("write temp prompt: {}", e))?; .map_err(|e| format!("write temp prompt: {}", e))?;
let result = Command::new("claude") let result = Command::new("claude")
.args(["-p", "--model", "sonnet", "--tools", ""]) .args(["-p", "--model", model, "--tools", "", "--no-session-persistence"])
.stdin(fs::File::open(&tmp).map_err(|e| format!("open temp: {}", e))?) .stdin(fs::File::open(&tmp).map_err(|e| format!("open temp: {}", e))?)
.env_remove("CLAUDECODE") .env_remove("CLAUDECODE")
.output(); .output();
@ -38,7 +38,17 @@ pub(crate) fn call_sonnet(prompt: &str, _timeout_secs: u64) -> Result<String, St
} }
} }
/// Parse a JSON response from Sonnet, handling markdown fences. /// Call Sonnet via claude CLI.
pub(crate) fn call_sonnet(prompt: &str, _timeout_secs: u64) -> Result<String, String> {
call_model("sonnet", prompt)
}
/// Call Haiku via claude CLI (cheaper, faster — good for high-volume extraction).
pub(crate) fn call_haiku(prompt: &str) -> Result<String, String> {
call_model("haiku", prompt)
}
/// Parse a JSON response, handling markdown fences.
pub(crate) fn parse_json_response(response: &str) -> Result<serde_json::Value, String> { pub(crate) fn parse_json_response(response: &str) -> Result<serde_json::Value, String> {
let cleaned = response.trim(); let cleaned = response.trim();
let cleaned = cleaned.strip_prefix("```json").unwrap_or(cleaned); let cleaned = cleaned.strip_prefix("```json").unwrap_or(cleaned);

View file

@ -29,6 +29,8 @@ mod query;
mod spectral; mod spectral;
mod lookups; mod lookups;
mod daemon; mod daemon;
mod fact_mine;
mod knowledge;
pub mod memory_capnp { pub mod memory_capnp {
include!(concat!(env!("OUT_DIR"), "/schema/memory_capnp.rs")); include!(concat!(env!("OUT_DIR"), "/schema/memory_capnp.rs"));
@ -132,6 +134,9 @@ fn main() {
"lookup-bump" => cmd_lookup_bump(&args[2..]), "lookup-bump" => cmd_lookup_bump(&args[2..]),
"lookups" => cmd_lookups(&args[2..]), "lookups" => cmd_lookups(&args[2..]),
"daemon" => cmd_daemon(&args[2..]), "daemon" => cmd_daemon(&args[2..]),
"knowledge-loop" => cmd_knowledge_loop(&args[2..]),
"fact-mine" => cmd_fact_mine(&args[2..]),
"fact-mine-store" => cmd_fact_mine_store(&args[2..]),
_ => { _ => {
eprintln!("Unknown command: {}", args[1]); eprintln!("Unknown command: {}", args[1]);
usage(); usage();
@ -216,7 +221,14 @@ Commands:
lookups [DATE] Show daily lookup counts (default: today) lookups [DATE] Show daily lookup counts (default: today)
daemon Start background job daemon daemon Start background job daemon
daemon status Show daemon status daemon status Show daemon status
daemon log [JOB] [N] Show last N log lines (default 50, optional job filter)"); daemon log [JOB] [N] Show last N log lines (default 50, optional job filter)
knowledge-loop [OPTIONS] Run knowledge agents to convergence
--max-cycles N (default 20)
--batch-size N (default 5)
--window N (default 5)
--max-depth N (default 4)
fact-mine JSONL [OPTIONS] Extract atomic facts from conversation transcripts
fact-mine --batch DIR Mine all .jsonl files in directory");
} }
fn cmd_search(args: &[String]) -> Result<(), String> { fn cmd_search(args: &[String]) -> Result<(), String> {
@ -850,8 +862,10 @@ fn cmd_digest(args: &[String]) -> Result<(), String> {
fn cmd_digest_links(args: &[String]) -> Result<(), String> { fn cmd_digest_links(args: &[String]) -> Result<(), String> {
let do_apply = args.iter().any(|a| a == "--apply"); let do_apply = args.iter().any(|a| a == "--apply");
let links = digest::parse_all_digest_links()?; let store = store::Store::load()?;
println!("Found {} unique links from digest files", links.len()); let links = digest::parse_all_digest_links(&store);
drop(store);
println!("Found {} unique links from digest nodes", links.len());
if !do_apply { if !do_apply {
for (i, link) in links.iter().enumerate() { for (i, link) in links.iter().enumerate() {
@ -1821,10 +1835,19 @@ fn cmd_daemon(args: &[String]) -> Result<(), String> {
match args[0].as_str() { match args[0].as_str() {
"status" => daemon::show_status(), "status" => daemon::show_status(),
"log" => { "log" => {
let job = args.get(1).map(|s| s.as_str()); // daemon log [N] — last N lines (default 20)
let lines = args.get(2) // daemon log JOB [N] — last N lines for job
.and_then(|s| s.parse().ok()) let (job, lines) = match args.get(1) {
.unwrap_or(50); None => (None, 20),
Some(s) => {
if let Ok(n) = s.parse::<usize>() {
(None, n)
} else {
let n = args.get(2).and_then(|s| s.parse().ok()).unwrap_or(20);
(Some(s.as_str()), n)
}
}
};
daemon::show_log(job, lines) daemon::show_log(job, lines)
} }
_ => { _ => {
@ -1833,3 +1856,125 @@ fn cmd_daemon(args: &[String]) -> Result<(), String> {
} }
} }
} }
fn cmd_knowledge_loop(args: &[String]) -> Result<(), String> {
if args.iter().any(|a| a == "--help" || a == "-h") {
eprintln!("Usage: poc-memory knowledge-loop [OPTIONS]
Run knowledge agents (observation, extractor, connector, challenger) in
a convergence loop. Each cycle runs all agents, applies actions to the
graph, and checks structural stability metrics.
Options:
--max-cycles N Maximum cycles before stopping (default: 20)
--batch-size N Items per agent per cycle (default: 5)
--window N Cycles to check for convergence (default: 5)
--max-depth N Maximum inference depth (default: 4)");
return Ok(());
}
let mut config = knowledge::KnowledgeLoopConfig::default();
let mut i = 0;
while i < args.len() {
match args[i].as_str() {
"--max-cycles" => { i += 1; config.max_cycles = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.max_cycles); }
"--batch-size" => { i += 1; config.batch_size = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.batch_size); }
"--window" => { i += 1; config.window = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.window); }
"--max-depth" => { i += 1; config.max_depth = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.max_depth); }
other => return Err(format!("Unknown arg: {}. Use --help for usage.", other)),
}
i += 1;
}
let results = knowledge::run_knowledge_loop(&config)?;
eprintln!("\nCompleted {} cycles, {} total actions applied",
results.len(),
results.iter().map(|r| r.total_applied).sum::<usize>());
Ok(())
}
fn cmd_fact_mine(args: &[String]) -> Result<(), String> {
if args.is_empty() || args.iter().any(|a| a == "--help" || a == "-h") {
eprintln!("Usage: poc-memory fact-mine <JSONL> [OPTIONS]
poc-memory fact-mine --batch <DIR> [OPTIONS]
Extract atomic factual claims from conversation transcripts using Haiku.
Options:
--batch Process all .jsonl files in directory
--dry-run Show chunks without calling model
--output FILE Write JSON to file (default: stdout)
--min-messages N Skip transcripts with fewer messages (default: 10)");
return Ok(());
}
let mut batch = false;
let mut dry_run = false;
let mut output_file: Option<String> = None;
let mut min_messages = 10usize;
let mut path: Option<String> = None;
let mut i = 0;
while i < args.len() {
match args[i].as_str() {
"--batch" => batch = true,
"--dry-run" => dry_run = true,
"--output" | "-o" => { i += 1; output_file = args.get(i).cloned(); }
"--min-messages" => { i += 1; min_messages = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(min_messages); }
s if !s.starts_with('-') => path = Some(s.to_string()),
other => return Err(format!("Unknown arg: {}", other)),
}
i += 1;
}
let path = path.ok_or("Missing path argument")?;
let p = std::path::Path::new(&path);
let paths: Vec<std::path::PathBuf> = if batch {
if !p.is_dir() {
return Err(format!("Not a directory: {}", path));
}
let mut files: Vec<_> = std::fs::read_dir(p)
.map_err(|e| format!("read dir: {}", e))?
.filter_map(|e| e.ok())
.map(|e| e.path())
.filter(|p| p.extension().map(|x| x == "jsonl").unwrap_or(false))
.collect();
files.sort();
eprintln!("Found {} transcripts", files.len());
files
} else {
vec![p.to_path_buf()]
};
let path_refs: Vec<&std::path::Path> = paths.iter().map(|p| p.as_path()).collect();
let facts = fact_mine::mine_batch(&path_refs, min_messages, dry_run)?;
if !dry_run {
let json = serde_json::to_string_pretty(&facts)
.map_err(|e| format!("serialize: {}", e))?;
if let Some(out) = &output_file {
std::fs::write(out, &json).map_err(|e| format!("write: {}", e))?;
eprintln!("\nWrote {} facts to {}", facts.len(), out);
} else {
println!("{}", json);
}
}
eprintln!("\nTotal: {} facts from {} transcripts", facts.len(), paths.len());
Ok(())
}
fn cmd_fact_mine_store(args: &[String]) -> Result<(), String> {
if args.len() != 1 {
return Err("Usage: poc-memory fact-mine-store <JSONL>".into());
}
let path = std::path::Path::new(&args[0]);
if !path.exists() {
return Err(format!("File not found: {}", args[0]));
}
let count = fact_mine::mine_and_store(path)?;
eprintln!("Stored {} facts", count);
Ok(())
}

View file

@ -202,11 +202,20 @@ fn node_type_label(nt: NodeType) -> &'static str {
fn provenance_label(p: Provenance) -> &'static str { fn provenance_label(p: Provenance) -> &'static str {
match p { match p {
Provenance::Manual => "manual", Provenance::Manual => "manual",
Provenance::Journal => "journal", Provenance::Journal => "journal",
Provenance::Agent => "agent", Provenance::Agent => "agent",
Provenance::Dream => "dream", Provenance::Dream => "dream",
Provenance::Derived => "derived", Provenance::Derived => "derived",
Provenance::AgentExperienceMine => "agent:experience-mine",
Provenance::AgentKnowledgeObservation => "agent:knowledge-observation",
Provenance::AgentKnowledgePattern => "agent:knowledge-pattern",
Provenance::AgentKnowledgeConnector => "agent:knowledge-connector",
Provenance::AgentKnowledgeChallenger => "agent:knowledge-challenger",
Provenance::AgentConsolidate => "agent:consolidate",
Provenance::AgentDigest => "agent:digest",
Provenance::AgentFactMine => "agent:fact-mine",
Provenance::AgentDecay => "agent:decay",
} }
} }

View file

@ -30,18 +30,25 @@ impl Store {
/// Upsert a node: update if exists (and content changed), create if not. /// Upsert a node: update if exists (and content changed), create if not.
/// Returns: "created", "updated", or "unchanged". /// Returns: "created", "updated", or "unchanged".
pub fn upsert(&mut self, key: &str, content: &str) -> Result<&'static str, String> { pub fn upsert(&mut self, key: &str, content: &str) -> Result<&'static str, String> {
self.upsert_provenance(key, content, Provenance::Manual)
}
/// Upsert with explicit provenance (for agent-created nodes).
pub fn upsert_provenance(&mut self, key: &str, content: &str, provenance: Provenance) -> Result<&'static str, String> {
if let Some(existing) = self.nodes.get(key) { if let Some(existing) = self.nodes.get(key) {
if existing.content == content { if existing.content == content {
return Ok("unchanged"); return Ok("unchanged");
} }
let mut node = existing.clone(); let mut node = existing.clone();
node.content = content.to_string(); node.content = content.to_string();
node.provenance = provenance;
node.version += 1; node.version += 1;
self.append_nodes(std::slice::from_ref(&node))?; self.append_nodes(std::slice::from_ref(&node))?;
self.nodes.insert(key.to_string(), node); self.nodes.insert(key.to_string(), node);
Ok("updated") Ok("updated")
} else { } else {
let node = new_node(key, content); let mut node = new_node(key, content);
node.provenance = provenance;
self.append_nodes(std::slice::from_ref(&node))?; self.append_nodes(std::slice::from_ref(&node))?;
self.uuid_to_key.insert(node.uuid, node.key.clone()); self.uuid_to_key.insert(node.uuid, node.key.clone());
self.nodes.insert(key.to_string(), node); self.nodes.insert(key.to_string(), node);

View file

@ -236,9 +236,18 @@ pub enum NodeType {
pub enum Provenance { pub enum Provenance {
Manual, Manual,
Journal, Journal,
Agent, Agent, // legacy catch-all, prefer specific variants below
Dream, Dream,
Derived, Derived,
AgentExperienceMine,
AgentKnowledgeObservation,
AgentKnowledgePattern,
AgentKnowledgeConnector,
AgentKnowledgeChallenger,
AgentConsolidate,
AgentDigest,
AgentFactMine,
AgentDecay,
} }
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)] #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
@ -296,7 +305,10 @@ capnp_enum!(NodeType, memory_capnp::NodeType,
[EpisodicSession, EpisodicDaily, EpisodicWeekly, Semantic]); [EpisodicSession, EpisodicDaily, EpisodicWeekly, Semantic]);
capnp_enum!(Provenance, memory_capnp::Provenance, capnp_enum!(Provenance, memory_capnp::Provenance,
[Manual, Journal, Agent, Dream, Derived]); [Manual, Journal, Agent, Dream, Derived,
AgentExperienceMine, AgentKnowledgeObservation, AgentKnowledgePattern,
AgentKnowledgeConnector, AgentKnowledgeChallenger, AgentConsolidate,
AgentDigest, AgentFactMine, AgentDecay]);
capnp_enum!(Category, memory_capnp::Category, capnp_enum!(Category, memory_capnp::Category,
[General, Core, Technical, Observation, Task]); [General, Core, Technical, Observation, Task]);