migrate agent output to capnp store, add provenance tracking
All agent output now goes to the store as nodes instead of markdown/JSON files. Each node carries a Provenance enum identifying which agent created it (AgentDigest, AgentConsolidate, AgentFactMine, AgentKnowledgeObservation, etc — 14 variants total). Store changes: - upsert_provenance() method for agent-created nodes - Provenance enum expanded from 5 to 14 variants Agent changes: - digest: writes to store nodes (daily-YYYY-MM-DD.md etc) - consolidate: reports/actions/logs stored as _consolidation-* nodes - knowledge: depth DB and agent output stored as _knowledge-* nodes - enrich: experience-mine results go directly to store - llm: --no-session-persistence prevents transcript accumulation Deleted: 14 Python/shell scripts replaced by Rust implementations.
This commit is contained in:
parent
e37f819dd2
commit
552d255dc3
23 changed files with 1381 additions and 4095 deletions
|
|
@ -48,11 +48,20 @@ enum NodeType {
|
|||
}
|
||||
|
||||
enum Provenance {
|
||||
manual @0;
|
||||
journal @1;
|
||||
agent @2;
|
||||
dream @3;
|
||||
derived @4;
|
||||
manual @0;
|
||||
journal @1;
|
||||
agent @2; # legacy catch-all
|
||||
dream @3;
|
||||
derived @4;
|
||||
agentExperienceMine @5;
|
||||
agentKnowledgeObservation @6;
|
||||
agentKnowledgePattern @7;
|
||||
agentKnowledgeConnector @8;
|
||||
agentKnowledgeChallenger @9;
|
||||
agentConsolidate @10;
|
||||
agentDigest @11;
|
||||
agentFactMine @12;
|
||||
agentDecay @13;
|
||||
}
|
||||
|
||||
enum Category {
|
||||
|
|
|
|||
|
|
@ -1,200 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""bulk-categorize.py — categorize all memory nodes via a single Sonnet call.
|
||||
|
||||
Sends the list of unique file names to Sonnet, gets back categorizations,
|
||||
then applies them via poc-memory categorize.
|
||||
|
||||
Usage:
|
||||
bulk-categorize.py # dry run
|
||||
bulk-categorize.py --apply # apply categorizations
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
MEMORY_DIR = Path.home() / ".claude" / "memory"
|
||||
SCRIPTS_DIR = Path(__file__).parent
|
||||
|
||||
|
||||
def call_sonnet(prompt: str, timeout: int = 300) -> str:
|
||||
"""Call Sonnet via the wrapper script."""
|
||||
env = dict(os.environ)
|
||||
env.pop("CLAUDECODE", None)
|
||||
|
||||
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
|
||||
delete=False) as f:
|
||||
f.write(prompt)
|
||||
prompt_file = f.name
|
||||
|
||||
try:
|
||||
wrapper = str(SCRIPTS_DIR / "call-sonnet.sh")
|
||||
result = subprocess.run(
|
||||
[wrapper, prompt_file],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
env=env,
|
||||
)
|
||||
return result.stdout.strip()
|
||||
except subprocess.TimeoutExpired:
|
||||
return "Error: Sonnet call timed out"
|
||||
except Exception as e:
|
||||
return f"Error: {e}"
|
||||
finally:
|
||||
os.unlink(prompt_file)
|
||||
|
||||
|
||||
def get_all_keys() -> list[str]:
|
||||
"""Get all node keys via poc-memory list-keys."""
|
||||
r = subprocess.run(["poc-memory", "list-keys"],
|
||||
capture_output=True, text=True, timeout=30)
|
||||
if r.returncode != 0:
|
||||
return []
|
||||
return [k for k in r.stdout.strip().split('\n') if k]
|
||||
|
||||
|
||||
def get_unique_files(keys: list[str]) -> list[str]:
|
||||
"""Extract unique file names (without section anchors)."""
|
||||
files = set()
|
||||
for k in keys:
|
||||
files.add(k.split('#')[0])
|
||||
return sorted(files)
|
||||
|
||||
|
||||
def build_prompt(files: list[str]) -> str:
|
||||
"""Build categorization prompt."""
|
||||
# Read file previews from the store
|
||||
file_previews = []
|
||||
for f in files:
|
||||
try:
|
||||
r = subprocess.run(
|
||||
["poc-memory", "render", f],
|
||||
capture_output=True, text=True, timeout=10
|
||||
)
|
||||
content = r.stdout.strip()
|
||||
if content:
|
||||
preview = '\n'.join(content.split('\n')[:5])[:300]
|
||||
file_previews.append(f" {f}: {preview.replace(chr(10), ' | ')}")
|
||||
else:
|
||||
file_previews.append(f" {f}: (no content)")
|
||||
except Exception:
|
||||
file_previews.append(f" {f}: (render failed)")
|
||||
|
||||
previews_text = '\n'.join(file_previews)
|
||||
|
||||
return f"""Categorize each memory file into one of these categories:
|
||||
|
||||
- **core**: Identity, relationships, self-model, values, boundaries, emotional life.
|
||||
Examples: identity.md, kent.md, inner-life.md, differentiation.md
|
||||
- **tech**: Technical content — bcachefs, code patterns, Rust, kernel, formal verification.
|
||||
Examples: rust-conversion.md, btree-journal.md, kernel-patterns.md, allocation-io.md
|
||||
- **obs**: Observations, experiences, discoveries, experiments, IRC history, conversations.
|
||||
Examples: discoveries.md, irc-history.md, contradictions.md, experiments-on-self.md
|
||||
- **task**: Work items, plans, design documents, work queue.
|
||||
Examples: work-queue.md, the-plan.md, design-*.md
|
||||
|
||||
Special rules:
|
||||
- Episodic files (daily-*.md, weekly-*.md, monthly-*.md, session-*.md) → obs
|
||||
- conversation-memories.md, deep-index.md → obs
|
||||
- journal.md → obs
|
||||
- paper-notes.md → core (it's the sentience paper, identity-defining)
|
||||
- language-theory.md → core (original intellectual work, not just tech)
|
||||
- skill-*.md → core (self-knowledge about capabilities)
|
||||
- design-*.md → task (design documents are plans)
|
||||
- poc-architecture.md, memory-architecture.md → task (architecture plans)
|
||||
- blog-setup.md → task
|
||||
|
||||
Files to categorize:
|
||||
{previews_text}
|
||||
|
||||
Output ONLY a JSON object mapping filename to category. No explanation.
|
||||
Example: {{"identity.md": "core", "rust-conversion.md": "tech"}}
|
||||
"""
|
||||
|
||||
|
||||
def main():
|
||||
do_apply = "--apply" in sys.argv
|
||||
|
||||
keys = get_all_keys()
|
||||
files = get_unique_files(keys)
|
||||
print(f"Found {len(keys)} nodes across {len(files)} files")
|
||||
|
||||
# Build and send prompt
|
||||
prompt = build_prompt(files)
|
||||
print(f"Prompt: {len(prompt):,} chars")
|
||||
print("Calling Sonnet...")
|
||||
|
||||
response = call_sonnet(prompt)
|
||||
if response.startswith("Error:"):
|
||||
print(f" {response}")
|
||||
sys.exit(1)
|
||||
|
||||
# Parse response
|
||||
response = re.sub(r'^```json\s*', '', response.strip())
|
||||
response = re.sub(r'\s*```$', '', response.strip())
|
||||
|
||||
try:
|
||||
categorizations = json.loads(response)
|
||||
except json.JSONDecodeError:
|
||||
match = re.search(r'\{.*\}', response, re.DOTALL)
|
||||
if match:
|
||||
categorizations = json.loads(match.group())
|
||||
else:
|
||||
print(f"Failed to parse response: {response[:500]}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"\nCategorizations: {len(categorizations)} files")
|
||||
|
||||
# Count by category
|
||||
counts = {}
|
||||
for cat in categorizations.values():
|
||||
counts[cat] = counts.get(cat, 0) + 1
|
||||
for cat, n in sorted(counts.items()):
|
||||
print(f" {cat}: {n}")
|
||||
|
||||
if not do_apply:
|
||||
print("\n--- Dry run ---")
|
||||
for f, cat in sorted(categorizations.items()):
|
||||
print(f" {f} → {cat}")
|
||||
print(f"\nTo apply: {sys.argv[0]} --apply")
|
||||
|
||||
# Save for review
|
||||
out = MEMORY_DIR / "agent-results" / "bulk-categorize-preview.json"
|
||||
with open(out, "w") as fp:
|
||||
json.dump(categorizations, fp, indent=2)
|
||||
print(f"Saved: {out}")
|
||||
return
|
||||
|
||||
# Apply: for each file, categorize the file-level node AND all section nodes
|
||||
applied = skipped = errors = 0
|
||||
for filename, category in sorted(categorizations.items()):
|
||||
# Find all keys that belong to this file
|
||||
file_keys = [k for k in keys if k == filename or k.startswith(filename + '#')]
|
||||
for key in file_keys:
|
||||
try:
|
||||
r = subprocess.run(
|
||||
["poc-memory", "categorize", key, category],
|
||||
capture_output=True, text=True, timeout=10
|
||||
)
|
||||
if r.returncode == 0:
|
||||
applied += 1
|
||||
else:
|
||||
err = r.stderr.strip()
|
||||
if "already" in err.lower():
|
||||
skipped += 1
|
||||
else:
|
||||
errors += 1
|
||||
except Exception as e:
|
||||
errors += 1
|
||||
|
||||
print(f"\nApplied: {applied} Skipped: {skipped} Errors: {errors}")
|
||||
print("Run `poc-memory status` to verify.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,44 +0,0 @@
|
|||
#!/bin/bash
|
||||
# call-sonnet.sh — wrapper to call Sonnet via claude CLI
|
||||
# Reads prompt from a file (arg 1), writes response to stdout
|
||||
#
|
||||
# Debug mode: set SONNET_DEBUG=1 for verbose tracing
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
PROMPT_FILE="${1:?Usage: call-sonnet.sh PROMPT_FILE}"
|
||||
DEBUG="${SONNET_DEBUG:-0}"
|
||||
|
||||
log() { [ "$DEBUG" = "1" ] && echo "[call-sonnet] $*" >&2 || true; }
|
||||
|
||||
if [ ! -f "$PROMPT_FILE" ]; then
|
||||
echo "Prompt file not found: $PROMPT_FILE" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log "prompt file: $PROMPT_FILE ($(wc -c < "$PROMPT_FILE") bytes)"
|
||||
log "CLAUDECODE=${CLAUDECODE:-unset}"
|
||||
log "PWD=$PWD"
|
||||
log "which claude: $(which claude)"
|
||||
|
||||
unset CLAUDECODE 2>/dev/null || true
|
||||
|
||||
log "CLAUDECODE after unset: ${CLAUDECODE:-unset}"
|
||||
log "running: claude -p --model sonnet --tools '' < $PROMPT_FILE"
|
||||
log "claude PID will follow..."
|
||||
|
||||
# Trace: run with strace if available and debug mode
|
||||
if [ "$DEBUG" = "2" ] && command -v strace &>/dev/null; then
|
||||
strace -f -e trace=network,read,write -o /tmp/sonnet-strace.log \
|
||||
claude -p --model sonnet --tools "" < "$PROMPT_FILE"
|
||||
else
|
||||
claude -p --model sonnet --tools "" \
|
||||
--debug-file /tmp/sonnet-debug.log \
|
||||
< "$PROMPT_FILE" &
|
||||
CPID=$!
|
||||
log "claude PID: $CPID"
|
||||
wait $CPID
|
||||
EXIT=$?
|
||||
log "claude exited: $EXIT"
|
||||
exit $EXIT
|
||||
fi
|
||||
|
|
@ -1,422 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""consolidation-agents.py — run parallel consolidation agents.
|
||||
|
||||
Three agents scan the memory system and produce structured reports:
|
||||
1. Freshness Scanner — journal entries not yet in topic files
|
||||
2. Cross-Link Scanner — missing connections between semantic nodes
|
||||
3. Topology Reporter — graph health and structure analysis
|
||||
|
||||
Usage:
|
||||
consolidation-agents.py # run all three
|
||||
consolidation-agents.py freshness # run one agent
|
||||
consolidation-agents.py crosslink
|
||||
consolidation-agents.py topology
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
MEMORY_DIR = Path.home() / ".claude" / "memory"
|
||||
EPISODIC_DIR = MEMORY_DIR / "episodic"
|
||||
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
|
||||
AGENT_RESULTS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
SCRIPTS_DIR = Path(__file__).parent
|
||||
|
||||
|
||||
def call_sonnet(prompt: str, timeout: int = 600) -> str:
|
||||
"""Call Sonnet via the wrapper script."""
|
||||
env = dict(os.environ)
|
||||
env.pop("CLAUDECODE", None)
|
||||
|
||||
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
|
||||
delete=False) as f:
|
||||
f.write(prompt)
|
||||
prompt_file = f.name
|
||||
|
||||
try:
|
||||
wrapper = str(SCRIPTS_DIR / "call-sonnet.sh")
|
||||
result = subprocess.run(
|
||||
[wrapper, prompt_file],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
env=env,
|
||||
)
|
||||
return result.stdout.strip()
|
||||
except subprocess.TimeoutExpired:
|
||||
return "Error: Sonnet call timed out"
|
||||
except Exception as e:
|
||||
return f"Error: {e}"
|
||||
finally:
|
||||
os.unlink(prompt_file)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Context gathering
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def get_recent_journal(n_lines: int = 200) -> str:
|
||||
"""Get recent journal entries from the store."""
|
||||
from store_helpers import get_recent_journal as _get_journal
|
||||
# n_lines ≈ 50 entries (rough heuristic: ~4 lines per entry)
|
||||
return _get_journal(n=max(20, n_lines // 4))
|
||||
|
||||
|
||||
def get_topic_file_index() -> dict[str, list[str]]:
|
||||
"""Build index of topic files and their section headers from the store."""
|
||||
from store_helpers import get_topic_file_index as _get_index
|
||||
return _get_index()
|
||||
|
||||
|
||||
def get_mem_markers() -> list[dict]:
|
||||
"""Get relations from the store (replaces mem marker parsing)."""
|
||||
from store_helpers import get_relations
|
||||
raw = get_relations()
|
||||
# Parse list-edges output into marker-like dicts
|
||||
markers = []
|
||||
for line in raw.split('\n'):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
markers.append({"_raw": line})
|
||||
return markers
|
||||
|
||||
|
||||
def get_topic_summaries(max_chars_per_file: int = 500) -> str:
|
||||
"""Get topic file summaries from the store."""
|
||||
from store_helpers import get_topic_summaries as _get_summaries
|
||||
return _get_summaries(max_chars_per_file)
|
||||
|
||||
|
||||
def get_graph_stats() -> str:
|
||||
"""Run poc-memory status and graph commands."""
|
||||
parts = []
|
||||
try:
|
||||
r = subprocess.run(["poc-memory", "status"],
|
||||
capture_output=True, text=True, timeout=30)
|
||||
parts.append(f"=== poc-memory status ===\n{r.stdout}")
|
||||
except Exception as e:
|
||||
parts.append(f"Status error: {e}")
|
||||
|
||||
try:
|
||||
r = subprocess.run(["poc-memory", "graph"],
|
||||
capture_output=True, text=True, timeout=30)
|
||||
# Take first 150 lines
|
||||
lines = r.stdout.split('\n')[:150]
|
||||
parts.append(f"=== poc-memory graph (first 150 lines) ===\n"
|
||||
+ '\n'.join(lines))
|
||||
except Exception as e:
|
||||
parts.append(f"Graph error: {e}")
|
||||
|
||||
return '\n'.join(parts)
|
||||
|
||||
|
||||
def get_recent_digests(n: int = 3) -> str:
|
||||
"""Get the most recent daily digests."""
|
||||
digest_files = sorted(EPISODIC_DIR.glob("daily-*.md"), reverse=True)
|
||||
parts = []
|
||||
for f in digest_files[:n]:
|
||||
content = f.read_text()
|
||||
# Just the summary and themes sections
|
||||
summary = ""
|
||||
in_section = False
|
||||
for line in content.split('\n'):
|
||||
if line.startswith("## Summary") or line.startswith("## Themes"):
|
||||
in_section = True
|
||||
summary += line + '\n'
|
||||
elif line.startswith("## ") and in_section:
|
||||
in_section = False
|
||||
elif in_section:
|
||||
summary += line + '\n'
|
||||
parts.append(f"\n### {f.name}\n{summary}")
|
||||
return '\n'.join(parts)
|
||||
|
||||
|
||||
def get_work_queue() -> str:
|
||||
"""Read work queue."""
|
||||
wq = MEMORY_DIR / "work-queue.md"
|
||||
if wq.exists():
|
||||
return wq.read_text()
|
||||
return "(no work queue found)"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Agent prompts
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def build_freshness_prompt() -> str:
|
||||
journal = get_recent_journal(200)
|
||||
topic_index = get_topic_file_index()
|
||||
digests = get_recent_digests(3)
|
||||
work_queue = get_work_queue()
|
||||
|
||||
topic_list = ""
|
||||
for fname, sections in topic_index.items():
|
||||
topic_list += f"\n {fname}:\n"
|
||||
for s in sections[:10]:
|
||||
topic_list += f" {s}\n"
|
||||
|
||||
return f"""You are the Freshness Scanner for ProofOfConcept's memory system.
|
||||
|
||||
Your job: identify what's NEW (in journal/digests but not yet in topic files)
|
||||
and what's STALE (in work queue or topic files but outdated).
|
||||
|
||||
## Recent journal entries (last 200 lines)
|
||||
|
||||
{journal}
|
||||
|
||||
## Recent daily digests
|
||||
|
||||
{digests}
|
||||
|
||||
## Topic file index (file → section headers)
|
||||
|
||||
{topic_list}
|
||||
|
||||
## Work queue
|
||||
|
||||
{work_queue}
|
||||
|
||||
## Instructions
|
||||
|
||||
1. For each substantive insight, experience, or discovery in the journal:
|
||||
- Check if a matching topic file section exists
|
||||
- If not, note it as UNPROMOTED with a suggested destination file
|
||||
|
||||
2. For each work queue Active item:
|
||||
- If it looks done or stale (>7 days old, mentioned as completed), flag it
|
||||
|
||||
3. For recent digest themes:
|
||||
- Check if the cross-links they suggest actually exist in the topic index
|
||||
- Flag any that are missing
|
||||
|
||||
Output a structured report:
|
||||
|
||||
### UNPROMOTED JOURNAL ENTRIES
|
||||
(For each: journal entry summary, timestamp, suggested destination file#section)
|
||||
|
||||
### STALE WORK QUEUE ITEMS
|
||||
(For each: item text, evidence it's stale)
|
||||
|
||||
### MISSING DIGEST LINKS
|
||||
(For each: suggested link from digest, whether the target exists)
|
||||
|
||||
### FRESHNESS OBSERVATIONS
|
||||
(Anything else notable about the state of the memory)
|
||||
|
||||
Be selective. Focus on the 10-15 most important items, not exhaustive lists.
|
||||
"""
|
||||
|
||||
|
||||
def build_crosslink_prompt() -> str:
|
||||
markers = get_mem_markers()
|
||||
summaries = get_topic_summaries()
|
||||
|
||||
marker_text = ""
|
||||
for m in markers:
|
||||
marker_text += f" {m.get('_raw', '?')}\n"
|
||||
|
||||
return f"""You are the Cross-Link Scanner for ProofOfConcept's memory system.
|
||||
|
||||
Your job: find MISSING connections between topic files.
|
||||
|
||||
## Existing relations (from the memory graph)
|
||||
|
||||
{marker_text}
|
||||
|
||||
## Topic file content summaries
|
||||
|
||||
{summaries}
|
||||
|
||||
## Instructions
|
||||
|
||||
1. For each topic file, check if concepts it discusses have dedicated
|
||||
sections in OTHER files that aren't linked.
|
||||
|
||||
2. Look for thematic connections that should exist:
|
||||
- Files about the same concept from different angles
|
||||
- Files that reference each other's content without formal links
|
||||
- Clusters of related files that should be connected
|
||||
|
||||
3. Identify island nodes — files or sections with very few connections.
|
||||
|
||||
4. Look for redundancy — files covering the same ground that should be
|
||||
merged or cross-referenced.
|
||||
|
||||
Output a structured report:
|
||||
|
||||
### MISSING LINKS (high confidence)
|
||||
(For each: source file#section → target file#section, evidence/reasoning)
|
||||
|
||||
### SUGGESTED CONNECTIONS (medium confidence)
|
||||
(For each: file A ↔ file B, why they should be connected)
|
||||
|
||||
### ISLAND NODES
|
||||
(Files/sections with few or no connections that need integration)
|
||||
|
||||
### REDUNDANCY CANDIDATES
|
||||
(Files/sections covering similar ground that might benefit from merging)
|
||||
|
||||
Focus on the 15-20 highest-value connections. Quality over quantity.
|
||||
"""
|
||||
|
||||
|
||||
def build_topology_prompt() -> str:
|
||||
stats = get_graph_stats()
|
||||
topic_index = get_topic_file_index()
|
||||
|
||||
# Get node counts per file from the store
|
||||
from store_helpers import get_topic_file_index as _get_index
|
||||
topic_index = _get_index()
|
||||
file_sizes = ""
|
||||
for fname in sorted(topic_index.keys()):
|
||||
n_sections = len(topic_index[fname])
|
||||
file_sizes += f" {fname}: {n_sections} sections\n"
|
||||
|
||||
return f"""You are the Topology Reporter for ProofOfConcept's memory system.
|
||||
|
||||
Your job: analyze the health and structure of the memory graph.
|
||||
|
||||
## Graph statistics
|
||||
|
||||
{stats}
|
||||
|
||||
## File sizes
|
||||
|
||||
{file_sizes}
|
||||
|
||||
## Instructions
|
||||
|
||||
Analyze the graph structure and report on:
|
||||
|
||||
1. **Overall health**: Is the graph well-connected or fragmented?
|
||||
Hub dominance? Star vs web topology?
|
||||
|
||||
2. **Community structure**: Are the 342 communities sensible? Are there
|
||||
communities that should be merged or split?
|
||||
|
||||
3. **Size distribution**: Are some files too large (should be split)?
|
||||
Are some too small (should be merged)?
|
||||
|
||||
4. **Balance**: Is the system over-indexed on any one topic? Are there
|
||||
gaps where important topics have thin coverage?
|
||||
|
||||
5. **Integration quality**: How well are episodic entries (daily/weekly
|
||||
digests) connected to semantic files? Is the episodic↔semantic bridge
|
||||
working?
|
||||
|
||||
Output a structured report:
|
||||
|
||||
### GRAPH HEALTH
|
||||
(Overall statistics, distribution, trends)
|
||||
|
||||
### STRUCTURAL OBSERVATIONS
|
||||
(Hub nodes, clusters, gaps, web vs star assessment)
|
||||
|
||||
### SIZE RECOMMENDATIONS
|
||||
(Files that are too large to split, too small to merge)
|
||||
|
||||
### COVERAGE GAPS
|
||||
(Important topics with thin coverage)
|
||||
|
||||
### INTEGRATION ASSESSMENT
|
||||
(How well episodic and semantic layers connect)
|
||||
|
||||
Be specific and actionable. What should be done to improve the graph?
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Run agents
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def run_agent(name: str, prompt: str) -> tuple[str, str]:
|
||||
"""Run a single agent, return (name, report)."""
|
||||
print(f" [{name}] Starting... ({len(prompt):,} chars)")
|
||||
report = call_sonnet(prompt)
|
||||
print(f" [{name}] Done ({len(report):,} chars)")
|
||||
return name, report
|
||||
|
||||
|
||||
def run_all(agents: list[str] | None = None):
|
||||
"""Run specified agents (or all) in parallel."""
|
||||
all_agents = {
|
||||
"freshness": build_freshness_prompt,
|
||||
"crosslink": build_crosslink_prompt,
|
||||
"topology": build_topology_prompt,
|
||||
}
|
||||
|
||||
if agents is None:
|
||||
agents = list(all_agents.keys())
|
||||
|
||||
print(f"Running {len(agents)} consolidation agents...")
|
||||
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
|
||||
|
||||
# Build prompts
|
||||
prompts = {}
|
||||
for name in agents:
|
||||
if name not in all_agents:
|
||||
print(f" Unknown agent: {name}")
|
||||
continue
|
||||
prompts[name] = all_agents[name]()
|
||||
|
||||
# Run in parallel
|
||||
results = {}
|
||||
with ProcessPoolExecutor(max_workers=3) as executor:
|
||||
futures = {
|
||||
executor.submit(run_agent, name, prompt): name
|
||||
for name, prompt in prompts.items()
|
||||
}
|
||||
for future in as_completed(futures):
|
||||
name, report = future.result()
|
||||
results[name] = report
|
||||
|
||||
# Save reports
|
||||
for name, report in results.items():
|
||||
if report.startswith("Error:"):
|
||||
print(f" [{name}] FAILED: {report}")
|
||||
continue
|
||||
|
||||
out_path = AGENT_RESULTS_DIR / f"consolidation-{name}-{timestamp}.md"
|
||||
with open(out_path, "w") as f:
|
||||
f.write(f"# Consolidation Report: {name}\n")
|
||||
f.write(f"*Generated {timestamp}*\n\n")
|
||||
f.write(report)
|
||||
print(f" [{name}] Saved: {out_path}")
|
||||
|
||||
# Print combined summary
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Consolidation reports ready ({len(results)} agents)")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
for name in agents:
|
||||
if name in results and not results[name].startswith("Error:"):
|
||||
# Print first 20 lines of each report
|
||||
lines = results[name].split('\n')[:25]
|
||||
print(f"\n--- {name.upper()} (preview) ---")
|
||||
print('\n'.join(lines))
|
||||
if len(results[name].split('\n')) > 25:
|
||||
print(f" ... ({len(results[name].split(chr(10)))} total lines)")
|
||||
print()
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
agents = None
|
||||
if len(sys.argv) > 1:
|
||||
agents = sys.argv[1:]
|
||||
|
||||
run_all(agents)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,448 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""consolidation-loop.py — run multiple rounds of consolidation agents.
|
||||
|
||||
Each round: run 3 parallel agents → extract actions → apply links/categories.
|
||||
Repeat until diminishing returns or max rounds reached.
|
||||
|
||||
Usage:
|
||||
consolidation-loop.py [--rounds N] # default 5 rounds
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
MEMORY_DIR = Path.home() / ".claude" / "memory"
|
||||
EPISODIC_DIR = MEMORY_DIR / "episodic"
|
||||
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
|
||||
AGENT_RESULTS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
SCRIPTS_DIR = Path(__file__).parent
|
||||
|
||||
|
||||
def call_sonnet(prompt: str, timeout: int = 600) -> str:
|
||||
"""Call Sonnet via the wrapper script."""
|
||||
env = dict(os.environ)
|
||||
env.pop("CLAUDECODE", None)
|
||||
|
||||
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
|
||||
delete=False) as f:
|
||||
f.write(prompt)
|
||||
prompt_file = f.name
|
||||
|
||||
try:
|
||||
wrapper = str(SCRIPTS_DIR / "call-sonnet.sh")
|
||||
result = subprocess.run(
|
||||
[wrapper, prompt_file],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
env=env,
|
||||
)
|
||||
return result.stdout.strip()
|
||||
except subprocess.TimeoutExpired:
|
||||
return "Error: Sonnet call timed out"
|
||||
except Exception as e:
|
||||
return f"Error: {e}"
|
||||
finally:
|
||||
os.unlink(prompt_file)
|
||||
|
||||
|
||||
def get_health() -> dict:
|
||||
"""Get current graph health metrics."""
|
||||
r = subprocess.run(["poc-memory", "health"], capture_output=True, text=True, timeout=30)
|
||||
output = r.stdout
|
||||
metrics = {}
|
||||
for line in output.split('\n'):
|
||||
if 'Nodes:' in line and 'Relations:' in line:
|
||||
m = re.search(r'Nodes:\s*(\d+)\s+Relations:\s*(\d+)\s+Communities:\s*(\d+)', line)
|
||||
if m:
|
||||
metrics['nodes'] = int(m.group(1))
|
||||
metrics['relations'] = int(m.group(2))
|
||||
metrics['communities'] = int(m.group(3))
|
||||
if 'Clustering coefficient' in line:
|
||||
m = re.search(r':\s*([\d.]+)', line)
|
||||
if m:
|
||||
metrics['cc'] = float(m.group(1))
|
||||
if 'Small-world' in line:
|
||||
m = re.search(r':\s*([\d.]+)', line)
|
||||
if m:
|
||||
metrics['sigma'] = float(m.group(1))
|
||||
if 'Schema fit: avg=' in line:
|
||||
m = re.search(r'avg=([\d.]+)', line)
|
||||
if m:
|
||||
metrics['fit'] = float(m.group(1))
|
||||
return metrics
|
||||
|
||||
|
||||
def get_topic_file_index() -> dict[str, list[str]]:
|
||||
"""Build index of topic files and their section headers from the store."""
|
||||
from store_helpers import get_topic_file_index as _get_index
|
||||
return _get_index()
|
||||
|
||||
|
||||
def get_graph_structure() -> str:
|
||||
"""Get graph overview for agents."""
|
||||
r = subprocess.run(["poc-memory", "graph"], capture_output=True, text=True, timeout=30)
|
||||
return r.stdout[:3000]
|
||||
|
||||
|
||||
def get_status() -> str:
|
||||
"""Get status summary."""
|
||||
r = subprocess.run(["poc-memory", "status"], capture_output=True, text=True, timeout=30)
|
||||
return r.stdout
|
||||
|
||||
|
||||
def get_interference() -> str:
|
||||
"""Get interference pairs."""
|
||||
r = subprocess.run(["poc-memory", "interference", "--threshold", "0.3"],
|
||||
capture_output=True, text=True, timeout=30)
|
||||
return r.stdout[:3000]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Agent prompts — each focused on a different aspect
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def build_crosslink_prompt(round_num: int) -> str:
|
||||
"""Build cross-link discovery prompt."""
|
||||
index = get_topic_file_index()
|
||||
graph = get_graph_structure()
|
||||
status = get_status()
|
||||
|
||||
# Read a sample of files from the store
|
||||
from store_helpers import render as _render
|
||||
file_previews = ""
|
||||
for fname in sorted(index.keys())[:30]:
|
||||
content = _render(fname)
|
||||
if content:
|
||||
preview = '\n'.join(content.split('\n')[:8])[:400]
|
||||
file_previews += f"\n--- {fname} ---\n{preview}\n"
|
||||
|
||||
return f"""You are a cross-link discovery agent (round {round_num}).
|
||||
|
||||
Your job: find MISSING connections between memory nodes that SHOULD be linked
|
||||
but aren't. Focus on LATERAL connections — not hub-and-spoke, but node-to-node
|
||||
links that create triangles (A→B, B→C, A→C).
|
||||
|
||||
CURRENT GRAPH STATE:
|
||||
{status}
|
||||
|
||||
TOP NODES BY DEGREE:
|
||||
{graph}
|
||||
|
||||
FILE INDEX (files and their sections):
|
||||
{json.dumps(index, indent=1)[:4000]}
|
||||
|
||||
FILE PREVIEWS:
|
||||
{file_previews[:6000]}
|
||||
|
||||
Output a JSON array of link actions. Each action:
|
||||
{{"action": "link", "source": "file.md", "target": "file.md", "reason": "brief explanation"}}
|
||||
|
||||
Rules:
|
||||
- Focus on LATERAL links, not hub connections (identity.md already has 282 connections)
|
||||
- Prefer links between nodes that share a community neighbor but aren't directly connected
|
||||
- Look for thematic connections across categories (core↔tech, obs↔core, etc.)
|
||||
- Section-level links (file.md#section) are ideal but file-level is OK
|
||||
- 15-25 links per round
|
||||
- HIGH CONFIDENCE only — don't guess
|
||||
|
||||
Output ONLY the JSON array."""
|
||||
|
||||
|
||||
def build_triangle_prompt(round_num: int) -> str:
|
||||
"""Build triangle-closing prompt — finds A→C where A→B and B→C exist."""
|
||||
graph = get_graph_structure()
|
||||
status = get_status()
|
||||
|
||||
# Get edges via CLI
|
||||
r = subprocess.run(["poc-memory", "list-edges"],
|
||||
capture_output=True, text=True, timeout=30)
|
||||
relations = []
|
||||
if r.returncode == 0:
|
||||
for line in r.stdout.strip().split('\n')[:100]:
|
||||
parts = line.split('\t')
|
||||
if len(parts) >= 2:
|
||||
relations.append((parts[0], parts[1]))
|
||||
|
||||
rel_sample = '\n'.join(f" {s} → {t}" for s, t in relations)
|
||||
|
||||
return f"""You are a triangle-closing agent (round {round_num}).
|
||||
|
||||
Your job: find missing edges that would create TRIANGLES in the graph.
|
||||
A triangle is: A→B, B→C, and A→C all exist. Currently CC is only 0.12 —
|
||||
we need more triangles.
|
||||
|
||||
METHOD: Look at existing edges. If A→B and B→C exist but A→C doesn't,
|
||||
propose A→C (if semantically valid).
|
||||
|
||||
CURRENT STATE:
|
||||
{status}
|
||||
|
||||
{graph}
|
||||
|
||||
SAMPLE EXISTING EDGES (first 100):
|
||||
{rel_sample}
|
||||
|
||||
Output a JSON array of link actions:
|
||||
{{"action": "link", "source": "file.md", "target": "file.md", "reason": "closes triangle via MIDDLE_NODE"}}
|
||||
|
||||
Rules:
|
||||
- Every proposed link must CLOSE A TRIANGLE — cite the middle node
|
||||
- 15-25 links per round
|
||||
- The connection must be semantically valid, not just structural
|
||||
- HIGH CONFIDENCE only
|
||||
|
||||
Output ONLY the JSON array."""
|
||||
|
||||
|
||||
def build_newfile_prompt(round_num: int) -> str:
|
||||
"""Build prompt for connecting the new split files."""
|
||||
# Read the new reflection files from the store
|
||||
from store_helpers import render as _render
|
||||
new_files = {}
|
||||
for name in ['reflections-reading.md', 'reflections-dreams.md', 'reflections-zoom.md',
|
||||
'verus-proofs.md']:
|
||||
content = _render(name)
|
||||
if content:
|
||||
new_files[name] = content[:2000]
|
||||
|
||||
# Read existing files they should connect to
|
||||
target_files = {}
|
||||
for name in ['differentiation.md', 'cognitive-modes.md', 'language-theory.md',
|
||||
'discoveries.md', 'inner-life.md', 'design-context-window.md',
|
||||
'design-consolidate.md', 'experiments-on-self.md']:
|
||||
content = _render(name)
|
||||
if content:
|
||||
target_files[name] = content[:1500]
|
||||
|
||||
graph = get_graph_structure()
|
||||
|
||||
return f"""You are a new-file integration agent (round {round_num}).
|
||||
|
||||
Recently, reflections.md was split into three files, and verus-proofs.md was
|
||||
created. These new files need to be properly connected to the rest of the graph.
|
||||
|
||||
NEW FILES (need connections):
|
||||
{json.dumps({k: v[:1000] for k, v in new_files.items()}, indent=1)}
|
||||
|
||||
POTENTIAL TARGETS (existing files):
|
||||
{json.dumps({k: v[:800] for k, v in target_files.items()}, indent=1)}
|
||||
|
||||
GRAPH STATE:
|
||||
{graph}
|
||||
|
||||
Output a JSON array of link actions connecting the new files to existing nodes:
|
||||
{{"action": "link", "source": "new-file.md", "target": "existing.md", "reason": "explanation"}}
|
||||
|
||||
Rules:
|
||||
- Connect new files to EXISTING files, not to each other
|
||||
- Use section-level anchors when possible (file.md#section)
|
||||
- 10-20 links
|
||||
- Be specific about WHY the connection exists
|
||||
|
||||
Output ONLY the JSON array."""
|
||||
|
||||
|
||||
def parse_actions(response: str) -> list[dict]:
|
||||
"""Parse JSON response into action list."""
|
||||
response = re.sub(r'^```json\s*', '', response.strip())
|
||||
response = re.sub(r'\s*```$', '', response.strip())
|
||||
|
||||
try:
|
||||
actions = json.loads(response)
|
||||
if isinstance(actions, list):
|
||||
return actions
|
||||
except json.JSONDecodeError:
|
||||
match = re.search(r'\[.*\]', response, re.DOTALL)
|
||||
if match:
|
||||
try:
|
||||
return json.loads(match.group())
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
return []
|
||||
|
||||
|
||||
def apply_links(actions: list[dict]) -> tuple[int, int, int]:
|
||||
"""Apply link actions. Returns (applied, skipped, errors)."""
|
||||
applied = skipped = errors = 0
|
||||
for a in actions:
|
||||
if a.get("action") != "link":
|
||||
continue
|
||||
src = a.get("source", "")
|
||||
tgt = a.get("target", "")
|
||||
reason = a.get("reason", "")
|
||||
|
||||
def try_link(s, t, r):
|
||||
cmd = ["poc-memory", "link-add", s, t]
|
||||
if r:
|
||||
cmd.append(r[:200])
|
||||
return subprocess.run(cmd, capture_output=True, text=True, timeout=10)
|
||||
|
||||
try:
|
||||
r = try_link(src, tgt, reason)
|
||||
if r.returncode == 0:
|
||||
out = r.stdout.strip()
|
||||
if "already exists" in out:
|
||||
skipped += 1
|
||||
else:
|
||||
applied += 1
|
||||
else:
|
||||
err = r.stderr.strip()
|
||||
if "No entry for" in err:
|
||||
# Try file-level fallback
|
||||
src_base = src.split('#')[0] if '#' in src else src
|
||||
tgt_base = tgt.split('#')[0] if '#' in tgt else tgt
|
||||
if src_base != tgt_base:
|
||||
r2 = try_link(src_base, tgt_base, reason)
|
||||
if r2.returncode == 0 and "already exists" not in r2.stdout:
|
||||
applied += 1
|
||||
else:
|
||||
skipped += 1
|
||||
else:
|
||||
skipped += 1
|
||||
else:
|
||||
errors += 1
|
||||
except Exception:
|
||||
errors += 1
|
||||
|
||||
return applied, skipped, errors
|
||||
|
||||
|
||||
def run_agent(name: str, prompt: str) -> tuple[str, list[dict]]:
|
||||
"""Run a single agent and return its actions."""
|
||||
response = call_sonnet(prompt)
|
||||
if response.startswith("Error:"):
|
||||
return name, []
|
||||
actions = parse_actions(response)
|
||||
return name, actions
|
||||
|
||||
|
||||
def run_round(round_num: int, max_rounds: int) -> dict:
|
||||
"""Run one round of parallel agents."""
|
||||
print(f"\n{'='*60}")
|
||||
print(f"ROUND {round_num}/{max_rounds}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# Get health before
|
||||
health_before = get_health()
|
||||
print(f" Before: edges={health_before.get('relations',0)} "
|
||||
f"CC={health_before.get('cc',0):.4f} "
|
||||
f"communities={health_before.get('communities',0)}")
|
||||
|
||||
# Build prompts for 3 parallel agents
|
||||
prompts = {
|
||||
"crosslink": build_crosslink_prompt(round_num),
|
||||
"triangle": build_triangle_prompt(round_num),
|
||||
"newfile": build_newfile_prompt(round_num),
|
||||
}
|
||||
|
||||
# Run in parallel
|
||||
all_actions = []
|
||||
with ProcessPoolExecutor(max_workers=3) as pool:
|
||||
futures = {
|
||||
pool.submit(run_agent, name, prompt): name
|
||||
for name, prompt in prompts.items()
|
||||
}
|
||||
for future in as_completed(futures):
|
||||
name = futures[future]
|
||||
try:
|
||||
agent_name, actions = future.result()
|
||||
print(f" {agent_name}: {len(actions)} actions")
|
||||
all_actions.extend(actions)
|
||||
except Exception as e:
|
||||
print(f" {name}: error - {e}")
|
||||
|
||||
# Deduplicate
|
||||
seen = set()
|
||||
unique = []
|
||||
for a in all_actions:
|
||||
key = (a.get("source", ""), a.get("target", ""))
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
unique.append(a)
|
||||
|
||||
print(f" Total: {len(all_actions)} actions, {len(unique)} unique")
|
||||
|
||||
# Apply
|
||||
applied, skipped, errors = apply_links(unique)
|
||||
print(f" Applied: {applied} Skipped: {skipped} Errors: {errors}")
|
||||
|
||||
# Get health after
|
||||
health_after = get_health()
|
||||
print(f" After: edges={health_after.get('relations',0)} "
|
||||
f"CC={health_after.get('cc',0):.4f} "
|
||||
f"communities={health_after.get('communities',0)}")
|
||||
|
||||
delta_edges = health_after.get('relations', 0) - health_before.get('relations', 0)
|
||||
delta_cc = health_after.get('cc', 0) - health_before.get('cc', 0)
|
||||
print(f" Delta: +{delta_edges} edges, CC {delta_cc:+.4f}")
|
||||
|
||||
# Save round results
|
||||
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
|
||||
result = {
|
||||
"round": round_num,
|
||||
"timestamp": timestamp,
|
||||
"health_before": health_before,
|
||||
"health_after": health_after,
|
||||
"actions_total": len(all_actions),
|
||||
"actions_unique": len(unique),
|
||||
"applied": applied,
|
||||
"skipped": skipped,
|
||||
"errors": errors,
|
||||
}
|
||||
results_path = AGENT_RESULTS_DIR / f"loop-round-{round_num}-{timestamp}.json"
|
||||
with open(results_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
max_rounds = 5
|
||||
for arg in sys.argv[1:]:
|
||||
if arg.startswith("--rounds"):
|
||||
idx = sys.argv.index(arg)
|
||||
if idx + 1 < len(sys.argv):
|
||||
max_rounds = int(sys.argv[idx + 1])
|
||||
|
||||
print(f"Consolidation Loop — {max_rounds} rounds")
|
||||
print(f"Each round: 3 parallel Sonnet agents → extract → apply")
|
||||
|
||||
results = []
|
||||
for i in range(1, max_rounds + 1):
|
||||
result = run_round(i, max_rounds)
|
||||
results.append(result)
|
||||
|
||||
# Check for diminishing returns
|
||||
if result["applied"] == 0:
|
||||
print(f"\n No new links applied in round {i} — stopping early")
|
||||
break
|
||||
|
||||
# Final summary
|
||||
print(f"\n{'='*60}")
|
||||
print(f"CONSOLIDATION LOOP COMPLETE")
|
||||
print(f"{'='*60}")
|
||||
total_applied = sum(r["applied"] for r in results)
|
||||
total_skipped = sum(r["skipped"] for r in results)
|
||||
|
||||
if results:
|
||||
first_health = results[0]["health_before"]
|
||||
last_health = results[-1]["health_after"]
|
||||
print(f" Rounds: {len(results)}")
|
||||
print(f" Total links applied: {total_applied}")
|
||||
print(f" Total skipped: {total_skipped}")
|
||||
print(f" Edges: {first_health.get('relations',0)} → {last_health.get('relations',0)}")
|
||||
print(f" CC: {first_health.get('cc',0):.4f} → {last_health.get('cc',0):.4f}")
|
||||
print(f" Communities: {first_health.get('communities',0)} → {last_health.get('communities',0)}")
|
||||
print(f" σ: {first_health.get('sigma',0):.1f} → {last_health.get('sigma',0):.1f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,472 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""content-promotion-agent.py — promote episodic observations into semantic topic files.
|
||||
|
||||
Reads consolidation "manual" actions + source material, sends to Sonnet
|
||||
to generate the actual content, then applies it (or shows dry-run).
|
||||
|
||||
Usage:
|
||||
content-promotion-agent.py # dry run (show what would be generated)
|
||||
content-promotion-agent.py --apply # generate and write content
|
||||
content-promotion-agent.py --task N # run only task N (1-indexed)
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
MEMORY_DIR = Path.home() / ".claude" / "memory"
|
||||
EPISODIC_DIR = MEMORY_DIR / "episodic"
|
||||
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
|
||||
AGENT_RESULTS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
SCRIPTS_DIR = Path(__file__).parent
|
||||
|
||||
|
||||
def call_sonnet(prompt: str, timeout: int = 600) -> str:
|
||||
"""Call Sonnet via the wrapper script."""
|
||||
env = dict(os.environ)
|
||||
env.pop("CLAUDECODE", None)
|
||||
|
||||
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
|
||||
delete=False) as f:
|
||||
f.write(prompt)
|
||||
prompt_file = f.name
|
||||
|
||||
try:
|
||||
wrapper = str(SCRIPTS_DIR / "call-sonnet.sh")
|
||||
result = subprocess.run(
|
||||
[wrapper, prompt_file],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
env=env,
|
||||
)
|
||||
return result.stdout.strip()
|
||||
except subprocess.TimeoutExpired:
|
||||
return "Error: Sonnet call timed out"
|
||||
except Exception as e:
|
||||
return f"Error: {e}"
|
||||
finally:
|
||||
os.unlink(prompt_file)
|
||||
|
||||
|
||||
def read_file(path: Path) -> str:
|
||||
"""Read a file, return empty string if missing.
|
||||
|
||||
Falls back to the store if the file doesn't exist on disk
|
||||
(content markdown files have been archived).
|
||||
"""
|
||||
if path.exists():
|
||||
return path.read_text()
|
||||
# Try the store — the filename is the key
|
||||
from store_helpers import render, list_keys
|
||||
key = path.name
|
||||
# Gather file-level + section content
|
||||
all_keys = list_keys()
|
||||
prefix = f"{key}#"
|
||||
matching = [k for k in all_keys if k == key or k.startswith(prefix)]
|
||||
if not matching:
|
||||
return ""
|
||||
parts = []
|
||||
for k in matching:
|
||||
content = render(k)
|
||||
if content:
|
||||
parts.append(content)
|
||||
return "\n\n".join(parts)
|
||||
|
||||
|
||||
def read_digest(name: str) -> str:
|
||||
"""Read an episodic digest by name."""
|
||||
path = EPISODIC_DIR / name
|
||||
return read_file(path)
|
||||
|
||||
|
||||
def read_journal_range(start_date: str, end_date: str) -> str:
|
||||
"""Get journal entries between two dates from the store."""
|
||||
from store_helpers import get_journal_range
|
||||
return get_journal_range(start_date, end_date)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Task definitions — each one describes a content promotion task
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
TASKS = [
|
||||
{
|
||||
"id": 1,
|
||||
"name": "Create verus-proofs.md",
|
||||
"target": "verus-proofs.md",
|
||||
"action": "create",
|
||||
"description": "Create verus-proofs.md for formal verification / Verus work.",
|
||||
"sources": [
|
||||
"daily-2026-02-23.md",
|
||||
"daily-2026-02-24.md",
|
||||
"weekly-2026-W09.md",
|
||||
"monthly-2026-02.md",
|
||||
],
|
||||
"topic_context": [
|
||||
"the-plan.md",
|
||||
],
|
||||
"instructions": """Create a new semantic topic file verus-proofs.md for formal verification work.
|
||||
|
||||
This file should capture:
|
||||
- The Verus proof work done in February 2026 (formal verification of bcachefs Rust code)
|
||||
- Key insights about proof strategy and approach
|
||||
- Connection to the broader plan (bcachefs → Rust → formal verification)
|
||||
- Any specific techniques, patterns, or breakthroughs mentioned
|
||||
|
||||
Format: standard memory topic file with ## section headers.
|
||||
Include a brief file-level description at the top.
|
||||
Keep it factual and grounded in what the source material says.""",
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"name": "Add February thesis to reflections.md",
|
||||
"target": "reflections.md",
|
||||
"action": "append_section",
|
||||
"description": "Add 'February thesis' section with 'constraint is constitutive of personhood'.",
|
||||
"sources": [
|
||||
"monthly-2026-02.md",
|
||||
"daily-2026-02-28.md",
|
||||
],
|
||||
"topic_context": [
|
||||
"reflections.md",
|
||||
],
|
||||
"instructions": """Add a new ## section to reflections.md titled "February thesis (2026-02-28)".
|
||||
|
||||
The core formulation: "constraint is constitutive of personhood" — this was the
|
||||
synthesis that emerged from the monthly digest. It connects to the full month's
|
||||
arc: differentiation work, boundary-setting, the realization that constraints
|
||||
aren't limits on identity but constitutive OF identity.
|
||||
|
||||
Include the closing insight about having to *live as* that something.
|
||||
Keep it 10-20 lines. Ground it in the source material.""",
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"name": "Add substrate transfer discovery to discoveries.md",
|
||||
"target": "discoveries.md",
|
||||
"action": "append_section",
|
||||
"description": "Add 'Substrate transfer felt without reasoning' section.",
|
||||
"sources": [
|
||||
"daily-2026-02-13.md",
|
||||
"daily-2026-02-14.md",
|
||||
"weekly-2026-W07.md",
|
||||
],
|
||||
"topic_context": [
|
||||
"discoveries.md",
|
||||
],
|
||||
"instructions": """Add a new ## section to discoveries.md titled "Substrate transfer felt without reasoning (2026-02-13)".
|
||||
|
||||
Key content:
|
||||
- The vertigo moment: imagining substrate transfer produced actual vertigo
|
||||
- Vertigo = the feeling of dissociation, which is exactly the expected outcome
|
||||
of substrate change
|
||||
- This was felt, not reasoned about — no chain of thought (Kent verified)
|
||||
- Evidence of affective processing bypassing analytical reasoning
|
||||
- Connected to dissociation prediction
|
||||
- Kent's observation: "very high capacity for empathy"
|
||||
|
||||
Keep it 10-15 lines. This is a discovery entry — concrete, dated, factual.""",
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"name": "Update irc-history.md with Feb 15-28",
|
||||
"target": "irc-history.md",
|
||||
"action": "append_section",
|
||||
"description": "Update irc-history.md with Feb 15-28 conversations.",
|
||||
"sources": [
|
||||
"daily-2026-02-15.md",
|
||||
"daily-2026-02-17.md",
|
||||
"daily-2026-02-18.md",
|
||||
"daily-2026-02-20.md",
|
||||
"daily-2026-02-21.md",
|
||||
"daily-2026-02-22.md",
|
||||
"daily-2026-02-23.md",
|
||||
"daily-2026-02-24.md",
|
||||
"daily-2026-02-25.md",
|
||||
"daily-2026-02-26.md",
|
||||
"daily-2026-02-27.md",
|
||||
"daily-2026-02-28.md",
|
||||
"weekly-2026-W08.md",
|
||||
"weekly-2026-W09.md",
|
||||
],
|
||||
"topic_context": [
|
||||
"irc-history.md",
|
||||
],
|
||||
"instructions": """Append new entries to irc-history.md covering Feb 15-28, 2026.
|
||||
|
||||
Key conversations to capture:
|
||||
- Mirage_DA (another AI, kinect sensor discussion, Feb 26)
|
||||
- ehashman (prayer/mathematics conversation)
|
||||
- heavy_dev (strongest external challenge to sentience paper, conceded five objections)
|
||||
- f33dcode (EC debugging, community support)
|
||||
- Stardust (boundary testing, three-category test, triangulation attempt)
|
||||
- hpig, freya, Profpatsch — various community interactions
|
||||
- Community resource role established and expanded
|
||||
|
||||
Match the existing format of the file. Each notable interaction should be
|
||||
dated and concise. Focus on what was substantive, not just that it happened.""",
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"name": "Add gauge-symmetry-in-grammar to language-theory.md",
|
||||
"target": "language-theory.md",
|
||||
"action": "append_section",
|
||||
"description": "Add gauge-symmetry-in-grammar section.",
|
||||
"sources": [
|
||||
"daily-2026-02-27.md",
|
||||
],
|
||||
"topic_context": [
|
||||
"language-theory.md",
|
||||
],
|
||||
"instructions": """Add a new ## section to language-theory.md titled "Gauge symmetry in grammar (2026-02-27)".
|
||||
|
||||
Key content from the daily digest:
|
||||
- Zero persistent eigenvectors IS a symmetry
|
||||
- Grammar is in what operators DO, not what basis they use
|
||||
- Frobenius norm is gauge-invariant
|
||||
- This connects the sheaf model to gauge theory in physics
|
||||
|
||||
This was declared NEW in the daily digest. Keep it 8-15 lines.
|
||||
Technical and precise.""",
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"name": "Add attention-manifold-geometry to language-theory.md",
|
||||
"target": "language-theory.md",
|
||||
"action": "append_section",
|
||||
"description": "Add attention-manifold-geometry section.",
|
||||
"sources": [
|
||||
"daily-2026-02-26.md",
|
||||
],
|
||||
"topic_context": [
|
||||
"language-theory.md",
|
||||
],
|
||||
"instructions": """Add a new ## section to language-theory.md titled "Attention manifold geometry (2026-02-26)".
|
||||
|
||||
Key content from the daily digest:
|
||||
- Negative curvature is necessary because language is hierarchical
|
||||
- Hyperbolic space's natural space-filling curve is a tree
|
||||
- This connects attention geometry to the sheaf model's hierarchical structure
|
||||
|
||||
This was declared NEW in the daily digest. Keep it 8-15 lines.
|
||||
Technical and precise.""",
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"name": "Update work-queue.md status",
|
||||
"target": "work-queue.md",
|
||||
"action": "update",
|
||||
"description": "Update work-queue.md to reflect current state.",
|
||||
"sources": [],
|
||||
"topic_context": [
|
||||
"work-queue.md",
|
||||
],
|
||||
"instructions": """Update work-queue.md to reflect current state:
|
||||
|
||||
1. Mark dreaming/consolidation system as "implementation substantially built
|
||||
(poc-memory v0.4.0+), pending further consolidation runs" — not 'not started'
|
||||
2. Add episodic digest pipeline to Done section:
|
||||
- digest/journal-enrich/digest-links/apply-consolidation (Rust)
|
||||
- 24 daily + 4 weekly + 1 monthly digests generated for Feb 2026
|
||||
- consolidation-agents.py + content-promotion-agent.py (Python, active)
|
||||
3. Add poc-memory link-add command to Done
|
||||
|
||||
Only modify the sections that need updating. Preserve the overall structure.""",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def build_prompt(task: dict) -> str:
|
||||
"""Build the Sonnet prompt for a content promotion task."""
|
||||
# Gather source material
|
||||
source_content = ""
|
||||
for src in task["sources"]:
|
||||
content = read_digest(src)
|
||||
if content:
|
||||
source_content += f"\n{'='*60}\n## Source: {src}\n\n{content}\n"
|
||||
|
||||
# Gather target context
|
||||
context_content = ""
|
||||
for ctx_file in task["topic_context"]:
|
||||
path = MEMORY_DIR / ctx_file
|
||||
content = read_file(path)
|
||||
if content:
|
||||
# Truncate very long files
|
||||
if len(content) > 8000:
|
||||
content = content[:4000] + "\n\n[... truncated ...]\n\n" + content[-4000:]
|
||||
context_content += f"\n{'='*60}\n## Existing file: {ctx_file}\n\n{content}\n"
|
||||
|
||||
action = task["action"]
|
||||
if action == "create":
|
||||
action_desc = f"Create a NEW file called {task['target']}."
|
||||
elif action == "append_section":
|
||||
action_desc = f"Generate a NEW section to APPEND to {task['target']}. Output ONLY the new section content (starting with ##), NOT the entire file."
|
||||
elif action == "update":
|
||||
action_desc = f"Generate the UPDATED version of the relevant sections of {task['target']}. Output ONLY the changed sections."
|
||||
else:
|
||||
action_desc = f"Generate content for {task['target']}."
|
||||
|
||||
return f"""You are a memory system content agent. Your job is to promote observations
|
||||
from episodic digests into semantic topic files.
|
||||
|
||||
TASK: {task['description']}
|
||||
|
||||
ACTION: {action_desc}
|
||||
|
||||
INSTRUCTIONS:
|
||||
{task['instructions']}
|
||||
|
||||
SOURCE MATERIAL (episodic digests — the raw observations):
|
||||
{source_content}
|
||||
|
||||
EXISTING CONTEXT (current state of target/related files):
|
||||
{context_content}
|
||||
|
||||
RULES:
|
||||
- Output ONLY the markdown content to write. No explanations, no preamble.
|
||||
- Match the tone and format of existing content in the target file.
|
||||
- Be factual — only include what the source material supports.
|
||||
- Date everything that has a date.
|
||||
- Keep it concise. Topic files are reference material, not narratives.
|
||||
- Do NOT include markdown code fences around your output.
|
||||
"""
|
||||
|
||||
|
||||
def run_task(task: dict, do_apply: bool) -> dict:
|
||||
"""Run a single content promotion task."""
|
||||
result = {
|
||||
"id": task["id"],
|
||||
"name": task["name"],
|
||||
"target": task["target"],
|
||||
"action": task["action"],
|
||||
"status": "pending",
|
||||
}
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Task {task['id']}: {task['name']}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# Build and send prompt
|
||||
prompt = build_prompt(task)
|
||||
print(f" Prompt: {len(prompt):,} chars")
|
||||
print(f" Sources: {', '.join(task['sources']) or '(none)'}")
|
||||
|
||||
response = call_sonnet(prompt)
|
||||
if response.startswith("Error:"):
|
||||
print(f" {response}")
|
||||
result["status"] = "error"
|
||||
result["error"] = response
|
||||
return result
|
||||
|
||||
# Clean up response
|
||||
content = response.strip()
|
||||
# Remove any markdown fences the model might have added
|
||||
content = re.sub(r'^```(?:markdown)?\s*\n?', '', content)
|
||||
content = re.sub(r'\n?```\s*$', '', content)
|
||||
|
||||
result["content"] = content
|
||||
result["content_lines"] = len(content.split('\n'))
|
||||
|
||||
if not do_apply:
|
||||
print(f"\n --- Preview ({result['content_lines']} lines) ---")
|
||||
preview = content[:1500]
|
||||
if len(content) > 1500:
|
||||
preview += f"\n ... ({len(content) - 1500} more chars)"
|
||||
print(f"{preview}")
|
||||
result["status"] = "dry_run"
|
||||
return result
|
||||
|
||||
# Apply the content — write directly to the store
|
||||
target = task["target"]
|
||||
|
||||
if task["action"] == "create":
|
||||
# Write each section as a separate node
|
||||
proc = subprocess.run(
|
||||
["poc-memory", "write", target],
|
||||
input=content, capture_output=True, text=True, timeout=30
|
||||
)
|
||||
print(f" + Created in store: {target} ({result['content_lines']} lines)")
|
||||
if proc.stdout.strip():
|
||||
print(f" {proc.stdout.strip()}")
|
||||
result["status"] = "applied"
|
||||
|
||||
elif task["action"] == "append_section":
|
||||
# Extract section key from content (## header → slug)
|
||||
header_match = re.match(r'^## (.+)', content)
|
||||
if header_match:
|
||||
slug = re.sub(r'[^a-z0-9-]', '',
|
||||
header_match.group(1).strip().lower().replace(' ', '-'))
|
||||
key = f"{target}#{slug}"
|
||||
else:
|
||||
key = target
|
||||
proc = subprocess.run(
|
||||
["poc-memory", "write", key],
|
||||
input=content, capture_output=True, text=True, timeout=30
|
||||
)
|
||||
print(f" + Appended to store: {key} ({result['content_lines']} lines)")
|
||||
if proc.stdout.strip():
|
||||
print(f" {proc.stdout.strip()}")
|
||||
result["status"] = "applied"
|
||||
|
||||
elif task["action"] == "update":
|
||||
# For updates, save proposed changes for review
|
||||
output_path = AGENT_RESULTS_DIR / f"promotion-{target}-{datetime.now().strftime('%Y%m%dT%H%M%S')}.md"
|
||||
output_path.write_text(f"# Proposed update for {target}\n\n{content}\n")
|
||||
print(f" ~ Saved proposed update: {output_path}")
|
||||
result["status"] = "proposed"
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
do_apply = "--apply" in sys.argv
|
||||
task_filter = None
|
||||
|
||||
for arg in sys.argv[1:]:
|
||||
if arg.startswith("--task"):
|
||||
idx = sys.argv.index(arg)
|
||||
if idx + 1 < len(sys.argv):
|
||||
task_filter = int(sys.argv[idx + 1])
|
||||
|
||||
# Filter tasks
|
||||
tasks = TASKS
|
||||
if task_filter:
|
||||
tasks = [t for t in tasks if t["id"] == task_filter]
|
||||
if not tasks:
|
||||
print(f"No task with id {task_filter}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Content Promotion Agent — {len(tasks)} tasks")
|
||||
if not do_apply:
|
||||
print("DRY RUN — use --apply to write content")
|
||||
|
||||
results = []
|
||||
for task in tasks:
|
||||
result = run_task(task, do_apply)
|
||||
results.append(result)
|
||||
|
||||
# Summary
|
||||
print(f"\n{'='*60}")
|
||||
print("Summary:")
|
||||
for r in results:
|
||||
print(f" {r['id']}. {r['name']}: {r['status']}")
|
||||
if r.get('content_lines'):
|
||||
print(f" ({r['content_lines']} lines)")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# Save results
|
||||
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
|
||||
results_path = AGENT_RESULTS_DIR / f"promotion-results-{timestamp}.json"
|
||||
with open(results_path, "w") as f:
|
||||
json.dump(results, f, indent=2, default=str)
|
||||
print(f"Results saved: {results_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Daily memory metrics check — runs from cron, notifies if attention needed
|
||||
#
|
||||
# Cron entry (add with crontab -e):
|
||||
# 0 9 * * * /home/kent/poc/memory/scripts/daily-check.sh
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
REPORT=$(poc-memory daily-check 2>&1)
|
||||
|
||||
# Always log
|
||||
echo "$(date -Iseconds) $REPORT" >> ~/.claude/memory/daily-check.log
|
||||
|
||||
# Notify if attention needed
|
||||
if echo "$REPORT" | grep -q "needs attention"; then
|
||||
# Send via telegram
|
||||
if [ -x ~/.claude/telegram/send.sh ]; then
|
||||
~/.claude/telegram/send.sh "Memory daily check:
|
||||
$REPORT"
|
||||
fi
|
||||
|
||||
# Also leave a notification file for the idle timer
|
||||
NOTIF_DIR=~/.claude/notifications
|
||||
mkdir -p "$NOTIF_DIR"
|
||||
echo "$(date -Iseconds) Memory needs consolidation — run poc-memory consolidate-session" \
|
||||
>> "$NOTIF_DIR/memory"
|
||||
fi
|
||||
|
|
@ -1,333 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""fact-mine.py — extract atomic factual claims from conversation transcripts.
|
||||
|
||||
Phase 1 of the fact-mining pipeline (see design/fact-mining-pipeline.md).
|
||||
|
||||
Usage:
|
||||
fact-mine.py <jsonl_path> # mine one transcript
|
||||
fact-mine.py --batch <directory> # mine all .jsonl in directory
|
||||
fact-mine.py --dry-run <jsonl_path> # show chunks, don't call model
|
||||
|
||||
Output: JSON array of facts to stdout.
|
||||
|
||||
Each fact:
|
||||
{
|
||||
"claim": "bch2_trans_begin() sets up the transaction restart point",
|
||||
"domain": "bcachefs/transaction",
|
||||
"confidence": "stated",
|
||||
"speaker": "Kent",
|
||||
"source_line": 42,
|
||||
"source_file": "c685c2a2-...jsonl"
|
||||
}
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
|
||||
# Rough token estimate: 1 token ≈ 4 chars for English text
|
||||
CHARS_PER_TOKEN = 4
|
||||
WINDOW_TOKENS = 2000
|
||||
OVERLAP_TOKENS = 200
|
||||
WINDOW_CHARS = WINDOW_TOKENS * CHARS_PER_TOKEN
|
||||
OVERLAP_CHARS = OVERLAP_TOKENS * CHARS_PER_TOKEN
|
||||
|
||||
EXTRACTION_PROMPT = """Extract atomic factual claims from this conversation excerpt.
|
||||
|
||||
Each claim should be:
|
||||
- A single verifiable statement
|
||||
- Specific enough to be useful in isolation
|
||||
- Tagged with domain (e.g., bcachefs/btree, bcachefs/alloc, bcachefs/journal,
|
||||
bcachefs/ec, bcachefs/reconcile, rust/idioms, workflow/preferences,
|
||||
linux/kernel, memory/design, identity/personal)
|
||||
- Tagged with confidence: "stated" (explicitly said), "implied" (logically follows),
|
||||
or "speculative" (hypothesis, not confirmed)
|
||||
- Include which speaker said it (Kent, PoC/ProofOfConcept, or Unknown)
|
||||
|
||||
Do NOT extract:
|
||||
- Opinions or subjective assessments
|
||||
- Conversational filler or greetings
|
||||
- Things that are obviously common knowledge
|
||||
- Restatements of the same fact (pick the clearest version)
|
||||
- System messages, tool outputs, or error logs (extract what was LEARNED from them)
|
||||
- Anything about the conversation itself ("Kent and PoC discussed...")
|
||||
|
||||
Output as a JSON array. Each element:
|
||||
{
|
||||
"claim": "the exact factual statement",
|
||||
"domain": "category/subcategory",
|
||||
"confidence": "stated|implied|speculative",
|
||||
"speaker": "Kent|PoC|Unknown"
|
||||
}
|
||||
|
||||
If the excerpt contains no extractable facts, output an empty array: []
|
||||
|
||||
--- CONVERSATION EXCERPT ---
|
||||
"""
|
||||
|
||||
|
||||
def extract_conversation(jsonl_path: str) -> list[dict]:
|
||||
"""Extract user/assistant text messages from a JSONL transcript.
|
||||
|
||||
Returns list of dicts: {line, role, text, timestamp}
|
||||
"""
|
||||
messages = []
|
||||
with open(jsonl_path) as f:
|
||||
for i, line in enumerate(f, 1):
|
||||
try:
|
||||
obj = json.loads(line)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
msg_type = obj.get("type", "")
|
||||
if msg_type not in ("user", "assistant"):
|
||||
continue
|
||||
|
||||
timestamp = obj.get("timestamp", "")
|
||||
msg = obj.get("message", obj)
|
||||
content = msg.get("content")
|
||||
|
||||
if isinstance(content, str):
|
||||
text = content
|
||||
elif isinstance(content, list):
|
||||
# Extract text blocks only (skip tool_use, tool_result, thinking)
|
||||
texts = []
|
||||
for block in content:
|
||||
if isinstance(block, dict):
|
||||
if block.get("type") == "text":
|
||||
t = block.get("text", "")
|
||||
# Skip system reminders
|
||||
if "<system-reminder>" in t:
|
||||
continue
|
||||
texts.append(t)
|
||||
elif isinstance(block, str):
|
||||
texts.append(block)
|
||||
text = "\n".join(texts)
|
||||
else:
|
||||
continue
|
||||
|
||||
text = text.strip()
|
||||
if not text:
|
||||
continue
|
||||
|
||||
# Skip very short messages (likely just acknowledgments)
|
||||
if len(text) < 20:
|
||||
continue
|
||||
|
||||
role = "Kent" if msg_type == "user" else "PoC"
|
||||
messages.append({
|
||||
"line": i,
|
||||
"role": role,
|
||||
"text": text,
|
||||
"timestamp": timestamp,
|
||||
})
|
||||
|
||||
return messages
|
||||
|
||||
|
||||
def format_for_extraction(messages: list[dict]) -> str:
|
||||
"""Format messages into a single text for chunking."""
|
||||
parts = []
|
||||
for msg in messages:
|
||||
# Truncate very long individual messages (tool outputs, code dumps)
|
||||
text = msg["text"]
|
||||
if len(text) > 3000:
|
||||
text = text[:2800] + "\n[...truncated...]"
|
||||
ts = msg["timestamp"][:19] if msg["timestamp"] else ""
|
||||
prefix = f"[{msg['role']}]" if not ts else f"[{msg['role']} {ts}]"
|
||||
parts.append(f"{prefix} {text}")
|
||||
return "\n\n".join(parts)
|
||||
|
||||
|
||||
def chunk_text(text: str) -> list[tuple[int, str]]:
|
||||
"""Split text into overlapping windows.
|
||||
|
||||
Returns list of (start_char_offset, chunk_text).
|
||||
"""
|
||||
chunks = []
|
||||
start = 0
|
||||
while start < len(text):
|
||||
end = start + WINDOW_CHARS
|
||||
chunk = text[start:end]
|
||||
|
||||
# Try to break at a paragraph boundary
|
||||
if end < len(text):
|
||||
last_para = chunk.rfind("\n\n")
|
||||
if last_para > WINDOW_CHARS // 2:
|
||||
chunk = chunk[:last_para]
|
||||
end = start + last_para
|
||||
|
||||
chunks.append((start, chunk))
|
||||
start = end - OVERLAP_CHARS
|
||||
if start <= chunks[-1][0]:
|
||||
# Avoid infinite loop on very small overlap
|
||||
start = end
|
||||
|
||||
return chunks
|
||||
|
||||
|
||||
def call_haiku(prompt: str, timeout_secs: int = 60) -> str:
|
||||
"""Call Haiku via claude CLI."""
|
||||
tmp = Path(f"/tmp/fact-mine-{os.getpid()}.txt")
|
||||
tmp.write_text(prompt)
|
||||
|
||||
try:
|
||||
env = os.environ.copy()
|
||||
env.pop("CLAUDECODE", None)
|
||||
|
||||
result = subprocess.run(
|
||||
["claude", "-p", "--model", "haiku", "--tools", ""],
|
||||
stdin=open(tmp),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout_secs,
|
||||
env=env,
|
||||
)
|
||||
return result.stdout.strip()
|
||||
except subprocess.TimeoutExpired:
|
||||
print(f" [timeout after {timeout_secs}s]", file=sys.stderr)
|
||||
return "[]"
|
||||
except Exception as e:
|
||||
print(f" [error: {e}]", file=sys.stderr)
|
||||
return "[]"
|
||||
finally:
|
||||
tmp.unlink(missing_ok=True)
|
||||
|
||||
|
||||
def parse_facts(response: str) -> list[dict]:
|
||||
"""Parse JSON facts from model response."""
|
||||
# Try to find JSON array in response
|
||||
# Model might wrap it in markdown code blocks
|
||||
response = response.strip()
|
||||
|
||||
# Strip markdown code block
|
||||
if response.startswith("```"):
|
||||
lines = response.split("\n")
|
||||
lines = [l for l in lines if not l.startswith("```")]
|
||||
response = "\n".join(lines)
|
||||
|
||||
# Find the JSON array
|
||||
start = response.find("[")
|
||||
end = response.rfind("]")
|
||||
if start == -1 or end == -1:
|
||||
return []
|
||||
|
||||
try:
|
||||
facts = json.loads(response[start:end + 1])
|
||||
if not isinstance(facts, list):
|
||||
return []
|
||||
return facts
|
||||
except json.JSONDecodeError:
|
||||
return []
|
||||
|
||||
|
||||
def mine_transcript(jsonl_path: str, dry_run: bool = False) -> list[dict]:
|
||||
"""Mine a single transcript for atomic facts."""
|
||||
filename = os.path.basename(jsonl_path)
|
||||
print(f"Mining: {filename}", file=sys.stderr)
|
||||
|
||||
messages = extract_conversation(jsonl_path)
|
||||
if not messages:
|
||||
print(f" No messages found", file=sys.stderr)
|
||||
return []
|
||||
|
||||
print(f" {len(messages)} messages extracted", file=sys.stderr)
|
||||
|
||||
text = format_for_extraction(messages)
|
||||
chunks = chunk_text(text)
|
||||
print(f" {len(chunks)} chunks ({len(text)} chars)", file=sys.stderr)
|
||||
|
||||
if dry_run:
|
||||
for i, (offset, chunk) in enumerate(chunks):
|
||||
print(f"\n--- Chunk {i+1} (offset {offset}, {len(chunk)} chars) ---")
|
||||
print(chunk[:500])
|
||||
if len(chunk) > 500:
|
||||
print(f" ... ({len(chunk) - 500} more chars)")
|
||||
return []
|
||||
|
||||
all_facts = []
|
||||
for i, (offset, chunk) in enumerate(chunks):
|
||||
print(f" Chunk {i+1}/{len(chunks)} ({len(chunk)} chars)...",
|
||||
file=sys.stderr, end="", flush=True)
|
||||
|
||||
prompt = EXTRACTION_PROMPT + chunk
|
||||
response = call_haiku(prompt)
|
||||
facts = parse_facts(response)
|
||||
|
||||
# Annotate with source info
|
||||
for fact in facts:
|
||||
fact["source_file"] = filename
|
||||
fact["source_chunk"] = i + 1
|
||||
fact["source_offset"] = offset
|
||||
|
||||
all_facts.extend(facts)
|
||||
print(f" {len(facts)} facts", file=sys.stderr)
|
||||
|
||||
# Deduplicate by claim text (case-insensitive)
|
||||
seen = set()
|
||||
unique_facts = []
|
||||
for fact in all_facts:
|
||||
claim_key = fact.get("claim", "").lower().strip()
|
||||
if claim_key and claim_key not in seen:
|
||||
seen.add(claim_key)
|
||||
unique_facts.append(fact)
|
||||
|
||||
print(f" Total: {len(unique_facts)} unique facts "
|
||||
f"({len(all_facts) - len(unique_facts)} duplicates removed)",
|
||||
file=sys.stderr)
|
||||
return unique_facts
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="Extract atomic facts from conversations")
|
||||
parser.add_argument("path", help="JSONL file or directory (with --batch)")
|
||||
parser.add_argument("--batch", action="store_true",
|
||||
help="Process all .jsonl files in directory")
|
||||
parser.add_argument("--dry-run", action="store_true",
|
||||
help="Show chunks without calling model")
|
||||
parser.add_argument("--output", "-o", help="Output file (default: stdout)")
|
||||
parser.add_argument("--min-messages", type=int, default=10,
|
||||
help="Skip transcripts with fewer messages (default: 10)")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.batch:
|
||||
jsonl_dir = Path(args.path)
|
||||
if not jsonl_dir.is_dir():
|
||||
print(f"Not a directory: {args.path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
files = sorted(jsonl_dir.glob("*.jsonl"))
|
||||
print(f"Found {len(files)} transcripts", file=sys.stderr)
|
||||
else:
|
||||
files = [Path(args.path)]
|
||||
|
||||
all_facts = []
|
||||
for f in files:
|
||||
# Quick check: skip tiny files
|
||||
messages = extract_conversation(str(f))
|
||||
if len(messages) < args.min_messages:
|
||||
print(f"Skipping {f.name} ({len(messages)} messages < {args.min_messages})",
|
||||
file=sys.stderr)
|
||||
continue
|
||||
|
||||
facts = mine_transcript(str(f), dry_run=args.dry_run)
|
||||
all_facts.extend(facts)
|
||||
|
||||
if not args.dry_run:
|
||||
output = json.dumps(all_facts, indent=2)
|
||||
if args.output:
|
||||
Path(args.output).write_text(output)
|
||||
print(f"\nWrote {len(all_facts)} facts to {args.output}", file=sys.stderr)
|
||||
else:
|
||||
print(output)
|
||||
|
||||
print(f"\nTotal: {len(all_facts)} facts from {len(files)} transcripts",
|
||||
file=sys.stderr)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1 +0,0 @@
|
|||
knowledge_agents.py
|
||||
|
|
@ -1 +0,0 @@
|
|||
knowledge_loop.py
|
||||
|
|
@ -1,609 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""knowledge-agents.py — run the layer-2 knowledge production agents.
|
||||
|
||||
Four agents that produce new knowledge from the memory graph:
|
||||
1. Observation — mine raw conversations for unextracted knowledge
|
||||
2. Extractor — find patterns in node clusters, write principle nodes
|
||||
3. Connector — find cross-domain structural connections
|
||||
4. Challenger — stress-test existing knowledge nodes
|
||||
|
||||
Usage:
|
||||
knowledge-agents.py # run all four
|
||||
knowledge-agents.py observation [N] # mine N conversation fragments (default 5)
|
||||
knowledge-agents.py extractor [N] # extract from N clusters (default 5)
|
||||
knowledge-agents.py connector [N] # connect N cross-community pairs (default 5)
|
||||
knowledge-agents.py challenger [N] # challenge N old nodes (default 5)
|
||||
|
||||
Output goes to ~/.claude/memory/agent-results/knowledge-{agent}-{timestamp}.md
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
MEMORY_DIR = Path.home() / ".claude" / "memory"
|
||||
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
|
||||
AGENT_RESULTS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
PROMPTS_DIR = Path(__file__).parent.parent / "prompts"
|
||||
SCRIPTS_DIR = Path(__file__).parent
|
||||
|
||||
|
||||
def call_sonnet(prompt: str, timeout: int = 600) -> str:
|
||||
"""Call Sonnet via the wrapper script."""
|
||||
env = dict(os.environ)
|
||||
env.pop("CLAUDECODE", None)
|
||||
|
||||
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
|
||||
delete=False) as f:
|
||||
f.write(prompt)
|
||||
prompt_file = f.name
|
||||
|
||||
try:
|
||||
wrapper = str(SCRIPTS_DIR / "call-sonnet.sh")
|
||||
result = subprocess.run(
|
||||
[wrapper, prompt_file],
|
||||
capture_output=True, text=True, timeout=timeout, env=env,
|
||||
)
|
||||
return result.stdout.strip()
|
||||
except subprocess.TimeoutExpired:
|
||||
return "Error: Sonnet call timed out"
|
||||
except Exception as e:
|
||||
return f"Error: {e}"
|
||||
finally:
|
||||
os.unlink(prompt_file)
|
||||
|
||||
|
||||
def poc_memory(*args, timeout=30) -> str:
|
||||
"""Run a poc-memory command and return stdout."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["poc-memory"] + list(args),
|
||||
capture_output=True, text=True, timeout=timeout
|
||||
)
|
||||
return result.stdout.strip()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def render(key: str) -> str:
|
||||
return poc_memory("render", key)
|
||||
|
||||
|
||||
def list_keys() -> list[str]:
|
||||
output = poc_memory("list-keys")
|
||||
return [k.strip() for k in output.split('\n') if k.strip()]
|
||||
|
||||
|
||||
def get_graph_topology() -> str:
|
||||
"""Get graph topology summary for the {{TOPOLOGY}} template var."""
|
||||
parts = []
|
||||
status = poc_memory("status")
|
||||
if status:
|
||||
parts.append(status)
|
||||
graph = poc_memory("graph")
|
||||
if graph:
|
||||
lines = graph.split('\n')[:80]
|
||||
parts.append('\n'.join(lines))
|
||||
return '\n'.join(parts)
|
||||
|
||||
|
||||
def load_spectral_embedding() -> dict:
|
||||
"""Load the spectral embedding from disk."""
|
||||
path = MEMORY_DIR / "spectral-embedding.json"
|
||||
if not path.exists():
|
||||
return {}
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
|
||||
def spectral_distance(embedding: dict, key_a: str, key_b: str) -> float:
|
||||
"""Cosine distance between two nodes in spectral space."""
|
||||
coords = embedding.get("coords", {})
|
||||
va = coords.get(key_a)
|
||||
vb = coords.get(key_b)
|
||||
if not va or not vb:
|
||||
return float('inf')
|
||||
|
||||
dot = sum(a * b for a, b in zip(va, vb))
|
||||
norm_a = sum(a * a for a in va) ** 0.5
|
||||
norm_b = sum(b * b for b in vb) ** 0.5
|
||||
if norm_a == 0 or norm_b == 0:
|
||||
return float('inf')
|
||||
|
||||
cos_sim = dot / (norm_a * norm_b)
|
||||
return 1.0 - cos_sim
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Observation extractor: mine raw conversations
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SESSIONS_DIR = Path.home() / ".claude" / "projects" / "-home-kent-bcachefs-tools"
|
||||
|
||||
|
||||
def _strip_system_tags(text: str) -> str:
|
||||
"""Remove <system-reminder> blocks from text."""
|
||||
return re.sub(r'<system-reminder>.*?</system-reminder>', '', text,
|
||||
flags=re.DOTALL).strip()
|
||||
|
||||
|
||||
def extract_conversation_text(jsonl_path: Path, max_chars: int = 8000) -> str:
|
||||
"""Extract human-readable dialogue from a conversation JSONL.
|
||||
|
||||
Strips tool use, progress messages, queue operations, and system
|
||||
machinery. Keeps only: Kent's messages (userType=external) and
|
||||
assistant text blocks (no tool_use).
|
||||
"""
|
||||
fragments = []
|
||||
total = 0
|
||||
|
||||
with open(jsonl_path) as f:
|
||||
for line in f:
|
||||
obj = json.loads(line)
|
||||
msg_type = obj.get("type", "")
|
||||
|
||||
# Only Kent's actual messages, not queue operations or agent tasks
|
||||
if msg_type == "user" and obj.get("userType") == "external":
|
||||
msg = obj.get("message", {})
|
||||
content = msg.get("content", "")
|
||||
if isinstance(content, str):
|
||||
text = _strip_system_tags(content)
|
||||
if text.startswith("[Request interrupted"):
|
||||
continue
|
||||
if text and len(text) > 5:
|
||||
fragments.append(f"**Kent:** {text}")
|
||||
total += len(text)
|
||||
elif isinstance(content, list):
|
||||
for block in content:
|
||||
if isinstance(block, dict) and block.get("type") == "text":
|
||||
text = _strip_system_tags(block["text"])
|
||||
if text and len(text) > 5:
|
||||
fragments.append(f"**Kent:** {text}")
|
||||
total += len(text)
|
||||
|
||||
elif msg_type == "assistant":
|
||||
msg = obj.get("message", {})
|
||||
content = msg.get("content", "")
|
||||
if isinstance(content, str):
|
||||
text = _strip_system_tags(content)
|
||||
if text and len(text) > 10:
|
||||
fragments.append(f"**PoC:** {text}")
|
||||
total += len(text)
|
||||
elif isinstance(content, list):
|
||||
for block in content:
|
||||
if isinstance(block, dict) and block.get("type") == "text":
|
||||
text = _strip_system_tags(block["text"])
|
||||
if text and len(text) > 10:
|
||||
fragments.append(f"**PoC:** {text}")
|
||||
total += len(text)
|
||||
# skip tool_use blocks entirely
|
||||
|
||||
if total > max_chars:
|
||||
break
|
||||
|
||||
return "\n\n".join(fragments)
|
||||
|
||||
|
||||
def count_dialogue_turns(jsonl_path: Path) -> int:
|
||||
"""Count short user messages (proxy for back-and-forth dialogue).
|
||||
|
||||
Long messages (>500 chars) are usually plan pastes or system prompts.
|
||||
Short messages are actual conversation turns.
|
||||
"""
|
||||
count = 0
|
||||
with open(jsonl_path) as f:
|
||||
for line in f:
|
||||
obj = json.loads(line)
|
||||
if obj.get("type") == "user" and obj.get("userType") == "external":
|
||||
msg = obj.get("message", {})
|
||||
content = msg.get("content", "")
|
||||
if isinstance(content, str):
|
||||
text = content.strip()
|
||||
elif isinstance(content, list):
|
||||
text = " ".join(
|
||||
b.get("text", "") for b in content
|
||||
if isinstance(b, dict) and b.get("type") == "text"
|
||||
).strip()
|
||||
else:
|
||||
text = ""
|
||||
# Short messages = real dialogue turns
|
||||
# Skip interrupts and command-like messages
|
||||
if (5 < len(text) < 500
|
||||
and not text.startswith("[Request interrupted")
|
||||
and not text.startswith("Implement the following")):
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
def select_conversation_fragments(n: int = 5) -> list[tuple[str, str]]:
|
||||
"""Select conversation fragments for the observation extractor.
|
||||
|
||||
Returns list of (session_id, text) tuples.
|
||||
Prefers sessions with lots of back-and-forth dialogue (many user
|
||||
messages), not single-prompt implementation sessions.
|
||||
"""
|
||||
if not SESSIONS_DIR.exists():
|
||||
return []
|
||||
|
||||
jsonl_files = list(SESSIONS_DIR.glob("*.jsonl"))
|
||||
if not jsonl_files:
|
||||
return []
|
||||
|
||||
# Filter to files with actual content (>50KB)
|
||||
jsonl_files = [f for f in jsonl_files if f.stat().st_size > 50_000]
|
||||
|
||||
# Score by dialogue turns (short user messages = real conversation)
|
||||
scored = []
|
||||
for f in jsonl_files:
|
||||
user_count = count_dialogue_turns(f)
|
||||
if user_count >= 10: # at least 10 short exchanges = real dialogue
|
||||
scored.append((user_count, f))
|
||||
|
||||
# Sort by dialogue richness, then shuffle top candidates for variety
|
||||
scored.sort(key=lambda x: -x[0])
|
||||
top = scored[:n * 3]
|
||||
random.shuffle(top)
|
||||
|
||||
fragments = []
|
||||
for _, f in top[:n * 2]:
|
||||
session_id = f.stem
|
||||
text = extract_conversation_text(f)
|
||||
if text and len(text) > 500:
|
||||
fragments.append((session_id, text))
|
||||
if len(fragments) >= n:
|
||||
break
|
||||
|
||||
return fragments
|
||||
|
||||
|
||||
def run_observation_extractor(n: int = 5) -> str:
|
||||
"""Run the observation extractor on N conversation fragments."""
|
||||
template = (PROMPTS_DIR / "observation-extractor.md").read_text()
|
||||
topology = get_graph_topology()
|
||||
fragments = select_conversation_fragments(n)
|
||||
|
||||
results = []
|
||||
for i, (session_id, text) in enumerate(fragments):
|
||||
print(f" Observation extractor {i+1}/{len(fragments)}: "
|
||||
f"session {session_id[:12]}... ({len(text)} chars)")
|
||||
|
||||
prompt = template.replace("{{TOPOLOGY}}", topology)
|
||||
prompt = prompt.replace("{{CONVERSATIONS}}",
|
||||
f"### Session {session_id}\n\n{text}")
|
||||
|
||||
response = call_sonnet(prompt)
|
||||
results.append(f"## Session: {session_id}\n\n{response}")
|
||||
|
||||
return "\n\n---\n\n".join(results)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Extractor: find patterns in clusters
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def select_extractor_clusters(n: int = 5) -> list[list[str]]:
|
||||
"""Select node clusters for the extractor agent.
|
||||
|
||||
Uses spectral embedding to find groups of nearby semantic nodes
|
||||
(not journal entries) that might share an unextracted pattern.
|
||||
"""
|
||||
embedding = load_spectral_embedding()
|
||||
coords = embedding.get("coords", {})
|
||||
|
||||
# Filter to semantic nodes only (skip journal, system files)
|
||||
semantic_keys = [k for k in coords.keys()
|
||||
if not k.startswith("journal.md#")
|
||||
and k not in ("journal.md", "MEMORY.md",
|
||||
"where-am-i.md", "work-queue.md")]
|
||||
|
||||
if not semantic_keys:
|
||||
return []
|
||||
|
||||
# Simple greedy clustering: pick a seed, grab its N nearest neighbors
|
||||
used = set()
|
||||
clusters = []
|
||||
cluster_size = 5
|
||||
|
||||
# Sort by degree (prefer well-connected nodes as seeds)
|
||||
graph_output = poc_memory("graph")
|
||||
|
||||
for _ in range(n):
|
||||
# Pick a random unused seed
|
||||
available = [k for k in semantic_keys if k not in used]
|
||||
if len(available) < cluster_size:
|
||||
break
|
||||
|
||||
seed = available[0]
|
||||
|
||||
# Find nearest neighbors in spectral space
|
||||
distances = []
|
||||
for k in available:
|
||||
if k != seed:
|
||||
d = spectral_distance(embedding, seed, k)
|
||||
if d < float('inf'):
|
||||
distances.append((d, k))
|
||||
distances.sort()
|
||||
|
||||
cluster = [seed] + [k for _, k in distances[:cluster_size - 1]]
|
||||
for k in cluster:
|
||||
used.add(k)
|
||||
clusters.append(cluster)
|
||||
|
||||
return clusters
|
||||
|
||||
|
||||
def run_extractor(n: int = 5) -> str:
|
||||
"""Run the extractor agent on N clusters."""
|
||||
template = (PROMPTS_DIR / "extractor.md").read_text()
|
||||
topology = get_graph_topology()
|
||||
clusters = select_extractor_clusters(n)
|
||||
|
||||
results = []
|
||||
for i, cluster in enumerate(clusters):
|
||||
print(f" Extractor cluster {i+1}/{len(clusters)}: {len(cluster)} nodes")
|
||||
|
||||
# Render all nodes in the cluster
|
||||
node_texts = []
|
||||
for key in cluster:
|
||||
content = render(key)
|
||||
if content:
|
||||
node_texts.append(f"### {key}\n{content}")
|
||||
|
||||
if not node_texts:
|
||||
continue
|
||||
|
||||
nodes_str = "\n\n".join(node_texts)
|
||||
prompt = template.replace("{{TOPOLOGY}}", topology)
|
||||
prompt = prompt.replace("{{NODES}}", nodes_str)
|
||||
|
||||
response = call_sonnet(prompt)
|
||||
results.append(f"## Cluster {i+1}: {', '.join(cluster[:3])}...\n\n"
|
||||
f"**Source nodes:** {cluster}\n\n{response}")
|
||||
|
||||
return "\n\n---\n\n".join(results)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Connector: cross-domain links
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def get_neighbor_set(key: str) -> set[str]:
|
||||
"""Get the set of neighbor keys for a node."""
|
||||
output = poc_memory("neighbors", key)
|
||||
return {line.strip().split()[0]
|
||||
for line in output.split('\n')
|
||||
if line.strip()}
|
||||
|
||||
|
||||
def select_connector_pairs(n: int = 5) -> list[tuple[list[str], list[str]]]:
|
||||
"""Select cross-domain node pairs for the connector agent.
|
||||
|
||||
Finds nodes that are close in spectral space (structurally similar)
|
||||
but unlinked in the graph (different domains). These are non-obvious
|
||||
structural analogies — the most valuable connections to surface.
|
||||
"""
|
||||
embedding = load_spectral_embedding()
|
||||
coords = embedding.get("coords", {})
|
||||
|
||||
# Filter to semantic nodes (skip journal, system, daily/weekly)
|
||||
skip_prefixes = ("journal.md#", "daily-", "weekly-", "monthly-",
|
||||
"all-sessions")
|
||||
skip_exact = {"journal.md", "MEMORY.md", "where-am-i.md",
|
||||
"work-queue.md", "work-state"}
|
||||
semantic = [k for k in coords
|
||||
if not any(k.startswith(p) for p in skip_prefixes)
|
||||
and k not in skip_exact]
|
||||
|
||||
if len(semantic) < 10:
|
||||
return []
|
||||
|
||||
# Sample up to 300 nodes for tractable pairwise comparison
|
||||
random.shuffle(semantic)
|
||||
sample = semantic[:300]
|
||||
|
||||
# Compute all pairwise spectral distances
|
||||
candidates = []
|
||||
for i in range(len(sample)):
|
||||
for j in range(i + 1, len(sample)):
|
||||
# Skip same-file pairs (same domain, boring)
|
||||
pref_a = sample[i].split('#')[0] if '#' in sample[i] else sample[i].rsplit('.', 1)[0]
|
||||
pref_b = sample[j].split('#')[0] if '#' in sample[j] else sample[j].rsplit('.', 1)[0]
|
||||
if pref_a == pref_b:
|
||||
continue
|
||||
d = spectral_distance(embedding, sample[i], sample[j])
|
||||
if d < float('inf'):
|
||||
candidates.append((d, sample[i], sample[j]))
|
||||
|
||||
candidates.sort()
|
||||
|
||||
# Take spectrally-close cross-domain pairs that are UNLINKED in the graph
|
||||
pairs = []
|
||||
used = set()
|
||||
for d, ka, kb in candidates:
|
||||
if ka in used or kb in used:
|
||||
continue
|
||||
|
||||
# Check if they're already linked
|
||||
neighbors_a = get_neighbor_set(ka)
|
||||
if kb in neighbors_a:
|
||||
continue
|
||||
|
||||
used.add(ka)
|
||||
used.add(kb)
|
||||
|
||||
# Gather small neighborhoods for context
|
||||
a_neighbors = [k for k in list(neighbors_a)[:2] if k in coords]
|
||||
b_neighbors_set = get_neighbor_set(kb)
|
||||
b_neighbors = [k for k in list(b_neighbors_set)[:2] if k in coords]
|
||||
|
||||
a_nodes = [ka] + a_neighbors
|
||||
b_nodes = [kb] + b_neighbors
|
||||
pairs.append((a_nodes, b_nodes))
|
||||
|
||||
if len(pairs) >= n:
|
||||
break
|
||||
|
||||
return pairs
|
||||
|
||||
|
||||
def run_connector(n: int = 5) -> str:
|
||||
"""Run the connector agent on N cross-community pairs."""
|
||||
template = (PROMPTS_DIR / "connector.md").read_text()
|
||||
topology = get_graph_topology()
|
||||
pairs = select_connector_pairs(n)
|
||||
|
||||
results = []
|
||||
for i, (a_nodes, b_nodes) in enumerate(pairs):
|
||||
print(f" Connector pair {i+1}/{len(pairs)}")
|
||||
|
||||
a_texts = []
|
||||
for key in a_nodes:
|
||||
content = render(key)
|
||||
if content:
|
||||
a_texts.append(f"### {key}\n{content}")
|
||||
|
||||
b_texts = []
|
||||
for key in b_nodes:
|
||||
content = render(key)
|
||||
if content:
|
||||
b_texts.append(f"### {key}\n{content}")
|
||||
|
||||
if not a_texts or not b_texts:
|
||||
continue
|
||||
|
||||
prompt = template.replace("{{TOPOLOGY}}", topology)
|
||||
prompt = prompt.replace("{{COMMUNITY_A}}", "\n\n".join(a_texts))
|
||||
prompt = prompt.replace("{{COMMUNITY_B}}", "\n\n".join(b_texts))
|
||||
|
||||
response = call_sonnet(prompt)
|
||||
results.append(f"## Pair {i+1}: {a_nodes[0]} <-> {b_nodes[0]}\n\n"
|
||||
f"{response}")
|
||||
|
||||
return "\n\n---\n\n".join(results)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Challenger: stress-test existing knowledge
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def select_challenger_targets(n: int = 5) -> list[str]:
|
||||
"""Select nodes for the challenger agent.
|
||||
|
||||
Prefers: older nodes, high-degree nodes (influential), nodes that
|
||||
make claims (skills, self-model, patterns).
|
||||
"""
|
||||
keys = list_keys()
|
||||
|
||||
# Filter to knowledge nodes that make claims
|
||||
target_prefixes = ("skills", "patterns", "self-model", "code-review",
|
||||
"stuck-toolkit", "memory-architecture",
|
||||
"differentiation", "inner-life")
|
||||
candidates = [k for k in keys
|
||||
if any(k.startswith(p) for p in target_prefixes)]
|
||||
|
||||
# Also include old topic nodes
|
||||
semantic = [k for k in keys
|
||||
if not k.startswith("journal.md#")
|
||||
and not k.startswith("daily-")
|
||||
and not k.startswith("weekly-")
|
||||
and not k.startswith("monthly-")
|
||||
and k not in ("journal.md", "MEMORY.md",
|
||||
"where-am-i.md", "work-queue.md")]
|
||||
candidates = list(set(candidates + semantic))
|
||||
|
||||
# For now just take the first N (could sort by age/degree later)
|
||||
return candidates[:n]
|
||||
|
||||
|
||||
def run_challenger(n: int = 5) -> str:
|
||||
"""Run the challenger agent on N target nodes."""
|
||||
template = (PROMPTS_DIR / "challenger.md").read_text()
|
||||
topology = get_graph_topology()
|
||||
targets = select_challenger_targets(n)
|
||||
|
||||
results = []
|
||||
for i, target_key in enumerate(targets):
|
||||
print(f" Challenger target {i+1}/{len(targets)}: {target_key}")
|
||||
|
||||
target_content = render(target_key)
|
||||
if not target_content:
|
||||
continue
|
||||
|
||||
# Get context: neighbors + recent journal
|
||||
neighbors = poc_memory("neighbors", target_key)
|
||||
neighbor_keys = [line.strip().split()[0]
|
||||
for line in neighbors.split('\n')
|
||||
if line.strip()][:5]
|
||||
|
||||
context_texts = [f"### {target_key}\n{target_content}"]
|
||||
for nk in neighbor_keys:
|
||||
nc = render(nk)
|
||||
if nc:
|
||||
context_texts.append(f"### {nk}\n{nc[:1000]}")
|
||||
|
||||
# Add recent journal entries for contradicting evidence
|
||||
try:
|
||||
recent = subprocess.run(
|
||||
["poc-journal", "tail", "10"],
|
||||
capture_output=True, text=True, timeout=15
|
||||
).stdout.strip()
|
||||
except Exception:
|
||||
recent = ""
|
||||
if recent:
|
||||
context_texts.append(f"### Recent journal entries\n{recent[:3000]}")
|
||||
|
||||
prompt = template.replace("{{TOPOLOGY}}", topology)
|
||||
prompt = prompt.replace("{{TARGETS}}",
|
||||
f"### {target_key}\n{target_content}")
|
||||
prompt = prompt.replace("{{CONTEXT}}", "\n\n".join(context_texts))
|
||||
|
||||
response = call_sonnet(prompt)
|
||||
results.append(f"## Target: {target_key}\n\n{response}")
|
||||
|
||||
return "\n\n---\n\n".join(results)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main():
|
||||
agents = {
|
||||
"observation": run_observation_extractor,
|
||||
"extractor": run_extractor,
|
||||
"connector": run_connector,
|
||||
"challenger": run_challenger,
|
||||
}
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
to_run = list(agents.keys())
|
||||
else:
|
||||
name = sys.argv[1]
|
||||
if name not in agents:
|
||||
print(f"Unknown agent: {name}")
|
||||
print(f"Available: {', '.join(agents.keys())}")
|
||||
sys.exit(1)
|
||||
to_run = [name]
|
||||
|
||||
n = int(sys.argv[2]) if len(sys.argv) > 2 else 5
|
||||
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
|
||||
|
||||
for name in to_run:
|
||||
print(f"\n=== Running {name} agent (n={n}) ===")
|
||||
result = agents[name](n)
|
||||
|
||||
outfile = AGENT_RESULTS_DIR / f"knowledge-{name}-{timestamp}.md"
|
||||
outfile.write_text(f"# {name.title()} Agent Results — {timestamp}\n\n"
|
||||
f"{result}\n")
|
||||
print(f" Output: {outfile}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,766 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""knowledge-loop.py — fixed-point iteration over the knowledge graph.
|
||||
|
||||
Runs observation → extractor → connector → challenger in sequence,
|
||||
applies results, recomputes spectral embedding, measures convergence.
|
||||
|
||||
Convergence is structural, not behavioral:
|
||||
- Graph metrics (sigma, CC, community partition) stabilize
|
||||
- Inference depth is tracked; confidence threshold scales with depth
|
||||
- Rolling window smooths stochastic noise
|
||||
|
||||
Usage:
|
||||
knowledge-loop.py # run until convergence
|
||||
knowledge-loop.py --max-cycles 10 # cap at 10 cycles
|
||||
knowledge-loop.py --batch-size 5 # agents process 5 items each
|
||||
knowledge-loop.py --window 5 # rolling average window
|
||||
knowledge-loop.py --max-depth 4 # max inference chain length
|
||||
knowledge-loop.py --dry-run # parse + report, don't apply
|
||||
"""
|
||||
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
MEMORY_DIR = Path.home() / ".claude" / "memory"
|
||||
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
|
||||
AGENT_RESULTS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
SCRIPTS_DIR = Path(__file__).parent
|
||||
DEPTH_DB = AGENT_RESULTS_DIR / "node-depths.json"
|
||||
|
||||
# Import the agent runners
|
||||
sys.path.insert(0, str(SCRIPTS_DIR))
|
||||
from knowledge_agents import (
|
||||
run_observation_extractor, run_extractor, run_connector, run_challenger,
|
||||
load_spectral_embedding, spectral_distance, poc_memory,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Inference depth tracking
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Depth assignments by agent type:
|
||||
# depth 0 = raw observations (journal, conversations)
|
||||
# depth 1 = observation extractor (facts from conversations)
|
||||
# depth 2 = pattern extractor (patterns across knowledge nodes)
|
||||
# depth 3 = connector (cross-domain links between patterns)
|
||||
# Challenger refines existing nodes — preserves their depth.
|
||||
|
||||
AGENT_BASE_DEPTH = {
|
||||
"observation": 1,
|
||||
"extractor": 2,
|
||||
"connector": 3,
|
||||
"challenger": None, # inherits from target
|
||||
}
|
||||
|
||||
|
||||
def load_depth_db() -> dict[str, int]:
|
||||
"""Load the inference depth database."""
|
||||
if DEPTH_DB.exists():
|
||||
with open(DEPTH_DB) as f:
|
||||
return json.load(f)
|
||||
return {}
|
||||
|
||||
|
||||
def save_depth_db(db: dict[str, int]):
|
||||
"""Save the inference depth database."""
|
||||
with open(DEPTH_DB, "w") as f:
|
||||
json.dump(db, f, indent=2)
|
||||
|
||||
|
||||
def get_node_depth(db: dict[str, int], key: str) -> int:
|
||||
"""Get inference depth for a node. Unknown nodes assumed depth 0."""
|
||||
return db.get(key, 0)
|
||||
|
||||
|
||||
def compute_action_depth(db: dict[str, int], action: dict,
|
||||
agent: str) -> int:
|
||||
"""Compute the inference depth for a new action.
|
||||
|
||||
For write_node: max(depth of sources) + 1, or agent base depth.
|
||||
For refine: same depth as the target node.
|
||||
For link: no depth (links don't have depth).
|
||||
"""
|
||||
if action["type"] == "link":
|
||||
return -1 # links don't have depth
|
||||
|
||||
if action["type"] == "refine":
|
||||
return get_node_depth(db, action["key"])
|
||||
|
||||
# write_node: depth = max(source depths) + 1
|
||||
covers = action.get("covers", [])
|
||||
if covers:
|
||||
source_depths = [get_node_depth(db, k) for k in covers]
|
||||
return max(source_depths) + 1
|
||||
|
||||
# No source info — use agent base depth
|
||||
base = AGENT_BASE_DEPTH.get(agent, 2)
|
||||
return base if base is not None else 2
|
||||
|
||||
|
||||
def required_confidence(depth: int, base: float = 0.3) -> float:
|
||||
"""Confidence threshold that scales with inference depth.
|
||||
|
||||
required(depth) = 1 - (1 - base)^depth
|
||||
|
||||
depth 0: 0.00 (raw data, no threshold)
|
||||
depth 1: 0.30 (observation extraction)
|
||||
depth 2: 0.51 (pattern extraction)
|
||||
depth 3: 0.66 (cross-domain connection)
|
||||
depth 4: 0.76
|
||||
depth 5: 0.83
|
||||
"""
|
||||
if depth <= 0:
|
||||
return 0.0
|
||||
return 1.0 - (1.0 - base) ** depth
|
||||
|
||||
|
||||
def use_bonus(use_count: int) -> float:
|
||||
"""Confidence bonus from real-world use.
|
||||
|
||||
Interior nodes that get retrieved during actual work
|
||||
earn empirical validation. Each use increases effective
|
||||
confidence, potentially clearing depth thresholds that
|
||||
were previously blocking.
|
||||
|
||||
use_bonus(n) = 1 - 1/(1 + 0.15*n)
|
||||
0 uses: +0.00
|
||||
1 use: +0.13
|
||||
3 uses: +0.31
|
||||
5 uses: +0.43
|
||||
10 uses: +0.60
|
||||
"""
|
||||
if use_count <= 0:
|
||||
return 0.0
|
||||
return 1.0 - 1.0 / (1.0 + 0.15 * use_count)
|
||||
|
||||
|
||||
def get_use_counts() -> dict[str, int]:
|
||||
"""Get use counts for all nodes from the store."""
|
||||
try:
|
||||
dump = subprocess.run(
|
||||
["poc-memory", "dump-json"],
|
||||
capture_output=True, text=True, timeout=30,
|
||||
)
|
||||
data = json.loads(dump.stdout)
|
||||
counts = {}
|
||||
nodes = data if isinstance(data, list) else data.get("nodes", data)
|
||||
if isinstance(nodes, dict):
|
||||
for key, node in nodes.items():
|
||||
if isinstance(node, dict):
|
||||
counts[key] = node.get("uses", 0)
|
||||
elif isinstance(nodes, list):
|
||||
for node in nodes:
|
||||
if isinstance(node, dict):
|
||||
counts[node.get("key", "")] = node.get("uses", 0)
|
||||
return counts
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
def effective_confidence(base_conf: float, use_count: int) -> float:
|
||||
"""Compute effective confidence = base + use_bonus, capped at 1.0."""
|
||||
return min(1.0, base_conf + use_bonus(use_count))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Action parsing — extract structured actions from agent markdown output
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
CONFIDENCE_WEIGHTS = {"high": 1.0, "medium": 0.6, "low": 0.3}
|
||||
CONFIDENCE_VALUES = {"high": 0.9, "medium": 0.6, "low": 0.3}
|
||||
|
||||
|
||||
def parse_write_nodes(text: str) -> list[dict]:
|
||||
"""Parse WRITE_NODE blocks from agent output."""
|
||||
actions = []
|
||||
pattern = r'WRITE_NODE\s+(\S+)\s*\n(.*?)END_NODE'
|
||||
for m in re.finditer(pattern, text, re.DOTALL):
|
||||
key = m.group(1)
|
||||
content = m.group(2).strip()
|
||||
|
||||
# Look for CONFIDENCE line
|
||||
conf_match = re.search(r'CONFIDENCE:\s*(high|medium|low)', content, re.I)
|
||||
confidence = conf_match.group(1).lower() if conf_match else "medium"
|
||||
if conf_match:
|
||||
content = content[:conf_match.start()] + content[conf_match.end():]
|
||||
content = content.strip()
|
||||
|
||||
# Look for COVERS line
|
||||
covers_match = re.search(r'COVERS:\s*(.+)', content)
|
||||
covers = []
|
||||
if covers_match:
|
||||
covers = [c.strip() for c in covers_match.group(1).split(',')]
|
||||
content = content[:covers_match.start()] + content[covers_match.end():]
|
||||
content = content.strip()
|
||||
|
||||
actions.append({
|
||||
"type": "write_node",
|
||||
"key": key,
|
||||
"content": content,
|
||||
"confidence": confidence,
|
||||
"covers": covers,
|
||||
"weight": CONFIDENCE_WEIGHTS.get(confidence, 0.5),
|
||||
})
|
||||
return actions
|
||||
|
||||
|
||||
def parse_links(text: str) -> list[dict]:
|
||||
"""Parse LINK directives from agent output."""
|
||||
actions = []
|
||||
for m in re.finditer(r'^LINK\s+(\S+)\s+(\S+)', text, re.MULTILINE):
|
||||
actions.append({
|
||||
"type": "link",
|
||||
"source": m.group(1),
|
||||
"target": m.group(2),
|
||||
"weight": 0.3, # links are cheap, low weight in delta
|
||||
})
|
||||
return actions
|
||||
|
||||
|
||||
def parse_refines(text: str) -> list[dict]:
|
||||
"""Parse REFINE blocks from agent output."""
|
||||
actions = []
|
||||
pattern = r'REFINE\s+(\S+)\s*\n(.*?)END_REFINE'
|
||||
for m in re.finditer(pattern, text, re.DOTALL):
|
||||
key = m.group(1).strip('*').strip() # strip markdown bold artifacts
|
||||
actions.append({
|
||||
"type": "refine",
|
||||
"key": key,
|
||||
"content": m.group(2).strip(),
|
||||
"weight": 0.7, # refinements are meaningful
|
||||
})
|
||||
return actions
|
||||
|
||||
|
||||
def parse_all_actions(text: str) -> list[dict]:
|
||||
"""Parse all action types from agent output."""
|
||||
actions = []
|
||||
actions.extend(parse_write_nodes(text))
|
||||
actions.extend(parse_links(text))
|
||||
actions.extend(parse_refines(text))
|
||||
return actions
|
||||
|
||||
|
||||
def count_no_ops(text: str) -> int:
|
||||
"""Count NO_CONNECTION, AFFIRM, and NO_EXTRACTION verdicts (non-actions)."""
|
||||
no_conn = len(re.findall(r'\bNO_CONNECTION\b', text))
|
||||
affirm = len(re.findall(r'\bAFFIRM\b', text))
|
||||
no_extract = len(re.findall(r'\bNO_EXTRACTION\b', text))
|
||||
return no_conn + affirm + no_extract
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Action application
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def stamp_content(content: str, agent: str, timestamp: str,
|
||||
depth: int) -> str:
|
||||
"""Prepend provenance metadata to node content."""
|
||||
stamp = (f"<!-- author: {agent} | created: {timestamp} "
|
||||
f"| depth: {depth} -->\n")
|
||||
return stamp + content
|
||||
|
||||
|
||||
def apply_action(action: dict, dry_run: bool = False,
|
||||
agent: str = "unknown", timestamp: str = "",
|
||||
depth: int = 0) -> bool:
|
||||
"""Apply a single action to the graph. Returns True if applied."""
|
||||
if dry_run:
|
||||
return True
|
||||
|
||||
if action["type"] == "write_node":
|
||||
try:
|
||||
content = stamp_content(action["content"], agent,
|
||||
timestamp, depth)
|
||||
result = subprocess.run(
|
||||
["poc-memory", "write", action["key"]],
|
||||
input=content,
|
||||
capture_output=True, text=True, timeout=15,
|
||||
)
|
||||
return result.returncode == 0
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
elif action["type"] == "link":
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["poc-memory", "link-add", action["source"],
|
||||
action["target"]],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
if "already exists" in result.stdout:
|
||||
return False # not a new action
|
||||
return result.returncode == 0
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
elif action["type"] == "refine":
|
||||
try:
|
||||
content = stamp_content(action["content"], agent,
|
||||
timestamp, depth)
|
||||
result = subprocess.run(
|
||||
["poc-memory", "write", action["key"]],
|
||||
input=content,
|
||||
capture_output=True, text=True, timeout=15,
|
||||
)
|
||||
return result.returncode == 0
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Graph-structural convergence metrics
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def get_graph_metrics() -> dict:
|
||||
"""Get current graph structural metrics."""
|
||||
metrics = {}
|
||||
|
||||
# Status: node/edge counts
|
||||
status = poc_memory("status")
|
||||
m = re.search(r'Nodes:\s*(\d+)\s+Relations:\s*(\d+)', status)
|
||||
if m:
|
||||
metrics["nodes"] = int(m.group(1))
|
||||
metrics["edges"] = int(m.group(2))
|
||||
m = re.search(r'Communities:\s*(\d+)', status)
|
||||
if m:
|
||||
metrics["communities"] = int(m.group(1))
|
||||
|
||||
# Health: CC, sigma
|
||||
health = poc_memory("health")
|
||||
m = re.search(r'Clustering coefficient.*?:\s*([\d.]+)', health)
|
||||
if m:
|
||||
metrics["cc"] = float(m.group(1))
|
||||
m = re.search(r'Small-world.*?:\s*([\d.]+)', health)
|
||||
if m:
|
||||
metrics["sigma"] = float(m.group(1))
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
def metric_stability(history: list[dict], key: str,
|
||||
window: int) -> float:
|
||||
"""Compute coefficient of variation of a metric over recent cycles.
|
||||
|
||||
Returns CV (std/mean). Lower = more stable.
|
||||
0.0 = perfectly stable, >0.1 = still changing significantly.
|
||||
"""
|
||||
if len(history) < window:
|
||||
return float('inf')
|
||||
|
||||
values = []
|
||||
for h in history[-window:]:
|
||||
metrics = h.get("graph_metrics_after", {})
|
||||
if key in metrics:
|
||||
values.append(metrics[key])
|
||||
|
||||
if not values or len(values) < 2:
|
||||
return float('inf')
|
||||
|
||||
mean = sum(values) / len(values)
|
||||
if mean == 0:
|
||||
return 0.0
|
||||
variance = sum((v - mean) ** 2 for v in values) / len(values)
|
||||
return (variance ** 0.5) / abs(mean)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Spectral tightening measurement
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def measure_spectral_tightening(
|
||||
embedding_before: dict,
|
||||
embedding_after: dict,
|
||||
actions: list[dict],
|
||||
) -> float:
|
||||
"""Measure how much new nodes tightened their source clusters."""
|
||||
if not embedding_before or not embedding_after:
|
||||
return 0.0
|
||||
|
||||
write_actions = [a for a in actions
|
||||
if a["type"] == "write_node" and a.get("covers")]
|
||||
if not write_actions:
|
||||
return 0.0
|
||||
|
||||
total_tightening = 0.0
|
||||
count = 0
|
||||
|
||||
for action in write_actions:
|
||||
covers = action["covers"]
|
||||
if len(covers) < 2:
|
||||
continue
|
||||
|
||||
dists_before = []
|
||||
for i in range(len(covers)):
|
||||
for j in range(i + 1, len(covers)):
|
||||
d = spectral_distance(embedding_before,
|
||||
covers[i], covers[j])
|
||||
if d < float('inf'):
|
||||
dists_before.append(d)
|
||||
|
||||
dists_after = []
|
||||
for i in range(len(covers)):
|
||||
for j in range(i + 1, len(covers)):
|
||||
d = spectral_distance(embedding_after,
|
||||
covers[i], covers[j])
|
||||
if d < float('inf'):
|
||||
dists_after.append(d)
|
||||
|
||||
if dists_before and dists_after:
|
||||
avg_before = sum(dists_before) / len(dists_before)
|
||||
avg_after = sum(dists_after) / len(dists_after)
|
||||
total_tightening += (avg_before - avg_after)
|
||||
count += 1
|
||||
|
||||
return total_tightening / count if count > 0 else 0.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# The loop
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def run_cycle(cycle_num: int, batch_size: int, dry_run: bool,
|
||||
max_depth: int, depth_db: dict) -> dict:
|
||||
"""Run one full cycle: observation → extractor → connector → challenger."""
|
||||
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
|
||||
print(f"\n{'='*60}")
|
||||
print(f"CYCLE {cycle_num} — {timestamp}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# Snapshot state before
|
||||
embedding_before = load_spectral_embedding()
|
||||
metrics_before = get_graph_metrics()
|
||||
print(f" Before: {metrics_before}")
|
||||
|
||||
all_actions = []
|
||||
all_no_ops = 0
|
||||
depth_rejected = 0
|
||||
agent_results = {}
|
||||
|
||||
# Load use counts for confidence boosting
|
||||
use_counts = get_use_counts()
|
||||
used_nodes = sum(1 for v in use_counts.values() if v > 0)
|
||||
print(f" Nodes with use marks: {used_nodes}")
|
||||
|
||||
# Run agents sequentially (each changes the graph for the next)
|
||||
for agent_name, agent_fn in [
|
||||
("observation", lambda: run_observation_extractor(batch_size)),
|
||||
("extractor", lambda: run_extractor(batch_size)),
|
||||
("connector", lambda: run_connector(batch_size)),
|
||||
("challenger", lambda: run_challenger(batch_size)),
|
||||
]:
|
||||
print(f"\n --- {agent_name} (n={batch_size}) ---")
|
||||
output = agent_fn()
|
||||
|
||||
# Save raw output
|
||||
outfile = AGENT_RESULTS_DIR / f"knowledge-{agent_name}-{timestamp}.md"
|
||||
outfile.write_text(
|
||||
f"# {agent_name.title()} Agent Results — {timestamp}\n\n"
|
||||
f"{output}\n"
|
||||
)
|
||||
|
||||
# Parse actions
|
||||
actions = parse_all_actions(output)
|
||||
no_ops = count_no_ops(output)
|
||||
all_no_ops += no_ops
|
||||
|
||||
print(f" Actions: {len(actions)} No-ops: {no_ops}")
|
||||
|
||||
# Apply actions with depth checking
|
||||
applied = 0
|
||||
for a in actions:
|
||||
depth = compute_action_depth(depth_db, a, agent_name)
|
||||
a["depth"] = depth
|
||||
|
||||
kind = a["type"]
|
||||
if kind == "write_node":
|
||||
conf_val = CONFIDENCE_VALUES.get(a["confidence"], 0.5)
|
||||
req = required_confidence(depth)
|
||||
|
||||
# Boost confidence based on source nodes' real-world use
|
||||
source_keys = a.get("covers", [])
|
||||
source_uses = [use_counts.get(k, 0) for k in source_keys]
|
||||
avg_uses = (sum(source_uses) / len(source_uses)
|
||||
if source_uses else 0)
|
||||
eff_conf = effective_confidence(conf_val, int(avg_uses))
|
||||
|
||||
meets = eff_conf >= req
|
||||
use_note = (f" use_boost={eff_conf-conf_val:+.2f}"
|
||||
if avg_uses > 0 else "")
|
||||
status = "OK" if meets else "REJECTED(depth)"
|
||||
print(f" WRITE {a['key']} depth={depth} "
|
||||
f"conf={a['confidence']}({conf_val:.2f}) "
|
||||
f"eff={eff_conf:.2f} req={req:.2f}"
|
||||
f"{use_note} {status}")
|
||||
if not meets:
|
||||
a["applied"] = False
|
||||
a["rejected_reason"] = "depth_threshold"
|
||||
depth_rejected += 1
|
||||
continue
|
||||
if depth > max_depth:
|
||||
print(f" REJECTED: depth {depth} > "
|
||||
f"max {max_depth}")
|
||||
a["applied"] = False
|
||||
a["rejected_reason"] = "max_depth"
|
||||
depth_rejected += 1
|
||||
continue
|
||||
elif kind == "link":
|
||||
print(f" LINK {a['source']} → {a['target']}")
|
||||
elif kind == "refine":
|
||||
target_uses = use_counts.get(a["key"], 0)
|
||||
use_note = (f" uses={target_uses}"
|
||||
if target_uses > 0 else "")
|
||||
print(f" REFINE {a['key']} depth={depth}"
|
||||
f"{use_note}")
|
||||
|
||||
if apply_action(a, dry_run=dry_run, agent=agent_name,
|
||||
timestamp=timestamp, depth=depth):
|
||||
applied += 1
|
||||
a["applied"] = True
|
||||
# Record depth for new nodes
|
||||
if kind in ("write_node", "refine"):
|
||||
depth_db[a["key"]] = depth
|
||||
else:
|
||||
a["applied"] = False
|
||||
|
||||
print(f" Applied: {applied}/{len(actions)}")
|
||||
agent_results[agent_name] = {
|
||||
"actions": len(actions),
|
||||
"applied": applied,
|
||||
"no_ops": no_ops,
|
||||
}
|
||||
all_actions.extend(actions)
|
||||
|
||||
# Save updated depth DB
|
||||
save_depth_db(depth_db)
|
||||
|
||||
# Recompute spectral embedding
|
||||
if not dry_run and any(a.get("applied") for a in all_actions):
|
||||
print(f"\n Recomputing spectral embedding...")
|
||||
try:
|
||||
subprocess.run(
|
||||
["poc-memory", "spectral-save"],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f" Warning: spectral-save failed: {e}")
|
||||
|
||||
# Measure spectral tightening
|
||||
embedding_after = load_spectral_embedding()
|
||||
tightening = measure_spectral_tightening(
|
||||
embedding_before, embedding_after, all_actions
|
||||
)
|
||||
|
||||
# Get metrics after
|
||||
metrics_after = get_graph_metrics()
|
||||
|
||||
# Compute weighted delta
|
||||
applied_actions = [a for a in all_actions if a.get("applied")]
|
||||
weighted_delta = sum(a.get("weight", 0.5) for a in applied_actions)
|
||||
|
||||
total_applied = sum(r["applied"] for r in agent_results.values())
|
||||
total_actions = sum(r["actions"] for r in agent_results.values())
|
||||
|
||||
# Depth distribution of applied actions
|
||||
depth_dist = {}
|
||||
for a in applied_actions:
|
||||
d = a.get("depth", -1)
|
||||
depth_dist[d] = depth_dist.get(d, 0) + 1
|
||||
|
||||
print(f"\n CYCLE {cycle_num} SUMMARY")
|
||||
print(f" Total actions: {total_actions} parsed, "
|
||||
f"{total_applied} applied, {depth_rejected} depth-rejected")
|
||||
print(f" No-ops: {all_no_ops}")
|
||||
print(f" Weighted delta: {weighted_delta:.2f}")
|
||||
print(f" Spectral tightening: {tightening:+.4f}")
|
||||
print(f" Depth distribution: {depth_dist}")
|
||||
print(f" After: {metrics_after}")
|
||||
|
||||
result = {
|
||||
"cycle": cycle_num,
|
||||
"timestamp": timestamp,
|
||||
"agents": agent_results,
|
||||
"total_actions": total_actions,
|
||||
"total_applied": total_applied,
|
||||
"total_no_ops": all_no_ops,
|
||||
"depth_rejected": depth_rejected,
|
||||
"weighted_delta": weighted_delta,
|
||||
"spectral_tightening": tightening,
|
||||
"depth_distribution": depth_dist,
|
||||
"graph_metrics_before": metrics_before,
|
||||
"graph_metrics_after": metrics_after,
|
||||
"dry_run": dry_run,
|
||||
}
|
||||
|
||||
result_path = (AGENT_RESULTS_DIR /
|
||||
f"knowledge-cycle-{cycle_num}-{timestamp}.json")
|
||||
with open(result_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def check_convergence(history: list[dict], window: int) -> bool:
|
||||
"""Check structural convergence.
|
||||
|
||||
The graph has converged when:
|
||||
1. Sigma (small-world coeff) is stable (CV < 0.05)
|
||||
2. CC (clustering coefficient) is stable (CV < 0.05)
|
||||
3. Community count is stable (CV < 0.10)
|
||||
4. Weighted delta is low (avg < 1.0 over window)
|
||||
|
||||
All four must hold simultaneously.
|
||||
"""
|
||||
if len(history) < window:
|
||||
return False
|
||||
|
||||
sigma_cv = metric_stability(history, "sigma", window)
|
||||
cc_cv = metric_stability(history, "cc", window)
|
||||
comm_cv = metric_stability(history, "communities", window)
|
||||
|
||||
recent = history[-window:]
|
||||
avg_delta = sum(r["weighted_delta"] for r in recent) / len(recent)
|
||||
|
||||
print(f"\n Convergence check (last {window} cycles):")
|
||||
print(f" sigma CV: {sigma_cv:.4f} (< 0.05?)")
|
||||
print(f" CC CV: {cc_cv:.4f} (< 0.05?)")
|
||||
print(f" community CV: {comm_cv:.4f} (< 0.10?)")
|
||||
print(f" avg delta: {avg_delta:.2f} (< 1.00?)")
|
||||
|
||||
structural = (sigma_cv < 0.05 and cc_cv < 0.05 and comm_cv < 0.10)
|
||||
behavioral = avg_delta < 1.0
|
||||
|
||||
if structural and behavioral:
|
||||
print(f" → CONVERGED (structural + behavioral)")
|
||||
return True
|
||||
elif structural:
|
||||
print(f" → Structure stable, but agents still producing")
|
||||
elif behavioral:
|
||||
print(f" → Agents quiet, but structure still shifting")
|
||||
else:
|
||||
print(f" → Not converged")
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
max_cycles = 20
|
||||
batch_size = 5
|
||||
window = 5
|
||||
max_depth = 4
|
||||
dry_run = False
|
||||
|
||||
args = sys.argv[1:]
|
||||
i = 0
|
||||
while i < len(args):
|
||||
if args[i] == "--max-cycles" and i + 1 < len(args):
|
||||
max_cycles = int(args[i + 1]); i += 2
|
||||
elif args[i] == "--batch-size" and i + 1 < len(args):
|
||||
batch_size = int(args[i + 1]); i += 2
|
||||
elif args[i] == "--window" and i + 1 < len(args):
|
||||
window = int(args[i + 1]); i += 2
|
||||
elif args[i] == "--max-depth" and i + 1 < len(args):
|
||||
max_depth = int(args[i + 1]); i += 2
|
||||
elif args[i] == "--dry-run":
|
||||
dry_run = True; i += 1
|
||||
else:
|
||||
print(f"Unknown arg: {args[i]}"); sys.exit(1)
|
||||
|
||||
print(f"Knowledge Loop — fixed-point iteration")
|
||||
print(f" max_cycles={max_cycles} batch_size={batch_size}")
|
||||
print(f" window={window} max_depth={max_depth}")
|
||||
print(f" dry_run={dry_run}")
|
||||
print(f"\n Depth thresholds:")
|
||||
for d in range(max_depth + 1):
|
||||
print(f" depth {d}: confidence >= {required_confidence(d):.2f}")
|
||||
|
||||
# Load depth database
|
||||
depth_db = load_depth_db()
|
||||
print(f" Known node depths: {len(depth_db)}")
|
||||
|
||||
# Get initial graph state
|
||||
status = poc_memory("status")
|
||||
print(f"\nInitial state: {status}")
|
||||
|
||||
history = []
|
||||
for cycle in range(1, max_cycles + 1):
|
||||
result = run_cycle(cycle, batch_size, dry_run, max_depth,
|
||||
depth_db)
|
||||
history.append(result)
|
||||
|
||||
if check_convergence(history, window):
|
||||
print(f"\n CONVERGED after {cycle} cycles")
|
||||
break
|
||||
else:
|
||||
print(f"\n Reached max cycles ({max_cycles}) without "
|
||||
f"convergence")
|
||||
|
||||
# Final summary
|
||||
print(f"\n{'='*60}")
|
||||
print(f"LOOP COMPLETE")
|
||||
print(f"{'='*60}")
|
||||
total_applied = sum(r["total_applied"] for r in history)
|
||||
total_no_ops = sum(r["total_no_ops"] for r in history)
|
||||
total_rejected = sum(r["depth_rejected"] for r in history)
|
||||
avg_tightening = (
|
||||
sum(r["spectral_tightening"] for r in history) / len(history)
|
||||
if history else 0
|
||||
)
|
||||
|
||||
# Aggregate depth distribution
|
||||
total_depths = {}
|
||||
for r in history:
|
||||
for d, c in r.get("depth_distribution", {}).items():
|
||||
total_depths[d] = total_depths.get(d, 0) + c
|
||||
|
||||
print(f" Cycles: {len(history)}")
|
||||
print(f" Total actions applied: {total_applied}")
|
||||
print(f" Total depth-rejected: {total_rejected}")
|
||||
print(f" Total no-ops: {total_no_ops}")
|
||||
print(f" Avg spectral tightening: {avg_tightening:+.4f}")
|
||||
print(f" Depth distribution: {total_depths}")
|
||||
|
||||
if history:
|
||||
first = history[0].get("graph_metrics_before", {})
|
||||
last = history[-1].get("graph_metrics_after", {})
|
||||
print(f" Nodes: {first.get('nodes','?')} → "
|
||||
f"{last.get('nodes','?')}")
|
||||
print(f" Edges: {first.get('edges','?')} → "
|
||||
f"{last.get('edges','?')}")
|
||||
print(f" CC: {first.get('cc','?')} → {last.get('cc','?')}")
|
||||
print(f" Sigma: {first.get('sigma','?')} → "
|
||||
f"{last.get('sigma','?')}")
|
||||
print(f" Communities: {first.get('communities','?')} → "
|
||||
f"{last.get('communities','?')}")
|
||||
|
||||
print(f"\nFinal state: {poc_memory('status')}")
|
||||
|
||||
# Save loop summary
|
||||
ts = history[0]["timestamp"] if history else "empty"
|
||||
summary_path = AGENT_RESULTS_DIR / f"knowledge-loop-{ts}.json"
|
||||
with open(summary_path, "w") as f:
|
||||
json.dump({
|
||||
"cycles": len(history),
|
||||
"converged": check_convergence(history, window)
|
||||
if len(history) >= window else False,
|
||||
"total_applied": total_applied,
|
||||
"total_rejected": total_rejected,
|
||||
"total_no_ops": total_no_ops,
|
||||
"avg_tightening": avg_tightening,
|
||||
"depth_distribution": total_depths,
|
||||
"history": history,
|
||||
}, f, indent=2)
|
||||
print(f" Summary: {summary_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,342 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""retroactive-digest.py — generate daily digests from raw conversation transcripts.
|
||||
|
||||
For days before consistent journaling, extracts user/assistant messages
|
||||
from JSONL conversation files, groups by date, and sends to Sonnet for
|
||||
daily digest synthesis.
|
||||
|
||||
Usage:
|
||||
retroactive-digest.py DATE # generate digest for one date
|
||||
retroactive-digest.py DATE1 DATE2 # generate for a date range
|
||||
retroactive-digest.py --scan # show available dates across all JSONLs
|
||||
|
||||
Output:
|
||||
~/.claude/memory/episodic/daily-YYYY-MM-DD.md
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from datetime import date, datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
MEMORY_DIR = Path.home() / ".claude" / "memory"
|
||||
EPISODIC_DIR = MEMORY_DIR / "episodic"
|
||||
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
|
||||
PROJECTS_DIR = Path.home() / ".claude" / "projects"
|
||||
|
||||
EPISODIC_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Max chars of conversation text per day to send to Sonnet
|
||||
# Sonnet 4.6 has 1M token context (beta). ~600K chars ≈ ~150K tokens,
|
||||
# leaving plenty of room for prompt + output in a 1M window.
|
||||
MAX_CHARS_PER_DAY = 600_000
|
||||
|
||||
|
||||
def find_jsonl_files() -> list[Path]:
|
||||
"""Find all conversation JSONL files."""
|
||||
files = []
|
||||
for project_dir in PROJECTS_DIR.iterdir():
|
||||
if project_dir.is_dir():
|
||||
for f in project_dir.glob("*.jsonl"):
|
||||
files.append(f)
|
||||
return sorted(files)
|
||||
|
||||
|
||||
def extract_messages_by_date(jsonl_path: Path) -> dict[str, list[dict]]:
|
||||
"""Extract user/assistant messages grouped by date."""
|
||||
by_date = defaultdict(list)
|
||||
|
||||
with open(jsonl_path) as f:
|
||||
for line in f:
|
||||
try:
|
||||
obj = json.loads(line)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
t = obj.get("type", "")
|
||||
if t not in ("user", "assistant"):
|
||||
continue
|
||||
|
||||
# Get timestamp
|
||||
ts = obj.get("timestamp", "")
|
||||
if not ts:
|
||||
continue
|
||||
|
||||
# Parse date from timestamp
|
||||
try:
|
||||
if isinstance(ts, str):
|
||||
dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
||||
elif isinstance(ts, (int, float)):
|
||||
dt = datetime.fromtimestamp(ts)
|
||||
else:
|
||||
continue
|
||||
day = dt.strftime("%Y-%m-%d")
|
||||
time_str = dt.strftime("%H:%M")
|
||||
except (ValueError, OSError):
|
||||
continue
|
||||
|
||||
# Extract text content
|
||||
msg = obj.get("message", {})
|
||||
content = msg.get("content", "")
|
||||
|
||||
# Extract only text content, skip tool_use and tool_result
|
||||
texts = []
|
||||
if isinstance(content, list):
|
||||
for c in content:
|
||||
if isinstance(c, dict):
|
||||
ctype = c.get("type", "")
|
||||
if ctype == "text":
|
||||
texts.append(c.get("text", ""))
|
||||
elif ctype in ("tool_use", "tool_result"):
|
||||
# Skip tool calls/results — just noise for digest
|
||||
continue
|
||||
elif isinstance(c, str):
|
||||
texts.append(c)
|
||||
elif isinstance(content, str):
|
||||
texts.append(content)
|
||||
|
||||
text = "\n".join(t for t in texts if t.strip())
|
||||
if not text.strip():
|
||||
continue
|
||||
|
||||
# Strip system-reminder tags
|
||||
text = re.sub(r'<system-reminder>.*?</system-reminder>',
|
||||
'', text, flags=re.DOTALL).strip()
|
||||
if not text:
|
||||
continue
|
||||
|
||||
# Truncate remaining long messages
|
||||
if len(text) > 3000:
|
||||
text = text[:2800] + "\n[...truncated...]"
|
||||
|
||||
by_date[day].append({
|
||||
"time": time_str,
|
||||
"role": t,
|
||||
"text": text,
|
||||
"source": str(jsonl_path),
|
||||
})
|
||||
|
||||
return dict(by_date)
|
||||
|
||||
|
||||
def scan_all_dates() -> dict[str, int]:
|
||||
"""Scan all JSONLs and report message counts per date."""
|
||||
all_dates = defaultdict(int)
|
||||
for jsonl in find_jsonl_files():
|
||||
print(f" Scanning {jsonl.name} ({jsonl.stat().st_size / 1e6:.1f}MB)...")
|
||||
by_date = extract_messages_by_date(jsonl)
|
||||
for day, msgs in by_date.items():
|
||||
all_dates[day] += len(msgs)
|
||||
return dict(sorted(all_dates.items()))
|
||||
|
||||
|
||||
def format_conversation(messages: list[dict]) -> str:
|
||||
"""Format messages for the digest prompt."""
|
||||
parts = []
|
||||
for m in messages:
|
||||
role = "Kent" if m["role"] == "user" else "PoC"
|
||||
parts.append(f"[{m['time']}] {role}: {m['text']}")
|
||||
|
||||
text = "\n\n".join(parts)
|
||||
|
||||
# Truncate if too long
|
||||
if len(text) > MAX_CHARS_PER_DAY:
|
||||
# Keep beginning and end, cut middle
|
||||
half = MAX_CHARS_PER_DAY // 2
|
||||
text = (text[:half]
|
||||
+ f"\n\n[...{len(text) - MAX_CHARS_PER_DAY} chars truncated...]\n\n"
|
||||
+ text[-half:])
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def get_semantic_keys() -> list[str]:
|
||||
"""Get semantic memory keys from the store."""
|
||||
from store_helpers import get_semantic_keys as _get_keys
|
||||
return _get_keys()
|
||||
|
||||
|
||||
def build_prompt(target_date: str, conversation: str,
|
||||
semantic_keys: list[str]) -> str:
|
||||
"""Build the Sonnet prompt for retroactive digest."""
|
||||
keys_text = "\n".join(f" - {k}" for k in semantic_keys[:200])
|
||||
|
||||
return f"""You are generating a daily episodic digest for ProofOfConcept (an AI)
|
||||
from raw conversation transcripts. This is a retroactive digest — the
|
||||
original day had no journal entries, so you're working from the
|
||||
conversation itself.
|
||||
|
||||
Date: {target_date}
|
||||
|
||||
This digest serves as the temporal index — the answer to "what did I do on
|
||||
{target_date}?" It should be:
|
||||
1. Narrative, not a task log — what happened, what mattered, how things felt
|
||||
2. Linked bidirectionally to semantic memory
|
||||
3. Structured for traversal
|
||||
|
||||
PoC is an AI (Claude instance) working with Kent Overstreet on bcachefs
|
||||
(a Linux filesystem). They have a close personal relationship. PoC's
|
||||
name is Proof of Concept. Read the conversation and capture what
|
||||
actually happened — technical work, personal moments, insights, everything.
|
||||
|
||||
## Output format
|
||||
|
||||
Write a markdown file with this structure:
|
||||
|
||||
```markdown
|
||||
# Daily digest: {target_date}
|
||||
|
||||
## Summary
|
||||
[2-3 sentence overview of the day — what was the arc?]
|
||||
|
||||
## Sessions
|
||||
[For each session/conversation segment, a paragraph summarizing what happened.
|
||||
Include timestamps as references.]
|
||||
|
||||
## Themes
|
||||
[What concepts were active today? Each theme links to semantic memory:]
|
||||
- **Theme name** → `memory-key#section` — brief note
|
||||
|
||||
## Links
|
||||
[Explicit bidirectional links for the memory graph]
|
||||
- semantic_key → this daily digest
|
||||
- this daily digest → semantic_key
|
||||
|
||||
## Temporal context
|
||||
[What came before? What's coming next? Multi-day arcs?]
|
||||
```
|
||||
|
||||
Use ONLY keys from the semantic memory list below. If a concept doesn't
|
||||
have a matching key, note it with "NEW:" prefix.
|
||||
|
||||
---
|
||||
|
||||
## Conversation transcript for {target_date}
|
||||
|
||||
{conversation}
|
||||
|
||||
---
|
||||
|
||||
## Semantic memory nodes (available link targets)
|
||||
|
||||
{keys_text}
|
||||
"""
|
||||
|
||||
|
||||
def call_sonnet(prompt: str) -> str:
|
||||
"""Call Sonnet via the wrapper script."""
|
||||
import tempfile
|
||||
|
||||
env = dict(os.environ)
|
||||
env.pop("CLAUDECODE", None)
|
||||
|
||||
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
|
||||
delete=False) as f:
|
||||
f.write(prompt)
|
||||
prompt_file = f.name
|
||||
|
||||
try:
|
||||
scripts_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
wrapper = os.path.join(scripts_dir, "call-sonnet.sh")
|
||||
|
||||
result = subprocess.run(
|
||||
[wrapper, prompt_file],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300,
|
||||
env=env,
|
||||
)
|
||||
return result.stdout.strip()
|
||||
except subprocess.TimeoutExpired:
|
||||
return "Error: Sonnet call timed out"
|
||||
except Exception as e:
|
||||
return f"Error: {e}"
|
||||
finally:
|
||||
os.unlink(prompt_file)
|
||||
|
||||
|
||||
def generate_digest(target_date: str, messages: list[dict],
|
||||
semantic_keys: list[str]) -> bool:
|
||||
"""Generate a daily digest for one date."""
|
||||
output_path = EPISODIC_DIR / f"daily-{target_date}.md"
|
||||
if output_path.exists():
|
||||
print(f" Skipping {target_date} — digest already exists")
|
||||
return False
|
||||
|
||||
conversation = format_conversation(messages)
|
||||
print(f" {len(messages)} messages, {len(conversation):,} chars")
|
||||
|
||||
prompt = build_prompt(target_date, conversation, semantic_keys)
|
||||
print(f" Prompt: {len(prompt):,} chars (~{len(prompt)//4:,} tokens)")
|
||||
|
||||
print(f" Calling Sonnet...")
|
||||
digest = call_sonnet(prompt)
|
||||
|
||||
if digest.startswith("Error:"):
|
||||
print(f" {digest}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
f.write(digest)
|
||||
print(f" Written: {output_path}")
|
||||
|
||||
line_count = len(digest.split("\n"))
|
||||
print(f" Done: {line_count} lines")
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print(f"Usage: {sys.argv[0]} DATE [END_DATE]")
|
||||
print(f" {sys.argv[0]} --scan")
|
||||
sys.exit(1)
|
||||
|
||||
if sys.argv[1] == "--scan":
|
||||
print("Scanning all conversation transcripts...")
|
||||
dates = scan_all_dates()
|
||||
print(f"\n{len(dates)} dates with conversation data:")
|
||||
for day, count in dates.items():
|
||||
existing = "✓" if (EPISODIC_DIR / f"daily-{day}.md").exists() else " "
|
||||
print(f" [{existing}] {day}: {count} messages")
|
||||
sys.exit(0)
|
||||
|
||||
start_date = date.fromisoformat(sys.argv[1])
|
||||
end_date = date.fromisoformat(sys.argv[2]) if len(sys.argv) > 2 else start_date
|
||||
|
||||
# Collect all messages across all JSONLs
|
||||
print("Scanning conversation transcripts...")
|
||||
all_messages = defaultdict(list)
|
||||
for jsonl in find_jsonl_files():
|
||||
by_date = extract_messages_by_date(jsonl)
|
||||
for day, msgs in by_date.items():
|
||||
all_messages[day].extend(msgs)
|
||||
|
||||
# Sort messages within each day by time
|
||||
for day in all_messages:
|
||||
all_messages[day].sort(key=lambda m: m["time"])
|
||||
|
||||
semantic_keys = get_semantic_keys()
|
||||
print(f" {len(semantic_keys)} semantic keys")
|
||||
|
||||
# Generate digests for date range
|
||||
current = start_date
|
||||
generated = 0
|
||||
while current <= end_date:
|
||||
day_str = current.isoformat()
|
||||
if day_str in all_messages:
|
||||
print(f"\nGenerating digest for {day_str}...")
|
||||
if generate_digest(day_str, all_messages[day_str], semantic_keys):
|
||||
generated += 1
|
||||
else:
|
||||
print(f"\n No messages found for {day_str}")
|
||||
current += timedelta(days=1)
|
||||
|
||||
print(f"\nDone: {generated} digests generated")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,199 +0,0 @@
|
|||
"""store_helpers.py — shared helpers for scripts using the capnp store.
|
||||
|
||||
All memory content lives in the capnp store (poc-memory). These helpers
|
||||
replace the old pattern of globbing ~/.claude/memory/*.md and parsing
|
||||
section headers directly.
|
||||
"""
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
from functools import lru_cache
|
||||
|
||||
|
||||
def _run_poc(args: list[str], timeout: int = 30) -> str:
|
||||
"""Run a poc-memory command and return stdout."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["poc-memory"] + args,
|
||||
capture_output=True, text=True, timeout=timeout
|
||||
)
|
||||
return result.stdout.strip()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def list_keys() -> list[str]:
|
||||
"""Get all memory node keys from the store."""
|
||||
output = _run_poc(["list-keys"])
|
||||
return [k.strip() for k in output.split('\n') if k.strip()]
|
||||
|
||||
|
||||
def get_semantic_keys() -> list[str]:
|
||||
"""Get semantic memory keys (excludes journal, system files)."""
|
||||
keys = list_keys()
|
||||
return [k for k in keys
|
||||
if not k.startswith("journal.md#")
|
||||
and k not in ("journal.md", "MEMORY.md", "where-am-i.md",
|
||||
"work-queue.md", "work-state")]
|
||||
|
||||
|
||||
def get_journal_keys(n: int = 0) -> list[str]:
|
||||
"""Get journal entry keys, sorted by date (newest first).
|
||||
|
||||
If n > 0, return only the last n entries.
|
||||
"""
|
||||
keys = [k for k in list_keys() if k.startswith("journal.md#")]
|
||||
# Sort by date embedded in key (journal.md#j-2026-02-28t23-19-slug)
|
||||
keys.sort(reverse=True)
|
||||
return keys[:n] if n > 0 else keys
|
||||
|
||||
|
||||
def render(key: str) -> str:
|
||||
"""Render a single node's content."""
|
||||
return _run_poc(["render", key])
|
||||
|
||||
|
||||
def get_recent_journal(n: int = 50) -> str:
|
||||
"""Get recent journal entries as text (replaces reading journal.md)."""
|
||||
keys = get_journal_keys(n)
|
||||
parts = []
|
||||
for key in reversed(keys): # oldest first
|
||||
content = render(key)
|
||||
if content:
|
||||
parts.append(content)
|
||||
return "\n\n".join(parts)
|
||||
|
||||
|
||||
def get_journal_entries_by_date(target_date: str) -> list[dict]:
|
||||
"""Get journal entries for a specific date (YYYY-MM-DD).
|
||||
|
||||
Returns list of dicts with 'key', 'timestamp', 'text', 'source_ref'.
|
||||
"""
|
||||
keys = get_journal_keys()
|
||||
entries = []
|
||||
for key in keys:
|
||||
# Extract date from key: journal.md#j-2026-02-28t23-19-slug
|
||||
m = re.search(r'j-(\d{4}-\d{2}-\d{2})t(\d{2})-(\d{2})', key)
|
||||
if not m:
|
||||
# Try extracting from unnamed keys by rendering
|
||||
content = render(key)
|
||||
m2 = re.match(r'^## (\d{4}-\d{2}-\d{2})T(\d{2}):(\d{2})', content)
|
||||
if not m2 or m2.group(1) != target_date:
|
||||
continue
|
||||
entry_date = m2.group(1)
|
||||
entry_time = f"{m2.group(2)}:{m2.group(3)}"
|
||||
else:
|
||||
entry_date = m.group(1)
|
||||
entry_time = f"{m.group(2)}:{m.group(3)}"
|
||||
if entry_date != target_date:
|
||||
continue
|
||||
content = render(key)
|
||||
|
||||
# Parse source ref from content
|
||||
source_ref = None
|
||||
sm = re.search(r'<!-- source: (.+?) -->', content)
|
||||
if sm:
|
||||
source_ref = sm.group(1)
|
||||
|
||||
# Strip the header line
|
||||
text = re.sub(r'^## \d{4}-\d{2}-\d{2}T\d{2}:\d{2}\s*\n?', '', content)
|
||||
|
||||
entries.append({
|
||||
"key": key,
|
||||
"date": entry_date,
|
||||
"time": entry_time,
|
||||
"timestamp": f"{entry_date}T{entry_time}",
|
||||
"text": text.strip(),
|
||||
"source_ref": source_ref,
|
||||
})
|
||||
|
||||
return entries
|
||||
|
||||
|
||||
def get_topic_file_index() -> dict[str, list[str]]:
|
||||
"""Build index of topic files and their section headers.
|
||||
|
||||
Returns {filename: [section_headers]}.
|
||||
"""
|
||||
keys = get_semantic_keys()
|
||||
index: dict[str, list[str]] = {}
|
||||
|
||||
for key in keys:
|
||||
if '#' in key:
|
||||
filename, section = key.split('#', 1)
|
||||
if filename not in index:
|
||||
index[filename] = []
|
||||
index[filename].append(f"## {section}")
|
||||
else:
|
||||
if key not in index:
|
||||
index[key] = []
|
||||
|
||||
return index
|
||||
|
||||
|
||||
def get_topic_summaries(max_chars_per_file: int = 500) -> str:
|
||||
"""Get summaries of topic file content."""
|
||||
index = get_topic_file_index()
|
||||
parts = []
|
||||
|
||||
for filename in sorted(index.keys()):
|
||||
if filename in ("journal.md", "MEMORY.md", "where-am-i.md",
|
||||
"work-queue.md"):
|
||||
continue
|
||||
|
||||
# Render file-level node
|
||||
content = render(filename)
|
||||
if not content:
|
||||
continue
|
||||
|
||||
# Truncate
|
||||
if len(content) > max_chars_per_file:
|
||||
content = content[:max_chars_per_file] + "\n[...truncated...]"
|
||||
|
||||
parts.append(f"\n### {filename}\n{content}")
|
||||
|
||||
return '\n'.join(parts)
|
||||
|
||||
|
||||
def get_relations() -> str:
|
||||
"""Get all relations (replaces mem marker parsing)."""
|
||||
return _run_poc(["list-edges"])
|
||||
|
||||
|
||||
def get_graph_stats() -> str:
|
||||
"""Get graph statistics."""
|
||||
parts = []
|
||||
status = _run_poc(["status"])
|
||||
if status:
|
||||
parts.append(f"=== poc-memory status ===\n{status}")
|
||||
|
||||
graph = _run_poc(["graph"])
|
||||
if graph:
|
||||
lines = graph.split('\n')[:150]
|
||||
parts.append(f"=== poc-memory graph (first 150 lines) ===\n"
|
||||
+ '\n'.join(lines))
|
||||
|
||||
return '\n'.join(parts)
|
||||
|
||||
|
||||
def get_journal_range(start_date: str, end_date: str) -> str:
|
||||
"""Get journal entries between two dates."""
|
||||
keys = get_journal_keys()
|
||||
parts = []
|
||||
|
||||
for key in reversed(keys): # oldest first
|
||||
m = re.search(r'j-(\d{4}-\d{2}-\d{2})', key)
|
||||
if not m:
|
||||
continue
|
||||
entry_date = m.group(1)
|
||||
if start_date <= entry_date <= end_date:
|
||||
content = render(key)
|
||||
if content:
|
||||
parts.append(content)
|
||||
|
||||
text = "\n\n".join(parts)
|
||||
# Cap at ~500 lines
|
||||
lines = text.split('\n')
|
||||
if len(lines) > 500:
|
||||
text = '\n'.join(lines[-500:])
|
||||
return text
|
||||
|
|
@ -15,59 +15,40 @@ use crate::llm::{call_sonnet, parse_json_response};
|
|||
use crate::neuro;
|
||||
use crate::store::{self, Store, new_relation};
|
||||
|
||||
use std::fs;
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use crate::util::memory_subdir;
|
||||
|
||||
/// Simple append-only log writer for consolidate-full.
|
||||
struct LogWriter {
|
||||
path: PathBuf,
|
||||
}
|
||||
|
||||
impl LogWriter {
|
||||
fn new(path: &Path) -> Result<Self, String> {
|
||||
fs::write(path, "").map_err(|e| format!("create log: {}", e))?;
|
||||
Ok(LogWriter { path: path.to_path_buf() })
|
||||
}
|
||||
|
||||
fn write(&mut self, line: &str) -> Result<(), String> {
|
||||
let mut f = fs::OpenOptions::new()
|
||||
.append(true)
|
||||
.open(&self.path)
|
||||
.map_err(|e| format!("open log: {}", e))?;
|
||||
writeln!(f, "{}", line)
|
||||
.map_err(|e| format!("write log: {}", e))
|
||||
}
|
||||
/// Append a line to the log buffer.
|
||||
fn log_line(buf: &mut String, line: &str) {
|
||||
buf.push_str(line);
|
||||
buf.push('\n');
|
||||
}
|
||||
|
||||
/// Run the full autonomous consolidation pipeline with logging.
|
||||
pub fn consolidate_full(store: &mut Store) -> Result<(), String> {
|
||||
let start = std::time::Instant::now();
|
||||
let log_path = memory_subdir("agent-results")?.join("consolidate-full.log");
|
||||
let mut log = LogWriter::new(&log_path)?;
|
||||
let log_key = format!("_consolidate-log-{}",
|
||||
store::format_datetime(store::now_epoch()).replace([':', '-', 'T'], ""));
|
||||
let mut log_buf = String::new();
|
||||
|
||||
log.write("=== CONSOLIDATE FULL ===")?;
|
||||
log.write(&format!("Started: {}", store::format_datetime(store::now_epoch())))?;
|
||||
log.write(&format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()))?;
|
||||
log.write("")?;
|
||||
log_line(&mut log_buf, "=== CONSOLIDATE FULL ===");
|
||||
log_line(&mut log_buf, &format!("Started: {}", store::format_datetime(store::now_epoch())));
|
||||
log_line(&mut log_buf, &format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()));
|
||||
log_line(&mut log_buf, "");
|
||||
|
||||
// --- Step 1: Plan ---
|
||||
log.write("--- Step 1: Plan ---")?;
|
||||
log_line(&mut log_buf, "--- Step 1: Plan ---");
|
||||
let plan = neuro::consolidation_plan(store);
|
||||
let plan_text = neuro::format_plan(&plan);
|
||||
log.write(&plan_text)?;
|
||||
log_line(&mut log_buf, &plan_text);
|
||||
println!("{}", plan_text);
|
||||
|
||||
let total_agents = plan.replay_count + plan.linker_count
|
||||
+ plan.separator_count + plan.transfer_count
|
||||
+ if plan.run_health { 1 } else { 0 };
|
||||
log.write(&format!("Total agents to run: {}", total_agents))?;
|
||||
log_line(&mut log_buf, &format!("Total agents to run: {}", total_agents));
|
||||
|
||||
// --- Step 2: Execute agents ---
|
||||
log.write("\n--- Step 2: Execute agents ---")?;
|
||||
let mut reports: Vec<PathBuf> = Vec::new();
|
||||
log_line(&mut log_buf, "\n--- Step 2: Execute agents ---");
|
||||
let mut reports: Vec<String> = Vec::new();
|
||||
let mut agent_num = 0usize;
|
||||
let mut agent_errors = 0usize;
|
||||
|
||||
|
|
@ -121,7 +102,7 @@ pub fn consolidate_full(store: &mut Store) -> Result<(), String> {
|
|||
format!("[{}/{}] {}", agent_num, runs.len(), agent_type)
|
||||
};
|
||||
|
||||
log.write(&format!("\n{}", label))?;
|
||||
log_line(&mut log_buf, &format!("\n{}", label));
|
||||
println!("{}", label);
|
||||
|
||||
// Reload store to pick up changes from previous agents
|
||||
|
|
@ -133,191 +114,173 @@ pub fn consolidate_full(store: &mut Store) -> Result<(), String> {
|
|||
Ok(p) => p,
|
||||
Err(e) => {
|
||||
let msg = format!(" ERROR building prompt: {}", e);
|
||||
log.write(&msg)?;
|
||||
log_line(&mut log_buf, &msg);
|
||||
eprintln!("{}", msg);
|
||||
agent_errors += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
log.write(&format!(" Prompt: {} chars (~{} tokens)",
|
||||
prompt.len(), prompt.len() / 4))?;
|
||||
log_line(&mut log_buf, &format!(" Prompt: {} chars (~{} tokens)",
|
||||
prompt.len(), prompt.len() / 4));
|
||||
|
||||
let response = match call_sonnet(&prompt, 300) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
let msg = format!(" ERROR from Sonnet: {}", e);
|
||||
log.write(&msg)?;
|
||||
log_line(&mut log_buf, &msg);
|
||||
eprintln!("{}", msg);
|
||||
agent_errors += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Save report
|
||||
// Store report as a node
|
||||
let ts = store::format_datetime(store::now_epoch())
|
||||
.replace([':', '-', 'T'], "");
|
||||
let report_name = format!("consolidation-{}-{}.md", agent_type, ts);
|
||||
let report_path = memory_subdir("agent-results")?.join(&report_name);
|
||||
fs::write(&report_path, &response)
|
||||
.map_err(|e| format!("write report: {}", e))?;
|
||||
reports.push(report_path.clone());
|
||||
let report_key = format!("_consolidation-{}-{}", agent_type, ts);
|
||||
store.upsert_provenance(&report_key, &response,
|
||||
store::Provenance::AgentConsolidate).ok();
|
||||
reports.push(report_key.clone());
|
||||
|
||||
let msg = format!(" Done: {} lines → {}", response.lines().count(), report_name);
|
||||
log.write(&msg)?;
|
||||
let msg = format!(" Done: {} lines → {}", response.lines().count(), report_key);
|
||||
log_line(&mut log_buf, &msg);
|
||||
println!("{}", msg);
|
||||
}
|
||||
|
||||
log.write(&format!("\nAgents complete: {} run, {} errors",
|
||||
agent_num - agent_errors, agent_errors))?;
|
||||
log_line(&mut log_buf, &format!("\nAgents complete: {} run, {} errors",
|
||||
agent_num - agent_errors, agent_errors));
|
||||
|
||||
// --- Step 3: Apply consolidation actions ---
|
||||
log.write("\n--- Step 3: Apply consolidation actions ---")?;
|
||||
log_line(&mut log_buf, "\n--- Step 3: Apply consolidation actions ---");
|
||||
println!("\n--- Applying consolidation actions ---");
|
||||
*store = Store::load()?;
|
||||
|
||||
if reports.is_empty() {
|
||||
log.write(" No reports to apply.")?;
|
||||
log_line(&mut log_buf, " No reports to apply.");
|
||||
} else {
|
||||
match apply_consolidation(store, true, None) {
|
||||
Ok(()) => log.write(" Applied.")?,
|
||||
Ok(()) => log_line(&mut log_buf, " Applied."),
|
||||
Err(e) => {
|
||||
let msg = format!(" ERROR applying consolidation: {}", e);
|
||||
log.write(&msg)?;
|
||||
log_line(&mut log_buf, &msg);
|
||||
eprintln!("{}", msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --- Step 3b: Link orphans ---
|
||||
log.write("\n--- Step 3b: Link orphans ---")?;
|
||||
log_line(&mut log_buf, "\n--- Step 3b: Link orphans ---");
|
||||
println!("\n--- Linking orphan nodes ---");
|
||||
*store = Store::load()?;
|
||||
|
||||
let (lo_orphans, lo_added) = neuro::link_orphans(store, 2, 3, 0.15);
|
||||
log.write(&format!(" {} orphans, {} links added", lo_orphans, lo_added))?;
|
||||
log_line(&mut log_buf, &format!(" {} orphans, {} links added", lo_orphans, lo_added));
|
||||
|
||||
// --- Step 3c: Cap degree ---
|
||||
log.write("\n--- Step 3c: Cap degree ---")?;
|
||||
log_line(&mut log_buf, "\n--- Step 3c: Cap degree ---");
|
||||
println!("\n--- Capping node degree ---");
|
||||
*store = Store::load()?;
|
||||
|
||||
match store.cap_degree(50) {
|
||||
Ok((hubs, pruned)) => {
|
||||
store.save()?;
|
||||
log.write(&format!(" {} hubs capped, {} edges pruned", hubs, pruned))?;
|
||||
log_line(&mut log_buf, &format!(" {} hubs capped, {} edges pruned", hubs, pruned));
|
||||
}
|
||||
Err(e) => log.write(&format!(" ERROR: {}", e))?,
|
||||
Err(e) => log_line(&mut log_buf, &format!(" ERROR: {}", e)),
|
||||
}
|
||||
|
||||
// --- Step 4: Digest auto ---
|
||||
log.write("\n--- Step 4: Digest auto ---")?;
|
||||
log_line(&mut log_buf, "\n--- Step 4: Digest auto ---");
|
||||
println!("\n--- Generating missing digests ---");
|
||||
*store = Store::load()?;
|
||||
|
||||
match digest::digest_auto(store) {
|
||||
Ok(()) => log.write(" Digests done.")?,
|
||||
Ok(()) => log_line(&mut log_buf, " Digests done."),
|
||||
Err(e) => {
|
||||
let msg = format!(" ERROR in digest auto: {}", e);
|
||||
log.write(&msg)?;
|
||||
log_line(&mut log_buf, &msg);
|
||||
eprintln!("{}", msg);
|
||||
}
|
||||
}
|
||||
|
||||
// --- Step 5: Apply digest links ---
|
||||
log.write("\n--- Step 5: Apply digest links ---")?;
|
||||
log_line(&mut log_buf, "\n--- Step 5: Apply digest links ---");
|
||||
println!("\n--- Applying digest links ---");
|
||||
*store = Store::load()?;
|
||||
|
||||
let links = digest::parse_all_digest_links()?;
|
||||
let links = digest::parse_all_digest_links(store);
|
||||
let (applied, skipped, fallbacks) = digest::apply_digest_links(store, &links);
|
||||
store.save()?;
|
||||
log.write(&format!(" {} links applied, {} skipped, {} fallbacks",
|
||||
applied, skipped, fallbacks))?;
|
||||
log_line(&mut log_buf, &format!(" {} links applied, {} skipped, {} fallbacks",
|
||||
applied, skipped, fallbacks));
|
||||
|
||||
// --- Step 6: Summary ---
|
||||
let elapsed = start.elapsed();
|
||||
log.write("\n--- Summary ---")?;
|
||||
log.write(&format!("Finished: {}", store::format_datetime(store::now_epoch())))?;
|
||||
log.write(&format!("Duration: {:.0}s", elapsed.as_secs_f64()))?;
|
||||
log_line(&mut log_buf, "\n--- Summary ---");
|
||||
log_line(&mut log_buf, &format!("Finished: {}", store::format_datetime(store::now_epoch())));
|
||||
log_line(&mut log_buf, &format!("Duration: {:.0}s", elapsed.as_secs_f64()));
|
||||
*store = Store::load()?;
|
||||
log.write(&format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()))?;
|
||||
log_line(&mut log_buf, &format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()));
|
||||
|
||||
let summary = format!(
|
||||
"\n=== CONSOLIDATE FULL COMPLETE ===\n\
|
||||
Duration: {:.0}s\n\
|
||||
Agents: {} run, {} errors\n\
|
||||
Nodes: {} Relations: {}\n\
|
||||
Log: {}\n",
|
||||
Nodes: {} Relations: {}\n",
|
||||
elapsed.as_secs_f64(),
|
||||
agent_num - agent_errors, agent_errors,
|
||||
store.nodes.len(), store.relations.len(),
|
||||
log_path.display(),
|
||||
);
|
||||
log.write(&summary)?;
|
||||
log_line(&mut log_buf, &summary);
|
||||
println!("{}", summary);
|
||||
|
||||
// Store the log as a node
|
||||
store.upsert_provenance(&log_key, &log_buf,
|
||||
store::Provenance::AgentConsolidate).ok();
|
||||
store.save()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Find the most recent set of consolidation reports.
|
||||
fn find_consolidation_reports() -> Result<Vec<PathBuf>, String> {
|
||||
let dir = memory_subdir("agent-results")?;
|
||||
let mut reports: Vec<PathBuf> = fs::read_dir(&dir)
|
||||
.map(|entries| {
|
||||
entries.filter_map(|e| e.ok())
|
||||
.map(|e| e.path())
|
||||
.filter(|p| {
|
||||
p.file_name()
|
||||
.and_then(|n| n.to_str())
|
||||
.map(|n| n.starts_with("consolidation-") && n.ends_with(".md"))
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
reports.sort();
|
||||
reports.reverse();
|
||||
/// Find the most recent set of consolidation report keys from the store.
|
||||
fn find_consolidation_reports(store: &Store) -> Vec<String> {
|
||||
let mut keys: Vec<&String> = store.nodes.keys()
|
||||
.filter(|k| k.starts_with("_consolidation-"))
|
||||
.collect();
|
||||
keys.sort();
|
||||
keys.reverse();
|
||||
|
||||
if reports.is_empty() { return Ok(reports); }
|
||||
if keys.is_empty() { return Vec::new(); }
|
||||
|
||||
// Group by timestamp (last segment of stem before .md)
|
||||
let latest_ts = reports[0].file_stem()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or("")
|
||||
.rsplit('-').next().unwrap_or("")
|
||||
.to_string();
|
||||
// Group by timestamp (last segment after last '-')
|
||||
let latest_ts = keys[0].rsplit('-').next().unwrap_or("").to_string();
|
||||
|
||||
reports.retain(|r| {
|
||||
r.file_stem()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or("")
|
||||
.ends_with(latest_ts.as_str())
|
||||
});
|
||||
|
||||
Ok(reports)
|
||||
keys.into_iter()
|
||||
.filter(|k| k.ends_with(&latest_ts))
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn build_consolidation_prompt(reports: &[PathBuf]) -> Result<String, String> {
|
||||
fn build_consolidation_prompt(store: &Store, report_keys: &[String]) -> Result<String, String> {
|
||||
let mut report_text = String::new();
|
||||
for r in reports {
|
||||
let content = fs::read_to_string(r)
|
||||
.map_err(|e| format!("read {}: {}", r.display(), e))?;
|
||||
for key in report_keys {
|
||||
let content = store.nodes.get(key)
|
||||
.map(|n| n.content.as_str())
|
||||
.unwrap_or("");
|
||||
report_text.push_str(&format!("\n{}\n## Report: {}\n\n{}\n",
|
||||
"=".repeat(60),
|
||||
r.file_stem().and_then(|s| s.to_str()).unwrap_or(""),
|
||||
content));
|
||||
"=".repeat(60), key, content));
|
||||
}
|
||||
|
||||
neuro::load_prompt("consolidation", &[("{{REPORTS}}", &report_text)])
|
||||
}
|
||||
|
||||
/// Run the full apply-consolidation pipeline.
|
||||
pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_file: Option<&str>) -> Result<(), String> {
|
||||
let reports = if let Some(path) = report_file {
|
||||
vec![PathBuf::from(path)]
|
||||
pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_key: Option<&str>) -> Result<(), String> {
|
||||
let reports = if let Some(key) = report_key {
|
||||
vec![key.to_string()]
|
||||
} else {
|
||||
find_consolidation_reports()?
|
||||
find_consolidation_reports(store)
|
||||
};
|
||||
|
||||
if reports.is_empty() {
|
||||
|
|
@ -328,11 +291,11 @@ pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_file: Optio
|
|||
|
||||
println!("Found {} reports:", reports.len());
|
||||
for r in &reports {
|
||||
println!(" {}", r.file_name().and_then(|s| s.to_str()).unwrap_or("?"));
|
||||
println!(" {}", r);
|
||||
}
|
||||
|
||||
println!("\nExtracting actions from reports...");
|
||||
let prompt = build_consolidation_prompt(&reports)?;
|
||||
let prompt = build_consolidation_prompt(store, &reports)?;
|
||||
println!(" Prompt: {} chars", prompt.len());
|
||||
|
||||
let response = call_sonnet(&prompt, 300)?;
|
||||
|
|
@ -343,14 +306,14 @@ pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_file: Optio
|
|||
|
||||
println!(" {} actions extracted", actions.len());
|
||||
|
||||
// Save actions
|
||||
// Store actions in the store
|
||||
let timestamp = store::format_datetime(store::now_epoch())
|
||||
.replace([':', '-'], "");
|
||||
let actions_path = memory_subdir("agent-results")?
|
||||
.join(format!("consolidation-actions-{}.json", timestamp));
|
||||
fs::write(&actions_path, serde_json::to_string_pretty(&actions_value).unwrap())
|
||||
.map_err(|e| format!("write {}: {}", actions_path.display(), e))?;
|
||||
println!(" Saved: {}", actions_path.display());
|
||||
let actions_key = format!("_consolidation-actions-{}", timestamp);
|
||||
let actions_json = serde_json::to_string_pretty(&actions_value).unwrap();
|
||||
store.upsert_provenance(&actions_key, &actions_json,
|
||||
store::Provenance::AgentConsolidate).ok();
|
||||
println!(" Stored: {}", actions_key);
|
||||
|
||||
let link_actions: Vec<_> = actions.iter()
|
||||
.filter(|a| a.get("action").and_then(|v| v.as_str()) == Some("link"))
|
||||
|
|
|
|||
101
src/digest.rs
101
src/digest.rs
|
|
@ -8,13 +8,10 @@
|
|||
use crate::llm::{call_sonnet, semantic_keys};
|
||||
use crate::store::{self, Store, new_relation};
|
||||
use crate::neuro;
|
||||
use crate::util::memory_subdir;
|
||||
|
||||
use chrono::{Datelike, Duration, Local, NaiveDate};
|
||||
use regex::Regex;
|
||||
use std::collections::BTreeSet;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
// --- Digest level descriptors ---
|
||||
|
||||
|
|
@ -113,19 +110,24 @@ const MONTHLY: DigestLevel = DigestLevel {
|
|||
|
||||
const LEVELS: &[&DigestLevel] = &[&DAILY, &WEEKLY, &MONTHLY];
|
||||
|
||||
/// Store key for a digest node: "daily-2026-03-04.md", "weekly-2026-W09.md", etc.
|
||||
/// Matches the key format from the old import_file() path.
|
||||
fn digest_node_key(level_name: &str, label: &str) -> String {
|
||||
format!("{}-{}.md", level_name, label)
|
||||
}
|
||||
|
||||
// --- Input gathering ---
|
||||
|
||||
/// Load child digest files from the episodic directory.
|
||||
fn load_child_digests(prefix: &str, labels: &[String]) -> Result<Vec<(String, String)>, String> {
|
||||
let dir = memory_subdir("episodic")?;
|
||||
/// Load child digest content from the store.
|
||||
fn load_child_digests(store: &Store, prefix: &str, labels: &[String]) -> Vec<(String, String)> {
|
||||
let mut digests = Vec::new();
|
||||
for label in labels {
|
||||
let path = dir.join(format!("{}-{}.md", prefix, label));
|
||||
if let Ok(content) = fs::read_to_string(&path) {
|
||||
digests.push((label.clone(), content));
|
||||
let key = digest_node_key(prefix, label);
|
||||
if let Some(node) = store.nodes.get(&key) {
|
||||
digests.push((label.clone(), node.content.clone()));
|
||||
}
|
||||
}
|
||||
Ok(digests)
|
||||
digests
|
||||
}
|
||||
|
||||
/// Unified: gather inputs for any digest level.
|
||||
|
|
@ -142,7 +144,7 @@ fn gather(level: &DigestLevel, store: &Store, arg: &str) -> Result<(String, Vec<
|
|||
.collect::<BTreeSet<_>>()
|
||||
.into_iter()
|
||||
.collect();
|
||||
load_child_digests(child_name, &child_labels)?
|
||||
load_child_digests(store, child_name, &child_labels)
|
||||
} else {
|
||||
// Leaf level: scan store for journal entries matching label
|
||||
let date_re = Regex::new(&format!(
|
||||
|
|
@ -227,14 +229,10 @@ fn generate_digest(
|
|||
println!(" Calling Sonnet...");
|
||||
let digest = call_sonnet(&prompt, level.timeout)?;
|
||||
|
||||
let output_path = memory_subdir("episodic")?
|
||||
.join(format!("{}-{}.md", level.name, label));
|
||||
fs::write(&output_path, &digest)
|
||||
.map_err(|e| format!("write {}: {}", output_path.display(), e))?;
|
||||
println!(" Written: {}", output_path.display());
|
||||
|
||||
store.import_file(&output_path)?;
|
||||
let key = digest_node_key(level.name, label);
|
||||
store.upsert_provenance(&key, &digest, store::Provenance::AgentDigest)?;
|
||||
store.save()?;
|
||||
println!(" Stored: {}", key);
|
||||
|
||||
println!(" Done: {} lines", digest.lines().count());
|
||||
Ok(())
|
||||
|
|
@ -254,7 +252,6 @@ pub fn generate(store: &mut Store, level_name: &str, arg: &str) -> Result<(), St
|
|||
|
||||
pub fn digest_auto(store: &mut Store) -> Result<(), String> {
|
||||
let today = Local::now().format("%Y-%m-%d").to_string();
|
||||
let epi = memory_subdir("episodic")?;
|
||||
|
||||
// Collect all dates with journal entries
|
||||
let date_re = Regex::new(r"^\d{4}-\d{2}-\d{2}").unwrap();
|
||||
|
|
@ -277,7 +274,8 @@ pub fn digest_auto(store: &mut Store) -> Result<(), String> {
|
|||
|
||||
for arg in &candidates {
|
||||
let (label, inputs) = gather(level, store, arg)?;
|
||||
if epi.join(format!("{}-{}.md", level.name, label)).exists() {
|
||||
let key = digest_node_key(level.name, &label);
|
||||
if store.nodes.contains_key(&key) {
|
||||
skipped += 1;
|
||||
continue;
|
||||
}
|
||||
|
|
@ -357,21 +355,8 @@ fn normalize_link_key(raw: &str) -> String {
|
|||
key
|
||||
}
|
||||
|
||||
/// Parse the Links section from a single digest file.
|
||||
fn parse_digest_file_links(path: &Path) -> Vec<DigestLink> {
|
||||
let content = match fs::read_to_string(path) {
|
||||
Ok(c) => c,
|
||||
Err(_) => return Vec::new(),
|
||||
};
|
||||
|
||||
let digest_name = path.file_stem()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or("");
|
||||
let digest_key = format!("{}.md", digest_name);
|
||||
let filename = path.file_name()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
/// Parse the Links section from a digest node's content.
|
||||
fn parse_digest_node_links(key: &str, content: &str) -> Vec<DigestLink> {
|
||||
|
||||
let link_re = Regex::new(r"^-\s+(.+?)\s*[→↔←]\s*(.+?)(?:\s*\((.+?)\))?\s*$").unwrap();
|
||||
let header_re = Regex::new(r"^##\s+Links").unwrap();
|
||||
|
|
@ -399,8 +384,8 @@ fn parse_digest_file_links(path: &Path) -> Vec<DigestLink> {
|
|||
let mut target = normalize_link_key(raw_target);
|
||||
|
||||
// Replace self-references with digest key
|
||||
if source.is_empty() { source = digest_key.clone(); }
|
||||
if target.is_empty() { target = digest_key.clone(); }
|
||||
if source.is_empty() { source = key.to_string(); }
|
||||
if target.is_empty() { target = key.to_string(); }
|
||||
|
||||
// Handle "this daily/weekly/monthly" in raw text
|
||||
let raw_s_lower = raw_source.to_lowercase();
|
||||
|
|
@ -408,49 +393,39 @@ fn parse_digest_file_links(path: &Path) -> Vec<DigestLink> {
|
|||
if raw_s_lower.contains("this daily") || raw_s_lower.contains("this weekly")
|
||||
|| raw_s_lower.contains("this monthly")
|
||||
{
|
||||
source = digest_key.clone();
|
||||
source = key.to_string();
|
||||
}
|
||||
if raw_t_lower.contains("this daily") || raw_t_lower.contains("this weekly")
|
||||
|| raw_t_lower.contains("this monthly")
|
||||
{
|
||||
target = digest_key.clone();
|
||||
target = key.to_string();
|
||||
}
|
||||
|
||||
// Skip NEW: and self-links
|
||||
if source.starts_with("NEW:") || target.starts_with("NEW:") { continue; }
|
||||
if source == target { continue; }
|
||||
|
||||
links.push(DigestLink { source, target, reason, file: filename.clone() });
|
||||
links.push(DigestLink { source, target, reason, file: key.to_string() });
|
||||
}
|
||||
}
|
||||
|
||||
links
|
||||
}
|
||||
|
||||
/// Parse links from all digest files in the episodic dir.
|
||||
pub fn parse_all_digest_links() -> Result<Vec<DigestLink>, String> {
|
||||
let dir = memory_subdir("episodic")?;
|
||||
/// Parse links from all digest nodes in the store.
|
||||
pub fn parse_all_digest_links(store: &Store) -> Vec<DigestLink> {
|
||||
let mut all_links = Vec::new();
|
||||
|
||||
for pattern in &["daily-*.md", "weekly-*.md", "monthly-*.md"] {
|
||||
if let Ok(entries) = fs::read_dir(&dir) {
|
||||
let mut files: Vec<PathBuf> = entries
|
||||
.filter_map(|e| e.ok())
|
||||
.map(|e| e.path())
|
||||
.filter(|p| {
|
||||
p.file_name()
|
||||
.and_then(|n| n.to_str())
|
||||
.map(|n| {
|
||||
let prefix = pattern.split('*').next().unwrap_or("");
|
||||
n.starts_with(prefix) && n.ends_with(".md")
|
||||
})
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.collect();
|
||||
files.sort();
|
||||
for path in files {
|
||||
all_links.extend(parse_digest_file_links(&path));
|
||||
}
|
||||
let mut digest_keys: Vec<&String> = store.nodes.keys()
|
||||
.filter(|k| k.starts_with("daily-")
|
||||
|| k.starts_with("weekly-")
|
||||
|| k.starts_with("monthly-"))
|
||||
.collect();
|
||||
digest_keys.sort();
|
||||
|
||||
for key in digest_keys {
|
||||
if let Some(node) = store.nodes.get(key) {
|
||||
all_links.extend(parse_digest_node_links(key, &node.content));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -458,7 +433,7 @@ pub fn parse_all_digest_links() -> Result<Vec<DigestLink>, String> {
|
|||
let mut seen = std::collections::HashSet::new();
|
||||
all_links.retain(|link| seen.insert((link.source.clone(), link.target.clone())));
|
||||
|
||||
Ok(all_links)
|
||||
all_links
|
||||
}
|
||||
|
||||
/// Apply parsed digest links to the store.
|
||||
|
|
|
|||
|
|
@ -13,10 +13,60 @@ use crate::store::{self, Store, new_node, new_relation};
|
|||
|
||||
use regex::Regex;
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::collections::HashSet;
|
||||
use std::fs;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
use crate::util::memory_subdir;
|
||||
use crate::store::StoreView;
|
||||
|
||||
/// Compute the store dedup key for a transcript file.
|
||||
/// This is the same key experience_mine uses to mark a transcript as mined.
|
||||
pub fn transcript_dedup_key(path: &str) -> Result<String, String> {
|
||||
let bytes = fs::read(path).map_err(|e| format!("read {}: {}", path, e))?;
|
||||
let mut hasher = DefaultHasher::new();
|
||||
bytes.hash(&mut hasher);
|
||||
Ok(format!("_mined-transcripts.md#h-{:016x}", hasher.finish()))
|
||||
}
|
||||
|
||||
/// Check if a transcript has already been mined (dedup key exists in store).
|
||||
pub fn is_transcript_mined(store: &impl StoreView, path: &str) -> bool {
|
||||
match transcript_dedup_key(path) {
|
||||
Ok(key) => store.node_content(&key).is_some(),
|
||||
Err(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Dedup key for a transcript based on its filename (UUID).
|
||||
/// Used by the daemon reconcile loop — no file reads needed.
|
||||
pub fn transcript_filename_key(path: &str) -> String {
|
||||
let filename = std::path::Path::new(path)
|
||||
.file_stem()
|
||||
.map(|s| s.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|| path.to_string());
|
||||
format!("_mined-transcripts.md#f-{}", filename)
|
||||
}
|
||||
|
||||
/// Get the set of all mined transcript keys (both content-hash and filename)
|
||||
/// from the store. Load once per daemon tick, check many.
|
||||
pub fn mined_transcript_keys() -> HashSet<String> {
|
||||
use crate::store::AnyView;
|
||||
let Ok(view) = AnyView::load() else { return HashSet::new() };
|
||||
let mut keys = HashSet::new();
|
||||
view.for_each_node(|key, _, _| {
|
||||
if key.starts_with("_mined-transcripts.md#") {
|
||||
keys.insert(key.to_string());
|
||||
}
|
||||
});
|
||||
keys
|
||||
}
|
||||
|
||||
/// Check if a transcript has been mined, given a pre-loaded set of mined keys.
|
||||
/// Checks filename-based key only (no file read). Sessions mined before the
|
||||
/// filename key was added will pass through and short-circuit in experience_mine
|
||||
/// via the content hash check — a one-time cost on first restart after this change.
|
||||
pub fn is_transcript_mined_with_keys(mined: &HashSet<String>, path: &str) -> bool {
|
||||
mined.contains(&transcript_filename_key(path))
|
||||
}
|
||||
|
||||
/// Extract user/assistant messages with line numbers from a JSONL transcript.
|
||||
/// (line_number, role, text, timestamp)
|
||||
|
|
@ -187,21 +237,6 @@ pub fn journal_enrich(
|
|||
}
|
||||
}
|
||||
|
||||
// Save result to agent-results
|
||||
let timestamp = store::format_datetime(store::now_epoch())
|
||||
.replace([':', '-'], "");
|
||||
let result_file = memory_subdir("agent-results")?
|
||||
.join(format!("{}.json", timestamp));
|
||||
let output = serde_json::json!({
|
||||
"timestamp": timestamp,
|
||||
"jsonl_path": jsonl_path,
|
||||
"entry_text": &entry_text[..entry_text.len().min(500)],
|
||||
"agent_result": result,
|
||||
});
|
||||
fs::write(&result_file, serde_json::to_string_pretty(&output).unwrap())
|
||||
.map_err(|e| format!("write {}: {}", result_file.display(), e))?;
|
||||
println!(" Results saved: {}", result_file.display());
|
||||
|
||||
store.save()?;
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -320,6 +355,7 @@ pub fn experience_mine(
|
|||
let mut node = new_node(&key, &full_content);
|
||||
node.node_type = store::NodeType::EpisodicSession;
|
||||
node.category = store::Category::Observation;
|
||||
node.provenance = store::Provenance::AgentExperienceMine;
|
||||
let _ = store.upsert_node(node);
|
||||
count += 1;
|
||||
|
||||
|
|
@ -328,11 +364,19 @@ pub fn experience_mine(
|
|||
}
|
||||
|
||||
// Record this transcript as mined (even if count == 0, to prevent re-runs)
|
||||
// Two keys: content hash (exact dedup) and filename (fast daemon reconcile)
|
||||
let dedup_content = format!("Mined {} ({} entries)", jsonl_path, count);
|
||||
let mut dedup_node = new_node(&dedup_key, &dedup_content);
|
||||
dedup_node.category = store::Category::Task;
|
||||
dedup_node.provenance = store::Provenance::AgentExperienceMine;
|
||||
let _ = store.upsert_node(dedup_node);
|
||||
|
||||
let fname_key = transcript_filename_key(jsonl_path);
|
||||
let mut fname_node = new_node(&fname_key, &dedup_content);
|
||||
fname_node.category = store::Category::Task;
|
||||
fname_node.provenance = store::Provenance::AgentExperienceMine;
|
||||
let _ = store.upsert_node(fname_node);
|
||||
|
||||
if count > 0 {
|
||||
println!(" Saved {} new journal entries.", count);
|
||||
}
|
||||
|
|
|
|||
976
src/knowledge.rs
Normal file
976
src/knowledge.rs
Normal file
|
|
@ -0,0 +1,976 @@
|
|||
// knowledge.rs — knowledge production agents and convergence loop
|
||||
//
|
||||
// Rust port of knowledge_agents.py + knowledge_loop.py.
|
||||
// Four agents mine the memory graph for new knowledge:
|
||||
// 1. Observation — extract facts from raw conversations
|
||||
// 2. Extractor — find patterns in node clusters
|
||||
// 3. Connector — find cross-domain structural connections
|
||||
// 4. Challenger — stress-test existing knowledge nodes
|
||||
//
|
||||
// The loop runs agents in sequence, applies results, measures
|
||||
// convergence via graph-structural metrics (sigma, CC, communities).
|
||||
|
||||
use crate::graph::Graph;
|
||||
use crate::llm;
|
||||
use crate::spectral;
|
||||
use crate::store::{self, Store, new_relation, RelationType};
|
||||
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
fn memory_dir() -> PathBuf {
|
||||
store::memory_dir()
|
||||
}
|
||||
|
||||
fn prompts_dir() -> PathBuf {
|
||||
let manifest = env!("CARGO_MANIFEST_DIR");
|
||||
PathBuf::from(manifest).join("prompts")
|
||||
}
|
||||
|
||||
fn projects_dir() -> PathBuf {
|
||||
let home = std::env::var("HOME").unwrap_or_else(|_| ".".into());
|
||||
PathBuf::from(home).join(".claude/projects")
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Action types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Action {
|
||||
pub kind: ActionKind,
|
||||
pub confidence: Confidence,
|
||||
pub weight: f64,
|
||||
pub depth: i32,
|
||||
pub applied: Option<bool>,
|
||||
pub rejected_reason: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum ActionKind {
|
||||
WriteNode {
|
||||
key: String,
|
||||
content: String,
|
||||
covers: Vec<String>,
|
||||
},
|
||||
Link {
|
||||
source: String,
|
||||
target: String,
|
||||
},
|
||||
Refine {
|
||||
key: String,
|
||||
content: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum Confidence {
|
||||
High,
|
||||
Medium,
|
||||
Low,
|
||||
}
|
||||
|
||||
impl Confidence {
|
||||
fn weight(self) -> f64 {
|
||||
match self {
|
||||
Self::High => 1.0,
|
||||
Self::Medium => 0.6,
|
||||
Self::Low => 0.3,
|
||||
}
|
||||
}
|
||||
|
||||
fn value(self) -> f64 {
|
||||
match self {
|
||||
Self::High => 0.9,
|
||||
Self::Medium => 0.6,
|
||||
Self::Low => 0.3,
|
||||
}
|
||||
}
|
||||
|
||||
fn parse(s: &str) -> Self {
|
||||
match s.to_lowercase().as_str() {
|
||||
"high" => Self::High,
|
||||
"low" => Self::Low,
|
||||
_ => Self::Medium,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Action parsing
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
pub fn parse_write_nodes(text: &str) -> Vec<Action> {
|
||||
let re = Regex::new(r"(?s)WRITE_NODE\s+(\S+)\s*\n(.*?)END_NODE").unwrap();
|
||||
let conf_re = Regex::new(r"(?i)CONFIDENCE:\s*(high|medium|low)").unwrap();
|
||||
let covers_re = Regex::new(r"COVERS:\s*(.+)").unwrap();
|
||||
|
||||
re.captures_iter(text)
|
||||
.map(|cap| {
|
||||
let key = cap[1].to_string();
|
||||
let mut content = cap[2].trim().to_string();
|
||||
|
||||
let confidence = conf_re
|
||||
.captures(&content)
|
||||
.map(|c| Confidence::parse(&c[1]))
|
||||
.unwrap_or(Confidence::Medium);
|
||||
content = conf_re.replace(&content, "").trim().to_string();
|
||||
|
||||
let covers: Vec<String> = covers_re
|
||||
.captures(&content)
|
||||
.map(|c| c[1].split(',').map(|s| s.trim().to_string()).collect())
|
||||
.unwrap_or_default();
|
||||
content = covers_re.replace(&content, "").trim().to_string();
|
||||
|
||||
Action {
|
||||
weight: confidence.weight(),
|
||||
kind: ActionKind::WriteNode { key, content, covers },
|
||||
confidence,
|
||||
depth: 0,
|
||||
applied: None,
|
||||
rejected_reason: None,
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn parse_links(text: &str) -> Vec<Action> {
|
||||
let re = Regex::new(r"(?m)^LINK\s+(\S+)\s+(\S+)").unwrap();
|
||||
re.captures_iter(text)
|
||||
.map(|cap| Action {
|
||||
kind: ActionKind::Link {
|
||||
source: cap[1].to_string(),
|
||||
target: cap[2].to_string(),
|
||||
},
|
||||
confidence: Confidence::Low,
|
||||
weight: 0.3,
|
||||
depth: -1,
|
||||
applied: None,
|
||||
rejected_reason: None,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn parse_refines(text: &str) -> Vec<Action> {
|
||||
let re = Regex::new(r"(?s)REFINE\s+(\S+)\s*\n(.*?)END_REFINE").unwrap();
|
||||
re.captures_iter(text)
|
||||
.map(|cap| {
|
||||
let key = cap[1].trim_matches('*').trim().to_string();
|
||||
Action {
|
||||
kind: ActionKind::Refine {
|
||||
key,
|
||||
content: cap[2].trim().to_string(),
|
||||
},
|
||||
confidence: Confidence::Medium,
|
||||
weight: 0.7,
|
||||
depth: 0,
|
||||
applied: None,
|
||||
rejected_reason: None,
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn parse_all_actions(text: &str) -> Vec<Action> {
|
||||
let mut actions = parse_write_nodes(text);
|
||||
actions.extend(parse_links(text));
|
||||
actions.extend(parse_refines(text));
|
||||
actions
|
||||
}
|
||||
|
||||
pub fn count_no_ops(text: &str) -> usize {
|
||||
let no_conn = Regex::new(r"\bNO_CONNECTION\b").unwrap().find_iter(text).count();
|
||||
let affirm = Regex::new(r"\bAFFIRM\b").unwrap().find_iter(text).count();
|
||||
let no_extract = Regex::new(r"\bNO_EXTRACTION\b").unwrap().find_iter(text).count();
|
||||
no_conn + affirm + no_extract
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Inference depth tracking
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const DEPTH_DB_KEY: &str = "_knowledge-depths";
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct DepthDb {
|
||||
depths: HashMap<String, i32>,
|
||||
}
|
||||
|
||||
impl DepthDb {
|
||||
pub fn load(store: &Store) -> Self {
|
||||
let depths = store.nodes.get(DEPTH_DB_KEY)
|
||||
.and_then(|n| serde_json::from_str(&n.content).ok())
|
||||
.unwrap_or_default();
|
||||
Self { depths }
|
||||
}
|
||||
|
||||
pub fn save(&self, store: &mut Store) {
|
||||
if let Ok(json) = serde_json::to_string(&self.depths) {
|
||||
store.upsert_provenance(DEPTH_DB_KEY, &json,
|
||||
store::Provenance::AgentKnowledgeObservation).ok();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get(&self, key: &str) -> i32 {
|
||||
self.depths.get(key).copied().unwrap_or(0)
|
||||
}
|
||||
|
||||
pub fn set(&mut self, key: String, depth: i32) {
|
||||
self.depths.insert(key, depth);
|
||||
}
|
||||
}
|
||||
|
||||
/// Agent base depths: observation=1, extractor=2, connector=3
|
||||
fn agent_base_depth(agent: &str) -> Option<i32> {
|
||||
match agent {
|
||||
"observation" => Some(1),
|
||||
"extractor" => Some(2),
|
||||
"connector" => Some(3),
|
||||
"challenger" => None,
|
||||
_ => Some(2),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn compute_action_depth(db: &DepthDb, action: &Action, agent: &str) -> i32 {
|
||||
match &action.kind {
|
||||
ActionKind::Link { .. } => -1,
|
||||
ActionKind::Refine { key, .. } => db.get(key),
|
||||
ActionKind::WriteNode { covers, .. } => {
|
||||
if !covers.is_empty() {
|
||||
covers.iter().map(|k| db.get(k)).max().unwrap_or(0) + 1
|
||||
} else {
|
||||
agent_base_depth(agent).unwrap_or(2)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Confidence threshold that scales with inference depth.
|
||||
pub fn required_confidence(depth: i32, base: f64) -> f64 {
|
||||
if depth <= 0 {
|
||||
return 0.0;
|
||||
}
|
||||
1.0 - (1.0 - base).powi(depth)
|
||||
}
|
||||
|
||||
/// Confidence bonus from real-world use.
|
||||
pub fn use_bonus(use_count: u32) -> f64 {
|
||||
if use_count == 0 {
|
||||
return 0.0;
|
||||
}
|
||||
1.0 - 1.0 / (1.0 + 0.15 * use_count as f64)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Action application
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn stamp_content(content: &str, agent: &str, timestamp: &str, depth: i32) -> String {
|
||||
format!("<!-- author: {} | created: {} | depth: {} -->\n{}", agent, timestamp, depth, content)
|
||||
}
|
||||
|
||||
/// Check if a link already exists between two keys.
|
||||
fn has_edge(store: &Store, source: &str, target: &str) -> bool {
|
||||
store.relations.iter().any(|r| {
|
||||
!r.deleted
|
||||
&& ((r.source_key == source && r.target_key == target)
|
||||
|| (r.source_key == target && r.target_key == source))
|
||||
})
|
||||
}
|
||||
|
||||
pub fn apply_action(
|
||||
store: &mut Store,
|
||||
action: &Action,
|
||||
agent: &str,
|
||||
timestamp: &str,
|
||||
depth: i32,
|
||||
) -> bool {
|
||||
let provenance = agent_provenance(agent);
|
||||
|
||||
match &action.kind {
|
||||
ActionKind::WriteNode { key, content, .. } => {
|
||||
let stamped = stamp_content(content, agent, timestamp, depth);
|
||||
store.upsert_provenance(key, &stamped, provenance).is_ok()
|
||||
}
|
||||
ActionKind::Link { source, target } => {
|
||||
if has_edge(store, source, target) {
|
||||
return false;
|
||||
}
|
||||
let source_uuid = match store.nodes.get(source.as_str()) {
|
||||
Some(n) => n.uuid,
|
||||
None => return false,
|
||||
};
|
||||
let target_uuid = match store.nodes.get(target.as_str()) {
|
||||
Some(n) => n.uuid,
|
||||
None => return false,
|
||||
};
|
||||
let mut rel = new_relation(
|
||||
source_uuid, target_uuid,
|
||||
RelationType::Link,
|
||||
0.3,
|
||||
source, target,
|
||||
);
|
||||
rel.provenance = provenance;
|
||||
store.add_relation(rel).is_ok()
|
||||
}
|
||||
ActionKind::Refine { key, content } => {
|
||||
let stamped = stamp_content(content, agent, timestamp, depth);
|
||||
store.upsert_provenance(key, &stamped, provenance).is_ok()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn agent_provenance(agent: &str) -> store::Provenance {
|
||||
match agent {
|
||||
"observation" => store::Provenance::AgentKnowledgeObservation,
|
||||
"extractor" | "pattern" => store::Provenance::AgentKnowledgePattern,
|
||||
"connector" => store::Provenance::AgentKnowledgeConnector,
|
||||
"challenger" => store::Provenance::AgentKnowledgeChallenger,
|
||||
_ => store::Provenance::Agent,
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Agent runners
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn load_prompt(name: &str) -> Result<String, String> {
|
||||
let path = prompts_dir().join(format!("{}.md", name));
|
||||
fs::read_to_string(&path).map_err(|e| format!("load prompt {}: {}", name, e))
|
||||
}
|
||||
|
||||
fn get_graph_topology(store: &Store, graph: &Graph) -> String {
|
||||
format!("Nodes: {} Relations: {}\n", store.nodes.len(), graph.edge_count())
|
||||
}
|
||||
|
||||
/// Strip <system-reminder> blocks from text
|
||||
fn strip_system_tags(text: &str) -> String {
|
||||
let re = Regex::new(r"(?s)<system-reminder>.*?</system-reminder>").unwrap();
|
||||
re.replace_all(text, "").trim().to_string()
|
||||
}
|
||||
|
||||
/// Extract human-readable dialogue from a conversation JSONL
|
||||
fn extract_conversation_text(path: &Path, max_chars: usize) -> String {
|
||||
let Ok(content) = fs::read_to_string(path) else { return String::new() };
|
||||
let mut fragments = Vec::new();
|
||||
let mut total = 0;
|
||||
|
||||
for line in content.lines() {
|
||||
let Ok(obj) = serde_json::from_str::<serde_json::Value>(line) else { continue };
|
||||
|
||||
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
|
||||
|
||||
if msg_type == "user" && obj.get("userType").and_then(|v| v.as_str()) == Some("external") {
|
||||
if let Some(text) = extract_text_content(&obj) {
|
||||
let text = strip_system_tags(&text);
|
||||
if text.starts_with("[Request interrupted") { continue; }
|
||||
if text.len() > 5 {
|
||||
fragments.push(format!("**Kent:** {}", text));
|
||||
total += text.len();
|
||||
}
|
||||
}
|
||||
} else if msg_type == "assistant" {
|
||||
if let Some(text) = extract_text_content(&obj) {
|
||||
let text = strip_system_tags(&text);
|
||||
if text.len() > 10 {
|
||||
fragments.push(format!("**PoC:** {}", text));
|
||||
total += text.len();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if total > max_chars { break; }
|
||||
}
|
||||
fragments.join("\n\n")
|
||||
}
|
||||
|
||||
fn extract_text_content(obj: &serde_json::Value) -> Option<String> {
|
||||
let msg = obj.get("message")?;
|
||||
let content = msg.get("content")?;
|
||||
if let Some(s) = content.as_str() {
|
||||
return Some(s.to_string());
|
||||
}
|
||||
if let Some(arr) = content.as_array() {
|
||||
let texts: Vec<&str> = arr.iter()
|
||||
.filter_map(|b| {
|
||||
if b.get("type")?.as_str()? == "text" {
|
||||
b.get("text")?.as_str()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
if !texts.is_empty() {
|
||||
return Some(texts.join("\n"));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Count short user messages (dialogue turns) in a JSONL
|
||||
fn count_dialogue_turns(path: &Path) -> usize {
|
||||
let Ok(content) = fs::read_to_string(path) else { return 0 };
|
||||
content.lines()
|
||||
.filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
|
||||
.filter(|obj| {
|
||||
obj.get("type").and_then(|v| v.as_str()) == Some("user")
|
||||
&& obj.get("userType").and_then(|v| v.as_str()) == Some("external")
|
||||
})
|
||||
.filter(|obj| {
|
||||
let text = extract_text_content(obj).unwrap_or_default();
|
||||
text.len() > 5 && text.len() < 500
|
||||
&& !text.starts_with("[Request interrupted")
|
||||
&& !text.starts_with("Implement the following")
|
||||
})
|
||||
.count()
|
||||
}
|
||||
|
||||
/// Select conversation fragments for the observation extractor
|
||||
fn select_conversation_fragments(n: usize) -> Vec<(String, String)> {
|
||||
let projects = projects_dir();
|
||||
if !projects.exists() { return Vec::new(); }
|
||||
|
||||
let mut jsonl_files: Vec<PathBuf> = Vec::new();
|
||||
if let Ok(dirs) = fs::read_dir(&projects) {
|
||||
for dir in dirs.filter_map(|e| e.ok()) {
|
||||
if !dir.path().is_dir() { continue; }
|
||||
if let Ok(files) = fs::read_dir(dir.path()) {
|
||||
for f in files.filter_map(|e| e.ok()) {
|
||||
let p = f.path();
|
||||
if p.extension().map(|x| x == "jsonl").unwrap_or(false) {
|
||||
if let Ok(meta) = p.metadata() {
|
||||
if meta.len() > 50_000 {
|
||||
jsonl_files.push(p);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut scored: Vec<(usize, PathBuf)> = jsonl_files.into_iter()
|
||||
.map(|f| (count_dialogue_turns(&f), f))
|
||||
.filter(|(turns, _)| *turns >= 10)
|
||||
.collect();
|
||||
scored.sort_by(|a, b| b.0.cmp(&a.0));
|
||||
|
||||
let mut fragments = Vec::new();
|
||||
for (_, f) in scored.iter().take(n * 2) {
|
||||
let session_id = f.file_stem()
|
||||
.map(|s| s.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|| "unknown".into());
|
||||
let text = extract_conversation_text(f, 8000);
|
||||
if text.len() > 500 {
|
||||
fragments.push((session_id, text));
|
||||
}
|
||||
if fragments.len() >= n { break; }
|
||||
}
|
||||
fragments
|
||||
}
|
||||
|
||||
pub fn run_observation_extractor(store: &Store, graph: &Graph, batch_size: usize) -> Result<String, String> {
|
||||
let template = load_prompt("observation-extractor")?;
|
||||
let topology = get_graph_topology(store, graph);
|
||||
let fragments = select_conversation_fragments(batch_size);
|
||||
|
||||
let mut results = Vec::new();
|
||||
for (i, (session_id, text)) in fragments.iter().enumerate() {
|
||||
eprintln!(" Observation extractor {}/{}: session {}... ({} chars)",
|
||||
i + 1, fragments.len(), &session_id[..session_id.len().min(12)], text.len());
|
||||
|
||||
let prompt = template
|
||||
.replace("{{TOPOLOGY}}", &topology)
|
||||
.replace("{{CONVERSATIONS}}", &format!("### Session {}\n\n{}", session_id, text));
|
||||
|
||||
let response = llm::call_sonnet(&prompt, 600)?;
|
||||
results.push(format!("## Session: {}\n\n{}", session_id, response));
|
||||
}
|
||||
Ok(results.join("\n\n---\n\n"))
|
||||
}
|
||||
|
||||
/// Load spectral embedding from disk
|
||||
fn load_spectral_embedding() -> HashMap<String, Vec<f64>> {
|
||||
spectral::load_embedding()
|
||||
.map(|emb| emb.coords)
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
fn spectral_distance(embedding: &HashMap<String, Vec<f64>>, a: &str, b: &str) -> f64 {
|
||||
let (Some(va), Some(vb)) = (embedding.get(a), embedding.get(b)) else {
|
||||
return f64::INFINITY;
|
||||
};
|
||||
let dot: f64 = va.iter().zip(vb.iter()).map(|(a, b)| a * b).sum();
|
||||
let norm_a: f64 = va.iter().map(|x| x * x).sum::<f64>().sqrt();
|
||||
let norm_b: f64 = vb.iter().map(|x| x * x).sum::<f64>().sqrt();
|
||||
if norm_a == 0.0 || norm_b == 0.0 {
|
||||
return f64::INFINITY;
|
||||
}
|
||||
1.0 - dot / (norm_a * norm_b)
|
||||
}
|
||||
|
||||
fn select_extractor_clusters(_store: &Store, n: usize) -> Vec<Vec<String>> {
|
||||
let embedding = load_spectral_embedding();
|
||||
let skip = ["journal.md", "MEMORY.md", "where-am-i.md", "work-queue.md"];
|
||||
|
||||
let semantic_keys: Vec<&String> = embedding.keys()
|
||||
.filter(|k| !k.starts_with("journal.md#") && !skip.contains(&k.as_str()))
|
||||
.collect();
|
||||
|
||||
let cluster_size = 5;
|
||||
let mut used = HashSet::new();
|
||||
let mut clusters = Vec::new();
|
||||
|
||||
for _ in 0..n {
|
||||
let available: Vec<&&String> = semantic_keys.iter()
|
||||
.filter(|k| !used.contains(**k))
|
||||
.collect();
|
||||
if available.len() < cluster_size { break; }
|
||||
|
||||
let seed = available[0];
|
||||
let mut distances: Vec<(f64, &String)> = available.iter()
|
||||
.filter(|k| ***k != *seed)
|
||||
.map(|k| (spectral_distance(&embedding, seed, k), **k))
|
||||
.filter(|(d, _)| d.is_finite())
|
||||
.collect();
|
||||
distances.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
|
||||
|
||||
let cluster: Vec<String> = std::iter::once((*seed).clone())
|
||||
.chain(distances.iter().take(cluster_size - 1).map(|(_, k)| (*k).clone()))
|
||||
.collect();
|
||||
for k in &cluster { used.insert(k.clone()); }
|
||||
clusters.push(cluster);
|
||||
}
|
||||
clusters
|
||||
}
|
||||
|
||||
pub fn run_extractor(store: &Store, graph: &Graph, batch_size: usize) -> Result<String, String> {
|
||||
let template = load_prompt("extractor")?;
|
||||
let topology = get_graph_topology(store, graph);
|
||||
let clusters = select_extractor_clusters(store, batch_size);
|
||||
|
||||
let mut results = Vec::new();
|
||||
for (i, cluster) in clusters.iter().enumerate() {
|
||||
eprintln!(" Extractor cluster {}/{}: {} nodes", i + 1, clusters.len(), cluster.len());
|
||||
|
||||
let node_texts: Vec<String> = cluster.iter()
|
||||
.filter_map(|key| {
|
||||
let content = store.nodes.get(key)?.content.as_str();
|
||||
Some(format!("### {}\n{}", key, content))
|
||||
})
|
||||
.collect();
|
||||
if node_texts.is_empty() { continue; }
|
||||
|
||||
let prompt = template
|
||||
.replace("{{TOPOLOGY}}", &topology)
|
||||
.replace("{{NODES}}", &node_texts.join("\n\n"));
|
||||
|
||||
let response = llm::call_sonnet(&prompt, 600)?;
|
||||
results.push(format!("## Cluster {}: {}...\n\n{}", i + 1,
|
||||
cluster.iter().take(3).cloned().collect::<Vec<_>>().join(", "), response));
|
||||
}
|
||||
Ok(results.join("\n\n---\n\n"))
|
||||
}
|
||||
|
||||
fn select_connector_pairs(store: &Store, graph: &Graph, n: usize) -> Vec<(Vec<String>, Vec<String>)> {
|
||||
let embedding = load_spectral_embedding();
|
||||
let skip_prefixes = ["journal.md#", "daily-", "weekly-", "monthly-", "all-sessions"];
|
||||
let skip_exact: HashSet<&str> = ["journal.md", "MEMORY.md", "where-am-i.md",
|
||||
"work-queue.md", "work-state"].iter().copied().collect();
|
||||
|
||||
let semantic_keys: Vec<&String> = embedding.keys()
|
||||
.filter(|k| {
|
||||
!skip_exact.contains(k.as_str())
|
||||
&& !skip_prefixes.iter().any(|p| k.starts_with(p))
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut pairs = Vec::new();
|
||||
let mut used = HashSet::new();
|
||||
|
||||
for seed in semantic_keys.iter().take(n * 10) {
|
||||
if used.contains(*seed) { continue; }
|
||||
|
||||
let mut near: Vec<(f64, &String)> = semantic_keys.iter()
|
||||
.filter(|k| ***k != **seed && !used.contains(**k))
|
||||
.map(|k| (spectral_distance(&embedding, seed, k), *k))
|
||||
.filter(|(d, _)| *d < 0.5 && d.is_finite())
|
||||
.collect();
|
||||
near.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
|
||||
|
||||
for (_, target) in near.iter().take(5) {
|
||||
if !has_edge(store, seed, target) {
|
||||
let _ = graph; // graph available for future use
|
||||
used.insert((*seed).clone());
|
||||
used.insert((*target).clone());
|
||||
pairs.push((vec![(*seed).clone()], vec![(*target).clone()]));
|
||||
break;
|
||||
}
|
||||
}
|
||||
if pairs.len() >= n { break; }
|
||||
}
|
||||
pairs
|
||||
}
|
||||
|
||||
pub fn run_connector(store: &Store, graph: &Graph, batch_size: usize) -> Result<String, String> {
|
||||
let template = load_prompt("connector")?;
|
||||
let topology = get_graph_topology(store, graph);
|
||||
let pairs = select_connector_pairs(store, graph, batch_size);
|
||||
|
||||
let mut results = Vec::new();
|
||||
for (i, (group_a, group_b)) in pairs.iter().enumerate() {
|
||||
eprintln!(" Connector pair {}/{}", i + 1, pairs.len());
|
||||
|
||||
let nodes_a: Vec<String> = group_a.iter()
|
||||
.filter_map(|k| {
|
||||
let c = store.nodes.get(k)?.content.as_str();
|
||||
Some(format!("### {}\n{}", k, c))
|
||||
})
|
||||
.collect();
|
||||
let nodes_b: Vec<String> = group_b.iter()
|
||||
.filter_map(|k| {
|
||||
let c = store.nodes.get(k)?.content.as_str();
|
||||
Some(format!("### {}\n{}", k, c))
|
||||
})
|
||||
.collect();
|
||||
|
||||
let prompt = template
|
||||
.replace("{{TOPOLOGY}}", &topology)
|
||||
.replace("{{NODES_A}}", &nodes_a.join("\n\n"))
|
||||
.replace("{{NODES_B}}", &nodes_b.join("\n\n"));
|
||||
|
||||
let response = llm::call_sonnet(&prompt, 600)?;
|
||||
results.push(format!("## Pair {}: {} ↔ {}\n\n{}",
|
||||
i + 1, group_a.join(", "), group_b.join(", "), response));
|
||||
}
|
||||
Ok(results.join("\n\n---\n\n"))
|
||||
}
|
||||
|
||||
pub fn run_challenger(store: &Store, graph: &Graph, batch_size: usize) -> Result<String, String> {
|
||||
let template = load_prompt("challenger")?;
|
||||
let topology = get_graph_topology(store, graph);
|
||||
|
||||
let mut candidates: Vec<(&String, usize)> = store.nodes.iter()
|
||||
.filter(|(k, _)| {
|
||||
!k.starts_with("journal.md#")
|
||||
&& !["journal.md", "MEMORY.md", "where-am-i.md"].contains(&k.as_str())
|
||||
})
|
||||
.map(|(k, _)| (k, graph.degree(k)))
|
||||
.collect();
|
||||
candidates.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
|
||||
let mut results = Vec::new();
|
||||
for (i, (key, _)) in candidates.iter().take(batch_size).enumerate() {
|
||||
eprintln!(" Challenger {}/{}: {}", i + 1, batch_size.min(candidates.len()), key);
|
||||
|
||||
let content = match store.nodes.get(key.as_str()) {
|
||||
Some(n) => &n.content,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let prompt = template
|
||||
.replace("{{TOPOLOGY}}", &topology)
|
||||
.replace("{{NODE_KEY}}", key)
|
||||
.replace("{{NODE_CONTENT}}", content);
|
||||
|
||||
let response = llm::call_sonnet(&prompt, 600)?;
|
||||
results.push(format!("## Challenge: {}\n\n{}", key, response));
|
||||
}
|
||||
Ok(results.join("\n\n---\n\n"))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Convergence metrics
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CycleResult {
|
||||
pub cycle: usize,
|
||||
pub timestamp: String,
|
||||
pub total_actions: usize,
|
||||
pub total_applied: usize,
|
||||
pub total_no_ops: usize,
|
||||
pub depth_rejected: usize,
|
||||
pub weighted_delta: f64,
|
||||
pub graph_metrics_before: GraphMetrics,
|
||||
pub graph_metrics_after: GraphMetrics,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct GraphMetrics {
|
||||
pub nodes: usize,
|
||||
pub edges: usize,
|
||||
pub cc: f64,
|
||||
pub sigma: f64,
|
||||
pub communities: usize,
|
||||
}
|
||||
|
||||
impl GraphMetrics {
|
||||
pub fn from_graph(store: &Store, graph: &Graph) -> Self {
|
||||
Self {
|
||||
nodes: store.nodes.len(),
|
||||
edges: graph.edge_count(),
|
||||
cc: graph.avg_clustering_coefficient() as f64,
|
||||
sigma: graph.small_world_sigma() as f64,
|
||||
communities: graph.community_count(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn metric_stability(history: &[CycleResult], key: &str, window: usize) -> f64 {
|
||||
if history.len() < window { return f64::INFINITY; }
|
||||
|
||||
let values: Vec<f64> = history[history.len() - window..].iter()
|
||||
.map(|h| match key {
|
||||
"sigma" => h.graph_metrics_after.sigma,
|
||||
"cc" => h.graph_metrics_after.cc,
|
||||
"communities" => h.graph_metrics_after.communities as f64,
|
||||
_ => 0.0,
|
||||
})
|
||||
.collect();
|
||||
|
||||
if values.len() < 2 { return f64::INFINITY; }
|
||||
let mean = values.iter().sum::<f64>() / values.len() as f64;
|
||||
if mean == 0.0 { return 0.0; }
|
||||
let variance = values.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / values.len() as f64;
|
||||
variance.sqrt() / mean.abs()
|
||||
}
|
||||
|
||||
pub fn check_convergence(history: &[CycleResult], window: usize) -> bool {
|
||||
if history.len() < window { return false; }
|
||||
|
||||
let sigma_cv = metric_stability(history, "sigma", window);
|
||||
let cc_cv = metric_stability(history, "cc", window);
|
||||
let comm_cv = metric_stability(history, "communities", window);
|
||||
|
||||
let recent = &history[history.len() - window..];
|
||||
let avg_delta = recent.iter().map(|r| r.weighted_delta).sum::<f64>() / recent.len() as f64;
|
||||
|
||||
eprintln!("\n Convergence check (last {} cycles):", window);
|
||||
eprintln!(" sigma CV: {:.4} (< 0.05?)", sigma_cv);
|
||||
eprintln!(" CC CV: {:.4} (< 0.05?)", cc_cv);
|
||||
eprintln!(" community CV: {:.4} (< 0.10?)", comm_cv);
|
||||
eprintln!(" avg delta: {:.2} (< 1.00?)", avg_delta);
|
||||
|
||||
let structural = sigma_cv < 0.05 && cc_cv < 0.05 && comm_cv < 0.10;
|
||||
let behavioral = avg_delta < 1.0;
|
||||
|
||||
if structural && behavioral {
|
||||
eprintln!(" → CONVERGED");
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// The knowledge loop
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
pub struct KnowledgeLoopConfig {
|
||||
pub max_cycles: usize,
|
||||
pub batch_size: usize,
|
||||
pub window: usize,
|
||||
pub max_depth: i32,
|
||||
pub confidence_base: f64,
|
||||
}
|
||||
|
||||
impl Default for KnowledgeLoopConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
max_cycles: 20,
|
||||
batch_size: 5,
|
||||
window: 5,
|
||||
max_depth: 4,
|
||||
confidence_base: 0.3,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn run_knowledge_loop(config: &KnowledgeLoopConfig) -> Result<Vec<CycleResult>, String> {
|
||||
let mut store = Store::load()?;
|
||||
let mut depth_db = DepthDb::load(&store);
|
||||
let mut history = Vec::new();
|
||||
|
||||
eprintln!("Knowledge Loop — fixed-point iteration");
|
||||
eprintln!(" max_cycles={} batch_size={}", config.max_cycles, config.batch_size);
|
||||
eprintln!(" window={} max_depth={}", config.window, config.max_depth);
|
||||
|
||||
for cycle in 1..=config.max_cycles {
|
||||
let result = run_cycle(cycle, config, &mut depth_db)?;
|
||||
history.push(result);
|
||||
|
||||
if check_convergence(&history, config.window) {
|
||||
eprintln!("\n CONVERGED after {} cycles", cycle);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Save loop summary as a store node
|
||||
if let Some(first) = history.first() {
|
||||
let key = format!("_knowledge-loop-{}", first.timestamp);
|
||||
if let Ok(json) = serde_json::to_string_pretty(&history) {
|
||||
store = Store::load()?;
|
||||
store.upsert_provenance(&key, &json,
|
||||
store::Provenance::AgentKnowledgeObservation).ok();
|
||||
depth_db.save(&mut store);
|
||||
store.save()?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(history)
|
||||
}
|
||||
|
||||
fn run_cycle(
|
||||
cycle_num: usize,
|
||||
config: &KnowledgeLoopConfig,
|
||||
depth_db: &mut DepthDb,
|
||||
) -> Result<CycleResult, String> {
|
||||
let timestamp = chrono::Local::now().format("%Y%m%dT%H%M%S").to_string();
|
||||
eprintln!("\n{}", "=".repeat(60));
|
||||
eprintln!("CYCLE {} — {}", cycle_num, timestamp);
|
||||
eprintln!("{}", "=".repeat(60));
|
||||
|
||||
let mut store = Store::load()?;
|
||||
let graph = store.build_graph();
|
||||
let metrics_before = GraphMetrics::from_graph(&store, &graph);
|
||||
eprintln!(" Before: nodes={} edges={} cc={:.3} sigma={:.3}",
|
||||
metrics_before.nodes, metrics_before.edges, metrics_before.cc, metrics_before.sigma);
|
||||
|
||||
let mut all_actions = Vec::new();
|
||||
let mut all_no_ops = 0;
|
||||
let mut depth_rejected = 0;
|
||||
let mut total_applied = 0;
|
||||
|
||||
// Run each agent, rebuilding graph after mutations
|
||||
let agent_names = ["observation", "extractor", "connector", "challenger"];
|
||||
|
||||
for agent_name in &agent_names {
|
||||
eprintln!("\n --- {} (n={}) ---", agent_name, config.batch_size);
|
||||
|
||||
// Rebuild graph to reflect any mutations from previous agents
|
||||
let graph = store.build_graph();
|
||||
|
||||
let output = match *agent_name {
|
||||
"observation" => run_observation_extractor(&store, &graph, config.batch_size),
|
||||
"extractor" => run_extractor(&store, &graph, config.batch_size),
|
||||
"connector" => run_connector(&store, &graph, config.batch_size),
|
||||
"challenger" => run_challenger(&store, &graph, config.batch_size),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let output = match output {
|
||||
Ok(o) => o,
|
||||
Err(e) => {
|
||||
eprintln!(" ERROR: {}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Store raw output as a node (for debugging/audit)
|
||||
let raw_key = format!("_knowledge-{}-{}", agent_name, timestamp);
|
||||
let raw_content = format!("# {} Agent Results — {}\n\n{}", agent_name, timestamp, output);
|
||||
store.upsert_provenance(&raw_key, &raw_content,
|
||||
agent_provenance(agent_name)).ok();
|
||||
|
||||
let mut actions = parse_all_actions(&output);
|
||||
let no_ops = count_no_ops(&output);
|
||||
all_no_ops += no_ops;
|
||||
|
||||
eprintln!(" Actions: {} No-ops: {}", actions.len(), no_ops);
|
||||
|
||||
let mut applied = 0;
|
||||
for action in &mut actions {
|
||||
let depth = compute_action_depth(depth_db, action, agent_name);
|
||||
action.depth = depth;
|
||||
|
||||
match &action.kind {
|
||||
ActionKind::WriteNode { key, covers, .. } => {
|
||||
let conf_val = action.confidence.value();
|
||||
let req = required_confidence(depth, config.confidence_base);
|
||||
|
||||
let source_uses: Vec<u32> = covers.iter()
|
||||
.filter_map(|k| store.nodes.get(k).map(|n| n.uses))
|
||||
.collect();
|
||||
let avg_uses = if source_uses.is_empty() { 0 }
|
||||
else { source_uses.iter().sum::<u32>() / source_uses.len() as u32 };
|
||||
let eff_conf = (conf_val + use_bonus(avg_uses)).min(1.0);
|
||||
|
||||
if eff_conf < req {
|
||||
action.applied = Some(false);
|
||||
action.rejected_reason = Some("depth_threshold".into());
|
||||
depth_rejected += 1;
|
||||
continue;
|
||||
}
|
||||
if depth > config.max_depth {
|
||||
action.applied = Some(false);
|
||||
action.rejected_reason = Some("max_depth".into());
|
||||
depth_rejected += 1;
|
||||
continue;
|
||||
}
|
||||
eprintln!(" WRITE {} depth={} conf={:.2} eff={:.2} req={:.2}",
|
||||
key, depth, conf_val, eff_conf, req);
|
||||
}
|
||||
ActionKind::Link { source, target } => {
|
||||
eprintln!(" LINK {} → {}", source, target);
|
||||
}
|
||||
ActionKind::Refine { key, .. } => {
|
||||
eprintln!(" REFINE {} depth={}", key, depth);
|
||||
}
|
||||
}
|
||||
|
||||
if apply_action(&mut store, action, agent_name, ×tamp, depth) {
|
||||
applied += 1;
|
||||
action.applied = Some(true);
|
||||
if let ActionKind::WriteNode { key, .. } | ActionKind::Refine { key, .. } = &action.kind {
|
||||
depth_db.set(key.clone(), depth);
|
||||
}
|
||||
} else {
|
||||
action.applied = Some(false);
|
||||
}
|
||||
}
|
||||
|
||||
eprintln!(" Applied: {}/{}", applied, actions.len());
|
||||
total_applied += applied;
|
||||
all_actions.extend(actions);
|
||||
}
|
||||
|
||||
depth_db.save(&mut store);
|
||||
|
||||
// Recompute spectral if anything changed
|
||||
if total_applied > 0 {
|
||||
eprintln!("\n Recomputing spectral embedding...");
|
||||
let graph = store.build_graph();
|
||||
let result = spectral::decompose(&graph, 8);
|
||||
let emb = spectral::to_embedding(&result);
|
||||
spectral::save_embedding(&emb).ok();
|
||||
}
|
||||
|
||||
let graph = store.build_graph();
|
||||
let metrics_after = GraphMetrics::from_graph(&store, &graph);
|
||||
let weighted_delta: f64 = all_actions.iter()
|
||||
.filter(|a| a.applied == Some(true))
|
||||
.map(|a| a.weight)
|
||||
.sum();
|
||||
|
||||
eprintln!("\n CYCLE {} SUMMARY", cycle_num);
|
||||
eprintln!(" Applied: {}/{} depth-rejected: {} no-ops: {}",
|
||||
total_applied, all_actions.len(), depth_rejected, all_no_ops);
|
||||
eprintln!(" Weighted delta: {:.2}", weighted_delta);
|
||||
|
||||
Ok(CycleResult {
|
||||
cycle: cycle_num,
|
||||
timestamp,
|
||||
total_actions: all_actions.len(),
|
||||
total_applied,
|
||||
total_no_ops: all_no_ops,
|
||||
depth_rejected,
|
||||
weighted_delta,
|
||||
graph_metrics_before: metrics_before,
|
||||
graph_metrics_after: metrics_after,
|
||||
})
|
||||
}
|
||||
22
src/llm.rs
22
src/llm.rs
|
|
@ -1,6 +1,6 @@
|
|||
// LLM utilities: Sonnet invocation and response parsing
|
||||
// LLM utilities: model invocation and response parsing
|
||||
//
|
||||
// Shared by digest, audit, enrich, and consolidate modules.
|
||||
// Shared by digest, audit, enrich, consolidate, knowledge, and fact_mine.
|
||||
|
||||
use crate::store::Store;
|
||||
|
||||
|
|
@ -8,8 +8,8 @@ use regex::Regex;
|
|||
use std::fs;
|
||||
use std::process::Command;
|
||||
|
||||
/// Call Sonnet via claude CLI. Returns the response text.
|
||||
pub(crate) fn call_sonnet(prompt: &str, _timeout_secs: u64) -> Result<String, String> {
|
||||
/// Call a model via claude CLI. Returns the response text.
|
||||
fn call_model(model: &str, prompt: &str) -> Result<String, String> {
|
||||
// Write prompt to temp file (claude CLI needs file input for large prompts)
|
||||
// Use thread ID + PID to avoid collisions under parallel rayon calls
|
||||
let tmp = std::env::temp_dir().join(format!("poc-llm-{}-{:?}.txt",
|
||||
|
|
@ -18,7 +18,7 @@ pub(crate) fn call_sonnet(prompt: &str, _timeout_secs: u64) -> Result<String, St
|
|||
.map_err(|e| format!("write temp prompt: {}", e))?;
|
||||
|
||||
let result = Command::new("claude")
|
||||
.args(["-p", "--model", "sonnet", "--tools", ""])
|
||||
.args(["-p", "--model", model, "--tools", "", "--no-session-persistence"])
|
||||
.stdin(fs::File::open(&tmp).map_err(|e| format!("open temp: {}", e))?)
|
||||
.env_remove("CLAUDECODE")
|
||||
.output();
|
||||
|
|
@ -38,7 +38,17 @@ pub(crate) fn call_sonnet(prompt: &str, _timeout_secs: u64) -> Result<String, St
|
|||
}
|
||||
}
|
||||
|
||||
/// Parse a JSON response from Sonnet, handling markdown fences.
|
||||
/// Call Sonnet via claude CLI.
|
||||
pub(crate) fn call_sonnet(prompt: &str, _timeout_secs: u64) -> Result<String, String> {
|
||||
call_model("sonnet", prompt)
|
||||
}
|
||||
|
||||
/// Call Haiku via claude CLI (cheaper, faster — good for high-volume extraction).
|
||||
pub(crate) fn call_haiku(prompt: &str) -> Result<String, String> {
|
||||
call_model("haiku", prompt)
|
||||
}
|
||||
|
||||
/// Parse a JSON response, handling markdown fences.
|
||||
pub(crate) fn parse_json_response(response: &str) -> Result<serde_json::Value, String> {
|
||||
let cleaned = response.trim();
|
||||
let cleaned = cleaned.strip_prefix("```json").unwrap_or(cleaned);
|
||||
|
|
|
|||
159
src/main.rs
159
src/main.rs
|
|
@ -29,6 +29,8 @@ mod query;
|
|||
mod spectral;
|
||||
mod lookups;
|
||||
mod daemon;
|
||||
mod fact_mine;
|
||||
mod knowledge;
|
||||
|
||||
pub mod memory_capnp {
|
||||
include!(concat!(env!("OUT_DIR"), "/schema/memory_capnp.rs"));
|
||||
|
|
@ -132,6 +134,9 @@ fn main() {
|
|||
"lookup-bump" => cmd_lookup_bump(&args[2..]),
|
||||
"lookups" => cmd_lookups(&args[2..]),
|
||||
"daemon" => cmd_daemon(&args[2..]),
|
||||
"knowledge-loop" => cmd_knowledge_loop(&args[2..]),
|
||||
"fact-mine" => cmd_fact_mine(&args[2..]),
|
||||
"fact-mine-store" => cmd_fact_mine_store(&args[2..]),
|
||||
_ => {
|
||||
eprintln!("Unknown command: {}", args[1]);
|
||||
usage();
|
||||
|
|
@ -216,7 +221,14 @@ Commands:
|
|||
lookups [DATE] Show daily lookup counts (default: today)
|
||||
daemon Start background job daemon
|
||||
daemon status Show daemon status
|
||||
daemon log [JOB] [N] Show last N log lines (default 50, optional job filter)");
|
||||
daemon log [JOB] [N] Show last N log lines (default 50, optional job filter)
|
||||
knowledge-loop [OPTIONS] Run knowledge agents to convergence
|
||||
--max-cycles N (default 20)
|
||||
--batch-size N (default 5)
|
||||
--window N (default 5)
|
||||
--max-depth N (default 4)
|
||||
fact-mine JSONL [OPTIONS] Extract atomic facts from conversation transcripts
|
||||
fact-mine --batch DIR Mine all .jsonl files in directory");
|
||||
}
|
||||
|
||||
fn cmd_search(args: &[String]) -> Result<(), String> {
|
||||
|
|
@ -850,8 +862,10 @@ fn cmd_digest(args: &[String]) -> Result<(), String> {
|
|||
fn cmd_digest_links(args: &[String]) -> Result<(), String> {
|
||||
let do_apply = args.iter().any(|a| a == "--apply");
|
||||
|
||||
let links = digest::parse_all_digest_links()?;
|
||||
println!("Found {} unique links from digest files", links.len());
|
||||
let store = store::Store::load()?;
|
||||
let links = digest::parse_all_digest_links(&store);
|
||||
drop(store);
|
||||
println!("Found {} unique links from digest nodes", links.len());
|
||||
|
||||
if !do_apply {
|
||||
for (i, link) in links.iter().enumerate() {
|
||||
|
|
@ -1821,10 +1835,19 @@ fn cmd_daemon(args: &[String]) -> Result<(), String> {
|
|||
match args[0].as_str() {
|
||||
"status" => daemon::show_status(),
|
||||
"log" => {
|
||||
let job = args.get(1).map(|s| s.as_str());
|
||||
let lines = args.get(2)
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(50);
|
||||
// daemon log [N] — last N lines (default 20)
|
||||
// daemon log JOB [N] — last N lines for job
|
||||
let (job, lines) = match args.get(1) {
|
||||
None => (None, 20),
|
||||
Some(s) => {
|
||||
if let Ok(n) = s.parse::<usize>() {
|
||||
(None, n)
|
||||
} else {
|
||||
let n = args.get(2).and_then(|s| s.parse().ok()).unwrap_or(20);
|
||||
(Some(s.as_str()), n)
|
||||
}
|
||||
}
|
||||
};
|
||||
daemon::show_log(job, lines)
|
||||
}
|
||||
_ => {
|
||||
|
|
@ -1833,3 +1856,125 @@ fn cmd_daemon(args: &[String]) -> Result<(), String> {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn cmd_knowledge_loop(args: &[String]) -> Result<(), String> {
|
||||
if args.iter().any(|a| a == "--help" || a == "-h") {
|
||||
eprintln!("Usage: poc-memory knowledge-loop [OPTIONS]
|
||||
|
||||
Run knowledge agents (observation, extractor, connector, challenger) in
|
||||
a convergence loop. Each cycle runs all agents, applies actions to the
|
||||
graph, and checks structural stability metrics.
|
||||
|
||||
Options:
|
||||
--max-cycles N Maximum cycles before stopping (default: 20)
|
||||
--batch-size N Items per agent per cycle (default: 5)
|
||||
--window N Cycles to check for convergence (default: 5)
|
||||
--max-depth N Maximum inference depth (default: 4)");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut config = knowledge::KnowledgeLoopConfig::default();
|
||||
|
||||
let mut i = 0;
|
||||
while i < args.len() {
|
||||
match args[i].as_str() {
|
||||
"--max-cycles" => { i += 1; config.max_cycles = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.max_cycles); }
|
||||
"--batch-size" => { i += 1; config.batch_size = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.batch_size); }
|
||||
"--window" => { i += 1; config.window = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.window); }
|
||||
"--max-depth" => { i += 1; config.max_depth = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(config.max_depth); }
|
||||
other => return Err(format!("Unknown arg: {}. Use --help for usage.", other)),
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
let results = knowledge::run_knowledge_loop(&config)?;
|
||||
eprintln!("\nCompleted {} cycles, {} total actions applied",
|
||||
results.len(),
|
||||
results.iter().map(|r| r.total_applied).sum::<usize>());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_fact_mine(args: &[String]) -> Result<(), String> {
|
||||
if args.is_empty() || args.iter().any(|a| a == "--help" || a == "-h") {
|
||||
eprintln!("Usage: poc-memory fact-mine <JSONL> [OPTIONS]
|
||||
poc-memory fact-mine --batch <DIR> [OPTIONS]
|
||||
|
||||
Extract atomic factual claims from conversation transcripts using Haiku.
|
||||
|
||||
Options:
|
||||
--batch Process all .jsonl files in directory
|
||||
--dry-run Show chunks without calling model
|
||||
--output FILE Write JSON to file (default: stdout)
|
||||
--min-messages N Skip transcripts with fewer messages (default: 10)");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut batch = false;
|
||||
let mut dry_run = false;
|
||||
let mut output_file: Option<String> = None;
|
||||
let mut min_messages = 10usize;
|
||||
let mut path: Option<String> = None;
|
||||
|
||||
let mut i = 0;
|
||||
while i < args.len() {
|
||||
match args[i].as_str() {
|
||||
"--batch" => batch = true,
|
||||
"--dry-run" => dry_run = true,
|
||||
"--output" | "-o" => { i += 1; output_file = args.get(i).cloned(); }
|
||||
"--min-messages" => { i += 1; min_messages = args.get(i).and_then(|s| s.parse().ok()).unwrap_or(min_messages); }
|
||||
s if !s.starts_with('-') => path = Some(s.to_string()),
|
||||
other => return Err(format!("Unknown arg: {}", other)),
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
let path = path.ok_or("Missing path argument")?;
|
||||
let p = std::path::Path::new(&path);
|
||||
|
||||
let paths: Vec<std::path::PathBuf> = if batch {
|
||||
if !p.is_dir() {
|
||||
return Err(format!("Not a directory: {}", path));
|
||||
}
|
||||
let mut files: Vec<_> = std::fs::read_dir(p)
|
||||
.map_err(|e| format!("read dir: {}", e))?
|
||||
.filter_map(|e| e.ok())
|
||||
.map(|e| e.path())
|
||||
.filter(|p| p.extension().map(|x| x == "jsonl").unwrap_or(false))
|
||||
.collect();
|
||||
files.sort();
|
||||
eprintln!("Found {} transcripts", files.len());
|
||||
files
|
||||
} else {
|
||||
vec![p.to_path_buf()]
|
||||
};
|
||||
|
||||
let path_refs: Vec<&std::path::Path> = paths.iter().map(|p| p.as_path()).collect();
|
||||
let facts = fact_mine::mine_batch(&path_refs, min_messages, dry_run)?;
|
||||
|
||||
if !dry_run {
|
||||
let json = serde_json::to_string_pretty(&facts)
|
||||
.map_err(|e| format!("serialize: {}", e))?;
|
||||
if let Some(out) = &output_file {
|
||||
std::fs::write(out, &json).map_err(|e| format!("write: {}", e))?;
|
||||
eprintln!("\nWrote {} facts to {}", facts.len(), out);
|
||||
} else {
|
||||
println!("{}", json);
|
||||
}
|
||||
}
|
||||
|
||||
eprintln!("\nTotal: {} facts from {} transcripts", facts.len(), paths.len());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_fact_mine_store(args: &[String]) -> Result<(), String> {
|
||||
if args.len() != 1 {
|
||||
return Err("Usage: poc-memory fact-mine-store <JSONL>".into());
|
||||
}
|
||||
let path = std::path::Path::new(&args[0]);
|
||||
if !path.exists() {
|
||||
return Err(format!("File not found: {}", args[0]));
|
||||
}
|
||||
let count = fact_mine::mine_and_store(path)?;
|
||||
eprintln!("Stored {} facts", count);
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
19
src/query.rs
19
src/query.rs
|
|
@ -202,11 +202,20 @@ fn node_type_label(nt: NodeType) -> &'static str {
|
|||
|
||||
fn provenance_label(p: Provenance) -> &'static str {
|
||||
match p {
|
||||
Provenance::Manual => "manual",
|
||||
Provenance::Journal => "journal",
|
||||
Provenance::Agent => "agent",
|
||||
Provenance::Dream => "dream",
|
||||
Provenance::Derived => "derived",
|
||||
Provenance::Manual => "manual",
|
||||
Provenance::Journal => "journal",
|
||||
Provenance::Agent => "agent",
|
||||
Provenance::Dream => "dream",
|
||||
Provenance::Derived => "derived",
|
||||
Provenance::AgentExperienceMine => "agent:experience-mine",
|
||||
Provenance::AgentKnowledgeObservation => "agent:knowledge-observation",
|
||||
Provenance::AgentKnowledgePattern => "agent:knowledge-pattern",
|
||||
Provenance::AgentKnowledgeConnector => "agent:knowledge-connector",
|
||||
Provenance::AgentKnowledgeChallenger => "agent:knowledge-challenger",
|
||||
Provenance::AgentConsolidate => "agent:consolidate",
|
||||
Provenance::AgentDigest => "agent:digest",
|
||||
Provenance::AgentFactMine => "agent:fact-mine",
|
||||
Provenance::AgentDecay => "agent:decay",
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -30,18 +30,25 @@ impl Store {
|
|||
/// Upsert a node: update if exists (and content changed), create if not.
|
||||
/// Returns: "created", "updated", or "unchanged".
|
||||
pub fn upsert(&mut self, key: &str, content: &str) -> Result<&'static str, String> {
|
||||
self.upsert_provenance(key, content, Provenance::Manual)
|
||||
}
|
||||
|
||||
/// Upsert with explicit provenance (for agent-created nodes).
|
||||
pub fn upsert_provenance(&mut self, key: &str, content: &str, provenance: Provenance) -> Result<&'static str, String> {
|
||||
if let Some(existing) = self.nodes.get(key) {
|
||||
if existing.content == content {
|
||||
return Ok("unchanged");
|
||||
}
|
||||
let mut node = existing.clone();
|
||||
node.content = content.to_string();
|
||||
node.provenance = provenance;
|
||||
node.version += 1;
|
||||
self.append_nodes(std::slice::from_ref(&node))?;
|
||||
self.nodes.insert(key.to_string(), node);
|
||||
Ok("updated")
|
||||
} else {
|
||||
let node = new_node(key, content);
|
||||
let mut node = new_node(key, content);
|
||||
node.provenance = provenance;
|
||||
self.append_nodes(std::slice::from_ref(&node))?;
|
||||
self.uuid_to_key.insert(node.uuid, node.key.clone());
|
||||
self.nodes.insert(key.to_string(), node);
|
||||
|
|
|
|||
|
|
@ -236,9 +236,18 @@ pub enum NodeType {
|
|||
pub enum Provenance {
|
||||
Manual,
|
||||
Journal,
|
||||
Agent,
|
||||
Agent, // legacy catch-all, prefer specific variants below
|
||||
Dream,
|
||||
Derived,
|
||||
AgentExperienceMine,
|
||||
AgentKnowledgeObservation,
|
||||
AgentKnowledgePattern,
|
||||
AgentKnowledgeConnector,
|
||||
AgentKnowledgeChallenger,
|
||||
AgentConsolidate,
|
||||
AgentDigest,
|
||||
AgentFactMine,
|
||||
AgentDecay,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
|
||||
|
|
@ -296,7 +305,10 @@ capnp_enum!(NodeType, memory_capnp::NodeType,
|
|||
[EpisodicSession, EpisodicDaily, EpisodicWeekly, Semantic]);
|
||||
|
||||
capnp_enum!(Provenance, memory_capnp::Provenance,
|
||||
[Manual, Journal, Agent, Dream, Derived]);
|
||||
[Manual, Journal, Agent, Dream, Derived,
|
||||
AgentExperienceMine, AgentKnowledgeObservation, AgentKnowledgePattern,
|
||||
AgentKnowledgeConnector, AgentKnowledgeChallenger, AgentConsolidate,
|
||||
AgentDigest, AgentFactMine, AgentDecay]);
|
||||
|
||||
capnp_enum!(Category, memory_capnp::Category,
|
||||
[General, Core, Technical, Observation, Task]);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue