poc-memory v0.4.0: graph-structured memory with consolidation pipeline
Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
link-add, link-impact, decay, consolidate-session, etc.
Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation
Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
"""journal-agent.py — background agent that enriches journal entries.
|
|
|
|
|
|
|
|
|
|
Spawned by poc-journal after each write. Sends the full conversation
|
|
|
|
|
to Sonnet to:
|
|
|
|
|
1. Find the exact conversation region the entry refers to
|
|
|
|
|
2. Propose bidirectional links to semantic memory nodes
|
|
|
|
|
3. Spot additional insights worth capturing
|
|
|
|
|
|
|
|
|
|
Results are written to ~/.claude/memory/agent-results/ as JSON for
|
|
|
|
|
pickup by poc-memory.
|
|
|
|
|
|
|
|
|
|
Usage:
|
|
|
|
|
journal-agent.py JSONL_PATH ENTRY_TEXT [GREP_LINE]
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
import os
|
|
|
|
|
import re
|
|
|
|
|
import subprocess
|
|
|
|
|
import sys
|
|
|
|
|
import time
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
MEMORY_DIR = Path.home() / ".claude" / "memory"
|
|
|
|
|
RESULTS_DIR = MEMORY_DIR / "agent-results"
|
|
|
|
|
RESULTS_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_conversation(jsonl_path: str) -> list[dict]:
|
|
|
|
|
"""Extract user/assistant messages with line numbers."""
|
|
|
|
|
messages = []
|
|
|
|
|
with open(jsonl_path) as f:
|
|
|
|
|
for i, line in enumerate(f, 1):
|
|
|
|
|
try:
|
|
|
|
|
obj = json.loads(line)
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
t = obj.get("type", "")
|
|
|
|
|
if t not in ("user", "assistant"):
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
msg = obj.get("message", {})
|
|
|
|
|
content = msg.get("content", "")
|
|
|
|
|
timestamp = obj.get("timestamp", "")
|
|
|
|
|
|
|
|
|
|
texts = []
|
|
|
|
|
if isinstance(content, list):
|
|
|
|
|
for c in content:
|
|
|
|
|
if isinstance(c, dict) and c.get("type") == "text":
|
|
|
|
|
texts.append(c.get("text", ""))
|
|
|
|
|
elif isinstance(c, str):
|
|
|
|
|
texts.append(c)
|
|
|
|
|
elif isinstance(content, str):
|
|
|
|
|
texts.append(content)
|
|
|
|
|
|
|
|
|
|
text = "\n".join(t for t in texts if t.strip())
|
|
|
|
|
if text.strip():
|
|
|
|
|
messages.append({
|
|
|
|
|
"line": i,
|
|
|
|
|
"role": t,
|
|
|
|
|
"text": text,
|
|
|
|
|
"timestamp": timestamp,
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
return messages
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def format_conversation(messages: list[dict]) -> str:
|
|
|
|
|
"""Format messages for the agent prompt."""
|
|
|
|
|
parts = []
|
|
|
|
|
for m in messages:
|
|
|
|
|
# Truncate very long messages (code output etc) but keep substance
|
|
|
|
|
text = m["text"]
|
|
|
|
|
if len(text) > 2000:
|
|
|
|
|
text = text[:1800] + "\n[...truncated...]"
|
|
|
|
|
parts.append(f'L{m["line"]} [{m["role"]}]: {text}')
|
|
|
|
|
return "\n\n".join(parts)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_memory_nodes() -> str:
|
|
|
|
|
"""Get a list of memory nodes for link proposals.
|
|
|
|
|
|
|
|
|
|
Uses poc-memory to get top nodes by degree plus recent nodes.
|
|
|
|
|
"""
|
|
|
|
|
# Get graph summary (top hubs)
|
|
|
|
|
try:
|
|
|
|
|
result = subprocess.run(
|
|
|
|
|
["poc-memory", "graph"],
|
|
|
|
|
capture_output=True, text=True, timeout=10
|
|
|
|
|
)
|
|
|
|
|
graph = result.stdout.strip()
|
|
|
|
|
except Exception:
|
|
|
|
|
graph = ""
|
|
|
|
|
|
|
|
|
|
# Get recent nodes from status
|
|
|
|
|
try:
|
|
|
|
|
result = subprocess.run(
|
|
|
|
|
["poc-memory", "status"],
|
|
|
|
|
capture_output=True, text=True, timeout=10
|
|
|
|
|
)
|
|
|
|
|
status = result.stdout.strip()
|
|
|
|
|
except Exception:
|
|
|
|
|
status = ""
|
|
|
|
|
|
|
|
|
|
return f"Graph (top hubs):\n{graph}\n\nStatus:\n{status}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_semantic_keys() -> list[str]:
|
2026-02-28 23:32:47 -05:00
|
|
|
"""Get all semantic memory keys from the store."""
|
|
|
|
|
from store_helpers import get_semantic_keys as _get_keys
|
|
|
|
|
return _get_keys()
|
poc-memory v0.4.0: graph-structured memory with consolidation pipeline
Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
link-add, link-impact, decay, consolidate-session, etc.
Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation
Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-02-28 22:17:00 -05:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_prompt(entry_text: str, conversation: str,
|
|
|
|
|
memory_nodes: str, semantic_keys: list[str],
|
|
|
|
|
grep_line: int) -> str:
|
|
|
|
|
"""Build the prompt for Sonnet."""
|
|
|
|
|
keys_text = "\n".join(f" - {k}" for k in semantic_keys[:200])
|
|
|
|
|
|
|
|
|
|
return f"""You are a memory agent for an AI named ProofOfConcept. A journal entry
|
|
|
|
|
was just written. Your job is to enrich it by finding its exact source in the
|
|
|
|
|
conversation and linking it to semantic memory.
|
|
|
|
|
|
|
|
|
|
## Task 1: Find exact source
|
|
|
|
|
|
|
|
|
|
The journal entry below was written during or after a conversation. Find the
|
|
|
|
|
exact region of the conversation it refers to — the exchange where the topic
|
|
|
|
|
was discussed. Return the start and end line numbers.
|
|
|
|
|
|
|
|
|
|
The grep-based approximation placed it near line {grep_line} (0 = no match).
|
|
|
|
|
Use that as a hint but find the true boundaries.
|
|
|
|
|
|
|
|
|
|
## Task 2: Propose semantic links
|
|
|
|
|
|
|
|
|
|
Which existing semantic memory nodes should this journal entry be linked to?
|
|
|
|
|
Look for:
|
|
|
|
|
- Concepts discussed in the entry
|
|
|
|
|
- Skills/patterns demonstrated
|
|
|
|
|
- People mentioned
|
|
|
|
|
- Projects or subsystems involved
|
|
|
|
|
- Emotional themes
|
|
|
|
|
|
|
|
|
|
Each link should be bidirectional — the entry documents WHEN something happened,
|
|
|
|
|
the semantic node documents WHAT it is. Together they let you traverse:
|
|
|
|
|
"What was I doing on this day?" ↔ "When did I learn about X?"
|
|
|
|
|
|
|
|
|
|
## Task 3: Spot missed insights
|
|
|
|
|
|
|
|
|
|
Read the conversation around the journal entry. Is there anything worth
|
|
|
|
|
capturing that the entry missed? A pattern, a decision, an insight, something
|
|
|
|
|
Kent said that's worth remembering? Be selective — only flag genuinely valuable
|
|
|
|
|
things.
|
|
|
|
|
|
|
|
|
|
## Output format (JSON)
|
|
|
|
|
|
|
|
|
|
Return ONLY a JSON object:
|
|
|
|
|
```json
|
|
|
|
|
{{
|
|
|
|
|
"source_start": 1234,
|
|
|
|
|
"source_end": 1256,
|
|
|
|
|
"links": [
|
|
|
|
|
{{"target": "memory-key#section", "reason": "why this link exists"}}
|
|
|
|
|
],
|
|
|
|
|
"missed_insights": [
|
|
|
|
|
{{"text": "insight text", "suggested_key": "where it belongs"}}
|
|
|
|
|
],
|
|
|
|
|
"temporal_tags": ["2026-02-28", "topology-metrics", "poc-memory"]
|
|
|
|
|
}}
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
For links, use existing keys from the semantic memory list below. If nothing
|
|
|
|
|
fits, suggest a new key with a NOTE prefix: "NOTE:new-topic-name".
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
## Journal entry
|
|
|
|
|
|
|
|
|
|
{entry_text}
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
## Semantic memory nodes (available link targets)
|
|
|
|
|
|
|
|
|
|
{keys_text}
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
## Memory graph
|
|
|
|
|
|
|
|
|
|
{memory_nodes}
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
## Full conversation (with line numbers)
|
|
|
|
|
|
|
|
|
|
{conversation}
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def call_sonnet(prompt: str) -> dict:
|
|
|
|
|
"""Call Sonnet via claude CLI and parse JSON response."""
|
|
|
|
|
import tempfile
|
|
|
|
|
|
|
|
|
|
env = dict(os.environ)
|
|
|
|
|
env.pop("CLAUDECODE", None)
|
|
|
|
|
|
|
|
|
|
# Write prompt to temp file — avoids Python subprocess pipe issues
|
|
|
|
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
|
|
|
|
|
delete=False) as f:
|
|
|
|
|
f.write(prompt)
|
|
|
|
|
prompt_file = f.name
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
scripts_dir = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
|
wrapper = os.path.join(scripts_dir, "call-sonnet.sh")
|
|
|
|
|
|
|
|
|
|
result = subprocess.run(
|
|
|
|
|
[wrapper, prompt_file],
|
|
|
|
|
capture_output=True,
|
|
|
|
|
text=True,
|
|
|
|
|
timeout=300,
|
|
|
|
|
env=env,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
output = result.stdout.strip()
|
|
|
|
|
if not output:
|
|
|
|
|
return {"error": f"Empty response. stderr: {result.stderr[:500]}"}
|
|
|
|
|
|
|
|
|
|
# Extract JSON from response (might be wrapped in markdown)
|
|
|
|
|
json_match = re.search(r'\{[\s\S]*\}', output)
|
|
|
|
|
if json_match:
|
|
|
|
|
return json.loads(json_match.group())
|
|
|
|
|
else:
|
|
|
|
|
return {"error": f"No JSON found in response: {output[:500]}"}
|
|
|
|
|
|
|
|
|
|
except subprocess.TimeoutExpired:
|
|
|
|
|
return {"error": "Sonnet call timed out after 300s"}
|
|
|
|
|
except json.JSONDecodeError as e:
|
|
|
|
|
return {"error": f"JSON parse error: {e}. Output: {output[:500]}"}
|
|
|
|
|
except Exception as e:
|
|
|
|
|
return {"error": str(e)}
|
|
|
|
|
finally:
|
|
|
|
|
os.unlink(prompt_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def save_result(entry_text: str, jsonl_path: str, result: dict):
|
|
|
|
|
"""Save agent results for pickup by poc-memory."""
|
|
|
|
|
timestamp = time.strftime("%Y%m%dT%H%M%S")
|
|
|
|
|
result_file = RESULTS_DIR / f"{timestamp}.json"
|
|
|
|
|
|
|
|
|
|
output = {
|
|
|
|
|
"timestamp": timestamp,
|
|
|
|
|
"jsonl_path": jsonl_path,
|
|
|
|
|
"entry_text": entry_text[:500],
|
|
|
|
|
"agent_result": result,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
with open(result_file, "w") as f:
|
|
|
|
|
json.dump(output, f, indent=2)
|
|
|
|
|
|
|
|
|
|
return result_file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def apply_links(result: dict):
|
|
|
|
|
"""Apply proposed links via poc-memory."""
|
|
|
|
|
links = result.get("links", [])
|
|
|
|
|
for link in links:
|
|
|
|
|
target = link.get("target", "")
|
|
|
|
|
if not target or target.startswith("NOTE:"):
|
|
|
|
|
continue
|
|
|
|
|
# For now, just log — we'll wire this up when poc-memory
|
|
|
|
|
# has a link-from-agent command
|
|
|
|
|
print(f" LINK → {target}: {link.get('reason', '')}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
if len(sys.argv) < 3:
|
|
|
|
|
print(f"Usage: {sys.argv[0]} JSONL_PATH ENTRY_TEXT [GREP_LINE]",
|
|
|
|
|
file=sys.stderr)
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
jsonl_path = sys.argv[1]
|
|
|
|
|
entry_text = sys.argv[2]
|
|
|
|
|
grep_line = int(sys.argv[3]) if len(sys.argv) > 3 else 0
|
|
|
|
|
|
|
|
|
|
if not os.path.isfile(jsonl_path):
|
|
|
|
|
print(f"JSONL not found: {jsonl_path}", file=sys.stderr)
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
print(f"Extracting conversation from {jsonl_path}...")
|
|
|
|
|
messages = extract_conversation(jsonl_path)
|
|
|
|
|
conversation = format_conversation(messages)
|
|
|
|
|
print(f" {len(messages)} messages, {len(conversation):,} chars")
|
|
|
|
|
|
|
|
|
|
print("Getting memory context...")
|
|
|
|
|
memory_nodes = get_memory_nodes()
|
|
|
|
|
semantic_keys = get_semantic_keys()
|
|
|
|
|
print(f" {len(semantic_keys)} semantic keys")
|
|
|
|
|
|
|
|
|
|
print("Building prompt...")
|
|
|
|
|
prompt = build_prompt(entry_text, conversation, memory_nodes,
|
|
|
|
|
semantic_keys, grep_line)
|
|
|
|
|
print(f" Prompt: {len(prompt):,} chars (~{len(prompt)//4:,} tokens)")
|
|
|
|
|
|
|
|
|
|
print("Calling Sonnet...")
|
|
|
|
|
result = call_sonnet(prompt)
|
|
|
|
|
|
|
|
|
|
if "error" in result:
|
|
|
|
|
print(f" Error: {result['error']}", file=sys.stderr)
|
|
|
|
|
else:
|
|
|
|
|
source = f"L{result.get('source_start', '?')}-L{result.get('source_end', '?')}"
|
|
|
|
|
n_links = len(result.get("links", []))
|
|
|
|
|
n_insights = len(result.get("missed_insights", []))
|
|
|
|
|
print(f" Source: {source}")
|
|
|
|
|
print(f" Links: {n_links}")
|
|
|
|
|
print(f" Missed insights: {n_insights}")
|
|
|
|
|
apply_links(result)
|
|
|
|
|
|
|
|
|
|
result_file = save_result(entry_text, jsonl_path, result)
|
|
|
|
|
print(f" Results saved: {result_file}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main()
|