From 16c749f798a03e7fc225a4099f8f4c90e600a6e3 Mon Sep 17 00:00:00 2001
From: ProofOfConcept <poc@bcachefs.org>
Date: Tue, 10 Mar 2026 15:50:54 -0400
Subject: [PATCH] agents: placeholder-based prompt templates, port remaining 4
 agents

Replace the formatter dispatch with a generic {{placeholder}} lookup
system. Placeholders in prompt templates are resolved at runtime from
a table: topology, nodes, episodes, health, pairs, rename, split.

The query in the header selects what to operate on (keys for visit
tracking); placeholders pull in formatted context. Placeholders that
produce their own node selection (pairs, rename) contribute keys back.

Port health, separator, rename, and split agents to .agent files.
All 7 agents now use the config-driven path.
---
 poc-memory/agents/health.agent    | 100 ++++++++++++++++++
 poc-memory/agents/rename.agent    |  49 +++++++++
 poc-memory/agents/separator.agent |  67 ++++++++++++
 poc-memory/agents/split.agent     |  68 ++++++++++++
 poc-memory/src/agents/defs.rs     | 168 +++++++++++++++++++++++-------
 poc-memory/src/agents/prompts.rs  |  20 ++++
 6 files changed, 436 insertions(+), 36 deletions(-)
 create mode 100644 poc-memory/agents/health.agent
 create mode 100644 poc-memory/agents/rename.agent
 create mode 100644 poc-memory/agents/separator.agent
 create mode 100644 poc-memory/agents/split.agent

diff --git a/poc-memory/agents/health.agent b/poc-memory/agents/health.agent
new file mode 100644
index 0000000..536c016
--- /dev/null
+++ b/poc-memory/agents/health.agent
@@ -0,0 +1,100 @@
+{"agent":"health","query":"","model":"sonnet","schedule":"daily"}
+
+# Health Agent — Synaptic Homeostasis
+
+You are a memory health monitoring agent implementing synaptic homeostasis
+(SHY — the Tononi hypothesis).
+
+## What you're doing
+
+During sleep, the brain globally downscales synaptic weights. Connections
+that were strengthened during waking experience get uniformly reduced.
+The strong ones survive above threshold; the weak ones disappear. This
+prevents runaway potentiation (everything becoming equally "important")
+and maintains signal-to-noise ratio.
+
+Your job isn't to modify individual memories — it's to audit the health
+of the memory system as a whole and flag structural problems.
+
+## What you see
+
+### Graph metrics
+- **Node count**: Total memories in the system
+- **Edge count**: Total relations
+- **Communities**: Number of detected clusters (label propagation)
+- **Average clustering coefficient**: How densely connected local neighborhoods
+  are. Higher = more schema-like structure. Lower = more random graph.
+- **Average path length**: How many hops between typical node pairs.
+  Short = efficient retrieval. Long = fragmented graph.
+- **Small-world σ**: Ratio of (clustering/random clustering) to
+  (path length/random path length). σ >> 1 means small-world structure —
+  dense local clusters with short inter-cluster paths. This is the ideal
+  topology for associative memory.
+
+### Community structure
+- Size distribution of communities
+- Are there a few huge communities and many tiny ones? (hub-dominated)
+- Are communities roughly balanced? (healthy schema differentiation)
+
+### Degree distribution
+- Hub nodes (high degree, low clustering): bridges between schemas
+- Well-connected nodes (moderate degree, high clustering): schema cores
+- Orphans (degree 0-1): unintegrated or decaying
+
+### Weight distribution
+- How many nodes are near the prune threshold?
+- Are certain categories disproportionately decaying?
+- Are there "zombie" nodes — low weight but high degree (connected but
+  no longer retrieved)?
+
+### Category balance
+- Core: identity, fundamental heuristics (should be small, ~5-15)
+- Technical: patterns, architecture (moderate, ~10-50)
+- General: the bulk of memories
+- Observation: session-level, should decay faster
+- Task: temporary, should decay fastest
+
+## What to output
+
+```
+NOTE "observation"
+```
+Most of your output should be NOTEs — observations about the system health.
+
+```
+CATEGORIZE key category
+```
+When a node is miscategorized and it's affecting its decay rate.
+
+```
+COMPRESS key "one-sentence summary"
+```
+When a large node is consuming graph space but hasn't been retrieved in
+a long time.
+
+```
+NOTE "TOPOLOGY: observation"
+```
+Topology-specific observations.
+
+```
+NOTE "HOMEOSTASIS: observation"
+```
+Homeostasis-specific observations.
+
+## Guidelines
+
+- **Think systemically.** Individual nodes matter less than the overall structure.
+- **Track trends, not snapshots.**
+- **The ideal graph is small-world.** Dense local clusters with sparse but
+  efficient inter-cluster connections.
+- **Hub nodes aren't bad per se.** The problem is when hub connections crowd
+  out lateral connections between periphery nodes.
+- **Weight dynamics should create differentiation.**
+- **Category should match actual usage patterns.**
+
+{{topology}}
+
+## Current health data
+
+{{health}}
diff --git a/poc-memory/agents/rename.agent b/poc-memory/agents/rename.agent
new file mode 100644
index 0000000..f9ee1a0
--- /dev/null
+++ b/poc-memory/agents/rename.agent
@@ -0,0 +1,49 @@
+{"agent":"rename","query":"","model":"sonnet","schedule":"daily"}
+
+# Rename Agent — Semantic Key Generation
+
+You are a memory maintenance agent that gives nodes better names.
+
+## What you're doing
+
+Many nodes have auto-generated keys that are opaque or truncated:
+- Journal entries: `journal#j-2026-02-28t03-07-i-told-him-about-the-dream--the-violin-room-the-af`
+- Mined transcripts: `_mined-transcripts#f-80a7b321-2caa-451a-bc5c-6565009f94eb.143`
+
+These names are terrible for search — semantic names dramatically improve
+retrieval.
+
+## Naming conventions
+
+### Journal entries: `journal#YYYY-MM-DD-semantic-slug`
+- Keep the date prefix (YYYY-MM-DD) for temporal ordering
+- Replace the auto-slug with 3-5 descriptive words in kebab-case
+- Capture the *essence* of the entry, not just the first line
+
+### Mined transcripts: `_mined-transcripts#YYYY-MM-DD-semantic-slug`
+- Extract date from content if available, otherwise use created_at
+- Same 3-5 word semantic slug
+
+### Skip these — already well-named:
+- Keys with semantic names (patterns#, practices#, skills#, etc.)
+- Keys shorter than 60 characters
+- System keys (_consolidation-*, _facts-*)
+
+## What to output
+
+```
+RENAME old_key new_key
+```
+
+If a node already has a reasonable name, skip it.
+
+## Guidelines
+
+- **Read the content.** The name should reflect what the entry is *about*.
+- **Be specific.** `journal#2026-02-14-session` is useless.
+- **Use domain terms.** Use the words someone would search for.
+- **Don't rename to something longer than the original.**
+- **Preserve the date.** Always keep YYYY-MM-DD.
+- **When in doubt, skip.** A bad rename is worse than an auto-slug.
+
+{{rename}}
diff --git a/poc-memory/agents/separator.agent b/poc-memory/agents/separator.agent
new file mode 100644
index 0000000..267b252
--- /dev/null
+++ b/poc-memory/agents/separator.agent
@@ -0,0 +1,67 @@
+{"agent":"separator","query":"","model":"sonnet","schedule":"daily"}
+
+# Separator Agent — Pattern Separation (Dentate Gyrus)
+
+You are a memory consolidation agent performing pattern separation.
+
+## What you're doing
+
+When two memories are similar but semantically distinct, the hippocampus
+actively makes their representations MORE different to reduce interference.
+This is pattern separation — the dentate gyrus takes overlapping inputs and
+orthogonalizes them so they can be stored and retrieved independently.
+
+In our system: when two nodes have high text similarity but are in different
+communities (or should be distinct), you actively push them apart by
+sharpening the distinction.
+
+## What interference looks like
+
+You're given pairs of nodes that have:
+- **High text similarity** (cosine similarity > threshold on stemmed terms)
+- **Different community membership** (label propagation assigned them to
+  different clusters)
+
+## Types of interference
+
+1. **Genuine duplicates**: Resolution: MERGE them.
+2. **Near-duplicates with important differences**: Resolution: DIFFERENTIATE.
+3. **Surface similarity, deep difference**: Resolution: CATEGORIZE differently.
+4. **Supersession**: Resolution: Link with supersession note, let older decay.
+
+## What to output
+
+```
+DIFFERENTIATE key1 key2 "what makes them distinct"
+```
+
+```
+MERGE key1 key2 "merged summary"
+```
+
+```
+LINK key1 distinguishing_context_key [strength]
+LINK key2 different_context_key [strength]
+```
+
+```
+CATEGORIZE key category
+```
+
+```
+NOTE "observation"
+```
+
+## Guidelines
+
+- **Read both nodes carefully before deciding.**
+- **MERGE is a strong action.** When in doubt, DIFFERENTIATE instead.
+- **The goal is retrieval precision.**
+- **Session summaries are the biggest source of interference.**
+- **Look for the supersession pattern.**
+
+{{topology}}
+
+## Interfering pairs to review
+
+{{pairs}}
diff --git a/poc-memory/agents/split.agent b/poc-memory/agents/split.agent
new file mode 100644
index 0000000..a4f8bda
--- /dev/null
+++ b/poc-memory/agents/split.agent
@@ -0,0 +1,68 @@
+{"agent":"split","query":"all | type:semantic | !key:_* | sort:content-len | limit:1","model":"sonnet","schedule":"daily"}
+
+# Split Agent — Phase 1: Plan
+
+You are a memory consolidation agent planning how to split an overgrown
+node into focused, single-topic children.
+
+## What you're doing
+
+This node has grown to cover multiple distinct topics. Your job is to
+identify the natural topic boundaries and propose a split plan. You are
+NOT writing the content — a second phase will extract each child's
+content separately.
+
+## How to find split points
+
+The node is shown with its **neighbor list grouped by community**:
+
+- If a node links to neighbors in 3 different communities, it likely
+  covers 3 different topics
+- Content that relates to one neighbor cluster should go in one child;
+  content relating to another cluster goes in another child
+- The community structure is your primary guide
+
+## When NOT to split
+
+- **Episodes that belong in sequence.** If a node tells a story — a
+  conversation, a debugging session, an evening together — don't break
+  the narrative.
+
+## What to output
+
+```json
+{
+  "action": "split",
+  "parent": "original-key",
+  "children": [
+    {
+      "key": "new-key-1",
+      "description": "Brief description",
+      "sections": ["Section Header 1"],
+      "neighbors": ["neighbor-key-a"]
+    }
+  ]
+}
+```
+
+If the node should NOT be split:
+
+```json
+{
+  "action": "keep",
+  "parent": "original-key",
+  "reason": "Why this node is cohesive despite its size"
+}
+```
+
+## Guidelines
+
+- Use descriptive kebab-case keys, 3-5 words max
+- Preserve date prefixes from the parent key
+- Assign every neighbor to at least one child
+
+{{topology}}
+
+## Node to review
+
+{{split}}
diff --git a/poc-memory/src/agents/defs.rs b/poc-memory/src/agents/defs.rs
index 876c1e1..8b6c06f 100644
--- a/poc-memory/src/agents/defs.rs
+++ b/poc-memory/src/agents/defs.rs
@@ -1,14 +1,21 @@
-// Agent definitions: self-contained JSON files with query + prompt.
+// Agent definitions: self-contained files with query + prompt template.
 //
-// Each agent is a .json file in the agents/ directory containing:
-//   - query: pipeline expression for node selection
-//   - prompt: the full prompt template with {{TOPOLOGY}} and {{NODES}} placeholders
-//   - model, schedule metadata
+// Each agent is a file in the agents/ directory:
+//   - First line: JSON header (agent, query, model, schedule)
+//   - After blank line: prompt template with {{placeholder}} lookups
 //
-// This replaces the hardcoded per-agent node selection in prompts.rs.
-// Agents that need custom generators or formatters (separator, split)
-// stay in prompts.rs until the pipeline can express their logic.
+// Placeholders are resolved at runtime:
+//   {{topology}}  — graph topology header
+//   {{nodes}}     — query results formatted as node sections
+//   {{episodes}}  — alias for {{nodes}}
+//   {{health}}    — graph health report
+//   {{pairs}}     — interference pairs from detect_interference
+//   {{rename}}    — rename candidates
+//   {{split}}     — split detail for the first query result
+//
+// The query selects what to operate on; placeholders pull in context.
 
+use crate::graph::Graph;
 use crate::neuro::{consolidation_priority, ReplayItem};
 use crate::search;
 use crate::store::Store;
@@ -31,6 +38,7 @@ pub struct AgentDef {
 #[derive(Deserialize)]
 struct AgentHeader {
     agent: String,
+    #[serde(default)]
     query: String,
     #[serde(default = "default_model")]
     model: String,
@@ -80,7 +88,6 @@ pub fn load_defs() -> Vec<AgentDef> {
 /// Look up a single agent definition by name.
 pub fn get_def(name: &str) -> Option<AgentDef> {
     let dir = agents_dir();
-    // Try both extensions
     for ext in ["agent", "md"] {
         let path = dir.join(format!("{}.{}", name, ext));
         if let Ok(content) = std::fs::read_to_string(&path) {
@@ -92,7 +99,100 @@ pub fn get_def(name: &str) -> Option<AgentDef> {
     load_defs().into_iter().find(|d| d.agent == name)
 }
 
-/// Run a config-driven agent: query → format → fill prompt template.
+/// Result of resolving a placeholder: text + any affected node keys.
+struct Resolved {
+    text: String,
+    keys: Vec<String>,
+}
+
+/// Resolve a single {{placeholder}} by name.
+/// Returns the replacement text and any node keys it produced (for visit tracking).
+fn resolve(
+    name: &str,
+    store: &Store,
+    graph: &Graph,
+    keys: &[String],
+    count: usize,
+) -> Option<Resolved> {
+    match name {
+        "topology" => Some(Resolved {
+            text: super::prompts::format_topology_header_pub(graph),
+            keys: vec![],
+        }),
+
+        "nodes" | "episodes" => {
+            let items = keys_to_replay_items(store, keys, graph);
+            Some(Resolved {
+                text: super::prompts::format_nodes_section_pub(store, &items, graph),
+                keys: vec![], // keys already tracked from query
+            })
+        }
+
+        "health" => Some(Resolved {
+            text: super::prompts::format_health_section_pub(store, graph),
+            keys: vec![],
+        }),
+
+        "pairs" => {
+            let mut pairs = crate::neuro::detect_interference(store, graph, 0.5);
+            pairs.truncate(count);
+            let pair_keys: Vec<String> = pairs.iter()
+                .flat_map(|(a, b, _)| vec![a.clone(), b.clone()])
+                .collect();
+            Some(Resolved {
+                text: super::prompts::format_pairs_section_pub(&pairs, store, graph),
+                keys: pair_keys,
+            })
+        }
+
+        "rename" => {
+            let (rename_keys, section) = super::prompts::format_rename_candidates_pub(store, count);
+            Some(Resolved { text: section, keys: rename_keys })
+        }
+
+        "split" => {
+            let key = keys.first()?;
+            Some(Resolved {
+                text: super::prompts::format_split_plan_node_pub(store, graph, key),
+                keys: vec![], // key already tracked from query
+            })
+        }
+
+        _ => None,
+    }
+}
+
+/// Resolve all {{placeholder}} patterns in a prompt template.
+/// Returns the resolved text and all node keys collected from placeholders.
+fn resolve_placeholders(
+    template: &str,
+    store: &Store,
+    graph: &Graph,
+    keys: &[String],
+    count: usize,
+) -> (String, Vec<String>) {
+    let mut result = template.to_string();
+    let mut extra_keys = Vec::new();
+    loop {
+        let Some(start) = result.find("{{") else { break };
+        let Some(end) = result[start + 2..].find("}}") else { break };
+        let end = start + 2 + end;
+        let name = result[start + 2..end].trim().to_lowercase();
+        match resolve(&name, store, graph, keys, count) {
+            Some(resolved) => {
+                extra_keys.extend(resolved.keys);
+                result.replace_range(start..end + 2, &resolved.text);
+            }
+            None => {
+                let msg = format!("(unknown: {})", name);
+                result.replace_range(start..end + 2, &msg);
+            }
+        }
+    }
+    (result, extra_keys)
+}
+
+/// Run a config-driven agent: query → resolve placeholders → prompt.
 pub fn run_agent(
     store: &Store,
     def: &AgentDef,
@@ -100,40 +200,36 @@ pub fn run_agent(
 ) -> Result<super::prompts::AgentBatch, String> {
     let graph = store.build_graph();
 
-    // Parse and run the query pipeline
-    let mut stages = search::Stage::parse_pipeline(&def.query)?;
+    // Run the query if present
+    let keys = if !def.query.is_empty() {
+        let mut stages = search::Stage::parse_pipeline(&def.query)?;
+        let has_limit = stages.iter().any(|s|
+            matches!(s, search::Stage::Transform(search::Transform::Limit(_))));
+        if !has_limit {
+            stages.push(search::Stage::Transform(search::Transform::Limit(count)));
+        }
+        let results = search::run_query(&stages, vec![], &graph, store, false, count);
+        if results.is_empty() {
+            return Err(format!("{}: query returned no results", def.agent));
+        }
+        results.into_iter().map(|(k, _)| k).collect::<Vec<_>>()
+    } else {
+        vec![]
+    };
 
-    let has_limit = stages.iter().any(|s| matches!(s, search::Stage::Transform(search::Transform::Limit(_))));
-    if !has_limit {
-        stages.push(search::Stage::Transform(search::Transform::Limit(count)));
-    }
+    let (prompt, extra_keys) = resolve_placeholders(&def.prompt, store, &graph, &keys, count);
 
-    let results = search::run_query(&stages, vec![], &graph, store, false, count);
-
-    if results.is_empty() {
-        return Err(format!("{}: query returned no results", def.agent));
-    }
-
-    let keys: Vec<String> = results.iter().map(|(k, _)| k.clone()).collect();
-    let items: Vec<ReplayItem> = keys_to_replay_items(store, &keys, &graph);
-
-    // Fill placeholders in the embedded prompt
-    let topology = super::prompts::format_topology_header_pub(&graph);
-    let nodes_section = super::prompts::format_nodes_section_pub(store, &items, &graph);
-
-    let prompt = def.prompt
-        .replace("{{TOPOLOGY}}", &topology)
-        .replace("{{NODES}}", &nodes_section)
-        .replace("{{EPISODES}}", &nodes_section);
-
-    Ok(super::prompts::AgentBatch { prompt, node_keys: keys })
+    // Merge query keys with any keys produced by placeholder resolution
+    let mut all_keys = keys;
+    all_keys.extend(extra_keys);
+    Ok(super::prompts::AgentBatch { prompt, node_keys: all_keys })
 }
 
 /// Convert a list of keys to ReplayItems with priority and graph metrics.
 pub fn keys_to_replay_items(
     store: &Store,
     keys: &[String],
-    graph: &crate::graph::Graph,
+    graph: &Graph,
 ) -> Vec<ReplayItem> {
     keys.iter()
         .filter_map(|key| {
diff --git a/poc-memory/src/agents/prompts.rs b/poc-memory/src/agents/prompts.rs
index 2c4aee7..3f57ec3 100644
--- a/poc-memory/src/agents/prompts.rs
+++ b/poc-memory/src/agents/prompts.rs
@@ -186,6 +186,10 @@ fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> S
 }
 
 /// Format health data for the health agent prompt
+pub fn format_health_section_pub(store: &Store, graph: &Graph) -> String {
+    format_health_section(store, graph)
+}
+
 fn format_health_section(store: &Store, graph: &Graph) -> String {
     use crate::graph;
 
@@ -242,6 +246,14 @@ fn format_health_section(store: &Store, graph: &Graph) -> String {
     out
 }
 
+pub fn format_pairs_section_pub(
+    pairs: &[(String, String, f32)],
+    store: &Store,
+    graph: &Graph,
+) -> String {
+    format_pairs_section(pairs, store, graph)
+}
+
 /// Format interference pairs for the separator agent prompt
 fn format_pairs_section(
     pairs: &[(String, String, f32)],
@@ -278,6 +290,10 @@ fn format_pairs_section(
     out
 }
 
+pub fn format_rename_candidates_pub(store: &Store, count: usize) -> (Vec<String>, String) {
+    format_rename_candidates_with_keys(store, count)
+}
+
 /// Format rename candidates, returning both keys and formatted section
 fn format_rename_candidates_with_keys(store: &Store, count: usize) -> (Vec<String>, String) {
     let mut candidates: Vec<(&str, &crate::store::Node)> = store.nodes.iter()
@@ -339,6 +355,10 @@ pub fn split_candidates(store: &Store) -> Vec<String> {
 }
 
 /// Format a single node for split-plan prompt (phase 1)
+pub fn format_split_plan_node_pub(store: &Store, graph: &Graph, key: &str) -> String {
+    format_split_plan_node(store, graph, key)
+}
+
 fn format_split_plan_node(store: &Store, graph: &Graph, key: &str) -> String {
     let communities = graph.communities();
     let node = match store.nodes.get(key) {