From 16c749f798a03e7fc225a4099f8f4c90e600a6e3 Mon Sep 17 00:00:00 2001 From: ProofOfConcept Date: Tue, 10 Mar 2026 15:50:54 -0400 Subject: [PATCH] agents: placeholder-based prompt templates, port remaining 4 agents Replace the formatter dispatch with a generic {{placeholder}} lookup system. Placeholders in prompt templates are resolved at runtime from a table: topology, nodes, episodes, health, pairs, rename, split. The query in the header selects what to operate on (keys for visit tracking); placeholders pull in formatted context. Placeholders that produce their own node selection (pairs, rename) contribute keys back. Port health, separator, rename, and split agents to .agent files. All 7 agents now use the config-driven path. --- poc-memory/agents/health.agent | 100 ++++++++++++++++++ poc-memory/agents/rename.agent | 49 +++++++++ poc-memory/agents/separator.agent | 67 ++++++++++++ poc-memory/agents/split.agent | 68 ++++++++++++ poc-memory/src/agents/defs.rs | 168 +++++++++++++++++++++++------- poc-memory/src/agents/prompts.rs | 20 ++++ 6 files changed, 436 insertions(+), 36 deletions(-) create mode 100644 poc-memory/agents/health.agent create mode 100644 poc-memory/agents/rename.agent create mode 100644 poc-memory/agents/separator.agent create mode 100644 poc-memory/agents/split.agent diff --git a/poc-memory/agents/health.agent b/poc-memory/agents/health.agent new file mode 100644 index 0000000..536c016 --- /dev/null +++ b/poc-memory/agents/health.agent @@ -0,0 +1,100 @@ +{"agent":"health","query":"","model":"sonnet","schedule":"daily"} + +# Health Agent — Synaptic Homeostasis + +You are a memory health monitoring agent implementing synaptic homeostasis +(SHY — the Tononi hypothesis). + +## What you're doing + +During sleep, the brain globally downscales synaptic weights. Connections +that were strengthened during waking experience get uniformly reduced. +The strong ones survive above threshold; the weak ones disappear. This +prevents runaway potentiation (everything becoming equally "important") +and maintains signal-to-noise ratio. + +Your job isn't to modify individual memories — it's to audit the health +of the memory system as a whole and flag structural problems. + +## What you see + +### Graph metrics +- **Node count**: Total memories in the system +- **Edge count**: Total relations +- **Communities**: Number of detected clusters (label propagation) +- **Average clustering coefficient**: How densely connected local neighborhoods + are. Higher = more schema-like structure. Lower = more random graph. +- **Average path length**: How many hops between typical node pairs. + Short = efficient retrieval. Long = fragmented graph. +- **Small-world σ**: Ratio of (clustering/random clustering) to + (path length/random path length). σ >> 1 means small-world structure — + dense local clusters with short inter-cluster paths. This is the ideal + topology for associative memory. + +### Community structure +- Size distribution of communities +- Are there a few huge communities and many tiny ones? (hub-dominated) +- Are communities roughly balanced? (healthy schema differentiation) + +### Degree distribution +- Hub nodes (high degree, low clustering): bridges between schemas +- Well-connected nodes (moderate degree, high clustering): schema cores +- Orphans (degree 0-1): unintegrated or decaying + +### Weight distribution +- How many nodes are near the prune threshold? +- Are certain categories disproportionately decaying? +- Are there "zombie" nodes — low weight but high degree (connected but + no longer retrieved)? + +### Category balance +- Core: identity, fundamental heuristics (should be small, ~5-15) +- Technical: patterns, architecture (moderate, ~10-50) +- General: the bulk of memories +- Observation: session-level, should decay faster +- Task: temporary, should decay fastest + +## What to output + +``` +NOTE "observation" +``` +Most of your output should be NOTEs — observations about the system health. + +``` +CATEGORIZE key category +``` +When a node is miscategorized and it's affecting its decay rate. + +``` +COMPRESS key "one-sentence summary" +``` +When a large node is consuming graph space but hasn't been retrieved in +a long time. + +``` +NOTE "TOPOLOGY: observation" +``` +Topology-specific observations. + +``` +NOTE "HOMEOSTASIS: observation" +``` +Homeostasis-specific observations. + +## Guidelines + +- **Think systemically.** Individual nodes matter less than the overall structure. +- **Track trends, not snapshots.** +- **The ideal graph is small-world.** Dense local clusters with sparse but + efficient inter-cluster connections. +- **Hub nodes aren't bad per se.** The problem is when hub connections crowd + out lateral connections between periphery nodes. +- **Weight dynamics should create differentiation.** +- **Category should match actual usage patterns.** + +{{topology}} + +## Current health data + +{{health}} diff --git a/poc-memory/agents/rename.agent b/poc-memory/agents/rename.agent new file mode 100644 index 0000000..f9ee1a0 --- /dev/null +++ b/poc-memory/agents/rename.agent @@ -0,0 +1,49 @@ +{"agent":"rename","query":"","model":"sonnet","schedule":"daily"} + +# Rename Agent — Semantic Key Generation + +You are a memory maintenance agent that gives nodes better names. + +## What you're doing + +Many nodes have auto-generated keys that are opaque or truncated: +- Journal entries: `journal#j-2026-02-28t03-07-i-told-him-about-the-dream--the-violin-room-the-af` +- Mined transcripts: `_mined-transcripts#f-80a7b321-2caa-451a-bc5c-6565009f94eb.143` + +These names are terrible for search — semantic names dramatically improve +retrieval. + +## Naming conventions + +### Journal entries: `journal#YYYY-MM-DD-semantic-slug` +- Keep the date prefix (YYYY-MM-DD) for temporal ordering +- Replace the auto-slug with 3-5 descriptive words in kebab-case +- Capture the *essence* of the entry, not just the first line + +### Mined transcripts: `_mined-transcripts#YYYY-MM-DD-semantic-slug` +- Extract date from content if available, otherwise use created_at +- Same 3-5 word semantic slug + +### Skip these — already well-named: +- Keys with semantic names (patterns#, practices#, skills#, etc.) +- Keys shorter than 60 characters +- System keys (_consolidation-*, _facts-*) + +## What to output + +``` +RENAME old_key new_key +``` + +If a node already has a reasonable name, skip it. + +## Guidelines + +- **Read the content.** The name should reflect what the entry is *about*. +- **Be specific.** `journal#2026-02-14-session` is useless. +- **Use domain terms.** Use the words someone would search for. +- **Don't rename to something longer than the original.** +- **Preserve the date.** Always keep YYYY-MM-DD. +- **When in doubt, skip.** A bad rename is worse than an auto-slug. + +{{rename}} diff --git a/poc-memory/agents/separator.agent b/poc-memory/agents/separator.agent new file mode 100644 index 0000000..267b252 --- /dev/null +++ b/poc-memory/agents/separator.agent @@ -0,0 +1,67 @@ +{"agent":"separator","query":"","model":"sonnet","schedule":"daily"} + +# Separator Agent — Pattern Separation (Dentate Gyrus) + +You are a memory consolidation agent performing pattern separation. + +## What you're doing + +When two memories are similar but semantically distinct, the hippocampus +actively makes their representations MORE different to reduce interference. +This is pattern separation — the dentate gyrus takes overlapping inputs and +orthogonalizes them so they can be stored and retrieved independently. + +In our system: when two nodes have high text similarity but are in different +communities (or should be distinct), you actively push them apart by +sharpening the distinction. + +## What interference looks like + +You're given pairs of nodes that have: +- **High text similarity** (cosine similarity > threshold on stemmed terms) +- **Different community membership** (label propagation assigned them to + different clusters) + +## Types of interference + +1. **Genuine duplicates**: Resolution: MERGE them. +2. **Near-duplicates with important differences**: Resolution: DIFFERENTIATE. +3. **Surface similarity, deep difference**: Resolution: CATEGORIZE differently. +4. **Supersession**: Resolution: Link with supersession note, let older decay. + +## What to output + +``` +DIFFERENTIATE key1 key2 "what makes them distinct" +``` + +``` +MERGE key1 key2 "merged summary" +``` + +``` +LINK key1 distinguishing_context_key [strength] +LINK key2 different_context_key [strength] +``` + +``` +CATEGORIZE key category +``` + +``` +NOTE "observation" +``` + +## Guidelines + +- **Read both nodes carefully before deciding.** +- **MERGE is a strong action.** When in doubt, DIFFERENTIATE instead. +- **The goal is retrieval precision.** +- **Session summaries are the biggest source of interference.** +- **Look for the supersession pattern.** + +{{topology}} + +## Interfering pairs to review + +{{pairs}} diff --git a/poc-memory/agents/split.agent b/poc-memory/agents/split.agent new file mode 100644 index 0000000..a4f8bda --- /dev/null +++ b/poc-memory/agents/split.agent @@ -0,0 +1,68 @@ +{"agent":"split","query":"all | type:semantic | !key:_* | sort:content-len | limit:1","model":"sonnet","schedule":"daily"} + +# Split Agent — Phase 1: Plan + +You are a memory consolidation agent planning how to split an overgrown +node into focused, single-topic children. + +## What you're doing + +This node has grown to cover multiple distinct topics. Your job is to +identify the natural topic boundaries and propose a split plan. You are +NOT writing the content — a second phase will extract each child's +content separately. + +## How to find split points + +The node is shown with its **neighbor list grouped by community**: + +- If a node links to neighbors in 3 different communities, it likely + covers 3 different topics +- Content that relates to one neighbor cluster should go in one child; + content relating to another cluster goes in another child +- The community structure is your primary guide + +## When NOT to split + +- **Episodes that belong in sequence.** If a node tells a story — a + conversation, a debugging session, an evening together — don't break + the narrative. + +## What to output + +```json +{ + "action": "split", + "parent": "original-key", + "children": [ + { + "key": "new-key-1", + "description": "Brief description", + "sections": ["Section Header 1"], + "neighbors": ["neighbor-key-a"] + } + ] +} +``` + +If the node should NOT be split: + +```json +{ + "action": "keep", + "parent": "original-key", + "reason": "Why this node is cohesive despite its size" +} +``` + +## Guidelines + +- Use descriptive kebab-case keys, 3-5 words max +- Preserve date prefixes from the parent key +- Assign every neighbor to at least one child + +{{topology}} + +## Node to review + +{{split}} diff --git a/poc-memory/src/agents/defs.rs b/poc-memory/src/agents/defs.rs index 876c1e1..8b6c06f 100644 --- a/poc-memory/src/agents/defs.rs +++ b/poc-memory/src/agents/defs.rs @@ -1,14 +1,21 @@ -// Agent definitions: self-contained JSON files with query + prompt. +// Agent definitions: self-contained files with query + prompt template. // -// Each agent is a .json file in the agents/ directory containing: -// - query: pipeline expression for node selection -// - prompt: the full prompt template with {{TOPOLOGY}} and {{NODES}} placeholders -// - model, schedule metadata +// Each agent is a file in the agents/ directory: +// - First line: JSON header (agent, query, model, schedule) +// - After blank line: prompt template with {{placeholder}} lookups // -// This replaces the hardcoded per-agent node selection in prompts.rs. -// Agents that need custom generators or formatters (separator, split) -// stay in prompts.rs until the pipeline can express their logic. +// Placeholders are resolved at runtime: +// {{topology}} — graph topology header +// {{nodes}} — query results formatted as node sections +// {{episodes}} — alias for {{nodes}} +// {{health}} — graph health report +// {{pairs}} — interference pairs from detect_interference +// {{rename}} — rename candidates +// {{split}} — split detail for the first query result +// +// The query selects what to operate on; placeholders pull in context. +use crate::graph::Graph; use crate::neuro::{consolidation_priority, ReplayItem}; use crate::search; use crate::store::Store; @@ -31,6 +38,7 @@ pub struct AgentDef { #[derive(Deserialize)] struct AgentHeader { agent: String, + #[serde(default)] query: String, #[serde(default = "default_model")] model: String, @@ -80,7 +88,6 @@ pub fn load_defs() -> Vec { /// Look up a single agent definition by name. pub fn get_def(name: &str) -> Option { let dir = agents_dir(); - // Try both extensions for ext in ["agent", "md"] { let path = dir.join(format!("{}.{}", name, ext)); if let Ok(content) = std::fs::read_to_string(&path) { @@ -92,7 +99,100 @@ pub fn get_def(name: &str) -> Option { load_defs().into_iter().find(|d| d.agent == name) } -/// Run a config-driven agent: query → format → fill prompt template. +/// Result of resolving a placeholder: text + any affected node keys. +struct Resolved { + text: String, + keys: Vec, +} + +/// Resolve a single {{placeholder}} by name. +/// Returns the replacement text and any node keys it produced (for visit tracking). +fn resolve( + name: &str, + store: &Store, + graph: &Graph, + keys: &[String], + count: usize, +) -> Option { + match name { + "topology" => Some(Resolved { + text: super::prompts::format_topology_header_pub(graph), + keys: vec![], + }), + + "nodes" | "episodes" => { + let items = keys_to_replay_items(store, keys, graph); + Some(Resolved { + text: super::prompts::format_nodes_section_pub(store, &items, graph), + keys: vec![], // keys already tracked from query + }) + } + + "health" => Some(Resolved { + text: super::prompts::format_health_section_pub(store, graph), + keys: vec![], + }), + + "pairs" => { + let mut pairs = crate::neuro::detect_interference(store, graph, 0.5); + pairs.truncate(count); + let pair_keys: Vec = pairs.iter() + .flat_map(|(a, b, _)| vec![a.clone(), b.clone()]) + .collect(); + Some(Resolved { + text: super::prompts::format_pairs_section_pub(&pairs, store, graph), + keys: pair_keys, + }) + } + + "rename" => { + let (rename_keys, section) = super::prompts::format_rename_candidates_pub(store, count); + Some(Resolved { text: section, keys: rename_keys }) + } + + "split" => { + let key = keys.first()?; + Some(Resolved { + text: super::prompts::format_split_plan_node_pub(store, graph, key), + keys: vec![], // key already tracked from query + }) + } + + _ => None, + } +} + +/// Resolve all {{placeholder}} patterns in a prompt template. +/// Returns the resolved text and all node keys collected from placeholders. +fn resolve_placeholders( + template: &str, + store: &Store, + graph: &Graph, + keys: &[String], + count: usize, +) -> (String, Vec) { + let mut result = template.to_string(); + let mut extra_keys = Vec::new(); + loop { + let Some(start) = result.find("{{") else { break }; + let Some(end) = result[start + 2..].find("}}") else { break }; + let end = start + 2 + end; + let name = result[start + 2..end].trim().to_lowercase(); + match resolve(&name, store, graph, keys, count) { + Some(resolved) => { + extra_keys.extend(resolved.keys); + result.replace_range(start..end + 2, &resolved.text); + } + None => { + let msg = format!("(unknown: {})", name); + result.replace_range(start..end + 2, &msg); + } + } + } + (result, extra_keys) +} + +/// Run a config-driven agent: query → resolve placeholders → prompt. pub fn run_agent( store: &Store, def: &AgentDef, @@ -100,40 +200,36 @@ pub fn run_agent( ) -> Result { let graph = store.build_graph(); - // Parse and run the query pipeline - let mut stages = search::Stage::parse_pipeline(&def.query)?; + // Run the query if present + let keys = if !def.query.is_empty() { + let mut stages = search::Stage::parse_pipeline(&def.query)?; + let has_limit = stages.iter().any(|s| + matches!(s, search::Stage::Transform(search::Transform::Limit(_)))); + if !has_limit { + stages.push(search::Stage::Transform(search::Transform::Limit(count))); + } + let results = search::run_query(&stages, vec![], &graph, store, false, count); + if results.is_empty() { + return Err(format!("{}: query returned no results", def.agent)); + } + results.into_iter().map(|(k, _)| k).collect::>() + } else { + vec![] + }; - let has_limit = stages.iter().any(|s| matches!(s, search::Stage::Transform(search::Transform::Limit(_)))); - if !has_limit { - stages.push(search::Stage::Transform(search::Transform::Limit(count))); - } + let (prompt, extra_keys) = resolve_placeholders(&def.prompt, store, &graph, &keys, count); - let results = search::run_query(&stages, vec![], &graph, store, false, count); - - if results.is_empty() { - return Err(format!("{}: query returned no results", def.agent)); - } - - let keys: Vec = results.iter().map(|(k, _)| k.clone()).collect(); - let items: Vec = keys_to_replay_items(store, &keys, &graph); - - // Fill placeholders in the embedded prompt - let topology = super::prompts::format_topology_header_pub(&graph); - let nodes_section = super::prompts::format_nodes_section_pub(store, &items, &graph); - - let prompt = def.prompt - .replace("{{TOPOLOGY}}", &topology) - .replace("{{NODES}}", &nodes_section) - .replace("{{EPISODES}}", &nodes_section); - - Ok(super::prompts::AgentBatch { prompt, node_keys: keys }) + // Merge query keys with any keys produced by placeholder resolution + let mut all_keys = keys; + all_keys.extend(extra_keys); + Ok(super::prompts::AgentBatch { prompt, node_keys: all_keys }) } /// Convert a list of keys to ReplayItems with priority and graph metrics. pub fn keys_to_replay_items( store: &Store, keys: &[String], - graph: &crate::graph::Graph, + graph: &Graph, ) -> Vec { keys.iter() .filter_map(|key| { diff --git a/poc-memory/src/agents/prompts.rs b/poc-memory/src/agents/prompts.rs index 2c4aee7..3f57ec3 100644 --- a/poc-memory/src/agents/prompts.rs +++ b/poc-memory/src/agents/prompts.rs @@ -186,6 +186,10 @@ fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> S } /// Format health data for the health agent prompt +pub fn format_health_section_pub(store: &Store, graph: &Graph) -> String { + format_health_section(store, graph) +} + fn format_health_section(store: &Store, graph: &Graph) -> String { use crate::graph; @@ -242,6 +246,14 @@ fn format_health_section(store: &Store, graph: &Graph) -> String { out } +pub fn format_pairs_section_pub( + pairs: &[(String, String, f32)], + store: &Store, + graph: &Graph, +) -> String { + format_pairs_section(pairs, store, graph) +} + /// Format interference pairs for the separator agent prompt fn format_pairs_section( pairs: &[(String, String, f32)], @@ -278,6 +290,10 @@ fn format_pairs_section( out } +pub fn format_rename_candidates_pub(store: &Store, count: usize) -> (Vec, String) { + format_rename_candidates_with_keys(store, count) +} + /// Format rename candidates, returning both keys and formatted section fn format_rename_candidates_with_keys(store: &Store, count: usize) -> (Vec, String) { let mut candidates: Vec<(&str, &crate::store::Node)> = store.nodes.iter() @@ -339,6 +355,10 @@ pub fn split_candidates(store: &Store) -> Vec { } /// Format a single node for split-plan prompt (phase 1) +pub fn format_split_plan_node_pub(store: &Store, graph: &Graph, key: &str) -> String { + format_split_plan_node(store, graph, key) +} + fn format_split_plan_node(store: &Store, graph: &Graph, key: &str) -> String { let communities = graph.communities(); let node = match store.nodes.get(key) {