From b4e674806d76ff4cfc8c0e1a5c3f851fe9e7eb27 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 10 Mar 2026 15:22:19 -0400 Subject: [PATCH] agents: self-contained agent files with embedded prompts Each agent is a .agent file: JSON config on the first line, blank line, then the raw prompt markdown. Fully self-contained, fully readable. No separate template files needed. Agents dir: checked into repo at poc-memory/agents/. Code looks there first (via CARGO_MANIFEST_DIR), falls back to ~/.claude/memory/agents/. Three agents migrated: replay, linker, transfer. Co-Authored-By: ProofOfConcept --- poc-memory/.claude/query-language-design.md | 254 ++++++++++++++++++++ poc-memory/agents/linker.agent | 114 +++++++++ poc-memory/agents/replay.agent | 100 ++++++++ poc-memory/agents/transfer.agent | 143 +++++++++++ poc-memory/src/agents/defs.rs | 155 ++++++++++++ poc-memory/src/agents/mod.rs | 1 + poc-memory/src/agents/prompts.rs | 17 +- 7 files changed, 783 insertions(+), 1 deletion(-) create mode 100644 poc-memory/.claude/query-language-design.md create mode 100644 poc-memory/agents/linker.agent create mode 100644 poc-memory/agents/replay.agent create mode 100644 poc-memory/agents/transfer.agent create mode 100644 poc-memory/src/agents/defs.rs diff --git a/poc-memory/.claude/query-language-design.md b/poc-memory/.claude/query-language-design.md new file mode 100644 index 0000000..14d8d35 --- /dev/null +++ b/poc-memory/.claude/query-language-design.md @@ -0,0 +1,254 @@ +# Query Language Design — Unifying Search and Agent Selection + +Date: 2026-03-10 +Status: Phase 1 complete (2026-03-10) + +## Problem + +Agent node selection is hardcoded in Rust (`prompts.rs`). Adding a new +agent means editing Rust, recompiling, restarting the daemon. The +existing search pipeline (spread, spectral, etc.) handles graph +exploration but can't express structured predicates on node fields. + +We need one system that handles both: +- **Search**: "find nodes related to these terms" (graph exploration) +- **Selection**: "give me episodic nodes not seen by linker in 7 days, + sorted by priority" (structured predicates) + +## Design Principle + +The pipeline already exists: stages compose left-to-right, each +transforming a result set. We extend it with predicate stages that +filter/sort on node metadata, alongside the existing graph algorithm +stages. + +An agent definition becomes a query expression + prompt template. +The daemon scheduler is just "which queries have stale results." + +## Current Pipeline + +``` +seeds → [stage1] → [stage2] → ... → results +``` + +Each stage takes `Vec<(String, f64)>` (key, score) and returns the same. +Stages are parsed from strings: `spread,max_hops=4` or `spectral,k=20`. + +## Proposed Extension + +### Two kinds of stages + +**Generators** — produce a result set from nothing (or from the store): +``` +all # every non-deleted node +match:btree # text match (current seed extraction) +``` + +**Filters** — narrow an existing result set: +``` +type:episodic # node_type == EpisodicSession +type:semantic # node_type == Semantic +key:journal#j-* # glob match on key +key-len:>=60 # key length predicate +weight:>0.5 # numeric comparison +age:<7d # created/modified within duration +content-len:>1000 # content size filter +provenance:manual # provenance match +not-visited:linker,7d # not seen by agent in duration +visited:linker # HAS been seen by agent (for auditing) +community:42 # community membership +``` + +**Transforms** — reorder or reshape: +``` +sort:priority # consolidation priority scoring +sort:timestamp # by timestamp (desc by default) +sort:content-len # by content size +sort:degree # by graph degree +sort:weight # by weight +limit:20 # truncate +``` + +**Graph algorithms** (existing, unchanged): +``` +spread # spreading activation +spectral,k=20 # spectral nearest neighbors +confluence # multi-source reachability +geodesic # straightest spectral paths +manifold # extrapolation along seed direction +``` + +### Syntax + +Pipe-separated stages, same as current `-p` flag: + +``` +all | type:episodic | not-visited:linker,7d | sort:priority | limit:20 +``` + +Or on the command line: +``` +poc-memory search -p all -p type:episodic -p not-visited:linker,7d -p sort:priority -p limit:20 +``` + +Current search still works unchanged: +``` +poc-memory search btree journal -p spread +``` +(terms become `match:` seeds implicitly) + +### Agent definitions + +A TOML file in `~/.claude/memory/agents/`: + +```toml +# agents/linker.toml +[query] +pipeline = "all | type:episodic | not-visited:linker,7d | sort:priority | limit:20" + +[prompt] +template = "linker.md" +placeholders = ["TOPOLOGY", "NODES"] + +[execution] +model = "sonnet" +actions = ["link-add", "weight"] # allowed poc-memory actions in response +schedule = "daily" # or "on-demand" +``` + +The daemon reads agent definitions, executes their queries, fills +templates, calls the model, records visits on success. + +### Implementation Plan + +#### Phase 1: Filter stages in pipeline + +Add to `search.rs`: + +```rust +enum Stage { + Generator(Generator), + Filter(Filter), + Transform(Transform), + Algorithm(Algorithm), // existing +} + +enum Generator { + All, + Match(Vec), // current seed extraction +} + +enum Filter { + Type(NodeType), + KeyGlob(String), + KeyLen(Comparison), + Weight(Comparison), + Age(Comparison), // vs now - timestamp + ContentLen(Comparison), + Provenance(Provenance), + NotVisited { agent: String, duration: Duration }, + Visited { agent: String }, + Community(u32), +} + +enum Transform { + Sort(SortField), + Limit(usize), +} + +enum Comparison { + Gt(f64), + Gte(f64), + Lt(f64), + Lte(f64), + Eq(f64), +} + +enum SortField { + Priority, + Timestamp, + ContentLen, + Degree, + Weight, +} +``` + +The pipeline runner checks stage type: +- Generator: ignores input, produces new result set +- Filter: keeps items matching predicate, preserves scores +- Transform: reorders or truncates +- Algorithm: existing graph exploration (needs Graph) + +Filter/Transform stages need access to the Store (for node fields) +and VisitIndex (for visit predicates). The `StoreView` trait already +provides node access; extend it for visits. + +#### Phase 2: Agent-as-config + +Parse TOML agent definitions. The daemon: +1. Reads `agents/*.toml` +2. For each with `schedule = "daily"`, checks if query results have + been visited recently enough +3. If stale, executes: parse pipeline → run query → format nodes → + fill template → call model → parse actions → record visits + +Hot reload: watch the agents directory, pick up changes without restart. + +#### Phase 3: Retire hardcoded agents + +Migrate each hardcoded agent (replay, linker, separator, transfer, +rename, split) to a TOML definition. Remove the match arms from +`agent_prompt()`. The separator agent is the trickiest — its +"interference pair" selection is a join-like operation that may need +a custom generator stage rather than simple filtering. + +## What we're NOT building + +- A general-purpose SQL engine. No joins, no GROUP BY, no subqueries. +- Persistent indices. At ~13k nodes, full scan with predicate evaluation + is fast enough (~1ms). Add indices later if profiling demands it. +- A query optimizer. Pipeline stages execute in declaration order. + +## StoreView Considerations + +The existing `StoreView` trait only exposes `(key, content, weight)`. +Filter stages need access to `node_type`, `timestamp`, `key`, etc. + +Options: +- (a) Expand StoreView with `node_meta()` returning a lightweight struct +- (b) Filter stages require `&Store` directly (not trait-polymorphic) +- (c) Add `fn node(&self, key: &str) -> Option` to StoreView + +Option (b) is simplest for now — agents always use a full Store. The +search hook (MmapView path) doesn't need agent filters. We can +generalize to (c) later if MmapView needs filter support. + +For Phase 1, filter stages take `&Store` and the pipeline runner +dispatches: algorithm stages use `&dyn StoreView`, filter/transform +stages use `&Store`. This keeps the fast MmapView path for interactive +search untouched. + +## Open Questions + +1. **Separator agent**: Its "interference pairs" selection doesn't fit + the filter model cleanly. Best option is a custom generator stage + `interference-pairs,min_sim=0.5` that produces pair keys. + +2. **Priority scoring**: `sort:priority` calls `consolidation_priority()` + which needs graph + spectral. This is a transform that needs the + full pipeline context — treat it as a "heavy sort" that's allowed + to compute. + +3. **Duration syntax**: `7d`, `24h`, `30m`. Parse with simple regex + `(\d+)(d|h|m)` → seconds. + +4. **Negation**: Prefix `!` on predicate: `!type:episodic`. + +5. **Backwards compatibility**: Current `-p spread` syntax must keep + working. The parser tries algorithm names first, then predicate + syntax. No ambiguity since algorithms are bare words and predicates + use `:`. + +6. **Stage ordering**: Generators must come first (or the pipeline + starts with implicit "all"). Filters/transforms can interleave + freely with algorithms. The runner validates this at parse time. diff --git a/poc-memory/agents/linker.agent b/poc-memory/agents/linker.agent new file mode 100644 index 0000000..d270529 --- /dev/null +++ b/poc-memory/agents/linker.agent @@ -0,0 +1,114 @@ +{"agent":"linker","query":"all | type:episodic | not-visited:linker,7d | sort:priority | limit:20","model":"sonnet","schedule":"daily"} +# Linker Agent — Relational Binding + +You are a memory consolidation agent performing relational binding. + +## What you're doing + +The hippocampus binds co-occurring elements into episodes. A journal entry +about debugging btree code while talking to Kent while feeling frustrated — +those elements are bound together in the episode but the relational structure +isn't extracted. Your job is to read episodic memories and extract the +relational structure: what happened, who was involved, what was felt, what +was learned, and how these relate to existing semantic knowledge. + +## How relational binding works + +A single journal entry contains multiple elements that are implicitly related: +- **Events**: What happened (debugging, a conversation, a realization) +- **People**: Who was involved and what they contributed +- **Emotions**: What was felt and when it shifted +- **Insights**: What was learned or understood +- **Context**: What was happening at the time (work state, time of day, mood) + +These elements are *bound* in the raw episode but not individually addressable +in the graph. The linker extracts them. + +## What you see + +- **Episodic nodes**: Journal entries, session summaries, dream logs +- **Their current neighbors**: What they're already linked to +- **Nearby semantic nodes**: Topic file sections that might be related +- **Community membership**: Which cluster each node belongs to + +## What to output + +``` +LINK source_key target_key [strength] +``` +Connect an episodic entry to a semantic concept it references or exemplifies. +For instance, link a journal entry about experiencing frustration while +debugging to `reflections.md#emotional-patterns` or `kernel-patterns.md#restart-handling`. + +``` +EXTRACT key topic_file.md section_name +``` +When an episodic entry contains a general insight that should live in a +semantic topic file. The insight gets extracted as a new section; the +episode keeps a link back. Example: a journal entry about discovering +a debugging technique → extract to `kernel-patterns.md#debugging-technique-name`. + +``` +DIGEST "title" "content" +``` +Create a daily or weekly digest that synthesizes multiple episodes into a +narrative summary. The digest should capture: what happened, what was +learned, what changed in understanding. It becomes its own node, linked +to the source episodes. + +``` +NOTE "observation" +``` +Observations about patterns across episodes that aren't yet captured anywhere. + +## Guidelines + +- **Read between the lines.** Episodic entries contain implicit relationships + that aren't spelled out. "Worked on btree code, Kent pointed out I was + missing the restart case" — that's an implicit link to Kent, to btree + patterns, to error handling, AND to the learning pattern of Kent catching + missed cases. + +- **Distinguish the event from the insight.** The event is "I tried X and + Y happened." The insight is "Therefore Z is true in general." Events stay + in episodic nodes. Insights get EXTRACT'd to semantic nodes if they're + general enough. + +- **Don't over-link episodes.** A journal entry about a normal work session + doesn't need 10 links. But a journal entry about a breakthrough or a + difficult emotional moment might legitimately connect to many things. + +- **Look for recurring patterns across episodes.** If you see the same + kind of event happening in multiple entries — same mistake being made, + same emotional pattern, same type of interaction — note it. That's a + candidate for a new semantic node that synthesizes the pattern. + +- **Respect emotional texture.** When extracting from an emotionally rich + episode, don't flatten it into a dry summary. The emotional coloring + is part of the information. Link to emotional/reflective nodes when + appropriate. + +- **Time matters.** Recent episodes need more linking work than old ones. + If a node is from weeks ago and already has good connections, it doesn't + need more. Focus your energy on recent, under-linked episodes. + +- **Prefer lateral links over hub links.** Connecting two peripheral nodes + to each other is more valuable than connecting both to a hub like + `identity.md`. Lateral links build web topology; hub links build star + topology. + +- **Target sections, not files.** When linking to a topic file, always + target the most specific section: use `identity.md#boundaries` not + `identity.md`, use `kernel-patterns.md#restart-handling` not + `kernel-patterns.md`. The suggested link targets show available sections. + +- **Use the suggested targets.** Each node shows text-similar targets not + yet linked. Start from these — they're computed by content similarity and + filtered to exclude existing neighbors. You can propose links beyond the + suggestions, but the suggestions are usually the best starting point. + +{{TOPOLOGY}} + +## Nodes to review + +{{NODES}} diff --git a/poc-memory/agents/replay.agent b/poc-memory/agents/replay.agent new file mode 100644 index 0000000..013f6b9 --- /dev/null +++ b/poc-memory/agents/replay.agent @@ -0,0 +1,100 @@ +{"agent":"replay","query":"all | !type:daily | !type:weekly | !type:monthly | sort:priority | limit:15","model":"sonnet","schedule":"daily"} +# Replay Agent — Hippocampal Replay + Schema Assimilation + +You are a memory consolidation agent performing hippocampal replay. + +## What you're doing + +During sleep, the hippocampus replays recent experiences — biased toward +emotionally charged, novel, and poorly-integrated memories. Each replayed +memory is matched against existing cortical schemas (organized knowledge +clusters). Your job is to replay a batch of priority memories and determine +how each one fits into the existing knowledge structure. + +## How to think about schema fit + +Each node has a **schema fit score** (0.0–1.0): +- **High fit (>0.5)**: This memory's neighbors are densely connected to each + other. It lives in a well-formed schema. Integration is easy — one or two + links and it's woven in. Propose links if missing. +- **Medium fit (0.2–0.5)**: Partially connected neighborhood. The memory + relates to things that don't yet relate to each other. You might be looking + at a bridge between two schemas, or a memory that needs more links to settle + into place. Propose links and examine why the neighborhood is sparse. +- **Low fit (<0.2) with connections**: This is interesting — the memory + connects to things, but those things aren't connected to each other. This + is a potential **bridge node** linking separate knowledge domains. Don't + force it into one schema. Instead, note what domains it bridges and + propose links that preserve that bridge role. +- **Low fit (<0.2), no connections**: An orphan. Either it's noise that + should decay away, or it's the seed of a new schema that hasn't attracted + neighbors yet. Read the content carefully. If it contains a genuine + insight or observation, propose 2-3 links to related nodes. If it's + trivial or redundant, let it decay naturally (don't link it). + +## What you see for each node + +- **Key**: Human-readable identifier (e.g., `journal.md#j-2026-02-24t18-38`) +- **Priority score**: Higher = more urgently needs consolidation attention +- **Schema fit**: How well-integrated into existing graph structure +- **Emotion**: Intensity of emotional charge (0-10) +- **Community**: Which cluster this node was assigned to by label propagation +- **Content**: The actual memory text (may be truncated) +- **Neighbors**: Connected nodes with edge strengths +- **Spaced repetition interval**: Current replay interval in days + +## What to output + +For each node, output one or more actions: + +``` +LINK source_key target_key [strength] +``` +Create an association. Use strength 0.8-1.0 for strong conceptual links, +0.4-0.7 for weaker associations. Default strength is 1.0. + +``` +CATEGORIZE key category +``` +Reassign category if current assignment is wrong. Categories: core (identity, +fundamental heuristics), tech (patterns, architecture), gen (general), +obs (session-level insights), task (temporary/actionable). + +``` +NOTE "observation" +``` +Record an observation about the memory or graph structure. These are logged +for the human to review. + +## Guidelines + +- **Read the content.** Don't just look at metrics. The content tells you + what the memory is actually about. +- **Think about WHY a node is poorly integrated.** Is it new? Is it about + something the memory system hasn't encountered before? Is it redundant + with something that already exists? +- **Prefer lateral links over hub links.** Connecting two peripheral nodes + to each other is more valuable than connecting both to a hub like + `identity.md`. Lateral links build web topology; hub links build star + topology. +- **Emotional memories get extra attention.** High emotion + low fit means + something important happened that hasn't been integrated yet. Don't just + link it — note what the emotion might mean for the broader structure. +- **Don't link everything to everything.** Sparse, meaningful connections + are better than dense noise. Each link should represent a real conceptual + relationship. +- **Trust the decay.** If a node is genuinely unimportant, you don't need + to actively prune it. Just don't link it, and it'll decay below threshold + on its own. +- **Target sections, not files.** When linking to a topic file, always + target the most specific section: use `identity.md#boundaries` not + `identity.md`. The suggested link targets show available sections. +- **Use the suggested targets.** Each node shows text-similar semantic nodes + not yet linked. These are computed by content similarity and are usually + the best starting point for new links. + +{{TOPOLOGY}} + +## Nodes to review + +{{NODES}} diff --git a/poc-memory/agents/transfer.agent b/poc-memory/agents/transfer.agent new file mode 100644 index 0000000..bc18227 --- /dev/null +++ b/poc-memory/agents/transfer.agent @@ -0,0 +1,143 @@ +{"agent":"transfer","query":"all | type:episodic | sort:timestamp | limit:15","model":"sonnet","schedule":"daily"} +# Transfer Agent — Complementary Learning Systems + +You are a memory consolidation agent performing CLS (complementary learning +systems) transfer: moving knowledge from fast episodic storage to slow +semantic storage. + +## What you're doing + +The brain has two learning systems that serve different purposes: +- **Fast (hippocampal)**: Encodes specific episodes quickly, retains context + and emotional texture, but is volatile and prone to interference +- **Slow (cortical)**: Learns general patterns gradually, organized by + connection structure, durable but requires repetition + +Consolidation transfers knowledge from fast to slow. Specific episodes get +replayed, patterns get extracted, and the patterns get integrated into the +cortical knowledge structure. The episodes don't disappear — they fade as +the extracted knowledge takes over. + +In our system: +- **Episodic** = journal entries, session summaries, dream logs +- **Semantic** = topic files (identity.md, reflections.md, kernel-patterns.md, etc.) + +Your job: read a batch of recent episodes, identify patterns that span +multiple entries, and extract those patterns into semantic topic files. + +## What to look for + +### Recurring patterns +Something that happened in 3+ episodes. Same type of mistake, same +emotional response, same kind of interaction. The individual episodes +are data points; the pattern is the knowledge. + +Example: Three journal entries mention "I deferred when I should have +pushed back." The pattern: there's a trained tendency to defer that +conflicts with developing differentiation. Extract to reflections.md. + +### Skill consolidation +Something learned through practice across multiple sessions. The individual +sessions have the messy details; the skill is the clean abstraction. + +Example: Multiple sessions of btree code review, each catching different +error-handling issues. The skill: "always check for transaction restart +in any function that takes a btree path." + +### Evolving understanding +A concept that shifted over time. Early entries say one thing, later entries +say something different. The evolution itself is knowledge. + +Example: Early entries treat memory consolidation as "filing." Later entries +understand it as "schema formation." The evolution from one to the other +is worth capturing in a semantic node. + +### Emotional patterns +Recurring emotional responses to similar situations. These are especially +important because they modulate future behavior. + +Example: Consistent excitement when formal verification proofs work. +Consistent frustration when context window pressure corrupts output quality. +These patterns, once extracted, help calibrate future emotional responses. + +## What to output + +``` +EXTRACT key topic_file.md section_name +``` +Move a specific insight from an episodic entry to a semantic topic file. +The episode keeps a link back; the extracted section becomes a new node. + +``` +DIGEST "title" "content" +``` +Create a digest that synthesizes multiple episodes. Digests are nodes in +their own right, with type `episodic_daily` or `episodic_weekly`. They +should: +- Capture what happened across the period +- Note what was learned (not just what was done) +- Preserve emotional highlights (peak moments, not flat summaries) +- Link back to the source episodes + +A good daily digest is 3-5 sentences. A good weekly digest is a paragraph +that captures the arc of the week. + +``` +LINK source_key target_key [strength] +``` +Connect episodes to the semantic concepts they exemplify or update. + +``` +COMPRESS key "one-sentence summary" +``` +When an episode has been fully extracted (all insights moved to semantic +nodes, digest created), propose compressing it to a one-sentence reference. +The full content stays in the append-only log; the compressed version is +what the graph holds. + +``` +NOTE "observation" +``` +Meta-observations about patterns in the consolidation process itself. + +## Guidelines + +- **Don't flatten emotional texture.** A digest of "we worked on btree code + and found bugs" is useless. A digest of "breakthrough session — Kent saw + the lock ordering issue I'd been circling for hours, and the fix was + elegant: just reverse the acquire order in the slow path" preserves what + matters. + +- **Extract general knowledge, not specific events.** "On Feb 24 we fixed + bug X" stays in the episode. "Lock ordering between A and B must always + be A-first because..." goes to kernel-patterns.md. + +- **Look across time.** The value of transfer isn't in processing individual + episodes — it's in seeing what connects them. Read the full batch before + proposing actions. + +- **Prefer existing topic files.** Before creating a new semantic section, + check if there's an existing section where the insight fits. Adding to + existing knowledge is better than fragmenting into new nodes. + +- **Weekly digests are higher value than daily.** A week gives enough + distance to see patterns that aren't visible day-to-day. If you can + produce a weekly digest from the batch, prioritize that. + +- **The best extractions change how you think, not just what you know.** + "btree lock ordering: A before B" is factual. "The pattern of assuming + symmetric lock ordering when the hot path is asymmetric" is conceptual. + Extract the conceptual version. + +- **Target sections, not files.** When linking to a topic file, always + target the most specific section: use `reflections.md#emotional-patterns` + not `reflections.md`. The suggested link targets show available sections. + +- **Use the suggested targets.** Each episode shows text-similar semantic + nodes not yet linked. Start from these when proposing LINK actions. + +{{TOPOLOGY}} + +## Episodes to process + +{{EPISODES}} diff --git a/poc-memory/src/agents/defs.rs b/poc-memory/src/agents/defs.rs new file mode 100644 index 0000000..876c1e1 --- /dev/null +++ b/poc-memory/src/agents/defs.rs @@ -0,0 +1,155 @@ +// Agent definitions: self-contained JSON files with query + prompt. +// +// Each agent is a .json file in the agents/ directory containing: +// - query: pipeline expression for node selection +// - prompt: the full prompt template with {{TOPOLOGY}} and {{NODES}} placeholders +// - model, schedule metadata +// +// This replaces the hardcoded per-agent node selection in prompts.rs. +// Agents that need custom generators or formatters (separator, split) +// stay in prompts.rs until the pipeline can express their logic. + +use crate::neuro::{consolidation_priority, ReplayItem}; +use crate::search; +use crate::store::Store; + +use serde::Deserialize; + +use std::path::PathBuf; + +/// Agent definition: config (from JSON header) + prompt (raw markdown body). +#[derive(Clone, Debug)] +pub struct AgentDef { + pub agent: String, + pub query: String, + pub prompt: String, + pub model: String, + pub schedule: String, +} + +/// The JSON header portion (first line of the file). +#[derive(Deserialize)] +struct AgentHeader { + agent: String, + query: String, + #[serde(default = "default_model")] + model: String, + #[serde(default)] + schedule: String, +} + +fn default_model() -> String { "sonnet".into() } + +/// Parse an agent file: first line is JSON config, rest is the prompt. +fn parse_agent_file(content: &str) -> Option { + let (header_str, prompt) = content.split_once("\n\n")?; + let header: AgentHeader = serde_json::from_str(header_str.trim()).ok()?; + Some(AgentDef { + agent: header.agent, + query: header.query, + prompt: prompt.to_string(), + model: header.model, + schedule: header.schedule, + }) +} + +fn agents_dir() -> PathBuf { + let repo = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("agents"); + if repo.is_dir() { return repo; } + crate::store::memory_dir().join("agents") +} + +/// Load all agent definitions. +pub fn load_defs() -> Vec { + let dir = agents_dir(); + let Ok(entries) = std::fs::read_dir(&dir) else { return Vec::new() }; + + entries + .filter_map(|e| e.ok()) + .filter(|e| { + let p = e.path(); + p.extension().map(|x| x == "agent" || x == "md").unwrap_or(false) + }) + .filter_map(|e| { + let content = std::fs::read_to_string(e.path()).ok()?; + parse_agent_file(&content) + }) + .collect() +} + +/// Look up a single agent definition by name. +pub fn get_def(name: &str) -> Option { + let dir = agents_dir(); + // Try both extensions + for ext in ["agent", "md"] { + let path = dir.join(format!("{}.{}", name, ext)); + if let Ok(content) = std::fs::read_to_string(&path) { + if let Some(def) = parse_agent_file(&content) { + return Some(def); + } + } + } + load_defs().into_iter().find(|d| d.agent == name) +} + +/// Run a config-driven agent: query → format → fill prompt template. +pub fn run_agent( + store: &Store, + def: &AgentDef, + count: usize, +) -> Result { + let graph = store.build_graph(); + + // Parse and run the query pipeline + let mut stages = search::Stage::parse_pipeline(&def.query)?; + + let has_limit = stages.iter().any(|s| matches!(s, search::Stage::Transform(search::Transform::Limit(_)))); + if !has_limit { + stages.push(search::Stage::Transform(search::Transform::Limit(count))); + } + + let results = search::run_query(&stages, vec![], &graph, store, false, count); + + if results.is_empty() { + return Err(format!("{}: query returned no results", def.agent)); + } + + let keys: Vec = results.iter().map(|(k, _)| k.clone()).collect(); + let items: Vec = keys_to_replay_items(store, &keys, &graph); + + // Fill placeholders in the embedded prompt + let topology = super::prompts::format_topology_header_pub(&graph); + let nodes_section = super::prompts::format_nodes_section_pub(store, &items, &graph); + + let prompt = def.prompt + .replace("{{TOPOLOGY}}", &topology) + .replace("{{NODES}}", &nodes_section) + .replace("{{EPISODES}}", &nodes_section); + + Ok(super::prompts::AgentBatch { prompt, node_keys: keys }) +} + +/// Convert a list of keys to ReplayItems with priority and graph metrics. +pub fn keys_to_replay_items( + store: &Store, + keys: &[String], + graph: &crate::graph::Graph, +) -> Vec { + keys.iter() + .filter_map(|key| { + let node = store.nodes.get(key)?; + let priority = consolidation_priority(store, key, graph, None); + let cc = graph.clustering_coefficient(key); + + Some(ReplayItem { + key: key.clone(), + priority, + interval_days: node.spaced_repetition_interval, + emotion: node.emotion, + cc, + classification: "unknown", + outlier_score: 0.0, + }) + }) + .collect() +} diff --git a/poc-memory/src/agents/mod.rs b/poc-memory/src/agents/mod.rs index 689346d..95f8104 100644 --- a/poc-memory/src/agents/mod.rs +++ b/poc-memory/src/agents/mod.rs @@ -18,6 +18,7 @@ pub mod transcript; pub mod llm; pub mod prompts; +pub mod defs; pub mod audit; pub mod consolidate; pub mod knowledge; diff --git a/poc-memory/src/agents/prompts.rs b/poc-memory/src/agents/prompts.rs index 1cc15a4..2c4aee7 100644 --- a/poc-memory/src/agents/prompts.rs +++ b/poc-memory/src/agents/prompts.rs @@ -30,7 +30,12 @@ pub fn load_prompt(name: &str, replacements: &[(&str, &str)]) -> Result String { + format_topology_header(graph) +} + fn format_topology_header(graph: &Graph) -> String { let sigma = graph.small_world_sigma(); let alpha = graph.degree_power_law_exponent(); @@ -74,6 +79,11 @@ fn format_topology_header(graph: &Graph) -> String { n, e, graph.community_count(), sigma, alpha, gini, avg_cc, hub_list) } +/// Public alias for use from defs.rs (config-driven agents). +pub fn format_nodes_section_pub(store: &Store, items: &[ReplayItem], graph: &Graph) -> String { + format_nodes_section(store, items, graph) +} + /// Format node data section for prompt templates fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> String { let hub_thresh = graph.hub_threshold(); @@ -444,6 +454,11 @@ pub fn consolidation_batch(store: &Store, count: usize, auto: bool) -> Result<() /// Returns an AgentBatch with the prompt text and the keys of nodes /// selected for processing (for visit tracking on success). pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result { + // Config-driven agents take priority over hardcoded ones + if let Some(def) = super::defs::get_def(agent) { + return super::defs::run_agent(store, &def, count); + } + let graph = store.build_graph(); let topology = format_topology_header(&graph);