consciousness/poc-memory/src/agents/defs.rs
Kent Overstreet 53b63ab45b seen_recent: cap at 20 roots total across both seen sets
Budget of 20 roots split between current and prev. Current gets
priority, prev fills the remainder. Prevents flooding the agent
with hundreds of previously surfaced keys.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-22 14:28:03 -04:00

666 lines
25 KiB
Rust

// Agent definitions: self-contained files with query + prompt template.
//
// Each agent is a file in the agents/ directory:
// - First line: JSON header (agent, query, model, schedule)
// - After blank line: prompt template with {{placeholder}} lookups
//
// Placeholders are resolved at runtime:
// {{topology}} — graph topology header
// {{nodes}} — query results formatted as node sections
// {{episodes}} — alias for {{nodes}}
// {{health}} — graph health report
// {{pairs}} — interference pairs from detect_interference
// {{rename}} — rename candidates
// {{split}} — split detail for the first query result
//
// The query selects what to operate on; placeholders pull in context.
use crate::graph::Graph;
use crate::neuro::{consolidation_priority, ReplayItem};
use crate::search;
use crate::store::Store;
use serde::Deserialize;
use std::path::PathBuf;
/// Agent definition: config (from JSON header) + prompt (raw markdown body).
#[derive(Clone, Debug)]
pub struct AgentDef {
pub agent: String,
pub query: String,
pub prompt: String,
pub model: String,
pub schedule: String,
pub tools: Vec<String>,
pub count: Option<usize>,
pub chunk_size: Option<usize>,
pub chunk_overlap: Option<usize>,
}
/// The JSON header portion (first line of the file).
#[derive(Deserialize)]
struct AgentHeader {
agent: String,
#[serde(default)]
query: String,
#[serde(default = "default_model")]
model: String,
#[serde(default)]
schedule: String,
#[serde(default)]
tools: Vec<String>,
/// Number of seed nodes / conversation fragments (overrides --count)
#[serde(default)]
count: Option<usize>,
/// Max size of conversation chunks in bytes (default 50000)
#[serde(default)]
chunk_size: Option<usize>,
/// Overlap between chunks in bytes (default 10000)
#[serde(default)]
chunk_overlap: Option<usize>,
}
fn default_model() -> String { "sonnet".into() }
/// Parse an agent file: first line is JSON config, rest is the prompt.
fn parse_agent_file(content: &str) -> Option<AgentDef> {
let (first_line, rest) = content.split_once('\n')?;
let header: AgentHeader = serde_json::from_str(first_line.trim()).ok()?;
// Skip optional blank line between header and prompt body
let prompt = rest.strip_prefix('\n').unwrap_or(rest);
Some(AgentDef {
agent: header.agent,
query: header.query,
prompt: prompt.to_string(),
model: header.model,
schedule: header.schedule,
tools: header.tools,
count: header.count,
chunk_size: header.chunk_size,
chunk_overlap: header.chunk_overlap,
})
}
fn agents_dir() -> PathBuf {
let repo = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("agents");
if repo.is_dir() { return repo; }
crate::store::memory_dir().join("agents")
}
/// Load all agent definitions.
pub fn load_defs() -> Vec<AgentDef> {
let dir = agents_dir();
let Ok(entries) = std::fs::read_dir(&dir) else { return Vec::new() };
entries
.filter_map(|e| e.ok())
.filter(|e| {
let p = e.path();
p.extension().map(|x| x == "agent" || x == "md").unwrap_or(false)
})
.filter_map(|e| {
let content = std::fs::read_to_string(e.path()).ok()?;
parse_agent_file(&content)
})
.collect()
}
/// Look up a single agent definition by name.
pub fn get_def(name: &str) -> Option<AgentDef> {
let dir = agents_dir();
for ext in ["agent", "md"] {
let path = dir.join(format!("{}.{}", name, ext));
if let Ok(content) = std::fs::read_to_string(&path)
&& let Some(def) = parse_agent_file(&content) {
return Some(def);
}
}
load_defs().into_iter().find(|d| d.agent == name)
}
/// Result of resolving a placeholder: text + any affected node keys.
struct Resolved {
text: String,
keys: Vec<String>,
}
/// Resolve a single {{placeholder}} by name.
/// Returns the replacement text and any node keys it produced (for visit tracking).
fn resolve(
name: &str,
store: &Store,
graph: &Graph,
keys: &[String],
count: usize,
) -> Option<Resolved> {
match name {
"topology" => Some(Resolved {
text: super::prompts::format_topology_header(graph),
keys: vec![],
}),
"nodes" | "episodes" => {
let items = keys_to_replay_items(store, keys, graph);
Some(Resolved {
text: super::prompts::format_nodes_section(store, &items, graph),
keys: vec![], // keys already tracked from query
})
}
"health" => Some(Resolved {
text: super::prompts::format_health_section(store, graph),
keys: vec![],
}),
"pairs" => {
let mut pairs = crate::neuro::detect_interference(store, graph, 0.5);
pairs.truncate(count);
let pair_keys: Vec<String> = pairs.iter()
.flat_map(|(a, b, _)| vec![a.clone(), b.clone()])
.collect();
Some(Resolved {
text: super::prompts::format_pairs_section(&pairs, store, graph),
keys: pair_keys,
})
}
"rename" => {
let (rename_keys, section) = super::prompts::format_rename_candidates(store, count);
Some(Resolved { text: section, keys: rename_keys })
}
"split" => {
let key = keys.first()?;
Some(Resolved {
text: super::prompts::format_split_plan_node(store, graph, key),
keys: vec![], // key already tracked from query
})
}
// seed — render output for each seed node (content + deduped links)
"seed" => {
let mut text = String::new();
let mut result_keys = Vec::new();
for key in keys {
if let Some(rendered) = crate::cli::node::render_node(store, key) {
if !text.is_empty() { text.push_str("\n\n---\n\n"); }
text.push_str(&format!("## {}\n\n{}", key, rendered));
result_keys.push(key.clone());
}
}
if text.is_empty() { return None; }
Some(Resolved { text, keys: result_keys })
}
"organize" => {
// Show seed nodes with their neighbors for exploratory organizing
use crate::store::NodeType;
// Helper: shell-quote keys containing #
let sq = |k: &str| -> String {
if k.contains('#') { format!("'{}'", k) } else { k.to_string() }
};
let mut text = format!("### Seed nodes ({} starting points)\n\n", keys.len());
let mut result_keys = Vec::new();
for key in keys {
let Some(node) = store.nodes.get(key) else { continue };
if node.deleted { continue; }
let is_journal = node.node_type == NodeType::EpisodicSession;
let tag = if is_journal { " [JOURNAL — no delete]" } else { "" };
let words = node.content.split_whitespace().count();
text.push_str(&format!("#### {}{} ({} words)\n\n", sq(key), tag, words));
// Show first ~200 words of content as preview
let preview: String = node.content.split_whitespace()
.take(200).collect::<Vec<_>>().join(" ");
if words > 200 {
text.push_str(&format!("{}...\n\n", preview));
} else {
text.push_str(&format!("{}\n\n", node.content));
}
// Show neighbors with strengths
let neighbors = graph.neighbors(key);
if !neighbors.is_empty() {
text.push_str("**Neighbors:**\n");
for (nbr, strength) in neighbors.iter().take(15) {
let nbr_type = store.nodes.get(nbr.as_str())
.map(|n| match n.node_type {
NodeType::EpisodicSession => " [journal]",
NodeType::EpisodicDaily => " [daily]",
_ => "",
})
.unwrap_or("");
text.push_str(&format!(" [{:.1}] {}{}\n", strength, sq(nbr), nbr_type));
}
if neighbors.len() > 15 {
text.push_str(&format!(" ... and {} more\n", neighbors.len() - 15));
}
text.push('\n');
}
text.push_str("---\n\n");
result_keys.push(key.clone());
}
text.push_str("Use `poc-memory render KEY` and `poc-memory query \"neighbors('KEY')\"` to explore further.\n");
Some(Resolved { text, keys: result_keys })
}
"conversations" => {
let fragments = super::knowledge::select_conversation_fragments(count);
let fragment_ids: Vec<String> = fragments.iter()
.map(|(id, _)| id.clone())
.collect();
let text = fragments.iter()
.map(|(id, text)| format!("### Session {}\n\n{}", id, text))
.collect::<Vec<_>>()
.join("\n\n---\n\n");
Some(Resolved { text, keys: fragment_ids })
}
"siblings" | "neighborhood" => {
let mut out = String::new();
let mut all_keys: Vec<String> = Vec::new();
let mut included_nodes: std::collections::HashSet<String> = std::collections::HashSet::new();
const MAX_NEIGHBORS: usize = 25;
for key in keys {
if included_nodes.contains(key) { continue; }
included_nodes.insert(key.clone());
let Some(node) = store.nodes.get(key.as_str()) else { continue };
let neighbors = graph.neighbors(key);
// Seed node with full content
out.push_str(&format!("## {} (seed)\n\n{}\n\n", key, node.content));
all_keys.push(key.clone());
// Rank neighbors by link_strength * node_weight
// Include all if <= 10, otherwise take top MAX_NEIGHBORS
let mut ranked: Vec<(String, f32, f32)> = neighbors.iter()
.filter_map(|(nbr, strength)| {
store.nodes.get(nbr.as_str()).map(|n| {
let node_weight = n.weight.max(0.01);
let score = strength * node_weight;
(nbr.to_string(), *strength, score)
})
})
.collect();
ranked.sort_by(|a, b| b.2.total_cmp(&a.2));
let total = ranked.len();
let included: Vec<_> = if total <= 10 {
ranked
} else {
// Smooth cutoff: threshold scales with neighborhood size
// Generous — err on including too much so the agent can
// see and clean up junk. 20 → top 75%, 50 → top 30%
let top_score = ranked.first().map(|(_, _, s)| *s).unwrap_or(0.0);
let ratio = (15.0 / total as f32).min(1.0);
let threshold = top_score * ratio;
ranked.into_iter()
.enumerate()
.take_while(|(i, (_, _, score))| *i < 10 || *score >= threshold)
.take(MAX_NEIGHBORS)
.map(|(_, item)| item)
.collect()
};
if !included.is_empty() {
if total > included.len() {
out.push_str(&format!("### Neighbors (top {} of {}, ranked by importance)\n\n",
included.len(), total));
} else {
out.push_str("### Neighbors\n\n");
}
let included_keys: std::collections::HashSet<&str> = included.iter()
.map(|(k, _, _)| k.as_str()).collect();
// Budget: stop adding full content when prompt gets large.
// Remaining neighbors get header-only (key + first line).
const NEIGHBORHOOD_BUDGET: usize = 400_000; // ~100K tokens, leaves room for core-personality + instructions
let mut budget_exceeded = false;
for (nbr, strength, _score) in &included {
if included_nodes.contains(nbr) { continue; }
included_nodes.insert(nbr.clone());
if let Some(n) = store.nodes.get(nbr.as_str()) {
if budget_exceeded || out.len() > NEIGHBORHOOD_BUDGET {
// Header-only: key + first non-empty line
budget_exceeded = true;
let first_line = n.content.lines()
.find(|l| !l.trim().is_empty())
.unwrap_or("(empty)");
out.push_str(&format!("#### {} (link: {:.2}) — {}\n",
nbr, strength, first_line));
} else {
out.push_str(&format!("#### {} (link: {:.2})\n\n{}\n\n",
nbr, strength, n.content));
}
all_keys.push(nbr.to_string());
}
}
if budget_exceeded {
out.push_str("\n(remaining neighbors shown as headers only — prompt budget)\n\n");
}
// Cross-links between included neighbors
let mut cross_links = Vec::new();
for (nbr, _, _) in &included {
for (nbr2, strength) in graph.neighbors(nbr) {
if nbr2.as_str() != key
&& included_keys.contains(nbr2.as_str())
&& nbr.as_str() < nbr2.as_str()
{
cross_links.push((nbr.clone(), nbr2, strength));
}
}
}
if !cross_links.is_empty() {
out.push_str("### Cross-links between neighbors\n\n");
for (a, b, s) in &cross_links {
out.push_str(&format!(" {}{} ({:.2})\n", a, b, s));
}
out.push('\n');
}
}
}
Some(Resolved { text: out, keys: all_keys })
}
// targets/context: aliases for challenger-style presentation
"targets" => {
let items = keys_to_replay_items(store, keys, graph);
Some(Resolved {
text: super::prompts::format_nodes_section(store, &items, graph),
keys: vec![],
})
}
"hubs" => {
// Top hub nodes by degree, spread apart (skip neighbors of already-selected hubs)
let mut hubs: Vec<(String, usize)> = store.nodes.iter()
.filter(|(k, n)| !n.deleted && !k.starts_with('_'))
.map(|(k, _)| {
let degree = graph.neighbors(k).len();
(k.clone(), degree)
})
.collect();
hubs.sort_by(|a, b| b.1.cmp(&a.1));
let mut selected = Vec::new();
let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
for (key, degree) in &hubs {
if seen.contains(key) { continue; }
selected.push(format!(" - {} (degree {})", key, degree));
// Mark neighbors as seen so we pick far-apart hubs
for (nbr, _) in graph.neighbors(key) {
seen.insert(nbr.clone());
}
seen.insert(key.clone());
if selected.len() >= 20 { break; }
}
let text = format!("## Hub nodes (link targets)\n\n{}", selected.join("\n"));
Some(Resolved { text, keys: vec![] })
}
// node:KEY — inline a node's content by key
other if other.starts_with("node:") => {
let key = &other[5..];
store.nodes.get(key).map(|n| Resolved {
text: n.content.clone(),
keys: vec![key.to_string()],
})
}
// conversation — tail of the current session transcript (post-compaction)
"conversation" => {
let text = resolve_conversation();
if text.is_empty() { None }
else { Some(Resolved { text, keys: vec![] }) }
}
// seen_recent — recently surfaced memory keys for this session
"seen_recent" => {
let text = resolve_seen_recent();
Some(Resolved { text, keys: vec![] })
}
_ => None,
}
}
/// Get the tail of the current session's conversation.
/// Reads POC_SESSION_ID to find the transcript, extracts the last
/// segment (post-compaction), returns the tail (~100K chars).
fn resolve_conversation() -> String {
let session_id = std::env::var("POC_SESSION_ID").unwrap_or_default();
if session_id.is_empty() { return String::new(); }
let projects = crate::config::get().projects_dir.clone();
// Find the transcript file matching this session
let mut transcript = None;
if let Ok(dirs) = std::fs::read_dir(&projects) {
for dir in dirs.filter_map(|e| e.ok()) {
let path = dir.path().join(format!("{}.jsonl", session_id));
if path.exists() {
transcript = Some(path);
break;
}
}
}
let Some(path) = transcript else { return String::new() };
let path_str = path.to_string_lossy();
let Some(iter) = crate::transcript::TailMessages::open(&path_str) else {
return String::new();
};
let cfg = crate::config::get();
let mut fragments: Vec<String> = Vec::new();
let mut total_bytes = 0;
const MAX_BYTES: usize = 200_000;
for (role, content, ts) in iter {
if total_bytes >= MAX_BYTES { break; }
let name = if role == "user" { &cfg.user_name } else { &cfg.assistant_name };
let formatted = if !ts.is_empty() {
format!("**{}** {}: {}", name, &ts[..ts.len().min(19)], content)
} else {
format!("**{}:** {}", name, content)
};
total_bytes += content.len();
fragments.push(formatted);
}
// Reverse back to chronological order
fragments.reverse();
fragments.join("\n\n")
}
/// Get recently surfaced memory keys for the current session.
fn resolve_seen_recent() -> String {
let session_id = std::env::var("POC_SESSION_ID").unwrap_or_default();
if session_id.is_empty() {
return "(no session ID — cannot load seen set)".to_string();
}
let state_dir = std::path::PathBuf::from("/tmp/claude-memory-search");
let parse_seen = |suffix: &str| -> Vec<(String, String)> {
let path = state_dir.join(format!("seen{}-{}", suffix, session_id));
std::fs::read_to_string(&path).ok()
.map(|content| {
content.lines()
.filter(|s| !s.is_empty())
.filter_map(|line| {
let (ts, key) = line.split_once('\t')?;
Some((ts.to_string(), key.to_string()))
})
.collect()
})
.unwrap_or_default()
};
let current = parse_seen("");
let prev = parse_seen("-prev");
if current.is_empty() && prev.is_empty() {
return "(no memories surfaced yet this session)".to_string();
}
let mut out = String::new();
const MAX_ROOTS: usize = 20;
// Current: already in this context, don't re-surface
// Sort newest first, dedup, cap
let mut current_sorted = current.clone();
current_sorted.sort_by(|a, b| b.0.cmp(&a.0));
let mut seen_keys = std::collections::HashSet::new();
let current_deduped: Vec<_> = current_sorted.into_iter()
.filter(|(_, key)| seen_keys.insert(key.clone()))
.take(MAX_ROOTS)
.collect();
if !current_deduped.is_empty() {
out.push_str("Already surfaced this context (don't re-surface unless conversation shifted):\n");
for (ts, key) in &current_deduped {
out.push_str(&format!("- {} (surfaced {})\n", key, ts));
}
}
// Prev: surfaced before compaction, MAY need re-surfacing
// Exclude anything already in current, sort newest first, cap at remaining budget
let remaining = MAX_ROOTS.saturating_sub(current_deduped.len());
if remaining > 0 && !prev.is_empty() {
let mut prev_sorted = prev.clone();
prev_sorted.sort_by(|a, b| b.0.cmp(&a.0));
let prev_deduped: Vec<_> = prev_sorted.into_iter()
.filter(|(_, key)| seen_keys.insert(key.clone()))
.take(remaining)
.collect();
if !prev_deduped.is_empty() {
out.push_str("\nSurfaced before compaction (context was reset — re-surface if still relevant):\n");
for (ts, key) in &prev_deduped {
out.push_str(&format!("- {} (pre-compaction, {})\n", key, ts));
}
}
}
out.trim_end().to_string()
}
/// Resolve all {{placeholder}} patterns in a prompt template.
/// Returns the resolved text and all node keys collected from placeholders.
pub fn resolve_placeholders(
template: &str,
store: &Store,
graph: &Graph,
keys: &[String],
count: usize,
) -> (String, Vec<String>) {
let mut result = template.to_string();
let mut extra_keys = Vec::new();
loop {
let Some(start) = result.find("{{") else { break };
let Some(end) = result[start + 2..].find("}}") else { break };
let end = start + 2 + end;
let name = result[start + 2..end].trim().to_lowercase();
match resolve(&name, store, graph, keys, count) {
Some(resolved) => {
extra_keys.extend(resolved.keys);
result.replace_range(start..end + 2, &resolved.text);
}
None => {
let msg = format!("(unknown: {})", name);
result.replace_range(start..end + 2, &msg);
}
}
}
(result, extra_keys)
}
/// Run a config-driven agent: query → resolve placeholders → prompt.
/// `exclude` filters out nodes (and their neighborhoods) already being
/// worked on by other agents, preventing concurrent collisions.
pub fn run_agent(
store: &Store,
def: &AgentDef,
count: usize,
exclude: &std::collections::HashSet<String>,
) -> Result<super::prompts::AgentBatch, String> {
let graph = store.build_graph();
// Run the query if present
let keys = if !def.query.is_empty() {
let mut stages = search::Stage::parse_pipeline(&def.query)?;
let has_limit = stages.iter().any(|s|
matches!(s, search::Stage::Transform(search::Transform::Limit(_))));
if !has_limit {
// Request extra results to compensate for exclusion filtering
let padded = count + exclude.len().min(100);
stages.push(search::Stage::Transform(search::Transform::Limit(padded)));
}
let results = search::run_query(&stages, vec![], &graph, store, false, count + exclude.len().min(100));
let filtered: Vec<String> = results.into_iter()
.map(|(k, _)| k)
.filter(|k| !exclude.contains(k))
.take(count)
.collect();
if filtered.is_empty() {
return Err(format!("{}: query returned no results (after exclusion)", def.agent));
}
filtered
} else {
vec![]
};
// Substitute {agent_name} before resolving {{...}} placeholders,
// so agents can reference their own notes: {{node:subconscious-notes-{agent_name}}}
let template = def.prompt.replace("{agent_name}", &def.agent);
let (prompt, extra_keys) = resolve_placeholders(&template, store, &graph, &keys, count);
// Identity and instructions are now pulled in via {{node:KEY}} placeholders.
// Agents should include {{node:core-personality}} and {{node:memory-instructions-core}}
// in their prompt templates. The resolve_placeholders call below handles this.
// Merge query keys with any keys produced by placeholder resolution
let mut all_keys = keys;
all_keys.extend(extra_keys);
Ok(super::prompts::AgentBatch { prompt, node_keys: all_keys })
}
/// Convert a list of keys to ReplayItems with priority and graph metrics.
pub fn keys_to_replay_items(
store: &Store,
keys: &[String],
graph: &Graph,
) -> Vec<ReplayItem> {
keys.iter()
.filter_map(|key| {
let node = store.nodes.get(key)?;
let priority = consolidation_priority(store, key, graph, None);
let cc = graph.clustering_coefficient(key);
Some(ReplayItem {
key: key.clone(),
priority,
interval_days: node.spaced_repetition_interval,
emotion: node.emotion,
cc,
classification: "unknown",
outlier_score: 0.0,
})
})
.collect()
}