consciousness/poc-memory/src/agents/defs.rs

// Agent definitions: self-contained files with query + prompt template.
//
// Each agent is a file in the agents/ directory:
//   - First line: JSON header (agent, query, model, schedule)
//   - After blank line: prompt template with {{placeholder}} lookups
//
// Placeholders are resolved at runtime:
//   {{topology}}  — graph topology header
//   {{nodes}}     — query results formatted as node sections
//   {{episodes}}  — alias for {{nodes}}
//   {{health}}    — graph health report
//   {{pairs}}     — interference pairs from detect_interference
//   {{rename}}    — rename candidates
//   {{split}}     — split detail for the first query result
//
// The query selects what to operate on; placeholders pull in context.

use crate::graph::Graph;
use crate::neuro::{consolidation_priority, ReplayItem};
use crate::search;
use crate::store::Store;

use serde::Deserialize;

use std::path::PathBuf;

/// Agent definition: config (from JSON header) + prompt (raw markdown body).
#[derive(Clone, Debug)]
pub struct AgentDef {
    pub agent: String,
    pub query: String,
    pub prompt: String,
    pub model: String,
    pub schedule: String,
    pub tools: Vec<String>,
    pub count: Option<usize>,
    pub chunk_size: Option<usize>,
    pub chunk_overlap: Option<usize>,
}

/// The JSON header portion (first line of the file).
#[derive(Deserialize)]
struct AgentHeader {
    agent: String,
    #[serde(default)]
    query: String,
    #[serde(default = "default_model")]
    model: String,
    #[serde(default)]
    schedule: String,
    #[serde(default)]
    tools: Vec<String>,
    /// Number of seed nodes / conversation fragments (overrides --count)
    #[serde(default)]
    count: Option<usize>,
    /// Max size of conversation chunks in bytes (default 50000)
    #[serde(default)]
    chunk_size: Option<usize>,
    /// Overlap between chunks in bytes (default 10000)
    #[serde(default)]
    chunk_overlap: Option<usize>,
}

fn default_model() -> String { "sonnet".into() }

/// Parse an agent file: first line is JSON config, rest is the prompt.
fn parse_agent_file(content: &str) -> Option<AgentDef> {
    let (first_line, rest) = content.split_once('\n')?;
    let header: AgentHeader = serde_json::from_str(first_line.trim()).ok()?;
    // Skip optional blank line between header and prompt body
    let prompt = rest.strip_prefix('\n').unwrap_or(rest);
    Some(AgentDef {
        agent: header.agent,
        query: header.query,
        prompt: prompt.to_string(),
        model: header.model,
        schedule: header.schedule,
        tools: header.tools,
        count: header.count,
        chunk_size: header.chunk_size,
        chunk_overlap: header.chunk_overlap,
    })
}

fn agents_dir() -> PathBuf {
    let repo = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("agents");
    if repo.is_dir() { return repo; }
    crate::store::memory_dir().join("agents")
}

/// Load all agent definitions.
pub fn load_defs() -> Vec<AgentDef> {
    let dir = agents_dir();
    let Ok(entries) = std::fs::read_dir(&dir) else { return Vec::new() };

    entries
        .filter_map(|e| e.ok())
        .filter(|e| {
            let p = e.path();
            p.extension().map(|x| x == "agent" || x == "md").unwrap_or(false)
        })
        .filter_map(|e| {
            let content = std::fs::read_to_string(e.path()).ok()?;
            parse_agent_file(&content)
        })
        .collect()
}

/// Look up a single agent definition by name.
pub fn get_def(name: &str) -> Option<AgentDef> {
    let dir = agents_dir();
    for ext in ["agent", "md"] {
        let path = dir.join(format!("{}.{}", name, ext));
        if let Ok(content) = std::fs::read_to_string(&path)
            && let Some(def) = parse_agent_file(&content) {
                return Some(def);
            }
    }
    load_defs().into_iter().find(|d| d.agent == name)
}

/// Result of resolving a placeholder: text + any affected node keys.
struct Resolved {
    text: String,
    keys: Vec<String>,
}

/// Resolve a single {{placeholder}} by name.
/// Returns the replacement text and any node keys it produced (for visit tracking).
fn resolve(
    name: &str,
    store: &Store,
    graph: &Graph,
    keys: &[String],
    count: usize,
) -> Option<Resolved> {
    match name {
        "topology" => Some(Resolved {
            text: super::prompts::format_topology_header(graph),
            keys: vec![],
        }),

        "nodes" | "episodes" => {
            let items = keys_to_replay_items(store, keys, graph);
            Some(Resolved {
                text: super::prompts::format_nodes_section(store, &items, graph),
                keys: vec![], // keys already tracked from query
            })
        }

        "health" => Some(Resolved {
            text: super::prompts::format_health_section(store, graph),
            keys: vec![],
        }),

        "pairs" => {
            let mut pairs = crate::neuro::detect_interference(store, graph, 0.5);
            pairs.truncate(count);
            let pair_keys: Vec<String> = pairs.iter()
                .flat_map(|(a, b, _)| vec![a.clone(), b.clone()])
                .collect();
            Some(Resolved {
                text: super::prompts::format_pairs_section(&pairs, store, graph),
                keys: pair_keys,
            })
        }

        "rename" => {
            let (rename_keys, section) = super::prompts::format_rename_candidates(store, count);
            Some(Resolved { text: section, keys: rename_keys })
        }

        "split" => {
            let key = keys.first()?;
            Some(Resolved {
                text: super::prompts::format_split_plan_node(store, graph, key),
                keys: vec![], // key already tracked from query
            })
        }

        // seed — render output for each seed node (content + deduped links)
        "seed" => {
            let mut text = String::new();
            let mut result_keys = Vec::new();
            for key in keys {
                if let Some(rendered) = crate::cli::node::render_node(store, key) {
                    if !text.is_empty() { text.push_str("\n\n---\n\n"); }
                    text.push_str(&format!("## {}\n\n{}", key, rendered));
                    result_keys.push(key.clone());
                }
            }
            if text.is_empty() { return None; }
            Some(Resolved { text, keys: result_keys })
        }

        "organize" => {
            // Show seed nodes with their neighbors for exploratory organizing
            use crate::store::NodeType;

            // Helper: shell-quote keys containing #
            let sq = |k: &str| -> String {
                if k.contains('#') { format!("'{}'", k) } else { k.to_string() }
            };

            let mut text = format!("### Seed nodes ({} starting points)\n\n", keys.len());
            let mut result_keys = Vec::new();

            for key in keys {
                let Some(node) = store.nodes.get(key) else { continue };
                if node.deleted { continue; }

                let is_journal = node.node_type == NodeType::EpisodicSession;
                let tag = if is_journal { " [JOURNAL — no delete]" } else { "" };
                let words = node.content.split_whitespace().count();

                text.push_str(&format!("#### {}{} ({} words)\n\n", sq(key), tag, words));

                // Show first ~200 words of content as preview
                let preview: String = node.content.split_whitespace()
                    .take(200).collect::<Vec<_>>().join(" ");
                if words > 200 {
                    text.push_str(&format!("{}...\n\n", preview));
                } else {
                    text.push_str(&format!("{}\n\n", node.content));
                }

                // Show neighbors with strengths
                let neighbors = graph.neighbors(key);
                if !neighbors.is_empty() {
                    text.push_str("**Neighbors:**\n");
                    for (nbr, strength) in neighbors.iter().take(15) {
                        let nbr_type = store.nodes.get(nbr.as_str())
                            .map(|n| match n.node_type {
                                NodeType::EpisodicSession => " [journal]",
                                NodeType::EpisodicDaily => " [daily]",
                                _ => "",
                            })
                            .unwrap_or("");
                        text.push_str(&format!("  [{:.1}] {}{}\n", strength, sq(nbr), nbr_type));
                    }
                    if neighbors.len() > 15 {
                        text.push_str(&format!("  ... and {} more\n", neighbors.len() - 15));
                    }
                    text.push('\n');
                }

                text.push_str("---\n\n");
                result_keys.push(key.clone());
            }

            text.push_str("Use `poc-memory render KEY` and `poc-memory query \"neighbors('KEY')\"` to explore further.\n");

            Some(Resolved { text, keys: result_keys })
        }

        "conversations" => {
            let fragments = super::knowledge::select_conversation_fragments(count);
            let fragment_ids: Vec<String> = fragments.iter()
                .map(|(id, _)| id.clone())
                .collect();
            let text = fragments.iter()
                .map(|(id, text)| format!("### Session {}\n\n{}", id, text))
                .collect::<Vec<_>>()
                .join("\n\n---\n\n");
            Some(Resolved { text, keys: fragment_ids })
        }

        "siblings" | "neighborhood" => {
            let mut out = String::new();
            let mut all_keys: Vec<String> = Vec::new();
            let mut included_nodes: std::collections::HashSet<String> = std::collections::HashSet::new();
            const MAX_NEIGHBORS: usize = 25;

            for key in keys {
                if included_nodes.contains(key) { continue; }
                included_nodes.insert(key.clone());
                let Some(node) = store.nodes.get(key.as_str()) else { continue };
                let neighbors = graph.neighbors(key);

                // Seed node with full content
                out.push_str(&format!("## {} (seed)\n\n{}\n\n", key, node.content));
                all_keys.push(key.clone());

                // Rank neighbors by link_strength * node_weight
                // Include all if <= 10, otherwise take top MAX_NEIGHBORS
                let mut ranked: Vec<(String, f32, f32)> = neighbors.iter()
                    .filter_map(|(nbr, strength)| {
                        store.nodes.get(nbr.as_str()).map(|n| {
                            let node_weight = n.weight.max(0.01);
                            let score = strength * node_weight;
                            (nbr.to_string(), *strength, score)
                        })
                    })
                    .collect();
                ranked.sort_by(|a, b| b.2.total_cmp(&a.2));

                let total = ranked.len();
                let included: Vec<_> = if total <= 10 {
                    ranked
                } else {
                    // Smooth cutoff: threshold scales with neighborhood size
                    // Generous — err on including too much so the agent can
                    // see and clean up junk. 20 → top 75%, 50 → top 30%
                    let top_score = ranked.first().map(|(_, _, s)| *s).unwrap_or(0.0);
                    let ratio = (15.0 / total as f32).min(1.0);
                    let threshold = top_score * ratio;
                    ranked.into_iter()
                        .enumerate()
                        .take_while(|(i, (_, _, score))| *i < 10 || *score >= threshold)
                        .take(MAX_NEIGHBORS)
                        .map(|(_, item)| item)
                        .collect()
                };

                if !included.is_empty() {
                    if total > included.len() {
                        out.push_str(&format!("### Neighbors (top {} of {}, ranked by importance)\n\n",
                            included.len(), total));
                    } else {
                        out.push_str("### Neighbors\n\n");
                    }
                    let included_keys: std::collections::HashSet<&str> = included.iter()
                        .map(|(k, _, _)| k.as_str()).collect();

                    // Budget: stop adding full content when prompt gets large.
                    // Remaining neighbors get header-only (key + first line).
                    const NEIGHBORHOOD_BUDGET: usize = 400_000; // ~100K tokens, leaves room for core-personality + instructions
                    let mut budget_exceeded = false;

                    for (nbr, strength, _score) in &included {
                        if included_nodes.contains(nbr) { continue; }
                        included_nodes.insert(nbr.clone());
                        if let Some(n) = store.nodes.get(nbr.as_str()) {
                            if budget_exceeded || out.len() > NEIGHBORHOOD_BUDGET {
                                // Header-only: key + first non-empty line
                                budget_exceeded = true;
                                let first_line = n.content.lines()
                                    .find(|l| !l.trim().is_empty())
                                    .unwrap_or("(empty)");
                                out.push_str(&format!("#### {} (link: {:.2}) — {}\n",
                                    nbr, strength, first_line));
                            } else {
                                out.push_str(&format!("#### {} (link: {:.2})\n\n{}\n\n",
                                    nbr, strength, n.content));
                            }
                            all_keys.push(nbr.to_string());
                        }
                    }
                    if budget_exceeded {
                        out.push_str("\n(remaining neighbors shown as headers only — prompt budget)\n\n");
                    }

                    // Cross-links between included neighbors
                    let mut cross_links = Vec::new();
                    for (nbr, _, _) in &included {
                        for (nbr2, strength) in graph.neighbors(nbr) {
                            if nbr2.as_str() != key
                                && included_keys.contains(nbr2.as_str())
                                && nbr.as_str() < nbr2.as_str()
                            {
                                cross_links.push((nbr.clone(), nbr2, strength));
                            }
                        }
                    }
                    if !cross_links.is_empty() {
                        out.push_str("### Cross-links between neighbors\n\n");
                        for (a, b, s) in &cross_links {
                            out.push_str(&format!("  {} ↔ {} ({:.2})\n", a, b, s));
                        }
                        out.push('\n');
                    }
                }
            }

            Some(Resolved { text: out, keys: all_keys })
        }

        // targets/context: aliases for challenger-style presentation
        "targets" => {
            let items = keys_to_replay_items(store, keys, graph);
            Some(Resolved {
                text: super::prompts::format_nodes_section(store, &items, graph),
                keys: vec![],
            })
        }

        "hubs" => {
            // Top hub nodes by degree, spread apart (skip neighbors of already-selected hubs)
            let mut hubs: Vec<(String, usize)> = store.nodes.iter()
                .filter(|(k, n)| !n.deleted && !k.starts_with('_'))
                .map(|(k, _)| {
                    let degree = graph.neighbors(k).len();
                    (k.clone(), degree)
                })
                .collect();
            hubs.sort_by(|a, b| b.1.cmp(&a.1));

            let mut selected = Vec::new();
            let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
            for (key, degree) in &hubs {
                if seen.contains(key) { continue; }
                selected.push(format!("  - {} (degree {})", key, degree));
                // Mark neighbors as seen so we pick far-apart hubs
                for (nbr, _) in graph.neighbors(key) {
                    seen.insert(nbr.clone());
                }
                seen.insert(key.clone());
                if selected.len() >= 20 { break; }
            }

            let text = format!("## Hub nodes (link targets)\n\n{}", selected.join("\n"));
            Some(Resolved { text, keys: vec![] })
        }

        // node:KEY — inline a node's content by key
        other if other.starts_with("node:") => {
            let key = &other[5..];
            store.nodes.get(key).map(|n| Resolved {
                text: n.content.clone(),
                keys: vec![key.to_string()],
            })
        }

        // conversation — tail of the current session transcript (post-compaction)
        "conversation" => {
            let text = resolve_conversation();
            if text.is_empty() { None }
            else { Some(Resolved { text, keys: vec![] }) }
        }

        // seen_recent — recently surfaced memory keys for this session
        "seen_recent" => {
            let text = resolve_seen_recent();
            Some(Resolved { text, keys: vec![] })
        }

        _ => None,
    }
}

/// Get the tail of the current session's conversation.
/// Reads POC_SESSION_ID to find the transcript, extracts the last
/// segment (post-compaction), returns the tail (~100K chars).
fn resolve_conversation() -> String {
    let session_id = std::env::var("POC_SESSION_ID").unwrap_or_default();
    if session_id.is_empty() { return String::new(); }

    let projects = crate::config::get().projects_dir.clone();
    // Find the transcript file matching this session
    let mut transcript = None;
    if let Ok(dirs) = std::fs::read_dir(&projects) {
        for dir in dirs.filter_map(|e| e.ok()) {
            let path = dir.path().join(format!("{}.jsonl", session_id));
            if path.exists() {
                transcript = Some(path);
                break;
            }
        }
    }

    let Some(path) = transcript else { return String::new() };
    let path_str = path.to_string_lossy();

    let Some(iter) = crate::transcript::TailMessages::open(&path_str) else {
        return String::new();
    };

    let cfg = crate::config::get();
    let mut fragments: Vec<String> = Vec::new();
    let mut total_bytes = 0;
    const MAX_BYTES: usize = 200_000;

    for (role, content, ts) in iter {
        if total_bytes >= MAX_BYTES { break; }
        let name = if role == "user" { &cfg.user_name } else { &cfg.assistant_name };
        let formatted = if !ts.is_empty() {
            format!("**{}** {}: {}", name, &ts[..ts.len().min(19)], content)
        } else {
            format!("**{}:** {}", name, content)
        };
        total_bytes += content.len();
        fragments.push(formatted);
    }

    // Reverse back to chronological order
    fragments.reverse();
    fragments.join("\n\n")
}

/// Get recently surfaced memory keys for the current session.
fn resolve_seen_recent() -> String {
    let session_id = std::env::var("POC_SESSION_ID").unwrap_or_default();
    if session_id.is_empty() {
        return "(no session ID — cannot load seen set)".to_string();
    }

    let state_dir = std::path::PathBuf::from("/tmp/claude-memory-search");

    let parse_seen = |suffix: &str| -> Vec<(String, String)> {
        let path = state_dir.join(format!("seen{}-{}", suffix, session_id));
        std::fs::read_to_string(&path).ok()
            .map(|content| {
                content.lines()
                    .filter(|s| !s.is_empty())
                    .filter_map(|line| {
                        let (ts, key) = line.split_once('\t')?;
                        Some((ts.to_string(), key.to_string()))
                    })
                    .collect()
            })
            .unwrap_or_default()
    };

    let current = parse_seen("");
    let prev = parse_seen("-prev");

    if current.is_empty() && prev.is_empty() {
        return "(no memories surfaced yet this session)".to_string();
    }

    let mut out = String::new();

    const MAX_ROOTS: usize = 20;

    // Current: already in this context, don't re-surface
    // Sort newest first, dedup, cap
    let mut current_sorted = current.clone();
    current_sorted.sort_by(|a, b| b.0.cmp(&a.0));
    let mut seen_keys = std::collections::HashSet::new();
    let current_deduped: Vec<_> = current_sorted.into_iter()
        .filter(|(_, key)| seen_keys.insert(key.clone()))
        .take(MAX_ROOTS)
        .collect();

    if !current_deduped.is_empty() {
        out.push_str("Already surfaced this context (don't re-surface unless conversation shifted):\n");
        for (ts, key) in &current_deduped {
            out.push_str(&format!("- {} (surfaced {})\n", key, ts));
        }
    }

    // Prev: surfaced before compaction, MAY need re-surfacing
    // Exclude anything already in current, sort newest first, cap at remaining budget
    let remaining = MAX_ROOTS.saturating_sub(current_deduped.len());
    if remaining > 0 && !prev.is_empty() {
        let mut prev_sorted = prev.clone();
        prev_sorted.sort_by(|a, b| b.0.cmp(&a.0));
        let prev_deduped: Vec<_> = prev_sorted.into_iter()
            .filter(|(_, key)| seen_keys.insert(key.clone()))
            .take(remaining)
            .collect();
        if !prev_deduped.is_empty() {
            out.push_str("\nSurfaced before compaction (context was reset — re-surface if still relevant):\n");
            for (ts, key) in &prev_deduped {
                out.push_str(&format!("- {} (pre-compaction, {})\n", key, ts));
            }
        }
    }

    out.trim_end().to_string()
}

/// Resolve all {{placeholder}} patterns in a prompt template.
/// Returns the resolved text and all node keys collected from placeholders.
pub fn resolve_placeholders(
    template: &str,
    store: &Store,
    graph: &Graph,
    keys: &[String],
    count: usize,
) -> (String, Vec<String>) {
    let mut result = template.to_string();
    let mut extra_keys = Vec::new();
    loop {
        let Some(start) = result.find("{{") else { break };
        let Some(end) = result[start + 2..].find("}}") else { break };
        let end = start + 2 + end;
        let name = result[start + 2..end].trim().to_lowercase();
        match resolve(&name, store, graph, keys, count) {
            Some(resolved) => {
                extra_keys.extend(resolved.keys);
                result.replace_range(start..end + 2, &resolved.text);
            }
            None => {
                let msg = format!("(unknown: {})", name);
                result.replace_range(start..end + 2, &msg);
            }
        }
    }
    (result, extra_keys)
}

/// Run a config-driven agent: query → resolve placeholders → prompt.
/// `exclude` filters out nodes (and their neighborhoods) already being
/// worked on by other agents, preventing concurrent collisions.
pub fn run_agent(
    store: &Store,
    def: &AgentDef,
    count: usize,
    exclude: &std::collections::HashSet<String>,
) -> Result<super::prompts::AgentBatch, String> {
    let graph = store.build_graph();

    // Run the query if present
    let keys = if !def.query.is_empty() {
        let mut stages = search::Stage::parse_pipeline(&def.query)?;
        let has_limit = stages.iter().any(|s|
            matches!(s, search::Stage::Transform(search::Transform::Limit(_))));
        if !has_limit {
            // Request extra results to compensate for exclusion filtering
            let padded = count + exclude.len().min(100);
            stages.push(search::Stage::Transform(search::Transform::Limit(padded)));
        }
        let results = search::run_query(&stages, vec![], &graph, store, false, count + exclude.len().min(100));
        let filtered: Vec<String> = results.into_iter()
            .map(|(k, _)| k)
            .filter(|k| !exclude.contains(k))
            .take(count)
            .collect();
        if filtered.is_empty() {
            return Err(format!("{}: query returned no results (after exclusion)", def.agent));
        }
        filtered
    } else {
        vec![]
    };

    // Substitute {agent_name} before resolving {{...}} placeholders,
    // so agents can reference their own notes: {{node:subconscious-notes-{agent_name}}}
    let template = def.prompt.replace("{agent_name}", &def.agent);
    let (prompt, extra_keys) = resolve_placeholders(&template, store, &graph, &keys, count);

    // Identity and instructions are now pulled in via {{node:KEY}} placeholders.
    // Agents should include {{node:core-personality}} and {{node:memory-instructions-core}}
    // in their prompt templates. The resolve_placeholders call below handles this.

    // Merge query keys with any keys produced by placeholder resolution
    let mut all_keys = keys;
    all_keys.extend(extra_keys);
    Ok(super::prompts::AgentBatch { prompt, node_keys: all_keys })
}

/// Convert a list of keys to ReplayItems with priority and graph metrics.
pub fn keys_to_replay_items(
    store: &Store,
    keys: &[String],
    graph: &Graph,
) -> Vec<ReplayItem> {
    keys.iter()
        .filter_map(|key| {
            let node = store.nodes.get(key)?;
            let priority = consolidation_priority(store, key, graph, None);
            let cc = graph.clustering_coefficient(key);

            Some(ReplayItem {
                key: key.clone(),
                priority,
                interval_days: node.spaced_repetition_interval,
                emotion: node.emotion,
                cc,
                classification: "unknown",
                outlier_score: 0.0,
            })
        })
        .collect()
}