consciousness/poc-memory/src/agents/prompts.rs

// Agent prompt generation and formatting. Presentation logic —
// builds text prompts from store data for consolidation agents.

use crate::store::Store;
use crate::graph::Graph;
use crate::similarity;
use crate::spectral;

use crate::neuro::{
    ReplayItem, consolidation_priority,
    replay_queue, replay_queue_with_graph, detect_interference,
};

/// Result of building an agent prompt — includes both the prompt text
/// and the keys of nodes selected for processing, so the caller can
/// record visits after successful completion.
pub struct AgentBatch {
    pub prompt: String,
    pub node_keys: Vec<String>,
}

/// Load a prompt template, replacing {{PLACEHOLDER}} with data
pub fn load_prompt(name: &str, replacements: &[(&str, &str)]) -> Result<String, String> {
    let path = crate::config::get().prompts_dir.join(format!("{}.md", name));
    let mut content = std::fs::read_to_string(&path)
        .map_err(|e| format!("load prompt {}: {}", path.display(), e))?;
    for (placeholder, data) in replacements {
        content = content.replace(placeholder, data);
    }
    Ok(content)
}

/// Format topology header for agent prompts — current graph health metrics.
/// Public alias for use from defs.rs (config-driven agents).
pub fn format_topology_header_pub(graph: &Graph) -> String {
    format_topology_header(graph)
}

fn format_topology_header(graph: &Graph) -> String {
    let sigma = graph.small_world_sigma();
    let alpha = graph.degree_power_law_exponent();
    let gini = graph.degree_gini();
    let avg_cc = graph.avg_clustering_coefficient();
    let n = graph.nodes().len();
    let e = graph.edge_count();

    // Identify saturated hubs — nodes with degree well above threshold
    let threshold = graph.hub_threshold();
    let mut hubs: Vec<_> = graph.nodes().iter()
        .map(|k| (k.clone(), graph.degree(k)))
        .filter(|(_, d)| *d >= threshold)
        .collect();
    hubs.sort_by(|a, b| b.1.cmp(&a.1));
    hubs.truncate(15);

    let hub_list = if hubs.is_empty() {
        String::new()
    } else {
        let lines: Vec<String> = hubs.iter()
            .map(|(k, d)| format!("  - {} (degree {})", k, d))
            .collect();
        format!(
            "### SATURATED HUBS — DO NOT LINK TO THESE\n\
             The following nodes are already over-connected. Adding more links\n\
             to them makes the graph worse (star topology). Find lateral\n\
             connections between peripheral nodes instead.\n\n{}\n\n\
             Only link to a hub if it is genuinely the ONLY reasonable target.\n\n",
            lines.join("\n"))
    };

    format!(
        "## Current graph topology\n\
         Nodes: {}  Edges: {}  Communities: {}\n\
         Small-world σ: {:.1}  Power-law α: {:.2}  Degree Gini: {:.3}\n\
         Avg clustering coefficient: {:.4}\n\n\
         {}\
         Each node below shows its hub-link ratio (fraction of edges to top-5% degree nodes).\n\
         Use `poc-memory link-impact SOURCE TARGET` to evaluate proposed links.\n\n",
        n, e, graph.community_count(), sigma, alpha, gini, avg_cc, hub_list)
}

/// Public alias for use from defs.rs (config-driven agents).
pub fn format_nodes_section_pub(store: &Store, items: &[ReplayItem], graph: &Graph) -> String {
    format_nodes_section(store, items, graph)
}

/// Format node data section for prompt templates
fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> String {
    let hub_thresh = graph.hub_threshold();
    let mut out = String::new();
    for item in items {
        let node = match store.nodes.get(&item.key) {
            Some(n) => n,
            None => continue,
        };

        out.push_str(&format!("## {} \n", item.key));
        out.push_str(&format!("Priority: {:.3}  CC: {:.3}  Emotion: {:.1}  ",
            item.priority, item.cc, item.emotion));
        out.push_str(&format!("Interval: {}d\n",
            node.spaced_repetition_interval));
        if item.outlier_score > 0.0 {
            out.push_str(&format!("Spectral: {} (outlier={:.1})\n",
                item.classification, item.outlier_score));
        }

        if let Some(community) = node.community_id {
            out.push_str(&format!("Community: {}  ", community));
        }
        let deg = graph.degree(&item.key);
        let cc = graph.clustering_coefficient(&item.key);

        // Hub-link ratio: what fraction of this node's edges go to hubs?
        let neighbors = graph.neighbors(&item.key);
        let hub_links = neighbors.iter()
            .filter(|(n, _)| graph.degree(n) >= hub_thresh)
            .count();
        let hub_ratio = if deg > 0 { hub_links as f32 / deg as f32 } else { 0.0 };
        let is_hub = deg >= hub_thresh;

        out.push_str(&format!("Degree: {}  CC: {:.3}  Hub-link ratio: {:.0}% ({}/{})",
            deg, cc, hub_ratio * 100.0, hub_links, deg));
        if is_hub {
            out.push_str("  ← THIS IS A HUB");
        } else if hub_ratio > 0.6 {
            out.push_str("  ← mostly hub-connected, needs lateral links");
        }
        out.push('\n');

        // Content (truncated for large nodes)
        let content = &node.content;
        if content.len() > 1500 {
            let truncated = crate::util::truncate(content, 1500, "\n[...]");
            out.push_str(&format!("\nContent ({} chars, truncated):\n{}\n\n",
                content.len(), truncated));
        } else {
            out.push_str(&format!("\nContent:\n{}\n\n", content));
        }

        // Neighbors
        let neighbors = graph.neighbors(&item.key);
        if !neighbors.is_empty() {
            out.push_str("Neighbors:\n");
            for (n, strength) in neighbors.iter().take(15) {
                let n_cc = graph.clustering_coefficient(n);
                let n_community = store.nodes.get(n.as_str())
                    .and_then(|n| n.community_id);
                out.push_str(&format!("  - {} (str={:.2}, cc={:.3}",
                    n, strength, n_cc));
                if let Some(c) = n_community {
                    out.push_str(&format!(", c{}", c));
                }
                out.push_str(")\n");
            }
        }

        // Suggested link targets: text-similar semantic nodes not already neighbors
        let neighbor_keys: std::collections::HashSet<&str> = neighbors.iter()
            .map(|(k, _)| k.as_str()).collect();
        let mut candidates: Vec<(&str, f32)> = store.nodes.iter()
            .filter(|(k, _)| {
                *k != &item.key
                    && !neighbor_keys.contains(k.as_str())
            })
            .map(|(k, n)| {
                let sim = similarity::cosine_similarity(content, &n.content);
                (k.as_str(), sim)
            })
            .filter(|(_, sim)| *sim > 0.1)
            .collect();
        candidates.sort_by(|a, b| b.1.total_cmp(&a.1));
        candidates.truncate(8);

        if !candidates.is_empty() {
            out.push_str("\nSuggested link targets (by text similarity, not yet linked):\n");
            for (k, sim) in &candidates {
                let is_hub = graph.degree(k) >= hub_thresh;
                out.push_str(&format!("  - {} (sim={:.3}{})\n",
                    k, sim, if is_hub { ", HUB" } else { "" }));
            }
        }

        out.push_str("\n---\n\n");
    }
    out
}

/// Format health data for the health agent prompt
fn format_health_section(store: &Store, graph: &Graph) -> String {
    use crate::graph;

    let health = graph::health_report(graph, store);

    let mut out = health;
    out.push_str("\n\n## Weight distribution\n");

    // Weight histogram
    let mut buckets = [0u32; 10]; // 0.0-0.1, 0.1-0.2, ..., 0.9-1.0
    for node in store.nodes.values() {
        let bucket = ((node.weight * 10.0) as usize).min(9);
        buckets[bucket] += 1;
    }
    for (i, &count) in buckets.iter().enumerate() {
        let lo = i as f32 / 10.0;
        let hi = (i + 1) as f32 / 10.0;
        let bar = "█".repeat((count as usize) / 10);
        out.push_str(&format!("  {:.1}-{:.1}: {:4} {}\n", lo, hi, count, bar));
    }

    // Near-prune nodes
    let near_prune: Vec<_> = store.nodes.iter()
        .filter(|(_, n)| n.weight < 0.15)
        .map(|(k, n)| (k.clone(), n.weight))
        .collect();
    if !near_prune.is_empty() {
        out.push_str(&format!("\n## Near-prune nodes ({} total)\n", near_prune.len()));
        for (k, w) in near_prune.iter().take(20) {
            out.push_str(&format!("  [{:.3}] {}\n", w, k));
        }
    }

    // Community sizes
    let communities = graph.communities();
    let mut comm_sizes: std::collections::HashMap<u32, Vec<String>> = std::collections::HashMap::new();
    for (key, &label) in communities {
        comm_sizes.entry(label).or_default().push(key.clone());
    }
    let mut sizes: Vec<_> = comm_sizes.iter()
        .map(|(id, members)| (*id, members.len(), members.clone()))
        .collect();
    sizes.sort_by(|a, b| b.1.cmp(&a.1));

    out.push_str("\n## Largest communities\n");
    for (id, size, members) in sizes.iter().take(10) {
        out.push_str(&format!("  Community {} ({} nodes): ", id, size));
        let sample: Vec<_> = members.iter().take(5).map(|s| s.as_str()).collect();
        out.push_str(&sample.join(", "));
        if *size > 5 { out.push_str(", ..."); }
        out.push('\n');
    }

    out
}

/// Format interference pairs for the separator agent prompt
fn format_pairs_section(
    pairs: &[(String, String, f32)],
    store: &Store,
    graph: &Graph,
) -> String {
    let mut out = String::new();
    let communities = graph.communities();

    for (a, b, sim) in pairs {
        out.push_str(&format!("## Pair: similarity={:.3}\n", sim));

        let ca = communities.get(a).map(|c| format!("c{}", c)).unwrap_or_else(|| "?".into());
        let cb = communities.get(b).map(|c| format!("c{}", c)).unwrap_or_else(|| "?".into());

        // Node A
        out.push_str(&format!("\n### {} ({})\n", a, ca));
        if let Some(node) = store.nodes.get(a) {
            let content = crate::util::truncate(&node.content, 500, "...");
            out.push_str(&format!("Weight: {:.2}\n{}\n",
                node.weight, content));
        }

        // Node B
        out.push_str(&format!("\n### {} ({})\n", b, cb));
        if let Some(node) = store.nodes.get(b) {
            let content = crate::util::truncate(&node.content, 500, "...");
            out.push_str(&format!("Weight: {:.2}\n{}\n",
                node.weight, content));
        }

        out.push_str("\n---\n\n");
    }
    out
}

/// Format rename candidates, returning both keys and formatted section
fn format_rename_candidates_with_keys(store: &Store, count: usize) -> (Vec<String>, String) {
    let mut candidates: Vec<(&str, &crate::store::Node)> = store.nodes.iter()
        .filter(|(key, _)| {
            if key.len() < 60 { return false; }
            if key.starts_with("journal#j-") { return true; }
            if key.starts_with("_mined-transcripts#f-") { return true; }
            false
        })
        .map(|(k, n)| (k.as_str(), n))
        .collect();

    candidates.sort_by(|a, b| b.1.timestamp.cmp(&a.1.timestamp));
    candidates.truncate(count);

    let keys: Vec<String> = candidates.iter().map(|(k, _)| k.to_string()).collect();

    let mut out = String::new();
    out.push_str(&format!("## Nodes to rename ({} of {} candidates)\n\n",
        candidates.len(),
        store.nodes.keys().filter(|k| k.len() >= 60 &&
            (k.starts_with("journal#j-") || k.starts_with("_mined-transcripts#f-"))).count()));

    for (key, node) in &candidates {
        out.push_str(&format!("### {}\n", key));
        let created = if node.timestamp > 0 {
            crate::store::format_datetime(node.timestamp)
        } else {
            "unknown".to_string()
        };
        out.push_str(&format!("Created: {}\n", created));

        let content = &node.content;
        if content.len() > 800 {
            let truncated = crate::util::truncate(content, 800, "\n[...]");
            out.push_str(&format!("\nContent ({} chars, truncated):\n{}\n\n",
                content.len(), truncated));
        } else {
            out.push_str(&format!("\nContent:\n{}\n\n", content));
        }

        out.push_str("---\n\n");
    }
    (keys, out)
}

/// Get split candidates sorted by size (largest first)
pub fn split_candidates(store: &Store) -> Vec<String> {
    let mut candidates: Vec<(&str, usize)> = store.nodes.iter()
        .filter(|(key, node)| {
            !key.starts_with('_')
                && !node.deleted
                && matches!(node.node_type, crate::store::NodeType::Semantic)
        })
        .map(|(k, n)| (k.as_str(), n.content.len()))
        .collect();
    candidates.sort_by(|a, b| b.1.cmp(&a.1));
    candidates.into_iter().map(|(k, _)| k.to_string()).collect()
}

/// Format a single node for split-plan prompt (phase 1)
fn format_split_plan_node(store: &Store, graph: &Graph, key: &str) -> String {
    let communities = graph.communities();
    let node = match store.nodes.get(key) {
        Some(n) => n,
        None => return format!("Node '{}' not found\n", key),
    };

    let mut out = String::new();
    out.push_str(&format!("### {} ({} chars)\n", key, node.content.len()));

    // Show neighbors grouped by community
    let neighbors = graph.neighbors(key);
    if !neighbors.is_empty() {
        let mut by_community: std::collections::BTreeMap<String, Vec<(&str, f32)>> =
            std::collections::BTreeMap::new();
        for (nkey, strength) in &neighbors {
            let comm = communities.get(nkey.as_str())
                .map(|c| format!("c{}", c))
                .unwrap_or_else(|| "unclustered".into());
            by_community.entry(comm)
                .or_default()
                .push((nkey.as_str(), *strength));
        }

        out.push_str("\nNeighbors by community:\n");
        for (comm, members) in &by_community {
            out.push_str(&format!("  {} ({}):", comm, members.len()));
            for (nkey, strength) in members.iter().take(5) {
                out.push_str(&format!(" {}({:.2})", nkey, strength));
            }
            if members.len() > 5 {
                out.push_str(&format!(" +{} more", members.len() - 5));
            }
            out.push('\n');
        }
    }

    // Full content
    out.push_str(&format!("\nContent:\n{}\n\n", node.content));
    out.push_str("---\n\n");
    out
}

/// Build split-plan prompt for a single node (phase 1)
pub fn split_plan_prompt(store: &Store, key: &str) -> Result<String, String> {
    let graph = store.build_graph();
    let topology = format_topology_header(&graph);
    let node_section = format_split_plan_node(store, &graph, key);
    load_prompt("split-plan", &[("{{TOPOLOGY}}", &topology), ("{{NODE}}", &node_section)])
}

/// Build split-extract prompt for one child (phase 2)
pub fn split_extract_prompt(store: &Store, parent_key: &str, child_key: &str, child_desc: &str, child_sections: &str) -> Result<String, String> {
    let parent_content = store.nodes.get(parent_key)
        .map(|n| n.content.as_str())
        .ok_or_else(|| format!("No node '{}'", parent_key))?;
    load_prompt("split-extract", &[
        ("{{CHILD_KEY}}", child_key),
        ("{{CHILD_DESC}}", child_desc),
        ("{{CHILD_SECTIONS}}", child_sections),
        ("{{PARENT_CONTENT}}", parent_content),
    ])
}

/// Run agent consolidation on top-priority nodes
pub fn consolidation_batch(store: &Store, count: usize, auto: bool) -> Result<(), String> {
    let graph = store.build_graph();
    let items = replay_queue(store, count);

    if items.is_empty() {
        println!("No nodes to consolidate.");
        return Ok(());
    }

    let nodes_section = format_nodes_section(store, &items, &graph);

    if auto {
        let prompt = load_prompt("replay", &[("{{NODES}}", &nodes_section)])?;
        println!("{}", prompt);
    } else {
        // Interactive: show what needs attention and available agent types
        println!("Consolidation batch ({} nodes):\n", items.len());
        for item in &items {
            let node_type = store.nodes.get(&item.key)
                .map(|n| if matches!(n.node_type, crate::store::NodeType::EpisodicSession) { "episodic" } else { "semantic" })
                .unwrap_or("?");
            println!("  [{:.3}] {} (cc={:.3}, interval={}d, type={})",
                item.priority, item.key, item.cc, item.interval_days, node_type);
        }

        // Also show interference pairs
        let pairs = detect_interference(store, &graph, 0.6);
        if !pairs.is_empty() {
            println!("\nInterfering pairs ({}):", pairs.len());
            for (a, b, sim) in pairs.iter().take(5) {
                println!("  [{:.3}] {} ↔ {}", sim, a, b);
            }
        }

        println!("\nAgent prompts:");
        println!("  --auto              Generate replay agent prompt");
        println!("  --agent replay      Replay agent (schema assimilation)");
        println!("  --agent linker      Linker agent (relational binding)");
        println!("  --agent separator   Separator agent (pattern separation)");
        println!("  --agent transfer    Transfer agent (CLS episodic→semantic)");
        println!("  --agent health      Health agent (synaptic homeostasis)");
    }

    Ok(())
}

/// Generate a specific agent prompt with filled-in data.
/// Returns an AgentBatch with the prompt text and the keys of nodes
/// selected for processing (for visit tracking on success).
pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<AgentBatch, String> {
    // Config-driven agents take priority over hardcoded ones
    if let Some(def) = super::defs::get_def(agent) {
        return super::defs::run_agent(store, &def, count);
    }

    let graph = store.build_graph();
    let topology = format_topology_header(&graph);

    let emb = spectral::load_embedding().ok();

    match agent {
        "replay" => {
            let items = replay_queue_with_graph(store, count, &graph, emb.as_ref());
            let keys: Vec<String> = items.iter().map(|i| i.key.clone()).collect();
            let nodes_section = format_nodes_section(store, &items, &graph);
            let prompt = load_prompt("replay", &[("{{TOPOLOGY}}", &topology), ("{{NODES}}", &nodes_section)])?;
            Ok(AgentBatch { prompt, node_keys: keys })
        }
        "linker" => {
            // Filter to episodic entries
            let mut items = replay_queue_with_graph(store, count * 2, &graph, emb.as_ref());
            items.retain(|item| {
                store.nodes.get(&item.key)
                    .map(|n| matches!(n.node_type, crate::store::NodeType::EpisodicSession))
                    .unwrap_or(false)
            });
            items.truncate(count);
            let keys: Vec<String> = items.iter().map(|i| i.key.clone()).collect();
            let nodes_section = format_nodes_section(store, &items, &graph);
            let prompt = load_prompt("linker", &[("{{TOPOLOGY}}", &topology), ("{{NODES}}", &nodes_section)])?;
            Ok(AgentBatch { prompt, node_keys: keys })
        }
        "separator" => {
            let mut pairs = detect_interference(store, &graph, 0.5);
            pairs.truncate(count);
            // Both nodes in each pair count as visited
            let keys: Vec<String> = pairs.iter()
                .flat_map(|(a, b, _)| vec![a.clone(), b.clone()])
                .collect();
            let pairs_section = format_pairs_section(&pairs, store, &graph);
            let prompt = load_prompt("separator", &[("{{TOPOLOGY}}", &topology), ("{{PAIRS}}", &pairs_section)])?;
            Ok(AgentBatch { prompt, node_keys: keys })
        }
        "transfer" => {
            // Recent episodic entries
            let mut episodes: Vec<_> = store.nodes.iter()
                .filter(|(_, n)| matches!(n.node_type, crate::store::NodeType::EpisodicSession))
                .map(|(k, n)| (k.clone(), n.timestamp))
                .collect();
            episodes.sort_by(|a, b| b.1.cmp(&a.1));
            episodes.truncate(count);

            let episode_keys: Vec<_> = episodes.iter().map(|(k, _)| k.clone()).collect();
            let items: Vec<ReplayItem> = episode_keys.iter()
                .filter_map(|k| {
                    let node = store.nodes.get(k)?;
                    Some(ReplayItem {
                        key: k.clone(),
                        priority: consolidation_priority(store, k, &graph, None),
                        interval_days: node.spaced_repetition_interval,
                        emotion: node.emotion,
                        cc: graph.clustering_coefficient(k),
                        classification: "unknown",
                        outlier_score: 0.0,
                    })
                })
                .collect();
            let episodes_section = format_nodes_section(store, &items, &graph);
            let prompt = load_prompt("transfer", &[("{{TOPOLOGY}}", &topology), ("{{EPISODES}}", &episodes_section)])?;
            Ok(AgentBatch { prompt, node_keys: episode_keys })
        }
        "health" => {
            // Health agent analyzes the whole graph, no specific nodes
            let health_section = format_health_section(store, &graph);
            let prompt = load_prompt("health", &[("{{TOPOLOGY}}", &topology), ("{{HEALTH}}", &health_section)])?;
            Ok(AgentBatch { prompt, node_keys: vec![] })
        }
        "rename" => {
            let (keys, nodes_section) = format_rename_candidates_with_keys(store, count);
            let prompt = load_prompt("rename", &[("{{NODES}}", &nodes_section)])?;
            Ok(AgentBatch { prompt, node_keys: keys })
        }
        "split" => {
            // Phase 1: plan prompt for the largest candidate
            let candidates = split_candidates(store);
            if candidates.is_empty() {
                return Err("No nodes large enough to split".to_string());
            }
            let key = candidates[0].clone();
            let node_section = format_split_plan_node(store, &graph, &key);
            let prompt = load_prompt("split-plan", &[("{{TOPOLOGY}}", &topology), ("{{NODE}}", &node_section)])?;
            Ok(AgentBatch { prompt, node_keys: vec![key] })
        }
        _ => Err(format!("Unknown agent: {}. Use: replay, linker, separator, transfer, health, rename, split", agent)),
    }
}