consciousness/src/agent/oneshot.rs

// oneshot.rs — Autonomous agent execution
//
// AutoAgent: wraps an Agent with a multi-step prompt sequence and an
// async run() method. Used for both oneshot CLI agents (from .agent
// files) and subconscious agents forked from the conscious agent.
//
// Also contains the legacy run_one_agent() pipeline and process
// management for spawned agent subprocesses.

use crate::store::{self, Store};
use crate::subconscious::{defs, prompts};

use std::fs;
use std::path::PathBuf;
use std::sync::OnceLock;

use super::api::ApiClient;
use super::api::types::*;
use super::tools::{self as agent_tools};
use super::Agent;

// ---------------------------------------------------------------------------
// API client — shared across oneshot agent runs
// ---------------------------------------------------------------------------

static API_CLIENT: OnceLock<ApiClient> = OnceLock::new();

fn get_client() -> Result<&'static ApiClient, String> {
    Ok(API_CLIENT.get_or_init(|| {
        let config = crate::config::get();
        let base_url = config.api_base_url.as_deref().unwrap_or("");
        let api_key = config.api_key.as_deref().unwrap_or("");
        let model = config.api_model.as_deref().unwrap_or("qwen-2.5-27b");
        ApiClient::new(base_url, api_key, model)
    }))
}

// ---------------------------------------------------------------------------
// AutoAgent — multi-step autonomous agent
// ---------------------------------------------------------------------------

pub struct AutoStep {
    pub prompt: String,
    pub phase: String,
}

/// An autonomous agent that runs a sequence of prompts with tool dispatch.
///
/// Two backends:
/// - Standalone: bare message list + global API client (oneshot CLI agents)
/// - Agent-backed: forked Agent whose ContextState is the conversation
///   (subconscious agents, KV cache sharing with conscious agent)
pub struct AutoAgent {
    pub name: String,
    backend: Backend,
    steps: Vec<AutoStep>,
    next_step: usize,
    sampling: super::api::SamplingParams,
    priority: i32,
    // Observable status
    pub current_phase: String,
    pub turn: usize,
}

enum Backend {
    /// Standalone: raw message list, no Agent context.
    Standalone {
        client: ApiClient,
        tools: Vec<agent_tools::Tool>,
        messages: Vec<Message>,
    },
    /// Backed by a forked Agent — conversation lives in ContextState.
    Forked(Agent),
}

impl Backend {
    fn client(&self) -> &ApiClient {
        match self {
            Backend::Standalone { client, .. } => client,
            Backend::Forked(agent) => &agent.client,
        }
    }

    fn tools(&self) -> &[agent_tools::Tool] {
        match self {
            Backend::Standalone { tools, .. } => tools,
            Backend::Forked(agent) => &agent.tools,
        }
    }

    fn messages(&self) -> Vec<Message> {
        match self {
            Backend::Standalone { messages, .. } => messages.clone(),
            Backend::Forked(agent) => agent.assemble_api_messages(),
        }
    }

    fn push_message(&mut self, msg: Message) {
        match self {
            Backend::Standalone { messages, .. } => messages.push(msg),
            Backend::Forked(agent) => agent.push_message(msg),
        }
    }

    fn push_raw(&mut self, msg: Message) {
        match self {
            Backend::Standalone { messages, .. } => messages.push(msg),
            Backend::Forked(agent) => {
                agent.context.entries.push(
                    super::context::ConversationEntry::Message(msg));
            }
        }
    }
}

impl AutoAgent {
    /// Create from the global API client with no initial context.
    /// Used by oneshot CLI agents.
    pub fn new(
        name: String,
        tools: Vec<agent_tools::Tool>,
        steps: Vec<AutoStep>,
        temperature: f32,
        priority: i32,
    ) -> Result<Self, String> {
        let client = get_client()?.clone();
        let phase = steps.first().map(|s| s.phase.clone()).unwrap_or_default();
        Ok(Self {
            name,
            backend: Backend::Standalone {
                client,
                tools,
                messages: Vec::new(),
            },
            steps,
            next_step: 0,
            sampling: super::api::SamplingParams {
                temperature,
                top_p: 0.95,
                top_k: 20,
            },
            priority,
            current_phase: phase,
            turn: 0,
        })
    }

    /// Fork from an existing agent for subconscious use. The forked
    /// agent's ContextState holds the conversation — step prompts and
    /// tool results are appended to it directly.
    pub fn from_agent(
        name: String,
        agent: &Agent,
        tools: Vec<agent_tools::Tool>,
        steps: Vec<AutoStep>,
        priority: i32,
    ) -> Self {
        let forked = agent.fork(tools);
        let phase = steps.first().map(|s| s.phase.clone()).unwrap_or_default();
        Self {
            name,
            sampling: super::api::SamplingParams {
                temperature: forked.temperature,
                top_p: forked.top_p,
                top_k: forked.top_k,
            },
            backend: Backend::Forked(forked),
            steps,
            next_step: 0,
            priority,
            current_phase: phase,
            turn: 0,
        }
    }

    /// Run all steps to completion. Returns the final text response.
    pub async fn run(
        &mut self,
        bail_fn: Option<&(dyn Fn(usize) -> Result<(), String> + Sync)>,
        log: &dyn Fn(&str),
    ) -> Result<String, String> {
        // Inject first step prompt
        if self.next_step < self.steps.len() {
            self.backend.push_message(
                Message::user(&self.steps[self.next_step].prompt));
            self.next_step += 1;
        }

        let reasoning = crate::config::get().api_reasoning.clone();
        let max_turns = 50 * self.steps.len().max(1);

        for _ in 0..max_turns {
            self.turn += 1;
            let messages = self.backend.messages();
            log(&format!("\n=== TURN {} ({} messages) ===\n",
                self.turn, messages.len()));

            // API call with retries
            let (msg, usage_opt) = self.api_call_with_retry(
                &messages, &reasoning, log).await?;

            if let Some(u) = &usage_opt {
                log(&format!("tokens: {} prompt + {} completion",
                    u.prompt_tokens, u.completion_tokens));
            }

            let has_content = msg.content.is_some();
            let has_tools = msg.tool_calls.as_ref().is_some_and(|tc| !tc.is_empty());

            if has_tools {
                self.dispatch_tools(&msg, log).await;
                continue;
            }

            // Text-only response — step complete
            let text = msg.content_text().to_string();
            if text.is_empty() && !has_content {
                log("empty response, retrying");
                self.backend.push_message(Message::user(
                    "[system] Your previous response was empty. \
                     Please respond with text or use a tool."
                ));
                continue;
            }

            log(&format!("\n=== RESPONSE ===\n\n{}", text));

            // More steps? Check bail, inject next prompt.
            if self.next_step < self.steps.len() {
                if let Some(ref check) = bail_fn {
                    check(self.next_step)?;
                }
                self.current_phase = self.steps[self.next_step].phase.clone();
                self.backend.push_message(Message::assistant(&text));
                self.backend.push_message(
                    Message::user(&self.steps[self.next_step].prompt));
                self.next_step += 1;
                log(&format!("\n=== STEP {}/{} ===\n",
                    self.next_step, self.steps.len()));
                continue;
            }

            return Ok(text);
        }

        Err(format!("{}: exceeded {} tool turns", self.name, max_turns))
    }

    async fn api_call_with_retry(
        &self,
        messages: &[Message],
        reasoning: &str,
        log: &dyn Fn(&str),
    ) -> Result<(Message, Option<Usage>), String> {
        let client = self.backend.client();
        let tools = self.backend.tools();
        let mut last_err = None;
        for attempt in 0..5 {
            match client.chat_completion_stream_temp(
                messages,
                tools,
                reasoning,
                self.sampling,
                Some(self.priority),
            ).await {
                Ok((msg, usage)) => {
                    if let Some(ref e) = last_err {
                        log(&format!("succeeded after retry (previous error: {})", e));
                    }
                    return Ok((msg, usage));
                }
                Err(e) => {
                    let err_str = e.to_string();
                    let is_transient = err_str.contains("IncompleteMessage")
                        || err_str.contains("connection closed")
                        || err_str.contains("connection reset")
                        || err_str.contains("timed out")
                        || err_str.contains("Connection refused");
                    if is_transient && attempt < 4 {
                        log(&format!("transient error (attempt {}): {}, retrying...",
                            attempt + 1, err_str));
                        tokio::time::sleep(std::time::Duration::from_secs(2 << attempt)).await;
                        last_err = Some(e);
                        continue;
                    }
                    let msg_bytes: usize = messages.iter()
                        .map(|m| m.content_text().len())
                        .sum();
                    return Err(format!(
                        "{}: API error on turn {} (~{}KB, {} messages, {} attempts): {}",
                        self.name, self.turn, msg_bytes / 1024,
                        messages.len(), attempt + 1, e));
                }
            }
        }
        unreachable!()
    }

    async fn dispatch_tools(&mut self, msg: &Message, log: &dyn Fn(&str)) {
        // Push sanitized assistant message with tool calls
        let mut sanitized = msg.clone();
        if let Some(ref mut calls) = sanitized.tool_calls {
            for call in calls {
                if serde_json::from_str::<serde_json::Value>(&call.function.arguments).is_err() {
                    log(&format!("sanitizing malformed args for {}: {}",
                        call.function.name, &call.function.arguments));
                    call.function.arguments = "{}".to_string();
                }
            }
        }
        self.backend.push_raw(sanitized);

        for call in msg.tool_calls.as_ref().unwrap() {
            log(&format!("\nTOOL CALL: {}({})",
                call.function.name, &call.function.arguments));

            let args: serde_json::Value = match serde_json::from_str(&call.function.arguments) {
                Ok(v) => v,
                Err(_) => {
                    log(&format!("malformed tool call args: {}", &call.function.arguments));
                    self.backend.push_raw(Message::tool_result(
                        &call.id,
                        "Error: your tool call had malformed JSON arguments. \
                         Please retry with valid JSON.",
                    ));
                    continue;
                }
            };

            let output = agent_tools::dispatch(&call.function.name, &args).await;

            if std::env::var("POC_AGENT_VERBOSE").is_ok() {
                log(&format!("TOOL RESULT ({} chars):\n{}", output.len(), output));
            } else {
                let preview: String = output.lines().next()
                    .unwrap_or("").chars().take(100).collect();
                log(&format!("Result: {}", preview));
            }

            self.backend.push_raw(Message::tool_result(&call.id, &output));
        }
    }
}

// ---------------------------------------------------------------------------
// Agent execution
// ---------------------------------------------------------------------------

/// Result of running a single agent.
pub struct AgentResult {
    pub output: String,
    pub node_keys: Vec<String>,
    /// Directory containing output() files from the agent run.
    pub state_dir: PathBuf,
}

/// Run an agent. If keys are provided, use them directly (bypassing the
/// agent's query). Otherwise, run the query to select target nodes.
pub fn run_one_agent(
    store: &mut Store,
    agent_name: &str,
    count: usize,
    keys: Option<&[String]>,
    log: &(dyn Fn(&str) + Sync),
) -> Result<AgentResult, String> {
    let def = defs::get_def(agent_name)
        .ok_or_else(|| format!("no .agent file for {}", agent_name))?;

    // State dir for agent output files
    let state_dir = std::env::var("POC_AGENT_OUTPUT_DIR")
        .map(PathBuf::from)
        .unwrap_or_else(|_| store::memory_dir().join("agent-output").join(agent_name));
    fs::create_dir_all(&state_dir)
        .map_err(|e| format!("create state dir: {}", e))?;
    unsafe { std::env::set_var("POC_AGENT_OUTPUT_DIR", &state_dir); }

    // Build prompt batch — either from explicit keys or the agent's query
    let agent_batch = if let Some(keys) = keys {
        log(&format!("targeting: {}", keys.join(", ")));
        let graph = store.build_graph();
        let mut resolved_steps = Vec::new();
        let mut all_keys: Vec<String> = keys.to_vec();
        for step in &def.steps {
            let (prompt, extra_keys) = defs::resolve_placeholders(
                &step.prompt, store, &graph, keys, count,
            );
            all_keys.extend(extra_keys);
            resolved_steps.push(prompts::ResolvedStep {
                prompt,
                phase: step.phase.clone(),
            });
        }
        let batch = prompts::AgentBatch { steps: resolved_steps, node_keys: all_keys };
        if !batch.node_keys.is_empty() {
            store.record_agent_visits(&batch.node_keys, agent_name).ok();
        }
        batch
    } else {
        log("building prompt");
        let effective_count = def.count.unwrap_or(count);
        defs::run_agent(store, &def, effective_count, &Default::default())?
    };

    // Filter tools based on agent def
    let all_tools = super::tools::memory_and_journal_tools();
    let effective_tools: Vec<super::tools::Tool> = if def.tools.is_empty() {
        all_tools.to_vec()
    } else {
        all_tools.into_iter()
            .filter(|t| def.tools.iter().any(|w| w == &t.name))
            .collect()
    };
    let tools_desc = effective_tools.iter().map(|t| t.name).collect::<Vec<_>>().join(", ");
    let n_steps = agent_batch.steps.len();

    for key in &agent_batch.node_keys {
        log(&format!("  node: {}", key));
    }

    // Guard: reject oversized first prompt
    let max_prompt_bytes = 800_000;
    let first_len = agent_batch.steps[0].prompt.len();
    if first_len > max_prompt_bytes {
        let prompt_kb = first_len / 1024;
        let oversize_dir = store::memory_dir().join("llm-logs").join("oversized");
        fs::create_dir_all(&oversize_dir).ok();
        let oversize_path = oversize_dir.join(format!("{}-{}.txt",
            agent_name, store::compact_timestamp()));
        let header = format!("=== OVERSIZED PROMPT ===\nagent: {}\nsize: {}KB (max {}KB)\nnodes: {:?}\n\n",
            agent_name, prompt_kb, max_prompt_bytes / 1024, agent_batch.node_keys);
        fs::write(&oversize_path, format!("{}{}", header, &agent_batch.steps[0].prompt)).ok();
        log(&format!("oversized prompt logged to {}", oversize_path.display()));
        return Err(format!(
            "prompt too large: {}KB (max {}KB) — seed nodes may be oversized",
            prompt_kb, max_prompt_bytes / 1024,
        ));
    }

    let phases: Vec<&str> = agent_batch.steps.iter().map(|s| s.phase.as_str()).collect();
    log(&format!("{} step(s) {:?}, {}KB initial, {}, {} nodes, output={}",
        n_steps, phases, first_len / 1024, tools_desc,
        agent_batch.node_keys.len(), state_dir.display()));

    let prompts: Vec<String> = agent_batch.steps.iter()
        .map(|s| s.prompt.clone()).collect();
    let step_phases: Vec<String> = agent_batch.steps.iter()
        .map(|s| s.phase.clone()).collect();

    if std::env::var("POC_AGENT_VERBOSE").is_ok() {
        for (i, s) in agent_batch.steps.iter().enumerate() {
            log(&format!("=== PROMPT {}/{} ({}) ===\n\n{}", i + 1, n_steps, s.phase, s.prompt));
        }
    }
    log("\n=== CALLING LLM ===");

    // Bail check: if the agent defines a bail script, run it between steps.
    let bail_script = def.bail.as_ref().map(|name| defs::agents_dir().join(name));
    let state_dir_for_bail = state_dir.clone();
    let bail_fn = move |step_idx: usize| -> Result<(), String> {
        if let Some(ref script) = bail_script {
            let status = std::process::Command::new(script)
                .current_dir(&state_dir_for_bail)
                .status()
                .map_err(|e| format!("bail script {:?} failed: {}", script, e))?;
            if !status.success() {
                return Err(format!("bailed at step {}: {:?} exited {}",
                    step_idx + 1, script.file_name().unwrap_or_default(),
                    status.code().unwrap_or(-1)));
            }
        }
        Ok(())
    };

    let output = call_api_with_tools_sync(
        agent_name, &prompts, &step_phases, def.temperature, def.priority,
        &effective_tools, Some(&bail_fn), log)?;

    Ok(AgentResult {
        output,
        node_keys: agent_batch.node_keys,
        state_dir,
    })
}

// ---------------------------------------------------------------------------
// Compatibility wrappers — delegate to AutoAgent
// ---------------------------------------------------------------------------

/// Run agent prompts through the API with tool support.
/// Convenience wrapper around AutoAgent for existing callers.
pub async fn call_api_with_tools(
    agent: &str,
    prompts: &[String],
    phases: &[String],
    temperature: Option<f32>,
    priority: i32,
    tools: &[agent_tools::Tool],
    bail_fn: Option<&(dyn Fn(usize) -> Result<(), String> + Sync)>,
    log: &dyn Fn(&str),
) -> Result<String, String> {
    let steps: Vec<AutoStep> = prompts.iter().zip(
        phases.iter().map(String::as_str)
            .chain(std::iter::repeat(""))
    ).map(|(prompt, phase)| AutoStep {
        prompt: prompt.clone(),
        phase: phase.to_string(),
    }).collect();

    let mut auto = AutoAgent::new(
        agent.to_string(),
        tools.to_vec(),
        steps,
        temperature.unwrap_or(0.6),
        priority,
    )?;
    auto.run(bail_fn, log).await
}

/// Synchronous wrapper — runs on a dedicated thread with its own
/// tokio runtime. Safe to call from any context.
pub fn call_api_with_tools_sync(
    agent: &str,
    prompts: &[String],
    phases: &[String],
    temperature: Option<f32>,
    priority: i32,
    tools: &[agent_tools::Tool],
    bail_fn: Option<&(dyn Fn(usize) -> Result<(), String> + Sync)>,
    log: &(dyn Fn(&str) + Sync),
) -> Result<String, String> {
    std::thread::scope(|s| {
        s.spawn(|| {
            let rt = tokio::runtime::Builder::new_current_thread()
                .enable_all()
                .build()
                .map_err(|e| format!("tokio runtime: {}", e))?;
            rt.block_on(
                call_api_with_tools(agent, prompts, phases, temperature, priority, tools, bail_fn, log)
            )
        }).join().unwrap()
    })
}

// ---------------------------------------------------------------------------
// Process management — PID tracking and subprocess spawning
// ---------------------------------------------------------------------------

/// Check for live agent processes in a state dir. Returns (phase, pid) pairs.
/// Cleans up stale pid files and kills timed-out processes.
pub fn scan_pid_files(state_dir: &std::path::Path, timeout_secs: u64) -> Vec<(String, u32)> {
    let mut live = Vec::new();
    let Ok(entries) = fs::read_dir(state_dir) else { return live };
    for entry in entries.flatten() {
        let name = entry.file_name();
        let name_str = name.to_string_lossy();
        let Some(pid_str) = name_str.strip_prefix("pid-") else { continue };
        let Ok(pid) = pid_str.parse::<u32>() else { continue };

        if unsafe { libc::kill(pid as i32, 0) } != 0 {
            fs::remove_file(entry.path()).ok();
            continue;
        }

        if timeout_secs > 0 {
            if let Ok(meta) = entry.metadata() {
                if let Ok(modified) = meta.modified() {
                    if modified.elapsed().unwrap_or_default().as_secs() > timeout_secs {
                        unsafe { libc::kill(pid as i32, libc::SIGTERM); }
                        fs::remove_file(entry.path()).ok();
                        continue;
                    }
                }
            }
        }

        let phase = fs::read_to_string(entry.path())
            .unwrap_or_default()
            .trim().to_string();
        live.push((phase, pid));
    }
    live
}

pub struct SpawnResult {
    pub child: std::process::Child,
    pub log_path: PathBuf,
}

pub fn spawn_agent(
    agent_name: &str,
    state_dir: &std::path::Path,
    session_id: &str,
) -> Option<SpawnResult> {
    let def = defs::get_def(agent_name)?;
    let first_phase = def.steps.first()
        .map(|s| s.phase.as_str())
        .unwrap_or("step-0");

    let log_dir = dirs::home_dir().unwrap_or_default()
        .join(format!(".consciousness/logs/{}", agent_name));
    fs::create_dir_all(&log_dir).ok();
    let log_path = log_dir.join(format!("{}.log", store::compact_timestamp()));
    let agent_log = fs::File::create(&log_path)
        .unwrap_or_else(|_| fs::File::create("/dev/null").unwrap());

    let child = std::process::Command::new("poc-memory")
        .args(["agent", "run", agent_name, "--count", "1", "--local",
               "--state-dir", &state_dir.to_string_lossy()])
        .env("POC_SESSION_ID", session_id)
        .stdout(agent_log.try_clone().unwrap_or_else(|_| fs::File::create("/dev/null").unwrap()))
        .stderr(agent_log)
        .spawn()
        .ok()?;

    let pid = child.id();
    let pid_path = state_dir.join(format!("pid-{}", pid));
    fs::write(&pid_path, first_phase).ok();
    Some(SpawnResult { child, log_path })
}