call_api_with_tools_sync() -> src/agent/oneshot.rs

2026-04-07 00:57:35 -04:00 · 2026-04-07 00:57:35 -04:00 · cbf7653cdf
commit cbf7653cdf
parent da24e02159
9 changed files with 638 additions and 234 deletions
--- a/src/subconscious/api.rs
+++ b/src/subconscious/api.rs
@ -1,228 +0,0 @@
-// agents/api.rs — Direct API backend for agent execution
-//
-// Uses poc-agent's OpenAI-compatible API client to call models directly
-// (vllm, llama.cpp, OpenRouter, etc.) instead of shelling out to claude CLI.
-// Implements the tool loop: send prompt → if tool_calls, execute them →
-// send results back → repeat until text response.
-//
-// Activated when config has api_base_url set.
-
-use crate::agent::api::ApiClient;
-use crate::agent::api::types::*;
-use crate::agent::tools::{self as agent_tools};
-
-use std::sync::OnceLock;
-
-static API_CLIENT: OnceLock<ApiClient> = OnceLock::new();
-
-fn get_client() -> Result<&'static ApiClient, String> {
-    Ok(API_CLIENT.get_or_init(|| {
-        let config = crate::config::get();
-        let base_url = config.api_base_url.as_deref().unwrap_or("");
-        let api_key = config.api_key.as_deref().unwrap_or("");
-        let model = config.api_model.as_deref().unwrap_or("qwen-2.5-27b");
-        ApiClient::new(base_url, api_key, model)
-    }))
-}
-
-/// Run agent prompts through the direct API with tool support.
-/// For multi-step agents, each prompt is injected as a new user message
-/// after the previous step's tool loop completes. The conversation
-/// context carries forward naturally between steps.
-/// Returns the final text response after all steps complete.
-pub async fn call_api_with_tools(
-    agent: &str,
-    prompts: &[String],
-    phases: &[String],
-    temperature: Option<f32>,
-    priority: i32,
-    tools: &[agent_tools::Tool],
-    bail_fn: Option<&(dyn Fn(usize) -> Result<(), String> + Sync)>,
-    log: &dyn Fn(&str),
-) -> Result<String, String> {
-    let client = get_client()?;
-
-    // Tools are already filtered by the caller
-    // Provenance tracks which agent:phase is making writes.
-    // Updated between steps by the bail function via set_provenance().
-    let first_phase = phases.first().map(|s| s.as_str()).unwrap_or("");
-    let provenance = std::cell::RefCell::new(
-        if first_phase.is_empty() { format!("agent:{}", agent) }
-        else { format!("agent:{}:{}", agent, first_phase) }
-    );
-
-    // Start with the first prompt as a user message
-    let mut messages = vec![Message::user(&prompts[0])];
-    let mut next_prompt_idx = 1; // index of next prompt to inject
-    let reasoning = crate::config::get().api_reasoning.clone();
-
-    let max_turns = 50 * prompts.len();
-    for turn in 0..max_turns {
-        log(&format!("\n=== TURN {} ({} messages) ===\n", turn, messages.len()));
-
-        let mut last_err = None;
-        let mut msg_opt = None;
-        let mut usage_opt = None;
-        for attempt in 0..5 {
-            let sampling = crate::agent::api::SamplingParams {
-                temperature: temperature.unwrap_or(0.6),
-                top_p: 0.95,
-                top_k: 20,
-            };
-            match client.chat_completion_stream_temp(
-                &messages,
-                tools,
-                &reasoning,
-                sampling,
-                Some(priority),
-            ).await {
-                Ok((msg, usage)) => {
-                    msg_opt = Some(msg);
-                    usage_opt = usage;
-                    break;
-                }
-                Err(e) => {
-                    let err_str = e.to_string();
-                    let is_transient = err_str.contains("IncompleteMessage")
-                        || err_str.contains("connection closed")
-                        || err_str.contains("connection reset")
-                        || err_str.contains("timed out")
-                        || err_str.contains("Connection refused");
-                    if is_transient && attempt < 4 {
-                        log(&format!("transient error (attempt {}): {}, retrying...",
-                            attempt + 1, err_str));
-                        tokio::time::sleep(std::time::Duration::from_secs(2 << attempt)).await;
-                        last_err = Some(e);
-                        continue;
-                    }
-                    let msg_bytes: usize = messages.iter()
-                        .map(|m| m.content_text().len())
-                        .sum();
-                    return Err(format!(
-                        "API error on turn {} (~{}KB payload, {} messages, {} attempts): {}",
-                        turn, msg_bytes / 1024, messages.len(), attempt + 1, e));
-                }
-            }
-        }
-        let msg = msg_opt.unwrap();
-        if let Some(ref e) = last_err {
-            log(&format!("succeeded after retry (previous error: {})", e));
-        }
-
-        if let Some(u) = &usage_opt {
-            log(&format!("tokens: {} prompt + {} completion",
-                u.prompt_tokens, u.completion_tokens));
-        }
-
-        let has_content = msg.content.is_some();
-        let has_tools = msg.tool_calls.as_ref().is_some_and(|tc| !tc.is_empty());
-
-        if has_tools {
-            // Push the assistant message with tool calls.
-            // Sanitize arguments: vllm re-parses them as JSON when
-            // preprocessing the conversation, so invalid JSON from the
-            // model crashes the next request.
-            let mut sanitized = msg.clone();
-            if let Some(ref mut calls) = sanitized.tool_calls {
-                for call in calls {
-                    if serde_json::from_str::<serde_json::Value>(&call.function.arguments).is_err() {
-                        log(&format!("sanitizing malformed args for {}: {}",
-                            call.function.name, &call.function.arguments));
-                        call.function.arguments = "{}".to_string();
-                    }
-                }
-            }
-            messages.push(sanitized);
-
-            // Execute each tool call
-            for call in msg.tool_calls.as_ref().unwrap() {
-                log(&format!("\nTOOL CALL: {}({})",
-                    call.function.name,
-                    &call.function.arguments));
-
-                let args: serde_json::Value = match serde_json::from_str(&call.function.arguments) {
-                    Ok(v) => v,
-                    Err(_) => {
-                        log(&format!("malformed tool call args: {}", &call.function.arguments));
-                        messages.push(Message::tool_result(
-                            &call.id,
-                            "Error: your tool call had malformed JSON arguments. Please retry with valid JSON.",
-                        ));
-                        continue;
-                    }
-                };
-
-                let output = agent_tools::dispatch(&call.function.name, &args).await;
-
-                if std::env::var("POC_AGENT_VERBOSE").is_ok() {
-                    log(&format!("TOOL RESULT ({} chars):\n{}", output.len(), output));
-                } else {
-                    let preview: String = output.lines().next().unwrap_or("").chars().take(100).collect();
-                    log(&format!("Result: {}", preview));
-                }
-
-                messages.push(Message::tool_result(&call.id, &output));
-            }
-            continue;
-        }
-
-        // Text-only response — step complete
-        let text = msg.content_text().to_string();
-        if text.is_empty() && !has_content {
-            log("empty response, retrying");
-            messages.push(Message::user(
-                "[system] Your previous response was empty. Please respond with text or use a tool."
-            ));
-            continue;
-        }
-
-        log(&format!("\n=== RESPONSE ===\n\n{}", text));
-
-        // If there are more prompts, check bail condition and inject the next one
-        if next_prompt_idx < prompts.len() {
-            // Run bail check before continuing to next step
-            if let Some(ref check) = bail_fn {
-                check(next_prompt_idx)?;
-            }
-            // Update provenance for the new phase
-            if let Some(phase) = phases.get(next_prompt_idx) {
-                *provenance.borrow_mut() = format!("agent:{}:{}", agent, phase);
-            }
-            messages.push(Message::assistant(&text));
-            let next = &prompts[next_prompt_idx];
-            next_prompt_idx += 1;
-            log(&format!("\n=== STEP {}/{} ===\n", next_prompt_idx, prompts.len()));
-            messages.push(Message::user(next));
-            continue;
-        }
-
-        return Ok(text);
-    }
-
-    Err(format!("agent exceeded {} tool turns", max_turns))
-}
-
-/// Synchronous wrapper — runs the async function on a dedicated thread
-/// with its own tokio runtime. Safe to call from any context.
-pub fn call_api_with_tools_sync(
-    agent: &str,
-    prompts: &[String],
-    phases: &[String],
-    temperature: Option<f32>,
-    priority: i32,
-    tools: &[agent_tools::Tool],
-    bail_fn: Option<&(dyn Fn(usize) -> Result<(), String> + Sync)>,
-    log: &(dyn Fn(&str) + Sync),
-) -> Result<String, String> {
-    std::thread::scope(|s| {
-        s.spawn(|| {
-            let rt = tokio::runtime::Builder::new_current_thread()
-                .enable_all()
-                .build()
-                .map_err(|e| format!("tokio runtime: {}", e))?;
-            rt.block_on(
-                    call_api_with_tools(agent, prompts, phases, temperature, priority, tools, bail_fn, log)
-            )
-        }).join().unwrap()
-    })
-}
--- a/src/subconscious/audit.rs
+++ b/src/subconscious/audit.rs
@ -210,7 +210,7 @@ pub fn link_audit(store: &mut Store, apply: bool) -> Result<AuditStats, String>
    // Run batches in parallel via rayon
    let batch_results: Vec<_> = batch_data.par_iter()
        .map(|(batch_idx, batch_infos, prompt)| {
-            let response = super::api::call_api_with_tools_sync(
+            let response = crate::agent::oneshot::call_api_with_tools_sync(
                "audit", &[prompt.clone()], &[], None, 10, &[], None, &|_| {});
            let completed = done.fetch_add(1, Ordering::Relaxed) + 1;
            eprint!("\r  Batches: {}/{} done", completed, total_batches);
--- a/src/subconscious/digest.rs
+++ b/src/subconscious/digest.rs
@ -284,7 +284,7 @@ fn generate_digest(
            .filter(|t| def.tools.iter().any(|w| w == &t.name))
            .collect()
    };
-    let digest = super::api::call_api_with_tools_sync(
+    let digest = crate::agent::oneshot::call_api_with_tools_sync(
        &def.agent, &prompts, &phases, def.temperature, def.priority,
        &tools, None, &log)?;

--- a/src/subconscious/mod.rs
+++ b/src/subconscious/mod.rs
@ -17,7 +17,6 @@
 //
 // The session hook (context injection, agent orchestration) moved to claude/hook.

-pub mod api;
 pub mod audit;
 pub mod consolidate;
 pub mod daemon;