// agents/api.rs — Direct API backend for agent execution // // Uses poc-agent's OpenAI-compatible API client to call models directly // (vllm, llama.cpp, OpenRouter, etc.) instead of shelling out to claude CLI. // Implements the tool loop: send prompt → if tool_calls, execute them → // send results back → repeat until text response. // // Activated when config has api_base_url set. use poc_agent::api::ApiClient; use poc_agent::types::*; use poc_agent::tools::{self, ProcessTracker}; use poc_agent::ui_channel::StreamTarget; use std::sync::OnceLock; static API_CLIENT: OnceLock = OnceLock::new(); fn get_client() -> Result<&'static ApiClient, String> { Ok(API_CLIENT.get_or_init(|| { let config = crate::config::get(); let base_url = config.api_base_url.as_deref().unwrap_or(""); let api_key = config.api_key.as_deref().unwrap_or(""); let model = config.api_model.as_deref().unwrap_or("qwen-2.5-27b"); ApiClient::new(base_url, api_key, model) })) } /// Run an agent prompt through the direct API with tool support. /// Returns the final text response after all tool calls are resolved. pub async fn call_api_with_tools( agent: &str, prompt: &str, log: &dyn Fn(&str), ) -> Result { let client = get_client()?; // Set up a UI channel — we drain reasoning tokens into the log let (ui_tx, mut ui_rx) = poc_agent::ui_channel::channel(); // Build tool definitions — memory tools for graph operations let all_defs = tools::definitions(); let tool_defs: Vec = all_defs.into_iter() .filter(|d| d.function.name.starts_with("memory_")) .collect(); let tracker = ProcessTracker::new(); // Start with the prompt as a user message let mut messages = vec![Message::user(prompt)]; let reasoning = crate::config::get().api_reasoning.clone(); let max_turns = 50; for turn in 0..max_turns { log(&format!("API turn {} ({} messages)", turn, messages.len())); let (msg, usage) = client.chat_completion_stream( &messages, Some(&tool_defs), &ui_tx, StreamTarget::Autonomous, &reasoning, ).await.map_err(|e| format!("API error: {}", e))?; if let Some(u) = &usage { log(&format!("tokens: {} prompt + {} completion", u.prompt_tokens, u.completion_tokens)); } // Drain reasoning tokens from the UI channel into the log { let mut reasoning_buf = String::new(); while let Ok(ui_msg) = ui_rx.try_recv() { if let poc_agent::ui_channel::UiMessage::Reasoning(r) = ui_msg { reasoning_buf.push_str(&r); } } if !reasoning_buf.is_empty() { log(&format!("\n{}\n", reasoning_buf.trim())); } } let has_content = msg.content.is_some(); let has_tools = msg.tool_calls.as_ref().is_some_and(|tc| !tc.is_empty()); if has_tools { // Push the assistant message with tool calls. // Sanitize arguments: vllm re-parses them as JSON when // preprocessing the conversation, so invalid JSON from the // model crashes the next request. let mut sanitized = msg.clone(); if let Some(ref mut calls) = sanitized.tool_calls { for call in calls { if serde_json::from_str::(&call.function.arguments).is_err() { log(&format!("sanitizing malformed args for {}: {}", call.function.name, &call.function.arguments)); call.function.arguments = "{}".to_string(); } } } messages.push(sanitized); // Execute each tool call for call in msg.tool_calls.as_ref().unwrap() { log(&format!("tool: {}({})", call.function.name, &call.function.arguments)); let args: serde_json::Value = match serde_json::from_str(&call.function.arguments) { Ok(v) => v, Err(_) => { log(&format!("malformed tool call args: {}", &call.function.arguments)); messages.push(Message::tool_result( &call.id, "Error: your tool call had malformed JSON arguments. Please retry with valid JSON.", )); continue; } }; let output = if call.function.name.starts_with("memory_") { let prov = format!("agent:{}", agent); match poc_agent::tools::memory::dispatch( &call.function.name, &args, Some(&prov), ) { Ok(text) => poc_agent::tools::ToolOutput { text, is_yield: false, images: Vec::new(), model_switch: None, dmn_pause: false, }, Err(e) => poc_agent::tools::ToolOutput { text: format!("Error: {}", e), is_yield: false, images: Vec::new(), model_switch: None, dmn_pause: false, }, } } else { tools::dispatch(&call.function.name, &args, &tracker).await }; log(&format!("tool result: {} chars", output.text.len())); messages.push(Message::tool_result(&call.id, &output.text)); } continue; } // Text-only response — we're done let text = msg.content_text().to_string(); if text.is_empty() && !has_content { log("empty response, retrying"); messages.push(Message::user( "[system] Your previous response was empty. Please respond with text or use a tool." )); continue; } return Ok(text); } Err(format!("agent exceeded {} tool turns", max_turns)) } /// Synchronous wrapper — runs the async function on a dedicated thread /// with its own tokio runtime. Safe to call from any context. pub fn call_api_with_tools_sync( agent: &str, prompt: &str, log: &(dyn Fn(&str) + Sync), ) -> Result { std::thread::scope(|s| { s.spawn(|| { let rt = tokio::runtime::Builder::new_current_thread() .enable_all() .build() .map_err(|e| format!("tokio runtime: {}", e))?; rt.block_on(call_api_with_tools(agent, prompt, log)) }).join().unwrap() }) }