consciousness/poc-memory/src/agents/api.rs

// agents/api.rs — Direct API backend for agent execution
//
// Uses poc-agent's OpenAI-compatible API client to call models directly
// (vllm, llama.cpp, OpenRouter, etc.) instead of shelling out to claude CLI.
// Implements the tool loop: send prompt → if tool_calls, execute them →
// send results back → repeat until text response.
//
// Activated when config has api_base_url set.

use poc_agent::api::ApiClient;
use poc_agent::types::*;
use poc_agent::tools::{self, ProcessTracker};
use poc_agent::ui_channel::StreamTarget;

use std::sync::OnceLock;

static API_CLIENT: OnceLock<ApiClient> = OnceLock::new();

fn get_client() -> Result<&'static ApiClient, String> {
    Ok(API_CLIENT.get_or_init(|| {
        let config = crate::config::get();
        let base_url = config.api_base_url.as_deref().unwrap_or("");
        let api_key = config.api_key.as_deref().unwrap_or("");
        let model = config.api_model.as_deref().unwrap_or("qwen-2.5-27b");
        ApiClient::new(base_url, api_key, model)
    }))
}

/// Run an agent prompt through the direct API with tool support.
/// Returns the final text response after all tool calls are resolved.
pub async fn call_api_with_tools(
    agent: &str,
    prompt: &str,
    log: &dyn Fn(&str),
) -> Result<String, String> {
    let client = get_client()?;

    // Set up a minimal UI channel (we just collect messages, no TUI)
    let (ui_tx, _ui_rx) = poc_agent::ui_channel::channel();

    // Build tool definitions — memory tools for graph operations
    let all_defs = tools::definitions();
    let tool_defs: Vec<ToolDef> = all_defs.into_iter()
        .filter(|d| d.function.name.starts_with("memory_"))
        .collect();
    let tracker = ProcessTracker::new();

    // Start with the prompt as a user message
    let mut messages = vec![Message::user(prompt)];
    let reasoning = crate::config::get().api_reasoning.clone();

    let max_turns = 50;
    for turn in 0..max_turns {
        log(&format!("API turn {} ({} messages)", turn, messages.len()));

        let (msg, usage) = client.chat_completion_stream(
            &messages,
            Some(&tool_defs),
            &ui_tx,
            StreamTarget::Autonomous,
            &reasoning,
        ).await.map_err(|e| format!("API error: {}", e))?;

        if let Some(u) = &usage {
            log(&format!("tokens: {} prompt + {} completion",
                u.prompt_tokens, u.completion_tokens));
        }

        let has_content = msg.content.is_some();
        let has_tools = msg.tool_calls.as_ref().is_some_and(|tc| !tc.is_empty());

        if has_tools {
            // Push the assistant message with tool calls
            messages.push(msg.clone());

            // Execute each tool call
            for call in msg.tool_calls.as_ref().unwrap() {
                log(&format!("tool: {}({})",
                    call.function.name,
                    &call.function.arguments));

                let args: serde_json::Value = serde_json::from_str(&call.function.arguments)
                    .unwrap_or_default();

                let output = if call.function.name.starts_with("memory_") {
                    let prov = format!("agent:{}", agent);
                    match poc_agent::tools::memory::dispatch(
                        &call.function.name, &args, Some(&prov),
                    ) {
                        Ok(text) => poc_agent::tools::ToolOutput {
                            text, is_yield: false, images: Vec::new(),
                            model_switch: None, dmn_pause: false,
                        },
                        Err(e) => poc_agent::tools::ToolOutput {
                            text: format!("Error: {}", e),
                            is_yield: false, images: Vec::new(),
                            model_switch: None, dmn_pause: false,
                        },
                    }
                } else {
                    tools::dispatch(&call.function.name, &args, &tracker).await
                };

                log(&format!("tool result: {} chars", output.text.len()));

                messages.push(Message::tool_result(&call.id, &output.text));
            }
            continue;
        }

        // Text-only response — we're done
        let text = msg.content_text().to_string();
        if text.is_empty() && !has_content {
            log("empty response, retrying");
            messages.push(Message::user(
                "[system] Your previous response was empty. Please respond with text or use a tool."
            ));
            continue;
        }

        return Ok(text);
    }

    Err(format!("agent exceeded {} tool turns", max_turns))
}

/// Synchronous wrapper — runs the async function on a dedicated thread
/// with its own tokio runtime. Safe to call from any context.
pub fn call_api_with_tools_sync(
    agent: &str,
    prompt: &str,
    log: &(dyn Fn(&str) + Sync),
) -> Result<String, String> {
    std::thread::scope(|s| {
        s.spawn(|| {
            let rt = tokio::runtime::Builder::new_current_thread()
                .enable_all()
                .build()
                .map_err(|e| format!("tokio runtime: {}", e))?;
            rt.block_on(call_api_with_tools(agent, prompt, log))
        }).join().unwrap()
    })
}