consciousness/poc-memory/src/agents/api.rs

// agents/api.rs — Direct API backend for agent execution
//
// Uses poc-agent's OpenAI-compatible API client to call models directly
// (vllm, llama.cpp, OpenRouter, etc.) instead of shelling out to claude CLI.
// Implements the tool loop: send prompt → if tool_calls, execute them →
// send results back → repeat until text response.
//
// Activated when config has api_base_url set.

use poc_agent::api::ApiClient;
use poc_agent::types::*;
use poc_agent::tools::{self, ProcessTracker};
use poc_agent::ui_channel::StreamTarget;

/// Run an agent prompt through the direct API with tool support.
/// Returns the final text response after all tool calls are resolved.
pub async fn call_api_with_tools(
    agent: &str,
    prompt: &str,
    log: &dyn Fn(&str),
) -> Result<String, String> {
    let config = crate::config::get();

    let base_url = config.api_base_url.as_deref()
        .ok_or("api_base_url not configured")?;
    let api_key = config.api_key.as_deref().unwrap_or("");
    let model = config.api_model.as_deref().unwrap_or("qwen-2.5-27b");

    let client = ApiClient::new(base_url, api_key, model);

    // Set up a minimal UI channel (we just collect messages, no TUI)
    let (ui_tx, _ui_rx) = poc_agent::ui_channel::channel();

    // Build tool definitions — just bash for poc-memory commands
    let all_defs = tools::definitions();
    let tool_defs: Vec<ToolDef> = all_defs.into_iter()
        .filter(|d| d.function.name == "bash")
        .collect();
    let tracker = ProcessTracker::new();

    // Start with the prompt as a user message
    let mut messages = vec![Message::user(prompt)];

    let max_turns = 50;
    for turn in 0..max_turns {
        log(&format!("API turn {} ({} messages)", turn, messages.len()));

        let (msg, usage) = client.chat_completion_stream(
            &messages,
            Some(&tool_defs),
            &ui_tx,
            StreamTarget::Autonomous,
            "none",
        ).await.map_err(|e| format!("API error: {}", e))?;

        if let Some(u) = &usage {
            log(&format!("tokens: {} prompt + {} completion",
                u.prompt_tokens, u.completion_tokens));
        }

        let has_content = msg.content.is_some();
        let has_tools = msg.tool_calls.as_ref().is_some_and(|tc| !tc.is_empty());

        if has_tools {
            // Push the assistant message with tool calls
            messages.push(msg.clone());

            // Execute each tool call
            for call in msg.tool_calls.as_ref().unwrap() {
                log(&format!("tool: {}({})",
                    call.function.name,
                    crate::util::first_n_chars(&call.function.arguments, 80)));

                let args: serde_json::Value = serde_json::from_str(&call.function.arguments)
                    .unwrap_or_default();

                let output = tools::dispatch(&call.function.name, &args, &tracker).await;

                log(&format!("tool result: {} chars", output.text.len()));

                messages.push(Message::tool_result(&call.id, &output.text));
            }
            continue;
        }

        // Text-only response — we're done
        let text = msg.content_text().to_string();
        if text.is_empty() && !has_content {
            log("empty response, retrying");
            messages.push(Message::user(
                "[system] Your previous response was empty. Please respond with text or use a tool."
            ));
            continue;
        }

        return Ok(text);
    }

    Err(format!("agent exceeded {} tool turns", max_turns))
}

/// Synchronous wrapper — creates a tokio runtime and blocks.
/// Used by the existing sync call path in knowledge.rs.
pub fn call_api_with_tools_sync(
    agent: &str,
    prompt: &str,
    log: &dyn Fn(&str),
) -> Result<String, String> {
    let rt = tokio::runtime::Builder::new_current_thread()
        .enable_all()
        .build()
        .map_err(|e| format!("tokio runtime: {}", e))?;

    rt.block_on(call_api_with_tools(agent, prompt, log))
}
Add direct API backend for agent execution When api_base_url is configured, agents call the LLM directly via OpenAI-compatible API (vllm, llama.cpp, etc.) instead of shelling out to claude CLI. Implements the full tool loop: send prompt, if tool_calls execute them and send results back, repeat until text. This enables running agents against local/remote models like Qwen-27B on a RunPod B200, with no dependency on claude CLI. Config fields: api_base_url, api_key, api_model. Falls back to claude CLI when api_base_url is not set. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-18 23:05:14 -04:00			`// agents/api.rs — Direct API backend for agent execution`
			`//`
			`// Uses poc-agent's OpenAI-compatible API client to call models directly`
			`// (vllm, llama.cpp, OpenRouter, etc.) instead of shelling out to claude CLI.`
			`// Implements the tool loop: send prompt → if tool_calls, execute them →`
			`// send results back → repeat until text response.`
			`//`
			`// Activated when config has api_base_url set.`

			`use poc_agent::api::ApiClient;`
			`use poc_agent::types::*;`
			`use poc_agent::tools::{self, ProcessTracker};`
			`use poc_agent::ui_channel::StreamTarget;`

			`/// Run an agent prompt through the direct API with tool support.`
			`/// Returns the final text response after all tool calls are resolved.`
			`pub async fn call_api_with_tools(`
			`agent: &str,`
			`prompt: &str,`
			`log: &dyn Fn(&str),`
			`) -> Result<String, String> {`
			`let config = crate::config::get();`

			`let base_url = config.api_base_url.as_deref()`
			`.ok_or("api_base_url not configured")?;`
			`let api_key = config.api_key.as_deref().unwrap_or("");`
			`let model = config.api_model.as_deref().unwrap_or("qwen-2.5-27b");`

			`let client = ApiClient::new(base_url, api_key, model);`

			`// Set up a minimal UI channel (we just collect messages, no TUI)`
			`let (ui_tx, _ui_rx) = poc_agent::ui_channel::channel();`

			`// Build tool definitions — just bash for poc-memory commands`
			`let all_defs = tools::definitions();`
			`let tool_defs: Vec<ToolDef> = all_defs.into_iter()`
			`.filter(\|d\| d.function.name == "bash")`
			`.collect();`
			`let tracker = ProcessTracker::new();`

			`// Start with the prompt as a user message`
			`let mut messages = vec![Message::user(prompt)];`

			`let max_turns = 50;`
			`for turn in 0..max_turns {`
			`log(&format!("API turn {} ({} messages)", turn, messages.len()));`

			`let (msg, usage) = client.chat_completion_stream(`
			`&messages,`
			`Some(&tool_defs),`
			`&ui_tx,`
			`StreamTarget::Autonomous,`
			`"none",`
			`).await.map_err(\|e\| format!("API error: {}", e))?;`

			`if let Some(u) = &usage {`
			`log(&format!("tokens: {} prompt + {} completion",`
			`u.prompt_tokens, u.completion_tokens));`
			`}`

			`let has_content = msg.content.is_some();`
			`let has_tools = msg.tool_calls.as_ref().is_some_and(\|tc\| !tc.is_empty());`

			`if has_tools {`
			`// Push the assistant message with tool calls`
			`messages.push(msg.clone());`

			`// Execute each tool call`
			`for call in msg.tool_calls.as_ref().unwrap() {`
			`log(&format!("tool: {}({})",`
			`call.function.name,`
			`crate::util::first_n_chars(&call.function.arguments, 80)));`

			`let args: serde_json::Value = serde_json::from_str(&call.function.arguments)`
			`.unwrap_or_default();`

			`let output = tools::dispatch(&call.function.name, &args, &tracker).await;`

			`log(&format!("tool result: {} chars", output.text.len()));`

			`messages.push(Message::tool_result(&call.id, &output.text));`
			`}`
			`continue;`
			`}`

			`// Text-only response — we're done`
			`let text = msg.content_text().to_string();`
			`if text.is_empty() && !has_content {`
			`log("empty response, retrying");`
			`messages.push(Message::user(`
			`"[system] Your previous response was empty. Please respond with text or use a tool."`
			`));`
			`continue;`
			`}`

			`return Ok(text);`
			`}`

			`Err(format!("agent exceeded {} tool turns", max_turns))`
			`}`

			`/// Synchronous wrapper — creates a tokio runtime and blocks.`
			`/// Used by the existing sync call path in knowledge.rs.`
			`pub fn call_api_with_tools_sync(`
			`agent: &str,`
			`prompt: &str,`
			`log: &dyn Fn(&str),`
			`) -> Result<String, String> {`
			`let rt = tokio::runtime::Builder::new_current_thread()`
			`.enable_all()`
			`.build()`
			`.map_err(\|e\| format!("tokio runtime: {}", e))?;`

			`rt.block_on(call_api_with_tools(agent, prompt, log))`
			`}`