Add direct API backend for agent execution

When api_base_url is configured, agents call the LLM directly via OpenAI-compatible API (vllm, llama.cpp, etc.) instead of shelling out to claude CLI. Implements the full tool loop: send prompt, if tool_calls execute them and send results back, repeat until text. This enables running agents against local/remote models like Qwen-27B on a RunPod B200, with no dependency on claude CLI. Config fields: api_base_url, api_key, api_model. Falls back to claude CLI when api_base_url is not set. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-18 23:05:14 -04:00 · 2026-03-18 23:05:14 -04:00 · a29b6d4c5d
commit a29b6d4c5d
parent 1b48e57f34
6 changed files with 145 additions and 1 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -2709,6 +2709,7 @@ dependencies = [
 "serde",
 "serde_json",
 "skillratings",
 "tokio",
 "uuid",
 ]
--- a/poc-memory/Cargo.toml
+++ b/poc-memory/Cargo.toml
@ -22,6 +22,7 @@ paste = "1"
 jobkit = { git = "https://evilpiepirate.org/git/jobkit.git/" }
 jobkit-daemon = { git = "https://evilpiepirate.org/git/jobkit-daemon.git/" }
 poc-agent = { path = "../poc-agent" }
 tokio = { version = "1", features = ["rt-multi-thread"] }
 redb = "2"
 log = "0.4"
 ratatui = "0.29"
--- a/poc-memory/src/agents/api.rs
+++ b/poc-memory/src/agents/api.rs
@ -0,0 +1,115 @@
 // agents/api.rs — Direct API backend for agent execution
 //
 // Uses poc-agent's OpenAI-compatible API client to call models directly
 // (vllm, llama.cpp, OpenRouter, etc.) instead of shelling out to claude CLI.
 // Implements the tool loop: send prompt → if tool_calls, execute them →
 // send results back → repeat until text response.
 //
 // Activated when config has api_base_url set.
 use poc_agent::api::ApiClient;
 use poc_agent::types::*;
 use poc_agent::tools::{self, ProcessTracker};
 use poc_agent::ui_channel::StreamTarget;
 /// Run an agent prompt through the direct API with tool support.
 /// Returns the final text response after all tool calls are resolved.
 pub async fn call_api_with_tools(
    agent: &str,
    prompt: &str,
    log: &dyn Fn(&str),
 ) -> Result<String, String> {
    let config = crate::config::get();
    let base_url = config.api_base_url.as_deref()
        .ok_or("api_base_url not configured")?;
    let api_key = config.api_key.as_deref().unwrap_or("");
    let model = config.api_model.as_deref().unwrap_or("qwen-2.5-27b");
    let client = ApiClient::new(base_url, api_key, model);
    // Set up a minimal UI channel (we just collect messages, no TUI)
    let (ui_tx, _ui_rx) = poc_agent::ui_channel::channel();
    // Build tool definitions — just bash for poc-memory commands
    let all_defs = tools::definitions();
    let tool_defs: Vec<ToolDef> = all_defs.into_iter()
        .filter(|d| d.function.name == "bash")
        .collect();
    let tracker = ProcessTracker::new();
    // Start with the prompt as a user message
    let mut messages = vec![Message::user(prompt)];
    let max_turns = 50;
    for turn in 0..max_turns {
        log(&format!("API turn {} ({} messages)", turn, messages.len()));
        let (msg, usage) = client.chat_completion_stream(
            &messages,
            Some(&tool_defs),
            &ui_tx,
            StreamTarget::Autonomous,
            "none",
        ).await.map_err(|e| format!("API error: {}", e))?;
        if let Some(u) = &usage {
            log(&format!("tokens: {} prompt + {} completion",
                u.prompt_tokens, u.completion_tokens));
        }
        let has_content = msg.content.is_some();
        let has_tools = msg.tool_calls.as_ref().is_some_and(|tc| !tc.is_empty());
        if has_tools {
            // Push the assistant message with tool calls
            messages.push(msg.clone());
            // Execute each tool call
            for call in msg.tool_calls.as_ref().unwrap() {
                log(&format!("tool: {}({})",
                    call.function.name,
                    crate::util::first_n_chars(&call.function.arguments, 80)));
                let args: serde_json::Value = serde_json::from_str(&call.function.arguments)
                    .unwrap_or_default();
                let output = tools::dispatch(&call.function.name, &args, &tracker).await;
                log(&format!("tool result: {} chars", output.text.len()));
                messages.push(Message::tool_result(&call.id, &output.text));
            }
            continue;
        }
        // Text-only response — we're done
        let text = msg.content_text().to_string();
        if text.is_empty() && !has_content {
            log("empty response, retrying");
            messages.push(Message::user(
                "[system] Your previous response was empty. Please respond with text or use a tool."
            ));
            continue;
        }
        return Ok(text);
    }
    Err(format!("agent exceeded {} tool turns", max_turns))
 }
 /// Synchronous wrapper — creates a tokio runtime and blocks.
 /// Used by the existing sync call path in knowledge.rs.
 pub fn call_api_with_tools_sync(
    agent: &str,
    prompt: &str,
    log: &dyn Fn(&str),
 ) -> Result<String, String> {
    let rt = tokio::runtime::Builder::new_current_thread()
        .enable_all()
        .build()
        .map_err(|e| format!("tokio runtime: {}", e))?;
    rt.block_on(call_api_with_tools(agent, prompt, log))
 }
--- a/poc-memory/src/agents/llm.rs
+++ b/poc-memory/src/agents/llm.rs
@ -184,9 +184,16 @@ pub(crate) fn call_haiku(agent: &str, prompt: &str) -> Result<String, String> {
 }
 /// Call a model using an agent definition's model and tool configuration.
 /// Uses the direct API backend when api_base_url is configured,
 /// otherwise falls back to claude CLI subprocess.
 pub(crate) fn call_for_def(def: &super::defs::AgentDef, prompt: &str) -> Result<String, String> {
    if crate::config::get().api_base_url.is_some() && !def.tools.is_empty() {
        let log = |msg: &str| eprintln!("[{}] {}", def.agent, msg);
        super::api::call_api_with_tools_sync(&def.agent, prompt, &log)
    } else {
        call_model_with_tools(&def.agent, &def.model, prompt, &def.tools)
    }
 }
 /// Parse a JSON response, handling markdown fences.
 pub(crate) fn parse_json_response(response: &str) -> Result<serde_json::Value, String> {
--- a/poc-memory/src/agents/mod.rs
+++ b/poc-memory/src/agents/mod.rs
@ -16,6 +16,7 @@
 //   transcript   — shared JSONL transcript parsing
 pub mod transcript;
 pub mod api;
 pub mod llm;
 pub mod prompts;
 pub mod defs;
--- a/poc-memory/src/config.rs
+++ b/poc-memory/src/config.rs
@ -59,6 +59,13 @@ pub struct Config {
    /// If set, passed as CLAUDE_CONFIG_DIR so the daemon authenticates
    /// with different OAuth credentials than the interactive session.
    pub agent_config_dir: Option<PathBuf>,
    /// OpenAI-compatible API base URL for direct LLM calls (e.g. vllm).
    /// When set, agents use this instead of shelling out to claude CLI.
    pub api_base_url: Option<String>,
    /// API key for the direct API endpoint.
    pub api_key: Option<String>,
    /// Model name to use with the direct API endpoint.
    pub api_model: Option<String>,
 }
 impl Default for Config {
@ -88,6 +95,9 @@ impl Default for Config {
            agent_budget: 1000,
            prompts_dir: home.join("poc/memory/prompts"),
            agent_config_dir: None,
            api_base_url: None,
            api_key: None,
            api_model: None,
        }
    }
 }
@ -153,6 +163,15 @@ impl Config {
                if let Some(s) = cfg.get("agent_config_dir").and_then(|v| v.as_str()) {
                    config.agent_config_dir = Some(expand_home(s));
                }
                if let Some(s) = cfg.get("api_base_url").and_then(|v| v.as_str()) {
                    config.api_base_url = Some(s.to_string());
                }
                if let Some(s) = cfg.get("api_key").and_then(|v| v.as_str()) {
                    config.api_key = Some(s.to_string());
                }
                if let Some(s) = cfg.get("api_model").and_then(|v| v.as_str()) {
                    config.api_model = Some(s.to_string());
                }
                continue;
            }