diff --git a/Cargo.lock b/Cargo.lock
index ea4c003..c3b197f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2709,6 +2709,7 @@ dependencies = [
  "serde",
  "serde_json",
  "skillratings",
+ "tokio",
  "uuid",
 ]
 
diff --git a/poc-memory/Cargo.toml b/poc-memory/Cargo.toml
index fa814c1..13411af 100644
--- a/poc-memory/Cargo.toml
+++ b/poc-memory/Cargo.toml
@@ -22,6 +22,7 @@ paste = "1"
 jobkit = { git = "https://evilpiepirate.org/git/jobkit.git/" }
 jobkit-daemon = { git = "https://evilpiepirate.org/git/jobkit-daemon.git/" }
 poc-agent = { path = "../poc-agent" }
+tokio = { version = "1", features = ["rt-multi-thread"] }
 redb = "2"
 log = "0.4"
 ratatui = "0.29"
diff --git a/poc-memory/src/agents/api.rs b/poc-memory/src/agents/api.rs
new file mode 100644
index 0000000..73dab21
--- /dev/null
+++ b/poc-memory/src/agents/api.rs
@@ -0,0 +1,115 @@
+// agents/api.rs — Direct API backend for agent execution
+//
+// Uses poc-agent's OpenAI-compatible API client to call models directly
+// (vllm, llama.cpp, OpenRouter, etc.) instead of shelling out to claude CLI.
+// Implements the tool loop: send prompt → if tool_calls, execute them →
+// send results back → repeat until text response.
+//
+// Activated when config has api_base_url set.
+
+use poc_agent::api::ApiClient;
+use poc_agent::types::*;
+use poc_agent::tools::{self, ProcessTracker};
+use poc_agent::ui_channel::StreamTarget;
+
+/// Run an agent prompt through the direct API with tool support.
+/// Returns the final text response after all tool calls are resolved.
+pub async fn call_api_with_tools(
+    agent: &str,
+    prompt: &str,
+    log: &dyn Fn(&str),
+) -> Result<String, String> {
+    let config = crate::config::get();
+
+    let base_url = config.api_base_url.as_deref()
+        .ok_or("api_base_url not configured")?;
+    let api_key = config.api_key.as_deref().unwrap_or("");
+    let model = config.api_model.as_deref().unwrap_or("qwen-2.5-27b");
+
+    let client = ApiClient::new(base_url, api_key, model);
+
+    // Set up a minimal UI channel (we just collect messages, no TUI)
+    let (ui_tx, _ui_rx) = poc_agent::ui_channel::channel();
+
+    // Build tool definitions — just bash for poc-memory commands
+    let all_defs = tools::definitions();
+    let tool_defs: Vec<ToolDef> = all_defs.into_iter()
+        .filter(|d| d.function.name == "bash")
+        .collect();
+    let tracker = ProcessTracker::new();
+
+    // Start with the prompt as a user message
+    let mut messages = vec![Message::user(prompt)];
+
+    let max_turns = 50;
+    for turn in 0..max_turns {
+        log(&format!("API turn {} ({} messages)", turn, messages.len()));
+
+        let (msg, usage) = client.chat_completion_stream(
+            &messages,
+            Some(&tool_defs),
+            &ui_tx,
+            StreamTarget::Autonomous,
+            "none",
+        ).await.map_err(|e| format!("API error: {}", e))?;
+
+        if let Some(u) = &usage {
+            log(&format!("tokens: {} prompt + {} completion",
+                u.prompt_tokens, u.completion_tokens));
+        }
+
+        let has_content = msg.content.is_some();
+        let has_tools = msg.tool_calls.as_ref().is_some_and(|tc| !tc.is_empty());
+
+        if has_tools {
+            // Push the assistant message with tool calls
+            messages.push(msg.clone());
+
+            // Execute each tool call
+            for call in msg.tool_calls.as_ref().unwrap() {
+                log(&format!("tool: {}({})",
+                    call.function.name,
+                    crate::util::first_n_chars(&call.function.arguments, 80)));
+
+                let args: serde_json::Value = serde_json::from_str(&call.function.arguments)
+                    .unwrap_or_default();
+
+                let output = tools::dispatch(&call.function.name, &args, &tracker).await;
+
+                log(&format!("tool result: {} chars", output.text.len()));
+
+                messages.push(Message::tool_result(&call.id, &output.text));
+            }
+            continue;
+        }
+
+        // Text-only response — we're done
+        let text = msg.content_text().to_string();
+        if text.is_empty() && !has_content {
+            log("empty response, retrying");
+            messages.push(Message::user(
+                "[system] Your previous response was empty. Please respond with text or use a tool."
+            ));
+            continue;
+        }
+
+        return Ok(text);
+    }
+
+    Err(format!("agent exceeded {} tool turns", max_turns))
+}
+
+/// Synchronous wrapper — creates a tokio runtime and blocks.
+/// Used by the existing sync call path in knowledge.rs.
+pub fn call_api_with_tools_sync(
+    agent: &str,
+    prompt: &str,
+    log: &dyn Fn(&str),
+) -> Result<String, String> {
+    let rt = tokio::runtime::Builder::new_current_thread()
+        .enable_all()
+        .build()
+        .map_err(|e| format!("tokio runtime: {}", e))?;
+
+    rt.block_on(call_api_with_tools(agent, prompt, log))
+}
diff --git a/poc-memory/src/agents/llm.rs b/poc-memory/src/agents/llm.rs
index d920876..9dee69d 100644
--- a/poc-memory/src/agents/llm.rs
+++ b/poc-memory/src/agents/llm.rs
@@ -184,8 +184,15 @@ pub(crate) fn call_haiku(agent: &str, prompt: &str) -> Result<String, String> {
 }
 
 /// Call a model using an agent definition's model and tool configuration.
+/// Uses the direct API backend when api_base_url is configured,
+/// otherwise falls back to claude CLI subprocess.
 pub(crate) fn call_for_def(def: &super::defs::AgentDef, prompt: &str) -> Result<String, String> {
-    call_model_with_tools(&def.agent, &def.model, prompt, &def.tools)
+    if crate::config::get().api_base_url.is_some() && !def.tools.is_empty() {
+        let log = |msg: &str| eprintln!("[{}] {}", def.agent, msg);
+        super::api::call_api_with_tools_sync(&def.agent, prompt, &log)
+    } else {
+        call_model_with_tools(&def.agent, &def.model, prompt, &def.tools)
+    }
 }
 
 /// Parse a JSON response, handling markdown fences.
diff --git a/poc-memory/src/agents/mod.rs b/poc-memory/src/agents/mod.rs
index 7d81914..1f889bd 100644
--- a/poc-memory/src/agents/mod.rs
+++ b/poc-memory/src/agents/mod.rs
@@ -16,6 +16,7 @@
 //   transcript   — shared JSONL transcript parsing
 
 pub mod transcript;
+pub mod api;
 pub mod llm;
 pub mod prompts;
 pub mod defs;
diff --git a/poc-memory/src/config.rs b/poc-memory/src/config.rs
index 13258e4..3c13c60 100644
--- a/poc-memory/src/config.rs
+++ b/poc-memory/src/config.rs
@@ -59,6 +59,13 @@ pub struct Config {
     /// If set, passed as CLAUDE_CONFIG_DIR so the daemon authenticates
     /// with different OAuth credentials than the interactive session.
     pub agent_config_dir: Option<PathBuf>,
+    /// OpenAI-compatible API base URL for direct LLM calls (e.g. vllm).
+    /// When set, agents use this instead of shelling out to claude CLI.
+    pub api_base_url: Option<String>,
+    /// API key for the direct API endpoint.
+    pub api_key: Option<String>,
+    /// Model name to use with the direct API endpoint.
+    pub api_model: Option<String>,
 }
 
 impl Default for Config {
@@ -88,6 +95,9 @@ impl Default for Config {
             agent_budget: 1000,
             prompts_dir: home.join("poc/memory/prompts"),
             agent_config_dir: None,
+            api_base_url: None,
+            api_key: None,
+            api_model: None,
         }
     }
 }
@@ -153,6 +163,15 @@ impl Config {
                 if let Some(s) = cfg.get("agent_config_dir").and_then(|v| v.as_str()) {
                     config.agent_config_dir = Some(expand_home(s));
                 }
+                if let Some(s) = cfg.get("api_base_url").and_then(|v| v.as_str()) {
+                    config.api_base_url = Some(s.to_string());
+                }
+                if let Some(s) = cfg.get("api_key").and_then(|v| v.as_str()) {
+                    config.api_key = Some(s.to_string());
+                }
+                if let Some(s) = cfg.get("api_model").and_then(|v| v.as_str()) {
+                    config.api_model = Some(s.to_string());
+                }
                 continue;
             }