diff --git a/Cargo.lock b/Cargo.lock index ea4c003..c3b197f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2709,6 +2709,7 @@ dependencies = [ "serde", "serde_json", "skillratings", + "tokio", "uuid", ] diff --git a/poc-memory/Cargo.toml b/poc-memory/Cargo.toml index fa814c1..13411af 100644 --- a/poc-memory/Cargo.toml +++ b/poc-memory/Cargo.toml @@ -22,6 +22,7 @@ paste = "1" jobkit = { git = "https://evilpiepirate.org/git/jobkit.git/" } jobkit-daemon = { git = "https://evilpiepirate.org/git/jobkit-daemon.git/" } poc-agent = { path = "../poc-agent" } +tokio = { version = "1", features = ["rt-multi-thread"] } redb = "2" log = "0.4" ratatui = "0.29" diff --git a/poc-memory/src/agents/api.rs b/poc-memory/src/agents/api.rs new file mode 100644 index 0000000..73dab21 --- /dev/null +++ b/poc-memory/src/agents/api.rs @@ -0,0 +1,115 @@ +// agents/api.rs — Direct API backend for agent execution +// +// Uses poc-agent's OpenAI-compatible API client to call models directly +// (vllm, llama.cpp, OpenRouter, etc.) instead of shelling out to claude CLI. +// Implements the tool loop: send prompt → if tool_calls, execute them → +// send results back → repeat until text response. +// +// Activated when config has api_base_url set. + +use poc_agent::api::ApiClient; +use poc_agent::types::*; +use poc_agent::tools::{self, ProcessTracker}; +use poc_agent::ui_channel::StreamTarget; + +/// Run an agent prompt through the direct API with tool support. +/// Returns the final text response after all tool calls are resolved. +pub async fn call_api_with_tools( + agent: &str, + prompt: &str, + log: &dyn Fn(&str), +) -> Result { + let config = crate::config::get(); + + let base_url = config.api_base_url.as_deref() + .ok_or("api_base_url not configured")?; + let api_key = config.api_key.as_deref().unwrap_or(""); + let model = config.api_model.as_deref().unwrap_or("qwen-2.5-27b"); + + let client = ApiClient::new(base_url, api_key, model); + + // Set up a minimal UI channel (we just collect messages, no TUI) + let (ui_tx, _ui_rx) = poc_agent::ui_channel::channel(); + + // Build tool definitions — just bash for poc-memory commands + let all_defs = tools::definitions(); + let tool_defs: Vec = all_defs.into_iter() + .filter(|d| d.function.name == "bash") + .collect(); + let tracker = ProcessTracker::new(); + + // Start with the prompt as a user message + let mut messages = vec![Message::user(prompt)]; + + let max_turns = 50; + for turn in 0..max_turns { + log(&format!("API turn {} ({} messages)", turn, messages.len())); + + let (msg, usage) = client.chat_completion_stream( + &messages, + Some(&tool_defs), + &ui_tx, + StreamTarget::Autonomous, + "none", + ).await.map_err(|e| format!("API error: {}", e))?; + + if let Some(u) = &usage { + log(&format!("tokens: {} prompt + {} completion", + u.prompt_tokens, u.completion_tokens)); + } + + let has_content = msg.content.is_some(); + let has_tools = msg.tool_calls.as_ref().is_some_and(|tc| !tc.is_empty()); + + if has_tools { + // Push the assistant message with tool calls + messages.push(msg.clone()); + + // Execute each tool call + for call in msg.tool_calls.as_ref().unwrap() { + log(&format!("tool: {}({})", + call.function.name, + crate::util::first_n_chars(&call.function.arguments, 80))); + + let args: serde_json::Value = serde_json::from_str(&call.function.arguments) + .unwrap_or_default(); + + let output = tools::dispatch(&call.function.name, &args, &tracker).await; + + log(&format!("tool result: {} chars", output.text.len())); + + messages.push(Message::tool_result(&call.id, &output.text)); + } + continue; + } + + // Text-only response — we're done + let text = msg.content_text().to_string(); + if text.is_empty() && !has_content { + log("empty response, retrying"); + messages.push(Message::user( + "[system] Your previous response was empty. Please respond with text or use a tool." + )); + continue; + } + + return Ok(text); + } + + Err(format!("agent exceeded {} tool turns", max_turns)) +} + +/// Synchronous wrapper — creates a tokio runtime and blocks. +/// Used by the existing sync call path in knowledge.rs. +pub fn call_api_with_tools_sync( + agent: &str, + prompt: &str, + log: &dyn Fn(&str), +) -> Result { + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .map_err(|e| format!("tokio runtime: {}", e))?; + + rt.block_on(call_api_with_tools(agent, prompt, log)) +} diff --git a/poc-memory/src/agents/llm.rs b/poc-memory/src/agents/llm.rs index d920876..9dee69d 100644 --- a/poc-memory/src/agents/llm.rs +++ b/poc-memory/src/agents/llm.rs @@ -184,8 +184,15 @@ pub(crate) fn call_haiku(agent: &str, prompt: &str) -> Result { } /// Call a model using an agent definition's model and tool configuration. +/// Uses the direct API backend when api_base_url is configured, +/// otherwise falls back to claude CLI subprocess. pub(crate) fn call_for_def(def: &super::defs::AgentDef, prompt: &str) -> Result { - call_model_with_tools(&def.agent, &def.model, prompt, &def.tools) + if crate::config::get().api_base_url.is_some() && !def.tools.is_empty() { + let log = |msg: &str| eprintln!("[{}] {}", def.agent, msg); + super::api::call_api_with_tools_sync(&def.agent, prompt, &log) + } else { + call_model_with_tools(&def.agent, &def.model, prompt, &def.tools) + } } /// Parse a JSON response, handling markdown fences. diff --git a/poc-memory/src/agents/mod.rs b/poc-memory/src/agents/mod.rs index 7d81914..1f889bd 100644 --- a/poc-memory/src/agents/mod.rs +++ b/poc-memory/src/agents/mod.rs @@ -16,6 +16,7 @@ // transcript — shared JSONL transcript parsing pub mod transcript; +pub mod api; pub mod llm; pub mod prompts; pub mod defs; diff --git a/poc-memory/src/config.rs b/poc-memory/src/config.rs index 13258e4..3c13c60 100644 --- a/poc-memory/src/config.rs +++ b/poc-memory/src/config.rs @@ -59,6 +59,13 @@ pub struct Config { /// If set, passed as CLAUDE_CONFIG_DIR so the daemon authenticates /// with different OAuth credentials than the interactive session. pub agent_config_dir: Option, + /// OpenAI-compatible API base URL for direct LLM calls (e.g. vllm). + /// When set, agents use this instead of shelling out to claude CLI. + pub api_base_url: Option, + /// API key for the direct API endpoint. + pub api_key: Option, + /// Model name to use with the direct API endpoint. + pub api_model: Option, } impl Default for Config { @@ -88,6 +95,9 @@ impl Default for Config { agent_budget: 1000, prompts_dir: home.join("poc/memory/prompts"), agent_config_dir: None, + api_base_url: None, + api_key: None, + api_model: None, } } } @@ -153,6 +163,15 @@ impl Config { if let Some(s) = cfg.get("agent_config_dir").and_then(|v| v.as_str()) { config.agent_config_dir = Some(expand_home(s)); } + if let Some(s) = cfg.get("api_base_url").and_then(|v| v.as_str()) { + config.api_base_url = Some(s.to_string()); + } + if let Some(s) = cfg.get("api_key").and_then(|v| v.as_str()) { + config.api_key = Some(s.to_string()); + } + if let Some(s) = cfg.get("api_model").and_then(|v| v.as_str()) { + config.api_model = Some(s.to_string()); + } continue; }