Add direct API backend for agent execution
When api_base_url is configured, agents call the LLM directly via OpenAI-compatible API (vllm, llama.cpp, etc.) instead of shelling out to claude CLI. Implements the full tool loop: send prompt, if tool_calls execute them and send results back, repeat until text. This enables running agents against local/remote models like Qwen-27B on a RunPod B200, with no dependency on claude CLI. Config fields: api_base_url, api_key, api_model. Falls back to claude CLI when api_base_url is not set. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
1b48e57f34
commit
a29b6d4c5d
6 changed files with 145 additions and 1 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
|
@ -2709,6 +2709,7 @@ dependencies = [
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"skillratings",
|
"skillratings",
|
||||||
|
"tokio",
|
||||||
"uuid",
|
"uuid",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,7 @@ paste = "1"
|
||||||
jobkit = { git = "https://evilpiepirate.org/git/jobkit.git/" }
|
jobkit = { git = "https://evilpiepirate.org/git/jobkit.git/" }
|
||||||
jobkit-daemon = { git = "https://evilpiepirate.org/git/jobkit-daemon.git/" }
|
jobkit-daemon = { git = "https://evilpiepirate.org/git/jobkit-daemon.git/" }
|
||||||
poc-agent = { path = "../poc-agent" }
|
poc-agent = { path = "../poc-agent" }
|
||||||
|
tokio = { version = "1", features = ["rt-multi-thread"] }
|
||||||
redb = "2"
|
redb = "2"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
ratatui = "0.29"
|
ratatui = "0.29"
|
||||||
|
|
|
||||||
115
poc-memory/src/agents/api.rs
Normal file
115
poc-memory/src/agents/api.rs
Normal file
|
|
@ -0,0 +1,115 @@
|
||||||
|
// agents/api.rs — Direct API backend for agent execution
|
||||||
|
//
|
||||||
|
// Uses poc-agent's OpenAI-compatible API client to call models directly
|
||||||
|
// (vllm, llama.cpp, OpenRouter, etc.) instead of shelling out to claude CLI.
|
||||||
|
// Implements the tool loop: send prompt → if tool_calls, execute them →
|
||||||
|
// send results back → repeat until text response.
|
||||||
|
//
|
||||||
|
// Activated when config has api_base_url set.
|
||||||
|
|
||||||
|
use poc_agent::api::ApiClient;
|
||||||
|
use poc_agent::types::*;
|
||||||
|
use poc_agent::tools::{self, ProcessTracker};
|
||||||
|
use poc_agent::ui_channel::StreamTarget;
|
||||||
|
|
||||||
|
/// Run an agent prompt through the direct API with tool support.
|
||||||
|
/// Returns the final text response after all tool calls are resolved.
|
||||||
|
pub async fn call_api_with_tools(
|
||||||
|
agent: &str,
|
||||||
|
prompt: &str,
|
||||||
|
log: &dyn Fn(&str),
|
||||||
|
) -> Result<String, String> {
|
||||||
|
let config = crate::config::get();
|
||||||
|
|
||||||
|
let base_url = config.api_base_url.as_deref()
|
||||||
|
.ok_or("api_base_url not configured")?;
|
||||||
|
let api_key = config.api_key.as_deref().unwrap_or("");
|
||||||
|
let model = config.api_model.as_deref().unwrap_or("qwen-2.5-27b");
|
||||||
|
|
||||||
|
let client = ApiClient::new(base_url, api_key, model);
|
||||||
|
|
||||||
|
// Set up a minimal UI channel (we just collect messages, no TUI)
|
||||||
|
let (ui_tx, _ui_rx) = poc_agent::ui_channel::channel();
|
||||||
|
|
||||||
|
// Build tool definitions — just bash for poc-memory commands
|
||||||
|
let all_defs = tools::definitions();
|
||||||
|
let tool_defs: Vec<ToolDef> = all_defs.into_iter()
|
||||||
|
.filter(|d| d.function.name == "bash")
|
||||||
|
.collect();
|
||||||
|
let tracker = ProcessTracker::new();
|
||||||
|
|
||||||
|
// Start with the prompt as a user message
|
||||||
|
let mut messages = vec![Message::user(prompt)];
|
||||||
|
|
||||||
|
let max_turns = 50;
|
||||||
|
for turn in 0..max_turns {
|
||||||
|
log(&format!("API turn {} ({} messages)", turn, messages.len()));
|
||||||
|
|
||||||
|
let (msg, usage) = client.chat_completion_stream(
|
||||||
|
&messages,
|
||||||
|
Some(&tool_defs),
|
||||||
|
&ui_tx,
|
||||||
|
StreamTarget::Autonomous,
|
||||||
|
"none",
|
||||||
|
).await.map_err(|e| format!("API error: {}", e))?;
|
||||||
|
|
||||||
|
if let Some(u) = &usage {
|
||||||
|
log(&format!("tokens: {} prompt + {} completion",
|
||||||
|
u.prompt_tokens, u.completion_tokens));
|
||||||
|
}
|
||||||
|
|
||||||
|
let has_content = msg.content.is_some();
|
||||||
|
let has_tools = msg.tool_calls.as_ref().is_some_and(|tc| !tc.is_empty());
|
||||||
|
|
||||||
|
if has_tools {
|
||||||
|
// Push the assistant message with tool calls
|
||||||
|
messages.push(msg.clone());
|
||||||
|
|
||||||
|
// Execute each tool call
|
||||||
|
for call in msg.tool_calls.as_ref().unwrap() {
|
||||||
|
log(&format!("tool: {}({})",
|
||||||
|
call.function.name,
|
||||||
|
crate::util::first_n_chars(&call.function.arguments, 80)));
|
||||||
|
|
||||||
|
let args: serde_json::Value = serde_json::from_str(&call.function.arguments)
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
let output = tools::dispatch(&call.function.name, &args, &tracker).await;
|
||||||
|
|
||||||
|
log(&format!("tool result: {} chars", output.text.len()));
|
||||||
|
|
||||||
|
messages.push(Message::tool_result(&call.id, &output.text));
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Text-only response — we're done
|
||||||
|
let text = msg.content_text().to_string();
|
||||||
|
if text.is_empty() && !has_content {
|
||||||
|
log("empty response, retrying");
|
||||||
|
messages.push(Message::user(
|
||||||
|
"[system] Your previous response was empty. Please respond with text or use a tool."
|
||||||
|
));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Ok(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(format!("agent exceeded {} tool turns", max_turns))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Synchronous wrapper — creates a tokio runtime and blocks.
|
||||||
|
/// Used by the existing sync call path in knowledge.rs.
|
||||||
|
pub fn call_api_with_tools_sync(
|
||||||
|
agent: &str,
|
||||||
|
prompt: &str,
|
||||||
|
log: &dyn Fn(&str),
|
||||||
|
) -> Result<String, String> {
|
||||||
|
let rt = tokio::runtime::Builder::new_current_thread()
|
||||||
|
.enable_all()
|
||||||
|
.build()
|
||||||
|
.map_err(|e| format!("tokio runtime: {}", e))?;
|
||||||
|
|
||||||
|
rt.block_on(call_api_with_tools(agent, prompt, log))
|
||||||
|
}
|
||||||
|
|
@ -184,9 +184,16 @@ pub(crate) fn call_haiku(agent: &str, prompt: &str) -> Result<String, String> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Call a model using an agent definition's model and tool configuration.
|
/// Call a model using an agent definition's model and tool configuration.
|
||||||
|
/// Uses the direct API backend when api_base_url is configured,
|
||||||
|
/// otherwise falls back to claude CLI subprocess.
|
||||||
pub(crate) fn call_for_def(def: &super::defs::AgentDef, prompt: &str) -> Result<String, String> {
|
pub(crate) fn call_for_def(def: &super::defs::AgentDef, prompt: &str) -> Result<String, String> {
|
||||||
|
if crate::config::get().api_base_url.is_some() && !def.tools.is_empty() {
|
||||||
|
let log = |msg: &str| eprintln!("[{}] {}", def.agent, msg);
|
||||||
|
super::api::call_api_with_tools_sync(&def.agent, prompt, &log)
|
||||||
|
} else {
|
||||||
call_model_with_tools(&def.agent, &def.model, prompt, &def.tools)
|
call_model_with_tools(&def.agent, &def.model, prompt, &def.tools)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Parse a JSON response, handling markdown fences.
|
/// Parse a JSON response, handling markdown fences.
|
||||||
pub(crate) fn parse_json_response(response: &str) -> Result<serde_json::Value, String> {
|
pub(crate) fn parse_json_response(response: &str) -> Result<serde_json::Value, String> {
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@
|
||||||
// transcript — shared JSONL transcript parsing
|
// transcript — shared JSONL transcript parsing
|
||||||
|
|
||||||
pub mod transcript;
|
pub mod transcript;
|
||||||
|
pub mod api;
|
||||||
pub mod llm;
|
pub mod llm;
|
||||||
pub mod prompts;
|
pub mod prompts;
|
||||||
pub mod defs;
|
pub mod defs;
|
||||||
|
|
|
||||||
|
|
@ -59,6 +59,13 @@ pub struct Config {
|
||||||
/// If set, passed as CLAUDE_CONFIG_DIR so the daemon authenticates
|
/// If set, passed as CLAUDE_CONFIG_DIR so the daemon authenticates
|
||||||
/// with different OAuth credentials than the interactive session.
|
/// with different OAuth credentials than the interactive session.
|
||||||
pub agent_config_dir: Option<PathBuf>,
|
pub agent_config_dir: Option<PathBuf>,
|
||||||
|
/// OpenAI-compatible API base URL for direct LLM calls (e.g. vllm).
|
||||||
|
/// When set, agents use this instead of shelling out to claude CLI.
|
||||||
|
pub api_base_url: Option<String>,
|
||||||
|
/// API key for the direct API endpoint.
|
||||||
|
pub api_key: Option<String>,
|
||||||
|
/// Model name to use with the direct API endpoint.
|
||||||
|
pub api_model: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for Config {
|
impl Default for Config {
|
||||||
|
|
@ -88,6 +95,9 @@ impl Default for Config {
|
||||||
agent_budget: 1000,
|
agent_budget: 1000,
|
||||||
prompts_dir: home.join("poc/memory/prompts"),
|
prompts_dir: home.join("poc/memory/prompts"),
|
||||||
agent_config_dir: None,
|
agent_config_dir: None,
|
||||||
|
api_base_url: None,
|
||||||
|
api_key: None,
|
||||||
|
api_model: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -153,6 +163,15 @@ impl Config {
|
||||||
if let Some(s) = cfg.get("agent_config_dir").and_then(|v| v.as_str()) {
|
if let Some(s) = cfg.get("agent_config_dir").and_then(|v| v.as_str()) {
|
||||||
config.agent_config_dir = Some(expand_home(s));
|
config.agent_config_dir = Some(expand_home(s));
|
||||||
}
|
}
|
||||||
|
if let Some(s) = cfg.get("api_base_url").and_then(|v| v.as_str()) {
|
||||||
|
config.api_base_url = Some(s.to_string());
|
||||||
|
}
|
||||||
|
if let Some(s) = cfg.get("api_key").and_then(|v| v.as_str()) {
|
||||||
|
config.api_key = Some(s.to_string());
|
||||||
|
}
|
||||||
|
if let Some(s) = cfg.get("api_model").and_then(|v| v.as_str()) {
|
||||||
|
config.api_model = Some(s.to_string());
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue