2026-03-18 23:05:14 -04:00
|
|
|
// agents/api.rs — Direct API backend for agent execution
|
|
|
|
|
//
|
|
|
|
|
// Uses poc-agent's OpenAI-compatible API client to call models directly
|
|
|
|
|
// (vllm, llama.cpp, OpenRouter, etc.) instead of shelling out to claude CLI.
|
|
|
|
|
// Implements the tool loop: send prompt → if tool_calls, execute them →
|
|
|
|
|
// send results back → repeat until text response.
|
|
|
|
|
//
|
|
|
|
|
// Activated when config has api_base_url set.
|
|
|
|
|
|
2026-03-25 00:52:41 -04:00
|
|
|
use crate::agent::api::ApiClient;
|
|
|
|
|
use crate::agent::types::*;
|
thought: wire up agent and subconscious to use shared tools
- agent/tools/mod.rs: remove duplicated tool implementations, delegate
to thought::dispatch for shared tools, keep only agent-specific
tools (control, vision, working_stack)
- subconscious/api.rs: replace duplicated memory/tool dispatch with
thought::dispatch, use thought::all_definitions() for tool schemas
- Delete agent/tools/{bash,read,write,edit,grep,glob_tool,journal,memory}.rs
(now live in thought/)
Both poc-agent and subconscious agents now use the same tool
implementations through the thought layer. Agent-specific behavior
(node tracking in runner.rs, control tools) stays in agent/.
2026-03-27 15:27:33 -04:00
|
|
|
use crate::thought::{self, ProcessTracker};
|
2026-03-18 23:05:14 -04:00
|
|
|
|
2026-03-18 23:09:11 -04:00
|
|
|
use std::sync::OnceLock;
|
|
|
|
|
|
|
|
|
|
static API_CLIENT: OnceLock<ApiClient> = OnceLock::new();
|
|
|
|
|
|
|
|
|
|
fn get_client() -> Result<&'static ApiClient, String> {
|
|
|
|
|
Ok(API_CLIENT.get_or_init(|| {
|
|
|
|
|
let config = crate::config::get();
|
|
|
|
|
let base_url = config.api_base_url.as_deref().unwrap_or("");
|
|
|
|
|
let api_key = config.api_key.as_deref().unwrap_or("");
|
|
|
|
|
let model = config.api_model.as_deref().unwrap_or("qwen-2.5-27b");
|
|
|
|
|
ApiClient::new(base_url, api_key, model)
|
|
|
|
|
}))
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-26 14:21:43 -04:00
|
|
|
/// Run agent prompts through the direct API with tool support.
|
|
|
|
|
/// For multi-step agents, each prompt is injected as a new user message
|
|
|
|
|
/// after the previous step's tool loop completes. The conversation
|
|
|
|
|
/// context carries forward naturally between steps.
|
|
|
|
|
/// Returns the final text response after all steps complete.
|
2026-03-18 23:05:14 -04:00
|
|
|
pub async fn call_api_with_tools(
|
2026-03-27 15:44:39 -04:00
|
|
|
agent: &str,
|
2026-03-26 14:21:43 -04:00
|
|
|
prompts: &[String],
|
2026-03-27 15:44:39 -04:00
|
|
|
phases: &[String],
|
2026-03-24 20:29:17 -04:00
|
|
|
temperature: Option<f32>,
|
2026-03-26 14:48:42 -04:00
|
|
|
bail_fn: Option<&(dyn Fn(usize) -> Result<(), String> + Sync)>,
|
2026-03-18 23:05:14 -04:00
|
|
|
log: &dyn Fn(&str),
|
|
|
|
|
) -> Result<String, String> {
|
2026-03-18 23:09:11 -04:00
|
|
|
let client = get_client()?;
|
2026-03-18 23:05:14 -04:00
|
|
|
|
2026-03-21 11:33:36 -04:00
|
|
|
// Set up a UI channel — we drain reasoning tokens into the log
|
2026-03-25 00:52:41 -04:00
|
|
|
let (ui_tx, mut ui_rx) = crate::agent::ui_channel::channel();
|
2026-03-18 23:05:14 -04:00
|
|
|
|
2026-03-28 19:49:13 -04:00
|
|
|
// Subconscious agents only get memory tools — no filesystem access.
|
|
|
|
|
let tool_defs = thought::memory_definitions();
|
2026-03-18 23:05:14 -04:00
|
|
|
let tracker = ProcessTracker::new();
|
2026-03-27 15:44:39 -04:00
|
|
|
// Provenance tracks which agent:phase is making writes.
|
|
|
|
|
// Updated between steps by the bail function via set_provenance().
|
|
|
|
|
let first_phase = phases.first().map(|s| s.as_str()).unwrap_or("");
|
|
|
|
|
let provenance = std::cell::RefCell::new(
|
|
|
|
|
if first_phase.is_empty() { format!("agent:{}", agent) }
|
|
|
|
|
else { format!("agent:{}:{}", agent, first_phase) }
|
|
|
|
|
);
|
2026-03-18 23:05:14 -04:00
|
|
|
|
2026-03-26 14:21:43 -04:00
|
|
|
// Start with the first prompt as a user message
|
|
|
|
|
let mut messages = vec![Message::user(&prompts[0])];
|
|
|
|
|
let mut next_prompt_idx = 1; // index of next prompt to inject
|
2026-03-19 22:58:54 -04:00
|
|
|
let reasoning = crate::config::get().api_reasoning.clone();
|
2026-03-18 23:05:14 -04:00
|
|
|
|
2026-03-26 18:41:10 -04:00
|
|
|
let max_turns = 50 * prompts.len();
|
2026-03-18 23:05:14 -04:00
|
|
|
for turn in 0..max_turns {
|
2026-03-22 01:57:47 -04:00
|
|
|
log(&format!("\n=== TURN {} ({} messages) ===\n", turn, messages.len()));
|
2026-03-18 23:05:14 -04:00
|
|
|
|
2026-03-26 17:48:44 -04:00
|
|
|
let mut last_err = None;
|
|
|
|
|
let mut msg_opt = None;
|
|
|
|
|
let mut usage_opt = None;
|
|
|
|
|
for attempt in 0..5 {
|
|
|
|
|
match client.chat_completion_stream_temp(
|
|
|
|
|
&messages,
|
|
|
|
|
Some(&tool_defs),
|
|
|
|
|
&ui_tx,
|
|
|
|
|
&reasoning,
|
|
|
|
|
temperature,
|
|
|
|
|
).await {
|
|
|
|
|
Ok((msg, usage)) => {
|
|
|
|
|
msg_opt = Some(msg);
|
|
|
|
|
usage_opt = usage;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
Err(e) => {
|
|
|
|
|
let err_str = e.to_string();
|
|
|
|
|
let is_transient = err_str.contains("IncompleteMessage")
|
|
|
|
|
|| err_str.contains("connection closed")
|
|
|
|
|
|| err_str.contains("connection reset")
|
|
|
|
|
|| err_str.contains("timed out")
|
|
|
|
|
|| err_str.contains("Connection refused");
|
|
|
|
|
if is_transient && attempt < 4 {
|
|
|
|
|
log(&format!("transient error (attempt {}): {}, retrying...",
|
|
|
|
|
attempt + 1, err_str));
|
|
|
|
|
tokio::time::sleep(std::time::Duration::from_secs(2 << attempt)).await;
|
|
|
|
|
last_err = Some(e);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
let msg_bytes: usize = messages.iter()
|
|
|
|
|
.map(|m| m.content_text().len())
|
|
|
|
|
.sum();
|
|
|
|
|
return Err(format!(
|
|
|
|
|
"API error on turn {} (~{}KB payload, {} messages, {} attempts): {}",
|
|
|
|
|
turn, msg_bytes / 1024, messages.len(), attempt + 1, e));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
let msg = msg_opt.unwrap();
|
|
|
|
|
if let Some(ref e) = last_err {
|
|
|
|
|
log(&format!("succeeded after retry (previous error: {})", e));
|
|
|
|
|
}
|
2026-03-18 23:05:14 -04:00
|
|
|
|
2026-03-26 17:48:44 -04:00
|
|
|
if let Some(u) = &usage_opt {
|
2026-03-18 23:05:14 -04:00
|
|
|
log(&format!("tokens: {} prompt + {} completion",
|
|
|
|
|
u.prompt_tokens, u.completion_tokens));
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-21 11:33:36 -04:00
|
|
|
// Drain reasoning tokens from the UI channel into the log
|
|
|
|
|
{
|
|
|
|
|
let mut reasoning_buf = String::new();
|
|
|
|
|
while let Ok(ui_msg) = ui_rx.try_recv() {
|
2026-03-25 00:52:41 -04:00
|
|
|
if let crate::agent::ui_channel::UiMessage::Reasoning(r) = ui_msg {
|
2026-03-21 11:33:36 -04:00
|
|
|
reasoning_buf.push_str(&r);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if !reasoning_buf.is_empty() {
|
|
|
|
|
log(&format!("<think>\n{}\n</think>", reasoning_buf.trim()));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-18 23:05:14 -04:00
|
|
|
let has_content = msg.content.is_some();
|
|
|
|
|
let has_tools = msg.tool_calls.as_ref().is_some_and(|tc| !tc.is_empty());
|
|
|
|
|
|
|
|
|
|
if has_tools {
|
2026-03-20 14:33:36 -04:00
|
|
|
// Push the assistant message with tool calls.
|
|
|
|
|
// Sanitize arguments: vllm re-parses them as JSON when
|
|
|
|
|
// preprocessing the conversation, so invalid JSON from the
|
|
|
|
|
// model crashes the next request.
|
|
|
|
|
let mut sanitized = msg.clone();
|
|
|
|
|
if let Some(ref mut calls) = sanitized.tool_calls {
|
|
|
|
|
for call in calls {
|
|
|
|
|
if serde_json::from_str::<serde_json::Value>(&call.function.arguments).is_err() {
|
|
|
|
|
log(&format!("sanitizing malformed args for {}: {}",
|
|
|
|
|
call.function.name, &call.function.arguments));
|
|
|
|
|
call.function.arguments = "{}".to_string();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
messages.push(sanitized);
|
2026-03-18 23:05:14 -04:00
|
|
|
|
|
|
|
|
// Execute each tool call
|
|
|
|
|
for call in msg.tool_calls.as_ref().unwrap() {
|
2026-03-22 01:57:47 -04:00
|
|
|
log(&format!("\nTOOL CALL: {}({})",
|
2026-03-18 23:05:14 -04:00
|
|
|
call.function.name,
|
2026-03-19 22:58:54 -04:00
|
|
|
&call.function.arguments));
|
2026-03-18 23:05:14 -04:00
|
|
|
|
2026-03-20 14:33:36 -04:00
|
|
|
let args: serde_json::Value = match serde_json::from_str(&call.function.arguments) {
|
|
|
|
|
Ok(v) => v,
|
|
|
|
|
Err(_) => {
|
|
|
|
|
log(&format!("malformed tool call args: {}", &call.function.arguments));
|
|
|
|
|
messages.push(Message::tool_result(
|
|
|
|
|
&call.id,
|
|
|
|
|
"Error: your tool call had malformed JSON arguments. Please retry with valid JSON.",
|
|
|
|
|
));
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
};
|
2026-03-18 23:05:14 -04:00
|
|
|
|
2026-03-27 15:44:39 -04:00
|
|
|
let prov = provenance.borrow().clone();
|
|
|
|
|
let output = match thought::dispatch(&call.function.name, &args, &tracker, Some(&prov)).await {
|
thought: wire up agent and subconscious to use shared tools
- agent/tools/mod.rs: remove duplicated tool implementations, delegate
to thought::dispatch for shared tools, keep only agent-specific
tools (control, vision, working_stack)
- subconscious/api.rs: replace duplicated memory/tool dispatch with
thought::dispatch, use thought::all_definitions() for tool schemas
- Delete agent/tools/{bash,read,write,edit,grep,glob_tool,journal,memory}.rs
(now live in thought/)
Both poc-agent and subconscious agents now use the same tool
implementations through the thought layer. Agent-specific behavior
(node tracking in runner.rs, control tools) stays in agent/.
2026-03-27 15:27:33 -04:00
|
|
|
Some(out) => out,
|
|
|
|
|
None => thought::ToolOutput::error(format!("Unknown tool: {}", call.function.name)),
|
2026-03-20 12:16:45 -04:00
|
|
|
};
|
2026-03-18 23:05:14 -04:00
|
|
|
|
2026-03-22 01:57:47 -04:00
|
|
|
log(&format!("TOOL RESULT ({} chars):\n{}", output.text.len(), output.text));
|
2026-03-18 23:05:14 -04:00
|
|
|
|
|
|
|
|
messages.push(Message::tool_result(&call.id, &output.text));
|
|
|
|
|
}
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-26 14:21:43 -04:00
|
|
|
// Text-only response — step complete
|
2026-03-18 23:05:14 -04:00
|
|
|
let text = msg.content_text().to_string();
|
|
|
|
|
if text.is_empty() && !has_content {
|
|
|
|
|
log("empty response, retrying");
|
|
|
|
|
messages.push(Message::user(
|
|
|
|
|
"[system] Your previous response was empty. Please respond with text or use a tool."
|
|
|
|
|
));
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-22 01:57:47 -04:00
|
|
|
log(&format!("\n=== RESPONSE ===\n\n{}", text));
|
2026-03-26 14:21:43 -04:00
|
|
|
|
2026-03-26 14:48:42 -04:00
|
|
|
// If there are more prompts, check bail condition and inject the next one
|
2026-03-26 14:21:43 -04:00
|
|
|
if next_prompt_idx < prompts.len() {
|
2026-03-26 14:48:42 -04:00
|
|
|
// Run bail check before continuing to next step
|
|
|
|
|
if let Some(ref check) = bail_fn {
|
|
|
|
|
check(next_prompt_idx)?;
|
|
|
|
|
}
|
2026-03-27 15:44:39 -04:00
|
|
|
// Update provenance for the new phase
|
|
|
|
|
if let Some(phase) = phases.get(next_prompt_idx) {
|
|
|
|
|
*provenance.borrow_mut() = format!("agent:{}:{}", agent, phase);
|
|
|
|
|
}
|
2026-03-26 14:21:43 -04:00
|
|
|
messages.push(Message::assistant(&text));
|
|
|
|
|
let next = &prompts[next_prompt_idx];
|
|
|
|
|
next_prompt_idx += 1;
|
|
|
|
|
log(&format!("\n=== STEP {}/{} ===\n", next_prompt_idx, prompts.len()));
|
|
|
|
|
messages.push(Message::user(next));
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-18 23:05:14 -04:00
|
|
|
return Ok(text);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Err(format!("agent exceeded {} tool turns", max_turns))
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-18 23:07:49 -04:00
|
|
|
/// Synchronous wrapper — runs the async function on a dedicated thread
|
|
|
|
|
/// with its own tokio runtime. Safe to call from any context.
|
2026-03-18 23:05:14 -04:00
|
|
|
pub fn call_api_with_tools_sync(
|
|
|
|
|
agent: &str,
|
2026-03-26 14:21:43 -04:00
|
|
|
prompts: &[String],
|
2026-03-27 15:44:39 -04:00
|
|
|
phases: &[String],
|
2026-03-24 20:29:17 -04:00
|
|
|
temperature: Option<f32>,
|
2026-03-26 14:48:42 -04:00
|
|
|
bail_fn: Option<&(dyn Fn(usize) -> Result<(), String> + Sync)>,
|
2026-03-18 23:09:11 -04:00
|
|
|
log: &(dyn Fn(&str) + Sync),
|
2026-03-18 23:05:14 -04:00
|
|
|
) -> Result<String, String> {
|
2026-03-18 23:07:49 -04:00
|
|
|
std::thread::scope(|s| {
|
|
|
|
|
s.spawn(|| {
|
|
|
|
|
let rt = tokio::runtime::Builder::new_current_thread()
|
|
|
|
|
.enable_all()
|
|
|
|
|
.build()
|
|
|
|
|
.map_err(|e| format!("tokio runtime: {}", e))?;
|
2026-03-24 01:53:28 -04:00
|
|
|
rt.block_on(
|
2026-03-27 15:44:39 -04:00
|
|
|
call_api_with_tools(agent, prompts, phases, temperature, bail_fn, log)
|
2026-03-24 01:53:28 -04:00
|
|
|
)
|
2026-03-18 23:07:49 -04:00
|
|
|
}).join().unwrap()
|
|
|
|
|
})
|
2026-03-18 23:05:14 -04:00
|
|
|
}
|