refactor: runner owns stream routing, suppress tool call XML from display
Split the streaming pipeline: API backends yield StreamEvents through a channel, the runner reads them and routes to the appropriate UI pane. - Add StreamEvent enum (Content, Reasoning, ToolCallDelta, etc.) - API start_stream() spawns backend as a task, returns event receiver - Runner loops over events, sends content to conversation pane but suppresses <tool_call> XML with a buffered tail for partial tags - OpenAI backend refactored to stream_events() — no more UI coupling - Anthropic backend gets a wrapper that synthesizes events from the existing stream() (TODO: native event streaming) - chat_completion_stream() kept for subconscious agents, reimplemented on top of the event stream - Usage derives Clone Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
912626c5f0
commit
13453606ae
6 changed files with 338 additions and 114 deletions
|
|
@ -18,8 +18,41 @@ use anyhow::Result;
|
|||
use reqwest::Client;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use crate::agent::types::*;
|
||||
use crate::agent::ui_channel::{StreamTarget, UiMessage, UiSender};
|
||||
use crate::agent::ui_channel::{UiMessage, UiSender};
|
||||
|
||||
// ─────────────────────────────────────────────────────────────
|
||||
// Stream events — yielded by backends, consumed by the runner
|
||||
// ─────────────────────────────────────────────────────────────
|
||||
|
||||
/// Events produced by the streaming API backends.
|
||||
/// The runner reads these and decides what to display where.
|
||||
pub enum StreamEvent {
|
||||
/// Content token from the model's response.
|
||||
Content(String),
|
||||
/// Reasoning/thinking token (internal monologue).
|
||||
Reasoning(String),
|
||||
/// Incremental tool call delta (structured, from APIs that support it).
|
||||
ToolCallDelta {
|
||||
index: usize,
|
||||
id: Option<String>,
|
||||
call_type: Option<String>,
|
||||
name: Option<String>,
|
||||
arguments: Option<String>,
|
||||
},
|
||||
/// Token usage stats.
|
||||
Usage(Usage),
|
||||
/// Stream finished.
|
||||
Finished {
|
||||
reason: String,
|
||||
prompt_tokens: u32,
|
||||
completion_tokens: u32,
|
||||
},
|
||||
/// Error from the stream.
|
||||
Error(String),
|
||||
}
|
||||
|
||||
enum Backend {
|
||||
OpenAi {
|
||||
|
|
@ -58,20 +91,71 @@ impl ApiClient {
|
|||
}
|
||||
}
|
||||
|
||||
/// Start a streaming chat completion. Returns a receiver of StreamEvents.
|
||||
/// The caller (runner) reads events and handles routing to the UI.
|
||||
///
|
||||
/// The old `chat_completion_stream` method is kept for the subconscious
|
||||
/// agents which don't need fine-grained stream control.
|
||||
pub fn start_stream(
|
||||
&self,
|
||||
messages: &[Message],
|
||||
tools: Option<&[ToolDef]>,
|
||||
ui_tx: &UiSender,
|
||||
reasoning_effort: &str,
|
||||
temperature: Option<f32>,
|
||||
) -> mpsc::UnboundedReceiver<StreamEvent> {
|
||||
let (tx, rx) = mpsc::unbounded_channel();
|
||||
let client = self.client.clone();
|
||||
let api_key = self.api_key.clone();
|
||||
let model = self.model.clone();
|
||||
let messages = messages.to_vec();
|
||||
let tools = tools.map(|t| t.to_vec());
|
||||
let ui_tx = ui_tx.clone();
|
||||
let reasoning_effort = reasoning_effort.to_string();
|
||||
let backend = match &self.backend {
|
||||
Backend::OpenAi { base_url } => Backend::OpenAi { base_url: base_url.clone() },
|
||||
Backend::Anthropic => Backend::Anthropic,
|
||||
};
|
||||
|
||||
tokio::spawn(async move {
|
||||
let result = match &backend {
|
||||
Backend::OpenAi { base_url } => {
|
||||
openai::stream_events(
|
||||
&client, base_url, &api_key, &model,
|
||||
&messages, tools.as_deref(), &tx, &ui_tx,
|
||||
&reasoning_effort, temperature,
|
||||
).await
|
||||
}
|
||||
Backend::Anthropic => {
|
||||
// Anthropic backend still uses the old path for now —
|
||||
// wrap it by calling the old stream() and synthesizing events.
|
||||
anthropic::stream_events(
|
||||
&client, &api_key, &model,
|
||||
&messages, tools.as_deref(), &tx, &ui_tx,
|
||||
&reasoning_effort,
|
||||
).await
|
||||
}
|
||||
};
|
||||
if let Err(e) = result {
|
||||
let _ = tx.send(StreamEvent::Error(e.to_string()));
|
||||
}
|
||||
});
|
||||
|
||||
rx
|
||||
}
|
||||
|
||||
/// Streaming chat completion. Returns the assembled response message
|
||||
/// plus optional usage stats. Text tokens stream through the UI channel.
|
||||
///
|
||||
/// Empty response handling is done at the agent level (agent.rs)
|
||||
/// where the conversation can be modified between retries.
|
||||
/// Used by subconscious agents that don't need per-token routing.
|
||||
pub async fn chat_completion_stream(
|
||||
&self,
|
||||
messages: &[Message],
|
||||
tools: Option<&[ToolDef]>,
|
||||
ui_tx: &UiSender,
|
||||
target: StreamTarget,
|
||||
reasoning_effort: &str,
|
||||
) -> Result<(Message, Option<Usage>)> {
|
||||
self.chat_completion_stream_temp(messages, tools, ui_tx, target, reasoning_effort, None).await
|
||||
self.chat_completion_stream_temp(messages, tools, ui_tx, reasoning_effort, None).await
|
||||
}
|
||||
|
||||
pub async fn chat_completion_stream_temp(
|
||||
|
|
@ -79,24 +163,48 @@ impl ApiClient {
|
|||
messages: &[Message],
|
||||
tools: Option<&[ToolDef]>,
|
||||
ui_tx: &UiSender,
|
||||
target: StreamTarget,
|
||||
reasoning_effort: &str,
|
||||
temperature: Option<f32>,
|
||||
) -> Result<(Message, Option<Usage>)> {
|
||||
match &self.backend {
|
||||
Backend::OpenAi { base_url } => {
|
||||
openai::stream(
|
||||
&self.client, base_url, &self.api_key, &self.model,
|
||||
messages, tools, ui_tx, target, reasoning_effort, temperature,
|
||||
).await
|
||||
}
|
||||
Backend::Anthropic => {
|
||||
anthropic::stream(
|
||||
&self.client, &self.api_key, &self.model,
|
||||
messages, tools, ui_tx, target, reasoning_effort,
|
||||
).await
|
||||
// Use the event stream and accumulate into a message.
|
||||
let mut rx = self.start_stream(messages, tools, ui_tx, reasoning_effort, temperature);
|
||||
let mut content = String::new();
|
||||
let mut tool_calls: Vec<ToolCall> = Vec::new();
|
||||
let mut usage = None;
|
||||
let mut finish_reason = None;
|
||||
|
||||
while let Some(event) = rx.recv().await {
|
||||
match event {
|
||||
StreamEvent::Content(text) => content.push_str(&text),
|
||||
StreamEvent::Reasoning(_) => {}
|
||||
StreamEvent::ToolCallDelta { index, id, call_type, name, arguments } => {
|
||||
while tool_calls.len() <= index {
|
||||
tool_calls.push(ToolCall {
|
||||
id: String::new(),
|
||||
call_type: "function".to_string(),
|
||||
function: FunctionCall { name: String::new(), arguments: String::new() },
|
||||
});
|
||||
}
|
||||
if let Some(id) = id { tool_calls[index].id = id; }
|
||||
if let Some(ct) = call_type { tool_calls[index].call_type = ct; }
|
||||
if let Some(n) = name { tool_calls[index].function.name = n; }
|
||||
if let Some(a) = arguments { tool_calls[index].function.arguments.push_str(&a); }
|
||||
}
|
||||
StreamEvent::Usage(u) => usage = Some(u),
|
||||
StreamEvent::Finished { reason, .. } => {
|
||||
finish_reason = Some(reason);
|
||||
break;
|
||||
}
|
||||
StreamEvent::Error(e) => anyhow::bail!("{}", e),
|
||||
}
|
||||
}
|
||||
|
||||
if finish_reason.as_deref() == Some("error") {
|
||||
let detail = if content.is_empty() { "no details".into() } else { content };
|
||||
anyhow::bail!("model stream error: {}", detail);
|
||||
}
|
||||
|
||||
Ok((build_response_message(content, tool_calls), usage))
|
||||
}
|
||||
|
||||
/// Return a label for the active backend, used in startup info.
|
||||
|
|
@ -325,7 +433,7 @@ impl SseReader {
|
|||
/// from models that emit tool calls as text), parse them out and
|
||||
/// promote them to structured tool_calls. This way all consumers
|
||||
/// see tool calls uniformly regardless of backend.
|
||||
pub(crate) fn build_response_message(
|
||||
pub fn build_response_message(
|
||||
content: String,
|
||||
tool_calls: Vec<ToolCall>,
|
||||
) -> Message {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue