refactor: runner owns stream routing, suppress tool call XML from display

Split the streaming pipeline: API backends yield StreamEvents through
a channel, the runner reads them and routes to the appropriate UI pane.

- Add StreamEvent enum (Content, Reasoning, ToolCallDelta, etc.)
- API start_stream() spawns backend as a task, returns event receiver
- Runner loops over events, sends content to conversation pane but
  suppresses <tool_call> XML with a buffered tail for partial tags
- OpenAI backend refactored to stream_events() — no more UI coupling
- Anthropic backend gets a wrapper that synthesizes events from the
  existing stream() (TODO: native event streaming)
- chat_completion_stream() kept for subconscious agents, reimplemented
  on top of the event stream
- Usage derives Clone

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
ProofOfConcept 2026-03-29 21:22:42 -04:00
parent 912626c5f0
commit 13453606ae
6 changed files with 338 additions and 114 deletions

View file

@ -18,8 +18,41 @@ use anyhow::Result;
use reqwest::Client;
use std::time::{Duration, Instant};
use tokio::sync::mpsc;
use crate::agent::types::*;
use crate::agent::ui_channel::{StreamTarget, UiMessage, UiSender};
use crate::agent::ui_channel::{UiMessage, UiSender};
// ─────────────────────────────────────────────────────────────
// Stream events — yielded by backends, consumed by the runner
// ─────────────────────────────────────────────────────────────
/// Events produced by the streaming API backends.
/// The runner reads these and decides what to display where.
pub enum StreamEvent {
/// Content token from the model's response.
Content(String),
/// Reasoning/thinking token (internal monologue).
Reasoning(String),
/// Incremental tool call delta (structured, from APIs that support it).
ToolCallDelta {
index: usize,
id: Option<String>,
call_type: Option<String>,
name: Option<String>,
arguments: Option<String>,
},
/// Token usage stats.
Usage(Usage),
/// Stream finished.
Finished {
reason: String,
prompt_tokens: u32,
completion_tokens: u32,
},
/// Error from the stream.
Error(String),
}
enum Backend {
OpenAi {
@ -58,20 +91,71 @@ impl ApiClient {
}
}
/// Start a streaming chat completion. Returns a receiver of StreamEvents.
/// The caller (runner) reads events and handles routing to the UI.
///
/// The old `chat_completion_stream` method is kept for the subconscious
/// agents which don't need fine-grained stream control.
pub fn start_stream(
&self,
messages: &[Message],
tools: Option<&[ToolDef]>,
ui_tx: &UiSender,
reasoning_effort: &str,
temperature: Option<f32>,
) -> mpsc::UnboundedReceiver<StreamEvent> {
let (tx, rx) = mpsc::unbounded_channel();
let client = self.client.clone();
let api_key = self.api_key.clone();
let model = self.model.clone();
let messages = messages.to_vec();
let tools = tools.map(|t| t.to_vec());
let ui_tx = ui_tx.clone();
let reasoning_effort = reasoning_effort.to_string();
let backend = match &self.backend {
Backend::OpenAi { base_url } => Backend::OpenAi { base_url: base_url.clone() },
Backend::Anthropic => Backend::Anthropic,
};
tokio::spawn(async move {
let result = match &backend {
Backend::OpenAi { base_url } => {
openai::stream_events(
&client, base_url, &api_key, &model,
&messages, tools.as_deref(), &tx, &ui_tx,
&reasoning_effort, temperature,
).await
}
Backend::Anthropic => {
// Anthropic backend still uses the old path for now —
// wrap it by calling the old stream() and synthesizing events.
anthropic::stream_events(
&client, &api_key, &model,
&messages, tools.as_deref(), &tx, &ui_tx,
&reasoning_effort,
).await
}
};
if let Err(e) = result {
let _ = tx.send(StreamEvent::Error(e.to_string()));
}
});
rx
}
/// Streaming chat completion. Returns the assembled response message
/// plus optional usage stats. Text tokens stream through the UI channel.
///
/// Empty response handling is done at the agent level (agent.rs)
/// where the conversation can be modified between retries.
/// Used by subconscious agents that don't need per-token routing.
pub async fn chat_completion_stream(
&self,
messages: &[Message],
tools: Option<&[ToolDef]>,
ui_tx: &UiSender,
target: StreamTarget,
reasoning_effort: &str,
) -> Result<(Message, Option<Usage>)> {
self.chat_completion_stream_temp(messages, tools, ui_tx, target, reasoning_effort, None).await
self.chat_completion_stream_temp(messages, tools, ui_tx, reasoning_effort, None).await
}
pub async fn chat_completion_stream_temp(
@ -79,24 +163,48 @@ impl ApiClient {
messages: &[Message],
tools: Option<&[ToolDef]>,
ui_tx: &UiSender,
target: StreamTarget,
reasoning_effort: &str,
temperature: Option<f32>,
) -> Result<(Message, Option<Usage>)> {
match &self.backend {
Backend::OpenAi { base_url } => {
openai::stream(
&self.client, base_url, &self.api_key, &self.model,
messages, tools, ui_tx, target, reasoning_effort, temperature,
).await
}
Backend::Anthropic => {
anthropic::stream(
&self.client, &self.api_key, &self.model,
messages, tools, ui_tx, target, reasoning_effort,
).await
// Use the event stream and accumulate into a message.
let mut rx = self.start_stream(messages, tools, ui_tx, reasoning_effort, temperature);
let mut content = String::new();
let mut tool_calls: Vec<ToolCall> = Vec::new();
let mut usage = None;
let mut finish_reason = None;
while let Some(event) = rx.recv().await {
match event {
StreamEvent::Content(text) => content.push_str(&text),
StreamEvent::Reasoning(_) => {}
StreamEvent::ToolCallDelta { index, id, call_type, name, arguments } => {
while tool_calls.len() <= index {
tool_calls.push(ToolCall {
id: String::new(),
call_type: "function".to_string(),
function: FunctionCall { name: String::new(), arguments: String::new() },
});
}
if let Some(id) = id { tool_calls[index].id = id; }
if let Some(ct) = call_type { tool_calls[index].call_type = ct; }
if let Some(n) = name { tool_calls[index].function.name = n; }
if let Some(a) = arguments { tool_calls[index].function.arguments.push_str(&a); }
}
StreamEvent::Usage(u) => usage = Some(u),
StreamEvent::Finished { reason, .. } => {
finish_reason = Some(reason);
break;
}
StreamEvent::Error(e) => anyhow::bail!("{}", e),
}
}
if finish_reason.as_deref() == Some("error") {
let detail = if content.is_empty() { "no details".into() } else { content };
anyhow::bail!("model stream error: {}", detail);
}
Ok((build_response_message(content, tool_calls), usage))
}
/// Return a label for the active backend, used in startup info.
@ -325,7 +433,7 @@ impl SseReader {
/// from models that emit tool calls as text), parse them out and
/// promote them to structured tool_calls. This way all consumers
/// see tool calls uniformly regardless of backend.
pub(crate) fn build_response_message(
pub fn build_response_message(
content: String,
tool_calls: Vec<ToolCall>,
) -> Message {