refactor: runner owns stream routing, suppress tool call XML from display

Split the streaming pipeline: API backends yield StreamEvents through a channel, the runner reads them and routes to the appropriate UI pane. - Add StreamEvent enum (Content, Reasoning, ToolCallDelta, etc.) - API start_stream() spawns backend as a task, returns event receiver - Runner loops over events, sends content to conversation pane but suppresses <tool_call> XML with a buffered tail for partial tags - OpenAI backend refactored to stream_events() — no more UI coupling - Anthropic backend gets a wrapper that synthesizes events from the existing stream() (TODO: native event streaming) - chat_completion_stream() kept for subconscious agents, reimplemented on top of the event stream - Usage derives Clone Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-03-29 21:22:42 -04:00 · 2026-03-29 21:22:42 -04:00 · 13453606ae
commit 13453606ae
parent 912626c5f0
6 changed files with 338 additions and 114 deletions
--- a/src/agent/api/openai.rs
+++ b/src/agent/api/openai.rs
@ -6,23 +6,27 @@

 use anyhow::Result;
 use reqwest::Client;
-use std::time::Duration;
+use tokio::sync::mpsc;

 use crate::agent::types::*;
-use crate::agent::ui_channel::{StreamTarget, UiMessage, UiSender};
+use crate::agent::ui_channel::{UiMessage, UiSender};
+use super::StreamEvent;

-pub async fn stream(
+/// Stream SSE events from an OpenAI-compatible endpoint, sending
+/// parsed StreamEvents through the channel. The caller (runner)
+/// handles routing to the UI.
+pub async fn stream_events(
    client: &Client,
    base_url: &str,
    api_key: &str,
    model: &str,
    messages: &[Message],
    tools: Option<&[ToolDef]>,
+    tx: &mpsc::UnboundedSender<StreamEvent>,
    ui_tx: &UiSender,
-    target: StreamTarget,
    reasoning_effort: &str,
    temperature: Option<f32>,
-) -> Result<(Message, Option<Usage>)> {
+) -> Result<()> {
    let request = ChatRequest {
        model: model.to_string(),
        messages: messages.to_vec(),
@ -59,23 +63,19 @@ pub async fn stream(

    let mut reader = super::SseReader::new(ui_tx);

-    let mut content = String::new();
-    let mut tool_calls: Vec<ToolCall> = Vec::new();
-    let mut usage = None;
-    let mut finish_reason = None;
+    let mut content_len: usize = 0;
    let mut reasoning_chars: usize = 0;
+    let mut tool_call_count: usize = 0;
    let mut empty_deltas: u64 = 0;
-    let mut first_content_at: Option<Duration> = None;
-
-    let _reasoning_enabled = reasoning_effort != "none";
+    let mut first_content_at = None;
+    let mut finish_reason = None;
+    let mut usage = None;

    while let Some(event) = reader.next_event(&mut response).await? {
-        // OpenRouter sometimes embeds error objects in the stream
        if let Some(err_msg) = event["error"]["message"].as_str() {
            let raw = event["error"]["metadata"]["raw"].as_str().unwrap_or("");
            let _ = ui_tx.send(UiMessage::Debug(format!(
-                "API error in stream: {}",
-                err_msg
+                "API error in stream: {}", err_msg
            )));
            anyhow::bail!("API error in stream: {} {}", err_msg, raw);
        }
@ -83,7 +83,6 @@ pub async fn stream(
        let chunk: ChatCompletionChunk = match serde_json::from_value(event.clone()) {
            Ok(c) => c,
            Err(e) => {
-                // Log unparseable events — they may contain error info
                let preview = event.to_string();
                let _ = ui_tx.send(UiMessage::Debug(format!(
                    "unparseable SSE event ({}): {}",
@ -93,7 +92,8 @@ pub async fn stream(
            }
        };

-        if chunk.usage.is_some() {
+        if let Some(ref u) = chunk.usage {
+            let _ = tx.send(StreamEvent::Usage(u.clone()));
            usage = chunk.usage;
        }

@ -107,18 +107,14 @@ pub async fn stream(

            // Reasoning tokens — multiple field names across providers
            let mut has_reasoning = false;
-            if let Some(ref r) = choice.delta.reasoning_content {
+            for r in [
+                choice.delta.reasoning_content.as_ref(),
+                choice.delta.reasoning.as_ref(),
+            ].into_iter().flatten() {
                reasoning_chars += r.len();
                has_reasoning = true;
                if !r.is_empty() {
-                    let _ = ui_tx.send(UiMessage::Reasoning(r.clone()));
-                }
-            }
-            if let Some(ref r) = choice.delta.reasoning {
-                reasoning_chars += r.len();
-                has_reasoning = true;
-                if !r.is_empty() {
-                    let _ = ui_tx.send(UiMessage::Reasoning(r.clone()));
+                    let _ = tx.send(StreamEvent::Reasoning(r.clone()));
                }
            }
            if let Some(ref r) = choice.delta.reasoning_details {
@ -126,46 +122,28 @@ pub async fn stream(
                reasoning_chars += s.len();
                has_reasoning = true;
                if !s.is_empty() && s != "null" {
-                    let _ = ui_tx.send(UiMessage::Reasoning(s));
+                    let _ = tx.send(StreamEvent::Reasoning(s));
                }
            }

            if let Some(ref text_delta) = choice.delta.content {
                if first_content_at.is_none() && !text_delta.is_empty() {
                    first_content_at = Some(reader.stream_start.elapsed());
-                    let _ = ui_tx.send(UiMessage::Activity("streaming...".into()));
                }
-                content.push_str(text_delta);
-                let _ = ui_tx.send(UiMessage::TextDelta(text_delta.clone(), target));
+                content_len += text_delta.len();
+                let _ = tx.send(StreamEvent::Content(text_delta.clone()));
            }

            if let Some(ref tc_deltas) = choice.delta.tool_calls {
                for tc_delta in tc_deltas {
-                    let idx = tc_delta.index;
-                    while tool_calls.len() <= idx {
-                        tool_calls.push(ToolCall {
-                            id: String::new(),
-                            call_type: "function".to_string(),
-                            function: FunctionCall {
-                                name: String::new(),
-                                arguments: String::new(),
-                            },
-                        });
-                    }
-                    if let Some(ref id) = tc_delta.id {
-                        tool_calls[idx].id = id.clone();
-                    }
-                    if let Some(ref ct) = tc_delta.call_type {
-                        tool_calls[idx].call_type = ct.clone();
-                    }
-                    if let Some(ref func) = tc_delta.function {
-                        if let Some(ref name) = func.name {
-                            tool_calls[idx].function.name = name.clone();
-                        }
-                        if let Some(ref args) = func.arguments {
-                            tool_calls[idx].function.arguments.push_str(args);
-                        }
-                    }
+                    tool_call_count = tool_call_count.max(tc_delta.index + 1);
+                    let _ = tx.send(StreamEvent::ToolCallDelta {
+                        index: tc_delta.index,
+                        id: tc_delta.id.clone(),
+                        call_type: tc_delta.call_type.clone(),
+                        name: tc_delta.function.as_ref().and_then(|f| f.name.clone()),
+                        arguments: tc_delta.function.as_ref().and_then(|f| f.arguments.clone()),
+                    });
                }
            }

@ -179,8 +157,8 @@ pub async fn stream(

    super::log_diagnostics(
        ui_tx,
-        content.len(),
-        tool_calls.len(),
+        content_len,
+        tool_call_count,
        reasoning_chars,
        reasoning_effort,
        &finish_reason,
@ -191,25 +169,18 @@ pub async fn stream(
        total_elapsed,
        first_content_at,
        &usage,
-        &tool_calls,
+        &[], // tool_calls not accumulated here anymore
    );

-    // Model/provider error delivered inside the stream (HTTP 200 but
-    // finish_reason="error"). Surface whatever content came back as
-    // the error message so the caller can retry or display it.
-    // Don't append the trailing newline — this isn't real content.
-    if finish_reason.as_deref() == Some("error") {
-        let detail = if content.is_empty() {
-            "no details".to_string()
-        } else {
-            content
-        };
-        anyhow::bail!("model stream error: {}", detail);
-    }
+    let reason = finish_reason.unwrap_or_default();
+    let (pt, ct) = usage.as_ref()
+        .map(|u| (u.prompt_tokens, u.completion_tokens))
+        .unwrap_or((0, 0));
+    let _ = tx.send(StreamEvent::Finished {
+        reason,
+        prompt_tokens: pt,
+        completion_tokens: ct,
+    });

-    if !content.is_empty() {
-        let _ = ui_tx.send(UiMessage::TextDelta("\n".to_string(), target));
-    }
-
-    Ok((super::build_response_message(content, tool_calls), usage))
+    Ok(())
 }