Capture reasoning/thinking from API stream into Thinking entries

StreamResult now includes accumulated reasoning text. After each
stream completes, if reasoning was produced, a Thinking entry is
pushed to the conversation before the response message.

Reasoning content is visible in the context tree UI but not sent
back to the API and doesn't count against the token budget.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-07 22:49:35 -04:00
parent e0ee441aec
commit 7ecc50d2e4
2 changed files with 12 additions and 4 deletions

View file

@ -578,6 +578,8 @@ pub(crate) struct StreamResult {
pub(crate) error: Option<String>, pub(crate) error: Option<String>,
/// Remaining display buffer (caller should flush if not in a tool call). /// Remaining display buffer (caller should flush if not in a tool call).
pub(crate) display_buf: String, pub(crate) display_buf: String,
/// Accumulated reasoning/thinking content.
pub(crate) reasoning: String,
/// Whether we were mid-tool-call when the stream ended. /// Whether we were mid-tool-call when the stream ended.
pub(crate) in_tool_call: bool, pub(crate) in_tool_call: bool,
} }
@ -601,6 +603,7 @@ pub(crate) async fn collect_stream(
let mut error = None; let mut error = None;
let mut first_content = true; let mut first_content = true;
let mut display_buf = String::new(); let mut display_buf = String::new();
let mut reasoning_buf = String::new();
let mut _streaming_guard: Option<super::ActivityGuard> = None; let mut _streaming_guard: Option<super::ActivityGuard> = None;
while let Some(event) = rx.recv().await { while let Some(event) = rx.recv().await {
@ -668,8 +671,7 @@ pub(crate) async fn collect_stream(
} }
} }
StreamEvent::Reasoning(text) => { StreamEvent::Reasoning(text) => {
// TODO: reasoning tokens → context entries reasoning_buf.push_str(&text);
let _ = text;
} }
StreamEvent::ToolCallDelta { index, id, call_type, name, arguments } => { StreamEvent::ToolCallDelta { index, id, call_type, name, arguments } => {
while tool_calls.len() <= index { while tool_calls.len() <= index {
@ -696,5 +698,5 @@ pub(crate) async fn collect_stream(
} }
} }
StreamResult { content, tool_calls, usage, finish_reason, error, display_buf, in_tool_call } StreamResult { content, tool_calls, usage, finish_reason, error, display_buf, in_tool_call, reasoning: reasoning_buf }
} }

View file

@ -477,10 +477,16 @@ impl Agent {
).await; ).await;
let api::StreamResult { let api::StreamResult {
content, tool_calls, usage, finish_reason, content, tool_calls, usage, finish_reason,
error: stream_error, display_buf, in_tool_call, error: stream_error, display_buf, in_tool_call, reasoning,
} = sr; } = sr;
// --- Stream complete --- // --- Stream complete ---
// Push thinking entry if model produced reasoning
if !reasoning.is_empty() {
let mut me = agent.lock().await;
me.push_entry(context::ConversationEntry::Thinking(reasoning));
}
// --- Lock 3: process results --- // --- Lock 3: process results ---
let (msg, pending) = { let (msg, pending) = {
let mut me = agent.lock().await; let mut me = agent.lock().await;