WIP: Wiring context_new into agent — turn loop, StreamToken, dead code removal
Work in progress. New turn loop uses ResponseParser + StreamToken. Killed StreamEvent, append_streaming, finalize_streaming, streaming_index, assemble_api_messages, working_stack. Many methods still reference old types — fixing next. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
648356ae40
commit
9c79d7a037
4 changed files with 202 additions and 424 deletions
|
|
@ -50,28 +50,10 @@ fn tools_to_json_str(tools: &[agent_tools::Tool]) -> String {
|
|||
format!("[{}]", inner.join(","))
|
||||
}
|
||||
|
||||
/// Events produced by the streaming API backends.
|
||||
/// The runner reads these and decides what to display where.
|
||||
pub(crate) enum StreamEvent {
|
||||
/// Content token from the model's response.
|
||||
Content(String),
|
||||
/// Reasoning/thinking token (internal monologue).
|
||||
Reasoning(String),
|
||||
/// Incremental tool call delta (structured, from APIs that support it).
|
||||
ToolCallDelta {
|
||||
index: usize,
|
||||
id: Option<String>,
|
||||
call_type: Option<String>,
|
||||
name: Option<String>,
|
||||
arguments: Option<String>,
|
||||
},
|
||||
/// Token usage stats.
|
||||
Usage(Usage),
|
||||
/// Stream finished.
|
||||
Finished {
|
||||
reason: String,
|
||||
},
|
||||
/// Error from the stream.
|
||||
/// One token from the streaming completions API.
|
||||
pub(crate) enum StreamToken {
|
||||
Token { text: String, id: u32 },
|
||||
Done { usage: Option<Usage> },
|
||||
Error(String),
|
||||
}
|
||||
|
||||
|
|
@ -133,14 +115,14 @@ impl ApiClient {
|
|||
(rx, AbortOnDrop(handle))
|
||||
}
|
||||
|
||||
/// Start a streaming completion with raw token IDs.
|
||||
/// No message formatting — the caller provides the complete prompt as tokens.
|
||||
pub(crate) fn start_stream_completions(
|
||||
/// Stream a completion with raw token IDs.
|
||||
/// Returns (text, token_id) per token via channel.
|
||||
pub(crate) fn stream_completion(
|
||||
&self,
|
||||
prompt_tokens: &[u32],
|
||||
sampling: SamplingParams,
|
||||
priority: Option<i32>,
|
||||
) -> (mpsc::UnboundedReceiver<StreamEvent>, AbortOnDrop) {
|
||||
) -> (mpsc::UnboundedReceiver<StreamToken>, AbortOnDrop) {
|
||||
let (tx, rx) = mpsc::unbounded_channel();
|
||||
let client = self.client.clone();
|
||||
let api_key = self.api_key.clone();
|
||||
|
|
@ -154,7 +136,7 @@ impl ApiClient {
|
|||
&prompt_tokens, &tx, sampling, priority,
|
||||
).await;
|
||||
if let Err(e) = result {
|
||||
let _ = tx.send(StreamEvent::Error(e.to_string()));
|
||||
let _ = tx.send(StreamToken::Error(e.to_string()));
|
||||
}
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ use tokio::sync::mpsc;
|
|||
|
||||
use super::http::HttpClient;
|
||||
use super::types::*;
|
||||
use super::StreamEvent;
|
||||
use super::StreamToken;
|
||||
|
||||
/// Stream SSE events from an OpenAI-compatible endpoint, sending
|
||||
/// parsed StreamEvents through the channel. The caller (runner)
|
||||
|
|
@ -186,16 +186,16 @@ pub(super) async fn stream_events(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Stream from the /v1/completions endpoint using raw token IDs.
|
||||
/// Tool calls come as text (<tool_call> tags) and are parsed by the caller.
|
||||
/// Thinking content comes as <think> tags and is split into Reasoning events.
|
||||
/// Stream from /v1/completions with raw token IDs in and out.
|
||||
/// Each SSE chunk yields one token (text + id). All parsing (think tags,
|
||||
/// tool calls) is handled by the ResponseParser, not here.
|
||||
pub(super) async fn stream_completions(
|
||||
client: &HttpClient,
|
||||
base_url: &str,
|
||||
api_key: &str,
|
||||
model: &str,
|
||||
prompt_tokens: &[u32],
|
||||
tx: &mpsc::UnboundedSender<StreamEvent>,
|
||||
tx: &mpsc::UnboundedSender<StreamToken>,
|
||||
sampling: super::SamplingParams,
|
||||
priority: Option<i32>,
|
||||
) -> Result<()> {
|
||||
|
|
@ -207,6 +207,8 @@ pub(super) async fn stream_completions(
|
|||
"top_p": sampling.top_p,
|
||||
"top_k": sampling.top_k,
|
||||
"stream": true,
|
||||
"return_token_ids": true,
|
||||
"skip_special_tokens": false,
|
||||
"stop_token_ids": [super::super::tokenizer::IM_END],
|
||||
});
|
||||
if let Some(p) = priority {
|
||||
|
|
@ -229,20 +231,15 @@ pub(super) async fn stream_completions(
|
|||
|
||||
let mut reader = super::SseReader::new();
|
||||
let mut content_len: usize = 0;
|
||||
let mut first_content_at = None;
|
||||
let mut finish_reason = None;
|
||||
let mut usage = None;
|
||||
let mut in_think = false;
|
||||
|
||||
while let Some(event) = reader.next_event(&mut response).await? {
|
||||
if let Some(err_msg) = event["error"]["message"].as_str() {
|
||||
anyhow::bail!("API error in stream: {}", err_msg);
|
||||
}
|
||||
|
||||
// Completions chunks have a simpler structure
|
||||
if let Some(u) = event["usage"].as_object() {
|
||||
if let Ok(u) = serde_json::from_value::<Usage>(serde_json::Value::Object(u.clone())) {
|
||||
let _ = tx.send(StreamEvent::Usage(u.clone()));
|
||||
usage = Some(u);
|
||||
}
|
||||
}
|
||||
|
|
@ -253,78 +250,27 @@ pub(super) async fn stream_completions(
|
|||
};
|
||||
|
||||
for choice in choices {
|
||||
if let Some(reason) = choice["finish_reason"].as_str() {
|
||||
if reason != "null" {
|
||||
finish_reason = Some(reason.to_string());
|
||||
}
|
||||
}
|
||||
let text = choice["text"].as_str().unwrap_or("");
|
||||
let token_ids = choice["token_ids"].as_array();
|
||||
|
||||
if let Some(text) = choice["text"].as_str() {
|
||||
if text.is_empty() { continue; }
|
||||
|
||||
// Handle <think> tags — split into Reasoning vs Content
|
||||
if text.contains("<think>") || in_think {
|
||||
// Simple state machine for think tags
|
||||
let mut remaining = text;
|
||||
while !remaining.is_empty() {
|
||||
if in_think {
|
||||
if let Some(end) = remaining.find("</think>") {
|
||||
let thinking = &remaining[..end];
|
||||
if !thinking.is_empty() {
|
||||
let _ = tx.send(StreamEvent::Reasoning(thinking.to_string()));
|
||||
}
|
||||
remaining = &remaining[end + 8..];
|
||||
in_think = false;
|
||||
} else {
|
||||
let _ = tx.send(StreamEvent::Reasoning(remaining.to_string()));
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if let Some(start) = remaining.find("<think>") {
|
||||
let content = &remaining[..start];
|
||||
if !content.is_empty() {
|
||||
content_len += content.len();
|
||||
if first_content_at.is_none() {
|
||||
first_content_at = Some(reader.stream_start.elapsed());
|
||||
}
|
||||
let _ = tx.send(StreamEvent::Content(content.to_string()));
|
||||
}
|
||||
remaining = &remaining[start + 7..];
|
||||
in_think = true;
|
||||
} else {
|
||||
content_len += remaining.len();
|
||||
if first_content_at.is_none() {
|
||||
first_content_at = Some(reader.stream_start.elapsed());
|
||||
}
|
||||
let _ = tx.send(StreamEvent::Content(remaining.to_string()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
if let Some(ids) = token_ids {
|
||||
for (i, id_val) in ids.iter().enumerate() {
|
||||
if let Some(id) = id_val.as_u64() {
|
||||
content_len += text.len();
|
||||
let _ = tx.send(StreamToken::Token {
|
||||
text: if i == 0 { text.to_string() } else { String::new() },
|
||||
id: id as u32,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
content_len += text.len();
|
||||
if first_content_at.is_none() {
|
||||
first_content_at = Some(reader.stream_start.elapsed());
|
||||
}
|
||||
let _ = tx.send(StreamEvent::Content(text.to_string()));
|
||||
}
|
||||
} else if !text.is_empty() {
|
||||
// Fallback: text without token IDs (shouldn't happen with return_token_ids)
|
||||
content_len += text.len();
|
||||
let _ = tx.send(StreamToken::Token { text: text.to_string(), id: 0 });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let total_elapsed = reader.stream_start.elapsed();
|
||||
super::log_diagnostics(
|
||||
content_len, 0, 0, "none",
|
||||
&finish_reason,
|
||||
reader.chunks_received,
|
||||
reader.sse_lines_parsed,
|
||||
reader.sse_parse_errors,
|
||||
0, total_elapsed, first_content_at,
|
||||
&usage, &[],
|
||||
);
|
||||
|
||||
let reason = finish_reason.unwrap_or_default();
|
||||
let _ = tx.send(StreamEvent::Finished { reason });
|
||||
|
||||
let _ = tx.send(StreamToken::Done { usage });
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue