// api/openai.rs — OpenAI-compatible backend // // Works with any provider that implements the OpenAI chat completions // API: OpenRouter, vLLM, llama.cpp, Fireworks, Together, etc. // Also used for local models (Qwen, llama) via compatible servers. use anyhow::Result; use tokio::sync::mpsc; use super::http::HttpClient; use super::types::*; use super::StreamToken; /// Stream from /v1/completions with raw token IDs in and out. /// Each SSE chunk yields one token (text + id). All parsing (think tags, /// tool calls) is handled by the ResponseParser, not here. pub(super) async fn stream_completions( client: &HttpClient, base_url: &str, api_key: &str, model: &str, prompt_tokens: &[u32], tx: &mpsc::UnboundedSender, sampling: super::SamplingParams, priority: Option, ) -> Result<()> { let mut request = serde_json::json!({ "model": model, "prompt": prompt_tokens, "max_tokens": 16384, "temperature": sampling.temperature, "top_p": sampling.top_p, "top_k": sampling.top_k, "stream": true, "return_token_ids": true, "skip_special_tokens": false, "stop_token_ids": [super::super::tokenizer::IM_END], }); if let Some(p) = priority { request["priority"] = serde_json::json!(p); } let url = format!("{}/completions", base_url); let debug_label = format!("{} prompt tokens, model={}", prompt_tokens.len(), model); let mut response = super::send_and_check( client, &url, &request, ("Authorization", &format!("Bearer {}", api_key)), &[], &debug_label, None, ) .await?; let mut reader = super::SseReader::new(); let mut usage = None; while let Some(event) = reader.next_event(&mut response).await? { if let Some(err_msg) = event["error"]["message"].as_str() { anyhow::bail!("API error in stream: {}", err_msg); } if let Some(u) = event["usage"].as_object() { if let Ok(u) = serde_json::from_value::(serde_json::Value::Object(u.clone())) { usage = Some(u); } } let choices = match event["choices"].as_array() { Some(c) => c, None => continue, }; for choice in choices { let text = choice["text"].as_str().unwrap_or(""); let token_ids = choice["token_ids"].as_array(); if let Some(ids) = token_ids { for (i, id_val) in ids.iter().enumerate() { if let Some(id) = id_val.as_u64() { let _ = tx.send(StreamToken::Token { text: if i == 0 { text.to_string() } else { String::new() }, id: id as u32, }); } } } else if !text.is_empty() { let _ = tx.send(StreamToken::Token { text: text.to_string(), id: 0 }); } } } let _ = tx.send(StreamToken::Done { usage }); Ok(()) }