agent: send images as multi_modal_data on completion requests
Split the prompt assembly into two forms: the AST keeps the fully-expanded representation (N image_pads per image, for accurate context budget accounting), while the request wire form collapses each image to a single <|image_pad|> bookended by vision_start/end and ships the raw bytes out-of-band as a base64 data URI in a new `multi_modal_data.image` field on /v1/completions. vLLM's Qwen3VL processor uses PromptReplacement with target=single <|image_pad|> and replacement=N image_pads, so the wire-form matches what the processor expects and it re-expands to N server-side. Server side needs /v1/completions to accept multi_modal_data for this to land images end-to-end — that's the next piece. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
91106deaa1
commit
204ba5570a
3 changed files with 115 additions and 5 deletions
|
|
@ -78,18 +78,31 @@ impl ApiClient {
|
|||
prompt_tokens: &[u32],
|
||||
sampling: SamplingParams,
|
||||
priority: Option<i32>,
|
||||
) -> (mpsc::UnboundedReceiver<StreamToken>, AbortOnDrop) {
|
||||
self.stream_completion_mm(prompt_tokens, &[], sampling, priority)
|
||||
}
|
||||
|
||||
pub(crate) fn stream_completion_mm(
|
||||
&self,
|
||||
prompt_tokens: &[u32],
|
||||
images: &[super::context::WireImage],
|
||||
sampling: SamplingParams,
|
||||
priority: Option<i32>,
|
||||
) -> (mpsc::UnboundedReceiver<StreamToken>, AbortOnDrop) {
|
||||
let (tx, rx) = mpsc::unbounded_channel();
|
||||
let client = self.client.clone();
|
||||
let api_key = self.api_key.clone();
|
||||
let model = self.model.clone();
|
||||
let prompt_tokens = prompt_tokens.to_vec();
|
||||
let images: Vec<(Vec<u8>, String)> = images.iter()
|
||||
.map(|i| (i.bytes.clone(), i.mime.clone()))
|
||||
.collect();
|
||||
let base_url = self.base_url.clone();
|
||||
|
||||
let handle = tokio::spawn(async move {
|
||||
let result = stream_completions(
|
||||
&client, &base_url, &api_key, &model,
|
||||
&prompt_tokens, &tx, sampling, priority,
|
||||
&prompt_tokens, &images, &tx, sampling, priority,
|
||||
).await;
|
||||
if let Err(e) = result {
|
||||
let _ = tx.send(StreamToken::Error(e.to_string()));
|
||||
|
|
@ -110,6 +123,7 @@ async fn stream_completions(
|
|||
api_key: &str,
|
||||
model: &str,
|
||||
prompt_tokens: &[u32],
|
||||
images: &[(Vec<u8>, String)],
|
||||
tx: &mpsc::UnboundedSender<StreamToken>,
|
||||
sampling: SamplingParams,
|
||||
priority: Option<i32>,
|
||||
|
|
@ -126,6 +140,14 @@ async fn stream_completions(
|
|||
"skip_special_tokens": false,
|
||||
"stop_token_ids": [super::tokenizer::IM_END],
|
||||
});
|
||||
if !images.is_empty() {
|
||||
use base64::Engine;
|
||||
let b64 = base64::engine::general_purpose::STANDARD;
|
||||
let uris: Vec<String> = images.iter()
|
||||
.map(|(bytes, mime)| format!("data:{};base64,{}", mime, b64.encode(bytes)))
|
||||
.collect();
|
||||
request["multi_modal_data"] = serde_json::json!({ "image": uris });
|
||||
}
|
||||
if let Some(p) = priority {
|
||||
request["priority"] = serde_json::json!(p);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue