agent: send images as multi_modal_data on completion requests
Split the prompt assembly into two forms: the AST keeps the fully-expanded representation (N image_pads per image, for accurate context budget accounting), while the request wire form collapses each image to a single <|image_pad|> bookended by vision_start/end and ships the raw bytes out-of-band as a base64 data URI in a new `multi_modal_data.image` field on /v1/completions. vLLM's Qwen3VL processor uses PromptReplacement with target=single <|image_pad|> and replacement=N image_pads, so the wire-form matches what the processor expects and it re-expands to N server-side. Server side needs /v1/completions to accept multi_modal_data for this to land images end-to-end — that's the next piece. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
91106deaa1
commit
204ba5570a
3 changed files with 115 additions and 5 deletions
|
|
@ -285,16 +285,23 @@ impl Agent {
|
|||
}
|
||||
|
||||
pub async fn assemble_prompt_tokens(&self) -> Vec<u32> {
|
||||
self.assemble_prompt().await.0
|
||||
}
|
||||
|
||||
/// Assemble a ready-to-send prompt: token stream in wire form (each
|
||||
/// image collapsed to a single `<|image_pad|>`) paired with the
|
||||
/// images to attach as multi_modal_data.
|
||||
pub async fn assemble_prompt(&self) -> (Vec<u32>, Vec<context::WireImage>) {
|
||||
let ctx = self.context.lock().await;
|
||||
let st = self.state.lock().await;
|
||||
let mut tokens = ctx.token_ids();
|
||||
let (mut tokens, images) = ctx.wire_prompt();
|
||||
tokens.push(tokenizer::IM_START);
|
||||
if st.think_native {
|
||||
tokens.extend(tokenizer::encode("assistant\n<think>\n"));
|
||||
} else {
|
||||
tokens.extend(tokenizer::encode("assistant\n"));
|
||||
}
|
||||
tokens
|
||||
(tokens, images)
|
||||
}
|
||||
|
||||
/// Rebuild the tools section of the system prompt from the current tools list.
|
||||
|
|
@ -354,10 +361,11 @@ impl Agent {
|
|||
let _thinking = start_activity(&agent, "thinking...").await;
|
||||
|
||||
let (rx, _stream_guard) = {
|
||||
let prompt_tokens = agent.assemble_prompt_tokens().await;
|
||||
let (prompt_tokens, images) = agent.assemble_prompt().await;
|
||||
let st = agent.state.lock().await;
|
||||
agent.client.stream_completion(
|
||||
agent.client.stream_completion_mm(
|
||||
&prompt_tokens,
|
||||
&images,
|
||||
api::SamplingParams {
|
||||
temperature: st.temperature,
|
||||
top_p: st.top_p,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue