salience: client-side pad expansion, drop AppendImage

Mirrors the vLLM-side rewrite. AppendImage is gone; images now
ride along on Generate via a parallel `images` list.

- Productionize `qwen3_image_token_count` (was test-only). Image
  leaf computes its IMAGE_PAD count eagerly at construction from
  height/width; `token_count` is no longer "0 until the server
  tells us."
- WireChunk shrinks to a single `Tokens(Vec<u32>)` variant — vision
  blocks live inline in the token stream.
- `wire_chunks` now returns `(Vec<WireChunk>, Vec<WireImage>)`.
  `WireImage` carries `pad_start` / `pad_end` (absolute positions
  in the full walk) alongside bytes + mime.
- `assemble_prompt` returns `(chunks, images, match_upto)`.
- `stream_session_mm` / `run_session_generate` take the parallel
  images list, filter to those past `match_upto`, and pass them
  in `GenerateRequest.images` as `pb::ImageAttachment` entries.
- Drop `SessionHandle::append_image`,
  `ContextState::commit_image_token_counts`,
  `StreamToken::ImageAppended`, the WireChunk::Image branch in
  `learn.rs`, and the now-empty `prompt_to_chunks` helper.
- Add 'v' toggle on the conscious-screen tree to render token-id
  vectors in place of text content (debug-aid: lets us see what
  the server actually has when output is suspicious).
- Comment out the subconscious-trigger spawn loop — Kent had this
  disabled before; it had crept back into running.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-24 20:26:47 -04:00
commit fe232cf292
12 changed files with 468 additions and 306 deletions

View file

@ -143,6 +143,13 @@ pub enum AstNode {
/// Maps memory key → divergence score for this response.
#[serde(default, skip_serializing_if = "std::collections::BTreeMap::is_empty")]
memory_scores: std::collections::BTreeMap<String, f64>,
/// Cached token stream for the subtree. When `Some`, wire-out
/// uses these bytes verbatim and skips recursion into children.
/// Populated by the response parser from the server's exact
/// stream; also computable from children as a fallback. Cleared
/// on any edit to a descendant. Not serialized — transient.
#[serde(skip, default)]
token_ids: Option<Vec<u32>>,
},
}
@ -155,6 +162,14 @@ pub struct ContextState {
journal: Vec<AstNode>,
conversation: Vec<AstNode>,
pub conversation_log: Option<crate::mind::log::ConversationLog>,
/// Length of the session's token stream on the server, as of the
/// last Done event. Updated by the grpc layer.
server_committed_len: u32,
/// Prefix length of our walk that still matches the server's
/// session.tokens byte-for-byte. When < `server_committed_len`
/// the session needs rewinding (truncating=true at this offset).
/// Reset to 0 on any mutation that could have changed sent bytes.
client_match_upto: u32,
}
impl Clone for ContextState {
@ -165,6 +180,8 @@ impl Clone for ContextState {
journal: self.journal.clone(),
conversation: self.conversation.clone(),
conversation_log: None, // forked contexts don't log
server_committed_len: self.server_committed_len,
client_match_upto: self.client_match_upto,
}
}
}
@ -201,6 +218,10 @@ pub struct ResponseParser {
think_buf: String,
in_tool_call: bool,
tool_call_buf: String,
/// Raw generated token IDs, in arrival order. Combined with the
/// prologue at `finish` to stamp the Branch's authoritative
/// token cache — the bytes the server has for this branch.
generated_tokens: Vec<u32>,
}
impl Role {
@ -369,8 +390,11 @@ impl AstNode {
mime: impl Into<String>,
orig_height: u32,
orig_width: u32,
token_count: u32,
) -> Self {
// Pad count is computed eagerly from dimensions — no more
// "unknown until server responds" shape. Server validates
// on the Generate call; mismatches fail loud.
let token_count = qwen3_image_token_count(orig_height, orig_width);
Self::Leaf(NodeLeaf::new(NodeBody::Image {
bytes,
mime: mime.into(),
@ -383,7 +407,13 @@ impl AstNode {
// -- Branch constructors --------------------------------------------------
pub fn branch(role: Role, children: Vec<AstNode>) -> Self {
Self::Branch { role, children, timestamp: Utc::now(), memory_scores: Default::default() }
Self::Branch {
role,
children,
timestamp: Utc::now(),
memory_scores: Default::default(),
token_ids: None,
}
}
pub fn system_msg(text: impl Into<String>) -> Self {
@ -392,6 +422,7 @@ impl AstNode {
children: vec![Self::content(text)],
timestamp: Utc::now(),
memory_scores: Default::default(),
token_ids: None,
}
}
@ -401,6 +432,7 @@ impl AstNode {
children: vec![Self::content(text)],
timestamp: Utc::now(),
memory_scores: Default::default(),
token_ids: None,
}
}
@ -412,11 +444,12 @@ impl AstNode {
let token_ids = leaf.body.compute_token_ids();
Self::Leaf(NodeLeaf { token_ids, ..leaf })
}
Self::Branch { role, children, timestamp, memory_scores } => Self::Branch {
Self::Branch { role, children, timestamp, memory_scores, .. } => Self::Branch {
role,
children: children.into_iter().map(|c| c.retokenize()).collect(),
timestamp,
memory_scores,
token_ids: None,
},
}
}
@ -493,7 +526,10 @@ impl AstNode {
fn token_ids_into(&self, out: &mut Vec<u32>) {
match self {
Self::Leaf(leaf) => out.extend_from_slice(&leaf.token_ids),
Self::Branch { role, children, .. } => {
Self::Branch { token_ids: Some(cached), .. } => {
out.extend_from_slice(cached);
}
Self::Branch { role, children, token_ids: None, .. } => {
out.push(tokenizer::IM_START);
out.extend(tokenizer::encode(&format!("{}\n", role.as_str())));
for child in children {
@ -522,7 +558,8 @@ impl Ast for AstNode {
fn tokens(&self) -> usize {
match self {
Self::Leaf(leaf) => leaf.tokens(),
Self::Branch { role, children, .. } => {
Self::Branch { token_ids: Some(cached), .. } => cached.len(),
Self::Branch { role, children, token_ids: None, .. } => {
1 + role_header_tokens(*role)
+ children.iter().map(|c| c.tokens()).sum::<usize>()
+ 1 + newline_tokens()
@ -676,6 +713,7 @@ impl ResponseParser {
think_buf: String::new(),
in_tool_call: false,
tool_call_buf: String::new(),
generated_tokens: Vec::new(),
}
}
@ -706,6 +744,7 @@ impl ResponseParser {
buf.push(id, r);
}
}
parser.generated_tokens.push(id);
let text = super::tokenizer::decode(&[id]);
full_text.push_str(&text);
let mut ctx = agent.context.lock().await;
@ -740,22 +779,16 @@ impl ResponseParser {
let _ = writeln!(f, " unparsed text: {}", &full_text[..end]);
}
}
if let Some(u) = usage {
if let Some(ref u) = usage {
agent.state.lock().await.last_prompt_tokens = u.prompt_tokens;
}
let mut ctx = agent.context.lock().await;
parser.finish(&mut ctx);
if let Some(u) = usage {
ctx.note_session_synced(u.total_tokens);
}
return Ok(());
}
super::api::StreamToken::ImageAppended { placeholder_count } => {
// Commit the server-authoritative IMAGE_PAD
// count into the first zero-count image leaf
// in wire order. AppendImage always runs
// before the final Generate, so this fires
// before any Token events for this stream.
let mut ctx = agent.context.lock().await;
ctx.commit_image_token_counts(&[placeholder_count]);
}
super::api::StreamToken::Error(e) => {
return Err(anyhow::anyhow!("{}", e));
}
@ -842,7 +875,7 @@ impl ResponseParser {
}
fn push_child(&self, ctx: &mut ContextState, child: AstNode) {
ctx.push_child(Section::Conversation, self.branch_idx, child);
ctx.push_child_raw(Section::Conversation, self.branch_idx, child);
}
fn flush_content(&mut self, ctx: &mut ContextState) {
@ -860,6 +893,29 @@ impl ResponseParser {
self.content_parts.push(std::mem::take(&mut self.buf));
}
self.flush_content(ctx);
// Stamp the authoritative token cache onto the branch.
// Layout mirrors the full chat-template rendering of a
// message block:
//
// IM_START + "assistant\n" [+ "<think>\n"] (prologue — what we sent)
// + generated_tokens (what the server generated, ends in IM_END)
// + "\n" (trailing newline — template-required)
//
// Server only has through the IM_END (model stops on it,
// doesn't emit "\n"). Match-upto lands inside the cache
// right after IM_END; the chunk-walk's straddle path picks
// up the trailing "\n" as the head of the next turn's delta.
// The "\n" between turns matters: without it Qwen sees
// `<|im_end|><|im_start|>` back-to-back (no newline) and
// responds with garbage.
let prologue_text = if self.in_think { "assistant\n<think>\n" } else { "assistant\n" };
let mut cache = Vec::with_capacity(1 + self.generated_tokens.len() + 8);
cache.push(tokenizer::IM_START);
cache.extend(tokenizer::encode(prologue_text));
cache.extend(self.generated_tokens);
cache.extend(tokenizer::encode("\n"));
ctx.set_branch_cache(Section::Conversation, self.branch_idx, cache);
}
}
@ -871,9 +927,39 @@ impl ContextState {
journal: Vec::new(),
conversation: Vec::new(),
conversation_log: None,
server_committed_len: 0,
client_match_upto: 0,
}
}
// -- Server sync tracking -------------------------------------------------
/// Length of the session's token stream on the server. Updated by
/// the grpc layer from Generate Done events.
pub fn server_committed_len(&self) -> u32 { self.server_committed_len }
/// Prefix of our walk we still believe matches the server
/// byte-for-byte. If less than `server_committed_len`, the next
/// Generate must send `truncating=true` at this offset.
pub fn client_match_upto(&self) -> u32 { self.client_match_upto }
/// Called by the grpc layer after a successful Generate Done:
/// records both the server's new length and the fact that we
/// match up to it (we just sent everything).
pub fn note_session_synced(&mut self, total_tokens: u32) {
self.server_committed_len = total_tokens;
self.client_match_upto = total_tokens;
}
/// Reset match-upto to 0. Called from every mutation that could
/// have touched a region the server already has. For now,
/// conservatively drops alignment entirely — finer-grained
/// tracking (match-upto at the mutated node's offset) is a
/// future optimization.
fn mark_dirty(&mut self) {
self.client_match_upto = 0;
}
// -- Read access ----------------------------------------------------------
pub fn system(&self) -> &[AstNode] { &self.system }
@ -886,35 +972,6 @@ impl ContextState {
[&self.system, &self.identity, &self.journal, &self.conversation]
}
/// Walk image leaves across all sections in wire order and fill in
/// the first N leaves that have `token_count == 0` with successive
/// values from `counts`. Used after a gRPC session's stream of
/// AppendImage responses to commit the server's IMAGE_PAD counts
/// back into the AST so the next wire walk doesn't see zero-count
/// images in the already-committed prefix.
pub fn commit_image_token_counts(&mut self, counts: &[u32]) {
fn visit(node: &mut AstNode, counts: &[u32], idx: &mut usize) {
if *idx >= counts.len() { return; }
match node {
AstNode::Leaf(leaf) => {
if let NodeBody::Image { token_count, .. } = leaf.body() {
if *token_count == 0 {
leaf.set_image_token_count(counts[*idx]);
*idx += 1;
}
}
}
AstNode::Branch { children, .. } => {
for c in children { visit(c, counts, idx); }
}
}
}
let mut idx = 0usize;
for node in &mut self.system { visit(node, counts, &mut idx); }
for node in &mut self.identity { visit(node, counts, &mut idx); }
for node in &mut self.journal { visit(node, counts, &mut idx); }
for node in &mut self.conversation { visit(node, counts, &mut idx); }
}
}
impl Ast for ContextState {
@ -947,55 +1004,57 @@ impl Ast for ContextState {
}
/// An image collected from the AST for a request body. The AST stores
/// the pre-expanded token form (`<|vision_start|> + <|image_pad|>×N +
/// <|vision_end|>`), and the wire form mirrors that exactly so the
/// server's `session.tokens` length matches what vLLM's engine will
/// process. The authoritative N is obtained from the server via the
/// CountImageTokens RPC before the Image leaf is constructed.
/// Image metadata collected during `wire_chunks` — the binary +
/// mime plus the absolute token-position range of the image's
/// pre-expanded placeholder run in the full wire stream. Sent
/// alongside `append_tokens` in `GenerateRequest` so the server
/// can attach vision features to the declared positions. Positions
/// are absolute within the full wire walk starting at offset 0,
/// i.e. the same coordinate system as `session.tokens` on the
/// server once the walk has been applied.
#[derive(Clone)]
pub struct WireImage {
pub bytes: Vec<u8>,
pub mime: String,
pub pad_start: u32,
pub pad_end: u32,
}
/// One piece of the wire stream for the gRPC session path. Runs of
/// text/tool/thinking tokens are batched into `Tokens`; each Image
/// leaf becomes its own `Image` chunk because the server writes the
/// full vision block on AppendImage — the client never sends vision
/// tokens inline. Order matches the AST's depth-first wire order.
/// One piece of the wire stream for the gRPC session path. Since
/// images now live inline in the token stream (pre-expanded at AST
/// construction time), there's only one variant — a run of tokens.
/// The parallel `Vec<WireImage>` returned by `wire_chunks` gives the
/// binary + position metadata for each embedded image.
#[derive(Clone)]
pub enum WireChunk {
Tokens(Vec<u32>),
Image {
bytes: Vec<u8>,
mime: String,
/// Client's current best guess at how many tokens the server
/// will expand this image to, including bookends. `0` means
/// the count is unknown (view_image just loaded the image and
/// AppendImage hasn't run yet). Callers use this only to know
/// this chunk's contribution to the server-visible length for
/// offset bookkeeping on chunks that were already appended on
/// a prior turn.
known_expanded_len: u32,
},
}
fn wire_into(node: &AstNode, tokens: &mut Vec<u32>, images: &mut Vec<WireImage>) {
match node {
AstNode::Leaf(leaf) => match leaf.body() {
NodeBody::Image { bytes, mime, .. } => {
// Send the pre-expanded token form (includes N
// <|image_pad|> tokens); engine's multi_modal
// pipeline pairs them with the binary data below.
// The Image leaf's token_ids is already
// [VISION_START, IMAGE_PAD * N, VISION_END]. Inline
// those into the token stream and record the pad-run
// range so the server can attach features to the
// declared positions.
let pad_start = tokens.len() as u32;
tokens.extend_from_slice(leaf.token_ids());
let pad_end = tokens.len() as u32;
images.push(WireImage {
bytes: bytes.clone(),
mime: mime.clone(),
pad_start,
pad_end,
});
}
_ => tokens.extend_from_slice(leaf.token_ids()),
},
AstNode::Branch { role, children, .. } => {
AstNode::Branch { token_ids: Some(cached), .. } => {
tokens.extend_from_slice(cached);
}
AstNode::Branch { role, children, token_ids: None, .. } => {
tokens.push(tokenizer::IM_START);
tokens.extend(tokenizer::encode(&format!("{}\n", role.as_str())));
for c in children {
@ -1118,10 +1177,16 @@ impl ContextState {
}
/// Build the wire stream as interleaved `WireChunk`s for the gRPC
/// session path. Unlike `wire_prompt`, this preserves the order
/// of text runs vs image blocks so the caller can drive the
/// append flow (AppendImage for each Image, Generate append for
/// contiguous text runs).
/// session path. Returns a tuple of (chunks, images): the chunks
/// hold the full token stream (with vision blocks inlined as
/// `VISION_START + IMAGE_PAD*N + VISION_END`), and the images
/// list carries each embedded image's binary + position range so
/// the gRPC layer can attach them via `GenerateRequest.images`.
///
/// Note: with images inlined into the token stream, the chunks
/// list is structurally a single `Tokens` chunk in the common
/// case — the multi-chunk shape persists only because some
/// callers may want the option of inserting breakpoints later.
///
/// `conv_range` and `skip` mirror `wire_prompt` — select a
/// conversation slice and drop identity / conversation nodes by
@ -1130,46 +1195,43 @@ impl ContextState {
&self,
conv_range: std::ops::Range<usize>,
mut skip: F,
) -> Vec<WireChunk>
) -> (Vec<WireChunk>, Vec<WireImage>)
where F: FnMut(&AstNode) -> bool,
{
let mut out: Vec<WireChunk> = Vec::new();
let mut buf: Vec<u32> = Vec::new();
let mut images: Vec<WireImage> = Vec::new();
fn flush(buf: &mut Vec<u32>, out: &mut Vec<WireChunk>) {
if !buf.is_empty() {
out.push(WireChunk::Tokens(std::mem::take(buf)));
}
}
fn visit(node: &AstNode, buf: &mut Vec<u32>, out: &mut Vec<WireChunk>) {
fn visit(
node: &AstNode,
buf: &mut Vec<u32>,
images: &mut Vec<WireImage>,
) {
match node {
AstNode::Leaf(leaf) => match leaf.body() {
NodeBody::Image { bytes, mime, token_count, .. } => {
flush(buf, out);
// Bookends (VISION_START + VISION_END) add 2
// to the expanded length; token_count is the
// IMAGE_PAD run. 0 means count is still
// unknown (no AppendImage yet) — don't claim
// a length the server will disagree with.
let expanded = if *token_count == 0 {
0
} else {
*token_count + 2
};
out.push(WireChunk::Image {
NodeBody::Image { bytes, mime, .. } => {
// Pre-expanded vision block lives in
// leaf.token_ids: [VISION_START, IMAGE_PAD*N,
// VISION_END]. Inline + record the range.
let pad_start = buf.len() as u32;
buf.extend_from_slice(leaf.token_ids());
let pad_end = buf.len() as u32;
images.push(WireImage {
bytes: bytes.clone(),
mime: mime.clone(),
known_expanded_len: expanded,
pad_start,
pad_end,
});
}
_ => buf.extend_from_slice(leaf.token_ids()),
},
AstNode::Branch { role, children, .. } => {
AstNode::Branch { token_ids: Some(cached), .. } => {
buf.extend_from_slice(cached);
}
AstNode::Branch { role, children, token_ids: None, .. } => {
buf.push(tokenizer::IM_START);
buf.extend(tokenizer::encode(&format!("{}\n", role.as_str())));
for c in children {
visit(c, buf, out);
visit(c, buf, images);
}
buf.push(tokenizer::IM_END);
buf.extend(tokenizer::encode("\n"));
@ -1177,18 +1239,22 @@ impl ContextState {
}
}
for node in self.system() { visit(node, &mut buf, &mut out); }
for node in self.system() { visit(node, &mut buf, &mut images); }
for node in self.identity() {
if skip(node) { continue; }
visit(node, &mut buf, &mut out);
visit(node, &mut buf, &mut images);
}
for node in self.journal() { visit(node, &mut buf, &mut out); }
for node in self.journal() { visit(node, &mut buf, &mut images); }
for node in &self.conversation()[conv_range] {
if skip(node) { continue; }
visit(node, &mut buf, &mut out);
visit(node, &mut buf, &mut images);
}
flush(&mut buf, &mut out);
out
let chunks = if buf.is_empty() {
Vec::new()
} else {
vec![WireChunk::Tokens(buf)]
};
(chunks, images)
}
}
@ -1209,17 +1275,27 @@ impl ContextState {
dbglog!("warning: log: {:#}", e);
}
}
// Conversation appends always go to the tail — past committed —
// so they don't break the match. Any other section mutates a
// region the server may already have, so drop alignment.
if section != Section::Conversation {
self.mark_dirty();
}
self.section_mut(section).push(node);
}
/// Push without logging.
pub fn push_no_log(&mut self, section: Section, node: AstNode) {
if section != Section::Conversation {
self.mark_dirty();
}
self.section_mut(section).push(node);
}
/// Replace the body of a leaf at `index` in `section`.
/// Re-tokenizes to maintain the invariant.
pub fn set_message(&mut self, section: Section, index: usize, body: NodeBody) {
self.mark_dirty();
let nodes = self.section_mut(section);
let node = &mut nodes[index];
match node {
@ -1245,10 +1321,12 @@ impl ContextState {
}
pub fn del(&mut self, section: Section, index: usize) -> AstNode {
self.mark_dirty();
self.section_mut(section).remove(index)
}
pub fn clear(&mut self, section: Section) {
self.mark_dirty();
self.section_mut(section).clear();
}
@ -1269,6 +1347,7 @@ impl ContextState {
/// are > 50% of conversation tokens) or oldest conversation entry.
/// Phase 3: Snap to user message boundary at start.
pub fn trim_conversation(&mut self) {
self.mark_dirty();
let max_tokens = context_budget_tokens();
let fixed = self.system.iter().map(|n| n.tokens()).sum::<usize>()
+ self.identity.iter().map(|n| n.tokens()).sum::<usize>()
@ -1345,11 +1424,49 @@ impl ContextState {
}
/// Push a child node into a branch at `index` in `section`.
/// Clears the branch's cached token stream — wire-out will recompute
/// from children until the cache is repopulated. If the cache was
/// populated (server had these bytes), drops session alignment.
pub fn push_child(&mut self, section: Section, index: usize, child: AstNode) {
let node = &mut self.section_mut(section)[index];
let was_cached = matches!(node, AstNode::Branch { token_ids: Some(_), .. });
match node {
AstNode::Branch { children, token_ids, .. } => {
children.push(child);
*token_ids = None;
}
AstNode::Leaf(_) => panic!("push_child on leaf node"),
}
if was_cached {
self.mark_dirty();
}
}
/// Like `push_child` but preserves the branch's cached token stream.
/// Used by the response parser, which is simultaneously populating
/// the cache from the authoritative server stream and pushing the
/// parsed-out children — the two stay consistent by construction.
/// Module-private: callers outside `context.rs` must go through
/// `push_child` so the invariant is maintained.
fn push_child_raw(&mut self, section: Section, index: usize, child: AstNode) {
let node = &mut self.section_mut(section)[index];
match node {
AstNode::Branch { children, .. } => children.push(child),
AstNode::Leaf(_) => panic!("push_child on leaf node"),
AstNode::Leaf(_) => panic!("push_child_raw on leaf node"),
}
}
/// Stamp a verbatim token cache onto the branch at `index` in
/// `section`. Used by the response parser to record the server's
/// authoritative token stream for the just-finished turn.
/// Module-private: the cache is an invariant-load-bearing piece
/// of state, populated only by code that holds the server's
/// ground truth.
fn set_branch_cache(&mut self, section: Section, index: usize, tokens: Vec<u32>) {
let node = &mut self.section_mut(section)[index];
match node {
AstNode::Branch { token_ids, .. } => *token_ids = Some(tokens),
AstNode::Leaf(_) => panic!("set_branch_cache on leaf node"),
}
}
@ -1373,20 +1490,19 @@ impl ContextState {
// to at request time. Constants come from Qwen3.5-27B's preprocessor_config.
// ---------------------------------------------------------------------------
// Test-only client-side estimate of image token expansion. Production
// callers obtain the authoritative count from the server via
// CountImageTokens; these constants and helpers stay around only to
// keep the context-shape unit tests self-contained.
#[cfg(test)]
// Production client-side computation of image-token expansion. With
// the delta-session protocol, the client writes the pre-expanded
// vision block (VISION_START + N*IMAGE_PAD + VISION_END) directly
// into the token stream at Image-leaf construction time, and tells
// the server where each image's pad run lives via
// GenerateRequest.images. Server validates that this N matches
// what the vision encoder actually produces and rejects on
// mismatch — so drift here fails loudly, not silently.
const QWEN3_PATCH_SIZE: u32 = 16;
#[cfg(test)]
const QWEN3_MERGE_SIZE: u32 = 2;
#[cfg(test)]
const QWEN3_MIN_PIXELS: u64 = 65_536;
#[cfg(test)]
const QWEN3_MAX_PIXELS: u64 = 16_777_216;
#[cfg(test)]
fn smart_resize(h: u32, w: u32, factor: u32, min_pixels: u64, max_pixels: u64) -> (u32, u32) {
let max_s = h.max(w) as f64;
let min_s = h.min(w) as f64;
@ -1415,11 +1531,10 @@ fn smart_resize(h: u32, w: u32, factor: u32, min_pixels: u64, max_pixels: u64) -
}
}
/// Test-only: client-side estimate of how many `<|image_pad|>` tokens
/// vLLM will emit for an image of the given dimensions. Production
/// callers use `salience::count_image_tokens` (server-authoritative).
#[cfg(test)]
fn qwen3_image_token_count(orig_h: u32, orig_w: u32) -> u32 {
/// How many `<|image_pad|>` tokens the Qwen3-VL vision encoder will
/// produce for an image of the given dimensions. Server verifies
/// this count against its own encoder run and rejects on mismatch.
pub fn qwen3_image_token_count(orig_h: u32, orig_w: u32) -> u32 {
let factor = QWEN3_PATCH_SIZE * QWEN3_MERGE_SIZE;
let (rh, rw) = smart_resize(orig_h, orig_w, factor, QWEN3_MIN_PIXELS, QWEN3_MAX_PIXELS);
(rh / QWEN3_PATCH_SIZE) * (rw / QWEN3_PATCH_SIZE) / (QWEN3_MERGE_SIZE * QWEN3_MERGE_SIZE)
@ -1854,7 +1969,7 @@ mod tests {
#[test]
fn test_image_render_and_token_ids() {
let node = AstNode::image(vec![0u8, 1, 2, 3], "image/png", 512, 512, qwen3_image_token_count(512, 512));
let node = AstNode::image(vec![0u8, 1, 2, 3], "image/png", 512, 512);
let leaf = node.leaf().unwrap();
// 3 tokens of bookend + 256 image_pad tokens
assert_eq!(leaf.token_ids().len(), 258);
@ -1874,7 +1989,7 @@ mod tests {
let mut ctx = ContextState::new();
ctx.push_no_log(Section::Conversation, AstNode::branch(Role::User, vec![
AstNode::content("look:"),
AstNode::image(vec![0xDE, 0xAD], "image/png", 512, 512, qwen3_image_token_count(512, 512)),
AstNode::image(vec![0xDE, 0xAD], "image/png", 512, 512),
]));
// AST side and wire side should both carry N image_pads + bookends —
@ -1904,7 +2019,7 @@ mod tests {
#[test]
fn test_image_serde_roundtrip() {
let node = AstNode::image(vec![0xDE, 0xAD, 0xBE, 0xEF], "image/png", 64, 64, qwen3_image_token_count(64, 64));
let node = AstNode::image(vec![0xDE, 0xAD, 0xBE, 0xEF], "image/png", 64, 64);
let json = serde_json::to_string(&node).unwrap();
// bytes must be base64-encoded in the JSON form
assert!(json.contains("3q2+7w=="));