diff --git a/src/agent/api/mod.rs b/src/agent/api/mod.rs
index 5705d89..fc8a358 100644
--- a/src/agent/api/mod.rs
+++ b/src/agent/api/mod.rs
@@ -117,6 +117,12 @@ impl ApiClient {
     /// the channel on first call and reuses it thereafter across
     /// every ApiClient clone. All scoring / inference / session
     /// RPCs flow through this single multiplexed HTTP/2 connection.
+    ///
+    /// Bumps tonic's default 4 MiB encode/decode caps to 64 MiB on
+    /// every client. Multimodal Generate requests carry pre-encoded
+    /// image bytes inline (Qwen3.6's 768×768 patches at high res
+    /// land around 5–8 MiB per turn), and Done events with full
+    /// per-token readout vectors can also exceed 4 MiB on long runs.
     pub async fn salience_client(&self) -> Result<
         salience::pb::salience_client::SalienceClient<tonic::transport::Channel>
     > {
@@ -127,7 +133,10 @@ impl ApiClient {
                 self.base_url, grpc_url);
             salience::connect_channel(&grpc_url).await
         }).await?;
-        Ok(salience::pb::salience_client::SalienceClient::new(ch.clone()))
+        const MAX_GRPC_MESSAGE_BYTES: usize = 64 * 1024 * 1024;
+        Ok(salience::pb::salience_client::SalienceClient::new(ch.clone())
+            .max_decoding_message_size(MAX_GRPC_MESSAGE_BYTES)
+            .max_encoding_message_size(MAX_GRPC_MESSAGE_BYTES))
     }
 
     /// Stream generation via a gRPC session. Walks the prompt chunks