Fix poc-agent for vllm/Qwen 3.5: reasoning display, tool parser

- Always display reasoning tokens regardless of reasoning_effort
  setting — Qwen 3.5 thinks natively and the reasoning parser
  separates it into its own field
- Remove chat_template_kwargs that disabled thinking when
  reasoning_effort was "none"
- Add chat_template_kwargs field to ChatRequest for vllm compat
- Update provision script: qwen3_xml tool parser, qwen3 reasoning
  parser, 262K context, 95% GPU memory utilization

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Kent Overstreet 2026-03-19 00:06:26 -04:00
parent 49ccdf87e1
commit f83325b44d
3 changed files with 18 additions and 10 deletions

View file

@ -34,6 +34,7 @@ pub async fn stream(
enabled: reasoning_effort != "none",
effort: Some(reasoning_effort.to_string()),
}),
chat_template_kwargs: None,
};
let url = format!("{}/chat/completions", base_url);
@ -96,14 +97,14 @@ pub async fn stream(
if let Some(ref r) = choice.delta.reasoning_content {
reasoning_chars += r.len();
has_reasoning = true;
if reasoning_enabled && !r.is_empty() {
if !r.is_empty() {
let _ = ui_tx.send(UiMessage::Reasoning(r.clone()));
}
}
if let Some(ref r) = choice.delta.reasoning {
reasoning_chars += r.len();
has_reasoning = true;
if reasoning_enabled && !r.is_empty() {
if !r.is_empty() {
let _ = ui_tx.send(UiMessage::Reasoning(r.clone()));
}
}
@ -111,7 +112,7 @@ pub async fn stream(
let s = r.to_string();
reasoning_chars += s.len();
has_reasoning = true;
if reasoning_enabled && !s.is_empty() && s != "null" {
if !s.is_empty() && s != "null" {
let _ = ui_tx.send(UiMessage::Reasoning(s));
}
}

View file

@ -129,6 +129,9 @@ pub struct ChatRequest {
/// - reasoning.effort (documented: "none" disables entirely)
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning: Option<ReasoningConfig>,
/// vllm chat template kwargs — used to disable thinking on Qwen 3.5
#[serde(skip_serializing_if = "Option::is_none")]
pub chat_template_kwargs: Option<serde_json::Value>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]