Fix poc-agent for vllm/Qwen 3.5: reasoning display, tool parser
- Always display reasoning tokens regardless of reasoning_effort setting — Qwen 3.5 thinks natively and the reasoning parser separates it into its own field - Remove chat_template_kwargs that disabled thinking when reasoning_effort was "none" - Add chat_template_kwargs field to ChatRequest for vllm compat - Update provision script: qwen3_xml tool parser, qwen3 reasoning parser, 262K context, 95% GPU memory utilization Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
49ccdf87e1
commit
f83325b44d
3 changed files with 18 additions and 10 deletions
|
|
@ -34,6 +34,7 @@ pub async fn stream(
|
|||
enabled: reasoning_effort != "none",
|
||||
effort: Some(reasoning_effort.to_string()),
|
||||
}),
|
||||
chat_template_kwargs: None,
|
||||
};
|
||||
|
||||
let url = format!("{}/chat/completions", base_url);
|
||||
|
|
@ -96,14 +97,14 @@ pub async fn stream(
|
|||
if let Some(ref r) = choice.delta.reasoning_content {
|
||||
reasoning_chars += r.len();
|
||||
has_reasoning = true;
|
||||
if reasoning_enabled && !r.is_empty() {
|
||||
if !r.is_empty() {
|
||||
let _ = ui_tx.send(UiMessage::Reasoning(r.clone()));
|
||||
}
|
||||
}
|
||||
if let Some(ref r) = choice.delta.reasoning {
|
||||
reasoning_chars += r.len();
|
||||
has_reasoning = true;
|
||||
if reasoning_enabled && !r.is_empty() {
|
||||
if !r.is_empty() {
|
||||
let _ = ui_tx.send(UiMessage::Reasoning(r.clone()));
|
||||
}
|
||||
}
|
||||
|
|
@ -111,7 +112,7 @@ pub async fn stream(
|
|||
let s = r.to_string();
|
||||
reasoning_chars += s.len();
|
||||
has_reasoning = true;
|
||||
if reasoning_enabled && !s.is_empty() && s != "null" {
|
||||
if !s.is_empty() && s != "null" {
|
||||
let _ = ui_tx.send(UiMessage::Reasoning(s));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -129,6 +129,9 @@ pub struct ChatRequest {
|
|||
/// - reasoning.effort (documented: "none" disables entirely)
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub reasoning: Option<ReasoningConfig>,
|
||||
/// vllm chat template kwargs — used to disable thinking on Qwen 3.5
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub chat_template_kwargs: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue