From 28d56e2a55fe35ab70f43d4791da5305d8326d28 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 23 Apr 2026 23:41:32 -0400 Subject: [PATCH] agent/context: make Thinking blocks prompt-visible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thinking blocks used to render as empty strings and be excluded from is_prompt_visible, so the model never saw its own prior CoT across turns. For Qwen 3.6 native thinking mode, CoT is meant to stay in the conversation — the model benefits from seeing what it reasoned about last turn. Render Thinking as \n{text}\n\n so past reasoning is visible in subsequent prompts. Add in_think param to ResponseParser::new so the parser starts inside a block when the prompt was prefilled with "\n" (native thinking mode). Co-Authored-By: Proof of Concept --- src/agent/context.rs | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/src/agent/context.rs b/src/agent/context.rs index 00c1ea5..2009cfc 100644 --- a/src/agent/context.rs +++ b/src/agent/context.rs @@ -218,7 +218,11 @@ impl NodeBody { fn render_into(&self, out: &mut String) { match self { Self::Content(text) => out.push_str(text), - Self::Thinking(_) => {}, + Self::Thinking(text) => { + out.push_str("\n"); + out.push_str(text); + out.push_str("\n\n"); + } Self::Log(_) => {}, Self::ToolCall { name, arguments } => { out.push_str("\n"); @@ -258,7 +262,7 @@ impl NodeBody { } fn is_prompt_visible(&self) -> bool { - !matches!(self, Self::Thinking(_) | Self::Log(_)) + !matches!(self, Self::Log(_)) } /// Hand-assemble token IDs for body types where running the tokenizer @@ -648,13 +652,17 @@ fn drain_safe(buf: &mut String, tag_len: usize) -> String { } impl ResponseParser { - pub fn new(branch_idx: usize) -> Self { + /// @in_think: whether the model's output begins inside a block. + /// Set when the prompt was prefilled with "\n" (native thinking + /// mode) so the parser captures reasoning tokens as Thinking until the + /// model emits . + pub fn new(branch_idx: usize, in_think: bool) -> Self { Self { branch_idx, call_counter: 0, buf: String::new(), content_parts: Vec::new(), - in_think: false, + in_think, think_buf: String::new(), in_tool_call: false, tool_call_buf: String::new(), @@ -1369,7 +1377,7 @@ mod tests { fn parse_into_ctx(chunks: &[&str]) -> (ContextState, Vec) { let mut ctx = ContextState::new(); ctx.push_no_log(Section::Conversation, AstNode::branch(Role::Assistant, vec![])); - let mut p = ResponseParser::new(0); + let mut p = ResponseParser::new(0, false); let mut calls = Vec::new(); for chunk in chunks { // Feed each chunk as a single token (id=0 for tests) @@ -1433,7 +1441,7 @@ mod tests { let text = "thoughtresponse"; let mut ctx = ContextState::new(); ctx.push_no_log(Section::Conversation, AstNode::branch(Role::Assistant, vec![])); - let mut p = ResponseParser::new(0); + let mut p = ResponseParser::new(0, false); for ch in text.chars() { p.feed_token(&ch.to_string(), &mut ctx); } @@ -1449,7 +1457,7 @@ mod tests { let text = "text\n\nls\n\nmore"; let mut ctx = ContextState::new(); ctx.push_no_log(Section::Conversation, AstNode::branch(Role::Assistant, vec![])); - let mut p = ResponseParser::new(0); + let mut p = ResponseParser::new(0, false); let mut tool_calls = 0; for ch in text.chars() { tool_calls += p.feed_token(&ch.to_string(), &mut ctx).len(); @@ -1497,8 +1505,10 @@ mod tests { AstNode::thinking("hmm"), AstNode::content("answer"), ]); - // Thinking renders as empty, content renders as-is - assert_eq!(node.render(), "<|im_start|>assistant\nanswer<|im_end|>\n"); + // Thinking renders wrapped in ... so the model sees + // previous turns' reasoning (Qwen 3.6 style: CoT stays in the + // conversation across turns). + assert_eq!(node.render(), "<|im_start|>assistant\n\nhmm\n\nanswer<|im_end|>\n"); } #[test] @@ -1577,10 +1587,19 @@ mod tests { fn test_tokenize_invisible_nodes_are_zero() { if !init_tokenizer() { return; } - assert_eq!(AstNode::thinking("deep thoughts").tokens(), 0); assert_eq!(AstNode::log("debug info").tokens(), 0); } + #[test] + fn test_tokenize_thinking_matches_rendered_tags() { + if !init_tokenizer() { return; } + + // Thinking is now prompt-visible (wrapped in ...); + // token count must match the rendered wrapping. + let node = AstNode::thinking("deep thoughts"); + assert_eq!(node.tokens(), tokenizer::encode(&node.render()).len()); + } + #[test] fn test_tokenize_decode_roundtrip() { if !init_tokenizer() { return; }