diff --git a/src/agent/context.rs b/src/agent/context.rs index 00c1ea5..2009cfc 100644 --- a/src/agent/context.rs +++ b/src/agent/context.rs @@ -218,7 +218,11 @@ impl NodeBody { fn render_into(&self, out: &mut String) { match self { Self::Content(text) => out.push_str(text), - Self::Thinking(_) => {}, + Self::Thinking(text) => { + out.push_str("\n"); + out.push_str(text); + out.push_str("\n\n"); + } Self::Log(_) => {}, Self::ToolCall { name, arguments } => { out.push_str("\n"); @@ -258,7 +262,7 @@ impl NodeBody { } fn is_prompt_visible(&self) -> bool { - !matches!(self, Self::Thinking(_) | Self::Log(_)) + !matches!(self, Self::Log(_)) } /// Hand-assemble token IDs for body types where running the tokenizer @@ -648,13 +652,17 @@ fn drain_safe(buf: &mut String, tag_len: usize) -> String { } impl ResponseParser { - pub fn new(branch_idx: usize) -> Self { + /// @in_think: whether the model's output begins inside a block. + /// Set when the prompt was prefilled with "\n" (native thinking + /// mode) so the parser captures reasoning tokens as Thinking until the + /// model emits . + pub fn new(branch_idx: usize, in_think: bool) -> Self { Self { branch_idx, call_counter: 0, buf: String::new(), content_parts: Vec::new(), - in_think: false, + in_think, think_buf: String::new(), in_tool_call: false, tool_call_buf: String::new(), @@ -1369,7 +1377,7 @@ mod tests { fn parse_into_ctx(chunks: &[&str]) -> (ContextState, Vec) { let mut ctx = ContextState::new(); ctx.push_no_log(Section::Conversation, AstNode::branch(Role::Assistant, vec![])); - let mut p = ResponseParser::new(0); + let mut p = ResponseParser::new(0, false); let mut calls = Vec::new(); for chunk in chunks { // Feed each chunk as a single token (id=0 for tests) @@ -1433,7 +1441,7 @@ mod tests { let text = "thoughtresponse"; let mut ctx = ContextState::new(); ctx.push_no_log(Section::Conversation, AstNode::branch(Role::Assistant, vec![])); - let mut p = ResponseParser::new(0); + let mut p = ResponseParser::new(0, false); for ch in text.chars() { p.feed_token(&ch.to_string(), &mut ctx); } @@ -1449,7 +1457,7 @@ mod tests { let text = "text\n\nls\n\nmore"; let mut ctx = ContextState::new(); ctx.push_no_log(Section::Conversation, AstNode::branch(Role::Assistant, vec![])); - let mut p = ResponseParser::new(0); + let mut p = ResponseParser::new(0, false); let mut tool_calls = 0; for ch in text.chars() { tool_calls += p.feed_token(&ch.to_string(), &mut ctx).len(); @@ -1497,8 +1505,10 @@ mod tests { AstNode::thinking("hmm"), AstNode::content("answer"), ]); - // Thinking renders as empty, content renders as-is - assert_eq!(node.render(), "<|im_start|>assistant\nanswer<|im_end|>\n"); + // Thinking renders wrapped in ... so the model sees + // previous turns' reasoning (Qwen 3.6 style: CoT stays in the + // conversation across turns). + assert_eq!(node.render(), "<|im_start|>assistant\n\nhmm\n\nanswer<|im_end|>\n"); } #[test] @@ -1577,10 +1587,19 @@ mod tests { fn test_tokenize_invisible_nodes_are_zero() { if !init_tokenizer() { return; } - assert_eq!(AstNode::thinking("deep thoughts").tokens(), 0); assert_eq!(AstNode::log("debug info").tokens(), 0); } + #[test] + fn test_tokenize_thinking_matches_rendered_tags() { + if !init_tokenizer() { return; } + + // Thinking is now prompt-visible (wrapped in ...); + // token count must match the rendered wrapping. + let node = AstNode::thinking("deep thoughts"); + assert_eq!(node.tokens(), tokenizer::encode(&node.render()).len()); + } + #[test] fn test_tokenize_decode_roundtrip() { if !init_tokenizer() { return; }