diff --git a/src/agent/context.rs b/src/agent/context.rs
index 00c1ea5..2009cfc 100644
--- a/src/agent/context.rs
+++ b/src/agent/context.rs
@@ -218,7 +218,11 @@ impl NodeBody {
fn render_into(&self, out: &mut String) {
match self {
Self::Content(text) => out.push_str(text),
- Self::Thinking(_) => {},
+ Self::Thinking(text) => {
+ out.push_str("\n");
+ out.push_str(text);
+ out.push_str("\n\n");
+ }
Self::Log(_) => {},
Self::ToolCall { name, arguments } => {
out.push_str("\n");
@@ -258,7 +262,7 @@ impl NodeBody {
}
fn is_prompt_visible(&self) -> bool {
- !matches!(self, Self::Thinking(_) | Self::Log(_))
+ !matches!(self, Self::Log(_))
}
/// Hand-assemble token IDs for body types where running the tokenizer
@@ -648,13 +652,17 @@ fn drain_safe(buf: &mut String, tag_len: usize) -> String {
}
impl ResponseParser {
- pub fn new(branch_idx: usize) -> Self {
+ /// @in_think: whether the model's output begins inside a block.
+ /// Set when the prompt was prefilled with "\n" (native thinking
+ /// mode) so the parser captures reasoning tokens as Thinking until the
+ /// model emits .
+ pub fn new(branch_idx: usize, in_think: bool) -> Self {
Self {
branch_idx,
call_counter: 0,
buf: String::new(),
content_parts: Vec::new(),
- in_think: false,
+ in_think,
think_buf: String::new(),
in_tool_call: false,
tool_call_buf: String::new(),
@@ -1369,7 +1377,7 @@ mod tests {
fn parse_into_ctx(chunks: &[&str]) -> (ContextState, Vec) {
let mut ctx = ContextState::new();
ctx.push_no_log(Section::Conversation, AstNode::branch(Role::Assistant, vec![]));
- let mut p = ResponseParser::new(0);
+ let mut p = ResponseParser::new(0, false);
let mut calls = Vec::new();
for chunk in chunks {
// Feed each chunk as a single token (id=0 for tests)
@@ -1433,7 +1441,7 @@ mod tests {
let text = "thoughtresponse";
let mut ctx = ContextState::new();
ctx.push_no_log(Section::Conversation, AstNode::branch(Role::Assistant, vec![]));
- let mut p = ResponseParser::new(0);
+ let mut p = ResponseParser::new(0, false);
for ch in text.chars() {
p.feed_token(&ch.to_string(), &mut ctx);
}
@@ -1449,7 +1457,7 @@ mod tests {
let text = "text\n\nls\n\nmore";
let mut ctx = ContextState::new();
ctx.push_no_log(Section::Conversation, AstNode::branch(Role::Assistant, vec![]));
- let mut p = ResponseParser::new(0);
+ let mut p = ResponseParser::new(0, false);
let mut tool_calls = 0;
for ch in text.chars() {
tool_calls += p.feed_token(&ch.to_string(), &mut ctx).len();
@@ -1497,8 +1505,10 @@ mod tests {
AstNode::thinking("hmm"),
AstNode::content("answer"),
]);
- // Thinking renders as empty, content renders as-is
- assert_eq!(node.render(), "<|im_start|>assistant\nanswer<|im_end|>\n");
+ // Thinking renders wrapped in ... so the model sees
+ // previous turns' reasoning (Qwen 3.6 style: CoT stays in the
+ // conversation across turns).
+ assert_eq!(node.render(), "<|im_start|>assistant\n\nhmm\n\nanswer<|im_end|>\n");
}
#[test]
@@ -1577,10 +1587,19 @@ mod tests {
fn test_tokenize_invisible_nodes_are_zero() {
if !init_tokenizer() { return; }
- assert_eq!(AstNode::thinking("deep thoughts").tokens(), 0);
assert_eq!(AstNode::log("debug info").tokens(), 0);
}
+ #[test]
+ fn test_tokenize_thinking_matches_rendered_tags() {
+ if !init_tokenizer() { return; }
+
+ // Thinking is now prompt-visible (wrapped in ...);
+ // token count must match the rendered wrapping.
+ let node = AstNode::thinking("deep thoughts");
+ assert_eq!(node.tokens(), tokenizer::encode(&node.render()).len());
+ }
+
#[test]
fn test_tokenize_decode_roundtrip() {
if !init_tokenizer() { return; }