agent/context: make Thinking blocks prompt-visible
Thinking blocks used to render as empty strings and be excluded from
is_prompt_visible, so the model never saw its own prior CoT across
turns. For Qwen 3.6 native thinking mode, CoT is meant to stay in the
conversation — the model benefits from seeing what it reasoned about
last turn.
Render Thinking as <think>\n{text}\n</think>\n so past reasoning is
visible in subsequent prompts. Add in_think param to ResponseParser::new
so the parser starts inside a <think> block when the prompt was
prefilled with "<think>\n" (native thinking mode).
Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
6fedc9b2a8
commit
28d56e2a55
1 changed files with 29 additions and 10 deletions
|
|
@ -218,7 +218,11 @@ impl NodeBody {
|
||||||
fn render_into(&self, out: &mut String) {
|
fn render_into(&self, out: &mut String) {
|
||||||
match self {
|
match self {
|
||||||
Self::Content(text) => out.push_str(text),
|
Self::Content(text) => out.push_str(text),
|
||||||
Self::Thinking(_) => {},
|
Self::Thinking(text) => {
|
||||||
|
out.push_str("<think>\n");
|
||||||
|
out.push_str(text);
|
||||||
|
out.push_str("\n</think>\n");
|
||||||
|
}
|
||||||
Self::Log(_) => {},
|
Self::Log(_) => {},
|
||||||
Self::ToolCall { name, arguments } => {
|
Self::ToolCall { name, arguments } => {
|
||||||
out.push_str("<tool_call>\n");
|
out.push_str("<tool_call>\n");
|
||||||
|
|
@ -258,7 +262,7 @@ impl NodeBody {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_prompt_visible(&self) -> bool {
|
fn is_prompt_visible(&self) -> bool {
|
||||||
!matches!(self, Self::Thinking(_) | Self::Log(_))
|
!matches!(self, Self::Log(_))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Hand-assemble token IDs for body types where running the tokenizer
|
/// Hand-assemble token IDs for body types where running the tokenizer
|
||||||
|
|
@ -648,13 +652,17 @@ fn drain_safe(buf: &mut String, tag_len: usize) -> String {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ResponseParser {
|
impl ResponseParser {
|
||||||
pub fn new(branch_idx: usize) -> Self {
|
/// @in_think: whether the model's output begins inside a <think> block.
|
||||||
|
/// Set when the prompt was prefilled with "<think>\n" (native thinking
|
||||||
|
/// mode) so the parser captures reasoning tokens as Thinking until the
|
||||||
|
/// model emits </think>.
|
||||||
|
pub fn new(branch_idx: usize, in_think: bool) -> Self {
|
||||||
Self {
|
Self {
|
||||||
branch_idx,
|
branch_idx,
|
||||||
call_counter: 0,
|
call_counter: 0,
|
||||||
buf: String::new(),
|
buf: String::new(),
|
||||||
content_parts: Vec::new(),
|
content_parts: Vec::new(),
|
||||||
in_think: false,
|
in_think,
|
||||||
think_buf: String::new(),
|
think_buf: String::new(),
|
||||||
in_tool_call: false,
|
in_tool_call: false,
|
||||||
tool_call_buf: String::new(),
|
tool_call_buf: String::new(),
|
||||||
|
|
@ -1369,7 +1377,7 @@ mod tests {
|
||||||
fn parse_into_ctx(chunks: &[&str]) -> (ContextState, Vec<PendingToolCall>) {
|
fn parse_into_ctx(chunks: &[&str]) -> (ContextState, Vec<PendingToolCall>) {
|
||||||
let mut ctx = ContextState::new();
|
let mut ctx = ContextState::new();
|
||||||
ctx.push_no_log(Section::Conversation, AstNode::branch(Role::Assistant, vec![]));
|
ctx.push_no_log(Section::Conversation, AstNode::branch(Role::Assistant, vec![]));
|
||||||
let mut p = ResponseParser::new(0);
|
let mut p = ResponseParser::new(0, false);
|
||||||
let mut calls = Vec::new();
|
let mut calls = Vec::new();
|
||||||
for chunk in chunks {
|
for chunk in chunks {
|
||||||
// Feed each chunk as a single token (id=0 for tests)
|
// Feed each chunk as a single token (id=0 for tests)
|
||||||
|
|
@ -1433,7 +1441,7 @@ mod tests {
|
||||||
let text = "<think>thought</think>response";
|
let text = "<think>thought</think>response";
|
||||||
let mut ctx = ContextState::new();
|
let mut ctx = ContextState::new();
|
||||||
ctx.push_no_log(Section::Conversation, AstNode::branch(Role::Assistant, vec![]));
|
ctx.push_no_log(Section::Conversation, AstNode::branch(Role::Assistant, vec![]));
|
||||||
let mut p = ResponseParser::new(0);
|
let mut p = ResponseParser::new(0, false);
|
||||||
for ch in text.chars() {
|
for ch in text.chars() {
|
||||||
p.feed_token(&ch.to_string(), &mut ctx);
|
p.feed_token(&ch.to_string(), &mut ctx);
|
||||||
}
|
}
|
||||||
|
|
@ -1449,7 +1457,7 @@ mod tests {
|
||||||
let text = "text<tool_call>\n<function=bash>\n<parameter=command>ls</parameter>\n</function>\n</tool_call>more";
|
let text = "text<tool_call>\n<function=bash>\n<parameter=command>ls</parameter>\n</function>\n</tool_call>more";
|
||||||
let mut ctx = ContextState::new();
|
let mut ctx = ContextState::new();
|
||||||
ctx.push_no_log(Section::Conversation, AstNode::branch(Role::Assistant, vec![]));
|
ctx.push_no_log(Section::Conversation, AstNode::branch(Role::Assistant, vec![]));
|
||||||
let mut p = ResponseParser::new(0);
|
let mut p = ResponseParser::new(0, false);
|
||||||
let mut tool_calls = 0;
|
let mut tool_calls = 0;
|
||||||
for ch in text.chars() {
|
for ch in text.chars() {
|
||||||
tool_calls += p.feed_token(&ch.to_string(), &mut ctx).len();
|
tool_calls += p.feed_token(&ch.to_string(), &mut ctx).len();
|
||||||
|
|
@ -1497,8 +1505,10 @@ mod tests {
|
||||||
AstNode::thinking("hmm"),
|
AstNode::thinking("hmm"),
|
||||||
AstNode::content("answer"),
|
AstNode::content("answer"),
|
||||||
]);
|
]);
|
||||||
// Thinking renders as empty, content renders as-is
|
// Thinking renders wrapped in <think>...</think> so the model sees
|
||||||
assert_eq!(node.render(), "<|im_start|>assistant\nanswer<|im_end|>\n");
|
// previous turns' reasoning (Qwen 3.6 style: CoT stays in the
|
||||||
|
// conversation across turns).
|
||||||
|
assert_eq!(node.render(), "<|im_start|>assistant\n<think>\nhmm\n</think>\nanswer<|im_end|>\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
@ -1577,10 +1587,19 @@ mod tests {
|
||||||
fn test_tokenize_invisible_nodes_are_zero() {
|
fn test_tokenize_invisible_nodes_are_zero() {
|
||||||
if !init_tokenizer() { return; }
|
if !init_tokenizer() { return; }
|
||||||
|
|
||||||
assert_eq!(AstNode::thinking("deep thoughts").tokens(), 0);
|
|
||||||
assert_eq!(AstNode::log("debug info").tokens(), 0);
|
assert_eq!(AstNode::log("debug info").tokens(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize_thinking_matches_rendered_tags() {
|
||||||
|
if !init_tokenizer() { return; }
|
||||||
|
|
||||||
|
// Thinking is now prompt-visible (wrapped in <think>...</think>);
|
||||||
|
// token count must match the rendered wrapping.
|
||||||
|
let node = AstNode::thinking("deep thoughts");
|
||||||
|
assert_eq!(node.tokens(), tokenizer::encode(&node.render()).len());
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_tokenize_decode_roundtrip() {
|
fn test_tokenize_decode_roundtrip() {
|
||||||
if !init_tokenizer() { return; }
|
if !init_tokenizer() { return; }
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue