diff --git a/src/agent/context.rs b/src/agent/context.rs index d61136f..a42beeb 100644 --- a/src/agent/context.rs +++ b/src/agent/context.rs @@ -900,7 +900,43 @@ impl ResponseParser { } pub fn finish(mut self, ctx: &mut ContextState) { - if !self.buf.is_empty() { + // Salvage any in-flight tag accumulators if the stream ended + // before the close tag arrived (max_tokens, premature EOS, + // server-side cancel). Without this, an unterminated + // ... drops all of self.think_buf and only the + // trailing rolling window in self.buf survives — observed as + // "responses cut off, only the last ~8 characters come + // through" because drain_safe keeps `close_tag.len()` bytes + // (8 for ``) at the tail of buf. + if self.in_think { + if !self.buf.is_empty() { + self.think_buf.push_str(&std::mem::take(&mut self.buf)); + } + let text = std::mem::take(&mut self.think_buf).trim().to_string(); + if !text.is_empty() { + self.push_child(ctx, AstNode::thinking(text)); + } + self.in_think = false; + } else if self.in_tool_call { + if !self.buf.is_empty() { + self.tool_call_buf.push_str(&std::mem::take(&mut self.buf)); + } + let body = std::mem::take(&mut self.tool_call_buf); + match parse_tool_call_body(&body) { + Some((name, args)) => { + self.flush_content(ctx); + self.push_child(ctx, AstNode::tool_call(&name, &args)); + } + None => { + // Body's likely incomplete (no `` ever + // arrived). Wrap as content with the open tag so the + // model can see its own truncated attempt next turn + // rather than losing it silently. + self.content_parts.push(format!("\n{}", body)); + } + } + self.in_tool_call = false; + } else if !self.buf.is_empty() { self.content_parts.push(std::mem::take(&mut self.buf)); } self.flush_content(ctx);