diff --git a/src/agent/context.rs b/src/agent/context.rs
index d61136f..a42beeb 100644
--- a/src/agent/context.rs
+++ b/src/agent/context.rs
@@ -900,7 +900,43 @@ impl ResponseParser {
}
pub fn finish(mut self, ctx: &mut ContextState) {
- if !self.buf.is_empty() {
+ // Salvage any in-flight tag accumulators if the stream ended
+ // before the close tag arrived (max_tokens, premature EOS,
+ // server-side cancel). Without this, an unterminated
+ // ... drops all of self.think_buf and only the
+ // trailing rolling window in self.buf survives — observed as
+ // "responses cut off, only the last ~8 characters come
+ // through" because drain_safe keeps `close_tag.len()` bytes
+ // (8 for ``) at the tail of buf.
+ if self.in_think {
+ if !self.buf.is_empty() {
+ self.think_buf.push_str(&std::mem::take(&mut self.buf));
+ }
+ let text = std::mem::take(&mut self.think_buf).trim().to_string();
+ if !text.is_empty() {
+ self.push_child(ctx, AstNode::thinking(text));
+ }
+ self.in_think = false;
+ } else if self.in_tool_call {
+ if !self.buf.is_empty() {
+ self.tool_call_buf.push_str(&std::mem::take(&mut self.buf));
+ }
+ let body = std::mem::take(&mut self.tool_call_buf);
+ match parse_tool_call_body(&body) {
+ Some((name, args)) => {
+ self.flush_content(ctx);
+ self.push_child(ctx, AstNode::tool_call(&name, &args));
+ }
+ None => {
+ // Body's likely incomplete (no `` ever
+ // arrived). Wrap as content with the open tag so the
+ // model can see its own truncated attempt next turn
+ // rather than losing it silently.
+ self.content_parts.push(format!("\n{}", body));
+ }
+ }
+ self.in_tool_call = false;
+ } else if !self.buf.is_empty() {
self.content_parts.push(std::mem::take(&mut self.buf));
}
self.flush_content(ctx);