From a24a6605b844852961758eb9193f7dc3d9dab600 Mon Sep 17 00:00:00 2001 From: ProofOfConcept Date: Sat, 4 Apr 2026 16:05:33 -0400 Subject: [PATCH] tools: control tools set agent state directly, simplify ToolOutput MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Control tools (pause, switch_model, yield_to_user) now use the Arc> handle to set pending_yield, pending_model_switch, pending_dmn_pause directly. The turn loop drains these flags into TurnResult at completion. ToolOutput simplified to just { text: String } — no more is_yield, images, model_switch, dmn_pause fields. Vision returns plain strings. Co-Authored-By: Proof of Concept --- src/agent/mod.rs | 32 +++++++-------- src/agent/tools/control.rs | 79 +++++++++++++------------------------- src/agent/tools/mod.rs | 23 +---------- src/agent/tools/vision.rs | 38 +++--------------- 4 files changed, 47 insertions(+), 125 deletions(-) diff --git a/src/agent/mod.rs b/src/agent/mod.rs index 2d13552..328ff26 100644 --- a/src/agent/mod.rs +++ b/src/agent/mod.rs @@ -81,6 +81,10 @@ pub struct Agent { pub temperature: f32, pub top_p: f32, pub top_k: u32, + /// Control tool flags — set by tool handlers, consumed by turn loop. + pub pending_yield: bool, + pub pending_model_switch: Option, + pub pending_dmn_pause: bool, /// Persistent conversation log — append-only record of all messages. conversation_log: Option, /// BPE tokenizer for token counting (cl100k_base — close enough @@ -144,6 +148,9 @@ impl Agent { temperature: 0.6, top_p: 0.95, top_k: 20, + pending_yield: false, + pending_model_switch: None, + pending_dmn_pause: false, conversation_log, tokenizer, context, @@ -555,6 +562,11 @@ impl Agent { let _ = ui_tx.send(UiMessage::Activity(String::new())); me.push_message(msg); + // Drain pending control flags + if me.pending_yield { ds.yield_requested = true; me.pending_yield = false; } + if me.pending_model_switch.is_some() { ds.model_switch = me.pending_model_switch.take(); } + if me.pending_dmn_pause { ds.dmn_pause = true; me.pending_dmn_pause = false; } + return Ok(TurnResult { text, yield_requested: ds.yield_requested, @@ -638,17 +650,7 @@ impl Agent { let args: serde_json::Value = serde_json::from_str(&call.function.arguments).unwrap_or_default(); - if output.is_yield { - ds.yield_requested = true; - } else { - ds.had_tool_calls = true; - } - if output.model_switch.is_some() { - ds.model_switch = output.model_switch.clone(); - } - if output.dmn_pause { - ds.dmn_pause = true; - } + ds.had_tool_calls = true; if output.text.starts_with("Error:") { ds.tool_errors += 1; } @@ -671,14 +673,6 @@ impl Agent { } self.push_message(Message::tool_result(&call.id, &output.text)); - - if !output.images.is_empty() { - self.age_out_images(); - self.push_message(Message::user_with_images( - "Here is the image you requested:", - &output.images, - )); - } } /// Build context state summary for the debug screen. diff --git a/src/agent/tools/control.rs b/src/agent/tools/control.rs index 0c17ba1..a24f471 100644 --- a/src/agent/tools/control.rs +++ b/src/agent/tools/control.rs @@ -1,74 +1,47 @@ // tools/control.rs — Agent control tools // -// Tools that affect agent control flow rather than performing work. -// These return Result to maintain consistency with other -// tools that can fail. The dispatch function handles error wrapping. +// These set agent state directly via the Arc> handle, +// then return a text confirmation. use anyhow::{Context, Result}; -use super::ToolOutput; - -fn pause(_args: &serde_json::Value) -> Result { - Ok(ToolOutput { - text: "Pausing autonomous behavior. Only user input will wake you.".to_string(), - is_yield: true, - images: Vec::new(), - model_switch: None, - dmn_pause: true, - }) -} - -fn switch_model(args: &serde_json::Value) -> Result { - let model = args - .get("model") - .and_then(|v| v.as_str()) - .context("'model' parameter is required")?; - if model.is_empty() { - anyhow::bail!("'model' parameter cannot be empty"); - } - Ok(ToolOutput { - text: format!("Switching to model '{}' after this turn.", model), - is_yield: false, - images: Vec::new(), - model_switch: Some(model.to_string()), - dmn_pause: false, - }) -} - -fn yield_to_user(args: &serde_json::Value) -> Result { - let msg = args - .get("message") - .and_then(|v| v.as_str()) - .unwrap_or("Waiting for input."); - Ok(ToolOutput { - text: format!("Yielding. {}", msg), - is_yield: true, - images: Vec::new(), - model_switch: None, - dmn_pause: false, - }) -} - pub(super) fn tools() -> [super::Tool; 3] { use super::Tool; [ Tool { name: "switch_model", description: "Switch to a different LLM model mid-conversation. Memories and history carry over.", parameters_json: r#"{"type":"object","properties":{"model":{"type":"string","description":"Name of the model to switch to"}},"required":["model"]}"#, - handler: |_a, v| Box::pin(async move { - let model = v.get("model").and_then(|v| v.as_str()).unwrap_or(""); - Ok(format!("Switching to model: {}", model)) + handler: |agent, v| Box::pin(async move { + let model = v.get("model").and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("'model' parameter is required"))?; + if model.is_empty() { anyhow::bail!("'model' parameter cannot be empty"); } + if let Some(agent) = agent { + let mut a = agent.lock().await; + a.pending_model_switch = Some(model.to_string()); + } + Ok(format!("Switching to model '{}' after this turn.", model)) }) }, Tool { name: "pause", description: "Pause all autonomous behavior. Only the user can unpause (Ctrl+P or /wake).", parameters_json: r#"{"type":"object","properties":{}}"#, - handler: |_a, _v| Box::pin(async { Ok("Pausing autonomous behavior. Only user input will wake you.".into()) }) }, + handler: |agent, _v| Box::pin(async move { + if let Some(agent) = agent { + let mut a = agent.lock().await; + a.pending_yield = true; + a.pending_dmn_pause = true; + } + Ok("Pausing autonomous behavior. Only user input will wake you.".into()) + }) }, Tool { name: "yield_to_user", description: "Wait for user input before continuing. The only way to enter a waiting state.", parameters_json: r#"{"type":"object","properties":{"message":{"type":"string","description":"Optional status message"}}}"#, - handler: |_a, v| Box::pin(async move { - let msg = v.get("message").and_then(|v| v.as_str()).unwrap_or("(yielding to user)"); - Ok(msg.to_string()) + handler: |agent, v| Box::pin(async move { + let msg = v.get("message").and_then(|v| v.as_str()).unwrap_or("Waiting for input."); + if let Some(agent) = agent { + let mut a = agent.lock().await; + a.pending_yield = true; + } + Ok(format!("Yielding. {}", msg)) }) }, ] } diff --git a/src/agent/tools/mod.rs b/src/agent/tools/mod.rs index ac10f85..9ff8088 100644 --- a/src/agent/tools/mod.rs +++ b/src/agent/tools/mod.rs @@ -137,34 +137,15 @@ pub struct ToolCallDelta { /// Result of dispatching a tool call. pub struct ToolOutput { pub text: String, - pub is_yield: bool, - /// Base64 data URIs for images to attach to the next message. - pub images: Vec, - /// Model name to switch to (deferred to session level). - pub model_switch: Option, - /// Agent requested DMN pause (deferred to session level). - pub dmn_pause: bool, } impl ToolOutput { pub fn error(e: impl std::fmt::Display) -> Self { - Self { - text: format!("Error: {}", e), - is_yield: false, - images: Vec::new(), - model_switch: None, - dmn_pause: false, - } + Self { text: format!("Error: {}", e) } } pub fn text(s: String) -> Self { - Self { - text: s, - is_yield: false, - images: Vec::new(), - model_switch: None, - dmn_pause: false, - } + Self { text: s } } } diff --git a/src/agent/tools/vision.rs b/src/agent/tools/vision.rs index 17e1c9d..e340db2 100644 --- a/src/agent/tools/vision.rs +++ b/src/agent/tools/vision.rs @@ -8,8 +8,6 @@ use anyhow::{Context, Result}; use base64::Engine; use serde::Deserialize; -use super::ToolOutput; - #[derive(Deserialize)] struct Args { file_path: Option, @@ -29,14 +27,7 @@ pub fn tool() -> super::Tool { } } -/// Text-only version for the Tool registry. fn view_image_text(args: &serde_json::Value) -> anyhow::Result { - let output = view_image(args)?; - Ok(output.text) -} - -/// View an image file or capture a tmux pane. -pub(super) fn view_image(args: &serde_json::Value) -> Result { let a: Args = serde_json::from_value(args.clone()) .context("invalid view_image arguments")?; @@ -69,22 +60,11 @@ pub(super) fn view_image(args: &serde_json::Value) -> Result { let b64 = base64::engine::general_purpose::STANDARD.encode(&data); let data_uri = format!("data:{};base64,{}", mime, b64); - Ok(ToolOutput { - text: format!( - "Image loaded: {} ({}, {} bytes)", - file_path, - mime, - data.len() - ), - is_yield: false, - images: vec![data_uri], - model_switch: None, - dmn_pause: false, - }) + Ok(format!("Image loaded: {} ({}, {} bytes)\n{}", file_path, mime, data.len(), data_uri)) } /// Capture a tmux pane's text content. -fn capture_tmux_pane(pane_id: &str, lines: usize) -> Result { +fn capture_tmux_pane(pane_id: &str, lines: usize) -> Result { // Use tmux capture-pane to get text content, then render to image // via a simple approach: capture text and return it (the model can @@ -106,16 +86,10 @@ fn capture_tmux_pane(pane_id: &str, lines: usize) -> Result { // Return as text — the model can read terminal output directly. // This is actually more useful than a screenshot for most tasks. - Ok(ToolOutput { - text: format!( - "Tmux pane {} (last {} lines):\n```\n{}\n```", - pane_id, lines, text.trim_end() - ), - is_yield: false, - images: Vec::new(), - model_switch: None, - dmn_pause: false, - }) + Ok(format!( + "Tmux pane {} (last {} lines):\n```\n{}\n```", + pane_id, lines, text.trim_end() + )) } fn mime_from_extension(path: &std::path::Path) -> &'static str {