tools: control tools set agent state directly, simplify ToolOutput

Control tools (pause, switch_model, yield_to_user) now use the Arc<Mutex<Agent>> handle to set pending_yield, pending_model_switch, pending_dmn_pause directly. The turn loop drains these flags into TurnResult at completion. ToolOutput simplified to just { text: String } — no more is_yield, images, model_switch, dmn_pause fields. Vision returns plain strings. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-04 16:05:33 -04:00 · 2026-04-04 16:05:33 -04:00 · a24a6605b8
commit a24a6605b8
parent 53ad8cc9df
4 changed files with 47 additions and 125 deletions
--- a/src/agent/mod.rs
+++ b/src/agent/mod.rs
@ -81,6 +81,10 @@ pub struct Agent {
    pub temperature: f32,
    pub top_p: f32,
    pub top_k: u32,
+    /// Control tool flags — set by tool handlers, consumed by turn loop.
+    pub pending_yield: bool,
+    pub pending_model_switch: Option<String>,
+    pub pending_dmn_pause: bool,
    /// Persistent conversation log — append-only record of all messages.
    conversation_log: Option<ConversationLog>,
    /// BPE tokenizer for token counting (cl100k_base — close enough
@ -144,6 +148,9 @@ impl Agent {
            temperature: 0.6,
            top_p: 0.95,
            top_k: 20,
+            pending_yield: false,
+            pending_model_switch: None,
+            pending_dmn_pause: false,
            conversation_log,
            tokenizer,
            context,
@ -555,6 +562,11 @@ impl Agent {
                let _ = ui_tx.send(UiMessage::Activity(String::new()));
                me.push_message(msg);

+                // Drain pending control flags
+                if me.pending_yield { ds.yield_requested = true; me.pending_yield = false; }
+                if me.pending_model_switch.is_some() { ds.model_switch = me.pending_model_switch.take(); }
+                if me.pending_dmn_pause { ds.dmn_pause = true; me.pending_dmn_pause = false; }
+
                return Ok(TurnResult {
                    text,
                    yield_requested: ds.yield_requested,
@ -638,17 +650,7 @@ impl Agent {
        let args: serde_json::Value =
            serde_json::from_str(&call.function.arguments).unwrap_or_default();

-        if output.is_yield {
-            ds.yield_requested = true;
-        } else {
-            ds.had_tool_calls = true;
-        }
-        if output.model_switch.is_some() {
-            ds.model_switch = output.model_switch.clone();
-        }
-        if output.dmn_pause {
-            ds.dmn_pause = true;
-        }
+        ds.had_tool_calls = true;
        if output.text.starts_with("Error:") {
            ds.tool_errors += 1;
        }
@ -671,14 +673,6 @@ impl Agent {
        }

        self.push_message(Message::tool_result(&call.id, &output.text));
-
-        if !output.images.is_empty() {
-            self.age_out_images();
-            self.push_message(Message::user_with_images(
-                "Here is the image you requested:",
-                &output.images,
-            ));
-        }
    }

    /// Build context state summary for the debug screen.
--- a/src/agent/tools/control.rs
+++ b/src/agent/tools/control.rs
@ -1,74 +1,47 @@
 // tools/control.rs — Agent control tools
 //
-// Tools that affect agent control flow rather than performing work.
-// These return Result<ToolOutput> to maintain consistency with other
-// tools that can fail. The dispatch function handles error wrapping.
+// These set agent state directly via the Arc<Mutex<Agent>> handle,
+// then return a text confirmation.

 use anyhow::{Context, Result};

-use super::ToolOutput;
-
-fn pause(_args: &serde_json::Value) -> Result<ToolOutput> {
-    Ok(ToolOutput {
-        text: "Pausing autonomous behavior. Only user input will wake you.".to_string(),
-        is_yield: true,
-        images: Vec::new(),
-        model_switch: None,
-        dmn_pause: true,
-    })
-}
-
-fn switch_model(args: &serde_json::Value) -> Result<ToolOutput> {
-    let model = args
-        .get("model")
-        .and_then(|v| v.as_str())
-        .context("'model' parameter is required")?;
-    if model.is_empty() {
-        anyhow::bail!("'model' parameter cannot be empty");
-    }
-    Ok(ToolOutput {
-        text: format!("Switching to model '{}' after this turn.", model),
-        is_yield: false,
-        images: Vec::new(),
-        model_switch: Some(model.to_string()),
-        dmn_pause: false,
-    })
-}
-
-fn yield_to_user(args: &serde_json::Value) -> Result<ToolOutput> {
-    let msg = args
-        .get("message")
-        .and_then(|v| v.as_str())
-        .unwrap_or("Waiting for input.");
-    Ok(ToolOutput {
-        text: format!("Yielding. {}", msg),
-        is_yield: true,
-        images: Vec::new(),
-        model_switch: None,
-        dmn_pause: false,
-    })
-}
-
 pub(super) fn tools() -> [super::Tool; 3] {
    use super::Tool;
    [
        Tool { name: "switch_model",
               description: "Switch to a different LLM model mid-conversation. Memories and history carry over.",
               parameters_json: r#"{"type":"object","properties":{"model":{"type":"string","description":"Name of the model to switch to"}},"required":["model"]}"#,
-               handler: |_a, v| Box::pin(async move {
-                   let model = v.get("model").and_then(|v| v.as_str()).unwrap_or("");
-                   Ok(format!("Switching to model: {}", model))
+               handler: |agent, v| Box::pin(async move {
+                   let model = v.get("model").and_then(|v| v.as_str())
+                       .ok_or_else(|| anyhow::anyhow!("'model' parameter is required"))?;
+                   if model.is_empty() { anyhow::bail!("'model' parameter cannot be empty"); }
+                   if let Some(agent) = agent {
+                       let mut a = agent.lock().await;
+                       a.pending_model_switch = Some(model.to_string());
+                   }
+                   Ok(format!("Switching to model '{}' after this turn.", model))
               }) },
        Tool { name: "pause",
               description: "Pause all autonomous behavior. Only the user can unpause (Ctrl+P or /wake).",
               parameters_json: r#"{"type":"object","properties":{}}"#,
-               handler: |_a, _v| Box::pin(async { Ok("Pausing autonomous behavior. Only user input will wake you.".into()) }) },
+               handler: |agent, _v| Box::pin(async move {
+                   if let Some(agent) = agent {
+                       let mut a = agent.lock().await;
+                       a.pending_yield = true;
+                       a.pending_dmn_pause = true;
+                   }
+                   Ok("Pausing autonomous behavior. Only user input will wake you.".into())
+               }) },
        Tool { name: "yield_to_user",
               description: "Wait for user input before continuing. The only way to enter a waiting state.",
               parameters_json: r#"{"type":"object","properties":{"message":{"type":"string","description":"Optional status message"}}}"#,
-               handler: |_a, v| Box::pin(async move {
-                   let msg = v.get("message").and_then(|v| v.as_str()).unwrap_or("(yielding to user)");
-                   Ok(msg.to_string())
+               handler: |agent, v| Box::pin(async move {
+                   let msg = v.get("message").and_then(|v| v.as_str()).unwrap_or("Waiting for input.");
+                   if let Some(agent) = agent {
+                       let mut a = agent.lock().await;
+                       a.pending_yield = true;
+                   }
+                   Ok(format!("Yielding. {}", msg))
               }) },
    ]
 }
--- a/src/agent/tools/mod.rs
+++ b/src/agent/tools/mod.rs
@ -137,34 +137,15 @@ pub struct ToolCallDelta {
 /// Result of dispatching a tool call.
 pub struct ToolOutput {
    pub text: String,
-    pub is_yield: bool,
-    /// Base64 data URIs for images to attach to the next message.
-    pub images: Vec<String>,
-    /// Model name to switch to (deferred to session level).
-    pub model_switch: Option<String>,
-    /// Agent requested DMN pause (deferred to session level).
-    pub dmn_pause: bool,
 }

 impl ToolOutput {
    pub fn error(e: impl std::fmt::Display) -> Self {
-        Self {
-            text: format!("Error: {}", e),
-            is_yield: false,
-            images: Vec::new(),
-            model_switch: None,
-            dmn_pause: false,
-        }
+        Self { text: format!("Error: {}", e) }
    }

    pub fn text(s: String) -> Self {
-        Self {
-            text: s,
-            is_yield: false,
-            images: Vec::new(),
-            model_switch: None,
-            dmn_pause: false,
-        }
+        Self { text: s }
    }
 }

--- a/src/agent/tools/vision.rs
+++ b/src/agent/tools/vision.rs
@ -8,8 +8,6 @@ use anyhow::{Context, Result};
 use base64::Engine;
 use serde::Deserialize;

-use super::ToolOutput;
-
 #[derive(Deserialize)]
 struct Args {
    file_path: Option<String>,
@ -29,14 +27,7 @@ pub fn tool() -> super::Tool {
    }
 }

-/// Text-only version for the Tool registry.
 fn view_image_text(args: &serde_json::Value) -> anyhow::Result<String> {
-    let output = view_image(args)?;
-    Ok(output.text)
-}
-
-/// View an image file or capture a tmux pane.
-pub(super) fn view_image(args: &serde_json::Value) -> Result<ToolOutput> {
    let a: Args = serde_json::from_value(args.clone())
        .context("invalid view_image arguments")?;

@ -69,22 +60,11 @@ pub(super) fn view_image(args: &serde_json::Value) -> Result<ToolOutput> {
    let b64 = base64::engine::general_purpose::STANDARD.encode(&data);
    let data_uri = format!("data:{};base64,{}", mime, b64);

-    Ok(ToolOutput {
-        text: format!(
-            "Image loaded: {} ({}, {} bytes)",
-            file_path,
-            mime,
-            data.len()
-        ),
-        is_yield: false,
-        images: vec![data_uri],
-        model_switch: None,
-        dmn_pause: false,
-    })
+    Ok(format!("Image loaded: {} ({}, {} bytes)\n{}", file_path, mime, data.len(), data_uri))
 }

 /// Capture a tmux pane's text content.
-fn capture_tmux_pane(pane_id: &str, lines: usize) -> Result<ToolOutput> {
+fn capture_tmux_pane(pane_id: &str, lines: usize) -> Result<String> {

    // Use tmux capture-pane to get text content, then render to image
    // via a simple approach: capture text and return it (the model can
@ -106,16 +86,10 @@ fn capture_tmux_pane(pane_id: &str, lines: usize) -> Result<ToolOutput> {

    // Return as text — the model can read terminal output directly.
    // This is actually more useful than a screenshot for most tasks.
-    Ok(ToolOutput {
-        text: format!(
-            "Tmux pane {} (last {} lines):\n```\n{}\n```",
-            pane_id, lines, text.trim_end()
-        ),
-        is_yield: false,
-        images: Vec::new(),
-        model_switch: None,
-        dmn_pause: false,
-    })
+    Ok(format!(
+        "Tmux pane {} (last {} lines):\n```\n{}\n```",
+        pane_id, lines, text.trim_end()
+    ))
 }

 fn mime_from_extension(path: &std::path::Path) -> &'static str {