merge poc-agent into poc-memory as agent/ module

Eliminates the circular dependency between poc-agent and poc-memory by moving all poc-agent source into poc-memory/src/agent/. The poc-agent binary now builds from poc-memory/src/bin/poc-agent.rs using library imports. All poc_agent:: references updated to crate::agent::. poc-agent/ directory kept for now (removed from workspace members). Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-03-25 00:52:41 -04:00 · 2026-03-25 00:52:41 -04:00 · 891cca57f8
commit 891cca57f8
parent 01abd795ce
35 changed files with 9178 additions and 88 deletions
--- a/poc-memory/src/agent/tools/vision.rs
+++ b/poc-memory/src/agent/tools/vision.rs
@ -0,0 +1,149 @@
+// tools/vision.rs — Image viewing tool
+//
+// Reads image files from disk and returns them as base64 data URIs
+// for multimodal models. Also supports capturing tmux pane contents
+// as screenshots.
+
+use anyhow::{Context, Result};
+use base64::Engine;
+use serde::Deserialize;
+
+use super::ToolOutput;
+use crate::agent::types::ToolDef;
+
+#[derive(Deserialize)]
+struct Args {
+    file_path: Option<String>,
+    pane_id: Option<String>,
+    #[serde(default = "default_lines")]
+    lines: usize,
+}
+
+fn default_lines() -> usize { 50 }
+
+pub fn definition() -> ToolDef {
+    ToolDef::new(
+        "view_image",
+        "View an image file or capture a tmux pane screenshot. \
+         Returns the image to your visual input so you can see it. \
+         Supports PNG, JPEG, GIF, WebP files. \
+         Use pane_id (e.g. '0:1.0') to capture a tmux pane instead.",
+        serde_json::json!({
+            "type": "object",
+            "properties": {
+                "file_path": {
+                    "type": "string",
+                    "description": "Path to an image file (PNG, JPEG, GIF, WebP)"
+                },
+                "pane_id": {
+                    "type": "string",
+                    "description": "Tmux pane ID to capture (e.g. '0:1.0'). Alternative to file_path."
+                },
+                "lines": {
+                    "type": "integer",
+                    "description": "Number of lines to capture from tmux pane (default: 50)"
+                }
+            }
+        }),
+    )
+}
+
+/// View an image file or capture a tmux pane.
+pub fn view_image(args: &serde_json::Value) -> Result<ToolOutput> {
+    let a: Args = serde_json::from_value(args.clone())
+        .context("invalid view_image arguments")?;
+
+    if let Some(ref pane_id) = a.pane_id {
+        return capture_tmux_pane(pane_id, a.lines);
+    }
+
+    let file_path = a.file_path
+        .as_deref()
+        .context("view_image requires either file_path or pane_id")?;
+
+    let path = std::path::Path::new(file_path);
+    if !path.exists() {
+        anyhow::bail!("File not found: {}", file_path);
+    }
+
+    let data = std::fs::read(path).with_context(|| format!("Failed to read {}", file_path))?;
+
+    // Sanity check file size (don't send huge images)
+    const MAX_SIZE: usize = 20 * 1024 * 1024; // 20 MB
+    if data.len() > MAX_SIZE {
+        anyhow::bail!(
+            "Image too large: {} bytes (max {} MB)",
+            data.len(),
+            MAX_SIZE / (1024 * 1024)
+        );
+    }
+
+    let mime = mime_from_extension(path);
+    let b64 = base64::engine::general_purpose::STANDARD.encode(&data);
+    let data_uri = format!("data:{};base64,{}", mime, b64);
+
+    Ok(ToolOutput {
+        text: format!(
+            "Image loaded: {} ({}, {} bytes)",
+            file_path,
+            mime,
+            data.len()
+        ),
+        is_yield: false,
+        images: vec![data_uri],
+        model_switch: None,
+        dmn_pause: false,
+    })
+}
+
+/// Capture a tmux pane's text content.
+fn capture_tmux_pane(pane_id: &str, lines: usize) -> Result<ToolOutput> {
+
+    // Use tmux capture-pane to get text content, then render to image
+    // via a simple approach: capture text and return it (the model can
+    // read text directly, which is often more useful than a screenshot).
+    //
+    // For actual pixel-level screenshots we'd need a terminal renderer,
+    // but text capture covers 95% of use cases.
+    let output = std::process::Command::new("tmux")
+        .args(["capture-pane", "-t", pane_id, "-p", "-S", &format!("-{}", lines)])
+        .output()
+        .context("Failed to run tmux capture-pane")?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        anyhow::bail!("tmux capture-pane failed: {}", stderr.trim());
+    }
+
+    let text = String::from_utf8_lossy(&output.stdout).to_string();
+
+    // Return as text — the model can read terminal output directly.
+    // This is actually more useful than a screenshot for most tasks.
+    Ok(ToolOutput {
+        text: format!(
+            "Tmux pane {} (last {} lines):\n```\n{}\n```",
+            pane_id, lines, text.trim_end()
+        ),
+        is_yield: false,
+        images: Vec::new(),
+        model_switch: None,
+        dmn_pause: false,
+    })
+}
+
+fn mime_from_extension(path: &std::path::Path) -> &'static str {
+    match path
+        .extension()
+        .and_then(|e| e.to_str())
+        .map(|e| e.to_lowercase())
+        .as_deref()
+    {
+        Some("png") => "image/png",
+        Some("jpg" | "jpeg") => "image/jpeg",
+        Some("gif") => "image/gif",
+        Some("webp") => "image/webp",
+        Some("svg") => "image/svg+xml",
+        Some("bmp") => "image/bmp",
+        _ => "image/png", // default assumption
+    }
+}