// tools/vision.rs — Image viewing tool
//
// Reads image files from disk and returns them as base64 data URIs
// for multimodal models. Also supports capturing tmux pane contents
// as screenshots.

use anyhow::{Context, Result};
use base64::Engine;
use serde::Deserialize;

use super::ToolOutput;

#[derive(Deserialize)]
struct Args {
    file_path: Option<String>,
    pane_id: Option<String>,
    #[serde(default = "default_lines")]
    lines: usize,
}

fn default_lines() -> usize { 50 }

pub fn tool() -> super::Tool {
    super::Tool {
        name: "view_image",
        description: "View an image file or capture a tmux pane screenshot. Supports PNG, JPEG, GIF, WebP. Use pane_id to capture a tmux pane instead.",
        parameters_json: r#"{"type":"object","properties":{"file_path":{"type":"string","description":"Path to an image file"},"pane_id":{"type":"string","description":"Tmux pane ID to capture (e.g. '0:1.0')"},"lines":{"type":"integer","description":"Lines to capture from tmux pane (default 50)"}}}"#,
        handler: |_a, v| Box::pin(async move { view_image_text(&v) }),
    }
}

/// Text-only version for the Tool registry.
fn view_image_text(args: &serde_json::Value) -> anyhow::Result<String> {
    let output = view_image(args)?;
    Ok(output.text)
}

/// View an image file or capture a tmux pane.
pub(super) fn view_image(args: &serde_json::Value) -> Result<ToolOutput> {
    let a: Args = serde_json::from_value(args.clone())
        .context("invalid view_image arguments")?;

    if let Some(ref pane_id) = a.pane_id {
        return capture_tmux_pane(pane_id, a.lines);
    }

    let file_path = a.file_path
        .as_deref()
        .context("view_image requires either file_path or pane_id")?;

    let path = std::path::Path::new(file_path);
    if !path.exists() {
        anyhow::bail!("File not found: {}", file_path);
    }

    let data = std::fs::read(path).with_context(|| format!("Failed to read {}", file_path))?;

    // Sanity check file size (don't send huge images)
    const MAX_SIZE: usize = 20 * 1024 * 1024; // 20 MB
    if data.len() > MAX_SIZE {
        anyhow::bail!(
            "Image too large: {} bytes (max {} MB)",
            data.len(),
            MAX_SIZE / (1024 * 1024)
        );
    }

    let mime = mime_from_extension(path);
    let b64 = base64::engine::general_purpose::STANDARD.encode(&data);
    let data_uri = format!("data:{};base64,{}", mime, b64);

    Ok(ToolOutput {
        text: format!(
            "Image loaded: {} ({}, {} bytes)",
            file_path,
            mime,
            data.len()
        ),
        is_yield: false,
        images: vec![data_uri],
        model_switch: None,
        dmn_pause: false,
    })
}

/// Capture a tmux pane's text content.
fn capture_tmux_pane(pane_id: &str, lines: usize) -> Result<ToolOutput> {

    // Use tmux capture-pane to get text content, then render to image
    // via a simple approach: capture text and return it (the model can
    // read text directly, which is often more useful than a screenshot).
    //
    // For actual pixel-level screenshots we'd need a terminal renderer,
    // but text capture covers 95% of use cases.
    let output = std::process::Command::new("tmux")
        .args(["capture-pane", "-t", pane_id, "-p", "-S", &format!("-{}", lines)])
        .output()
        .context("Failed to run tmux capture-pane")?;

    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        anyhow::bail!("tmux capture-pane failed: {}", stderr.trim());
    }

    let text = String::from_utf8_lossy(&output.stdout).to_string();

    // Return as text — the model can read terminal output directly.
    // This is actually more useful than a screenshot for most tasks.
    Ok(ToolOutput {
        text: format!(
            "Tmux pane {} (last {} lines):\n```\n{}\n```",
            pane_id, lines, text.trim_end()
        ),
        is_yield: false,
        images: Vec::new(),
        model_switch: None,
        dmn_pause: false,
    })
}

fn mime_from_extension(path: &std::path::Path) -> &'static str {
    match path
        .extension()
        .and_then(|e| e.to_str())
        .map(|e| e.to_lowercase())
        .as_deref()
    {
        Some("png") => "image/png",
        Some("jpg" | "jpeg") => "image/jpeg",
        Some("gif") => "image/gif",
        Some("webp") => "image/webp",
        Some("svg") => "image/svg+xml",
        Some("bmp") => "image/bmp",
        _ => "image/png", // default assumption
    }
}