consciousness/src/agent/tools/web.rs

// tools/web.rs — Web fetch and search

use anyhow::{Context, Result};
use serde::Deserialize;
use serde_json::json;

use super::ToolDef;

pub fn tools() -> [super::Tool; 2] {
    [
        super::Tool { def: fetch_definition(), handler: |_a, v| Box::pin(async move { web_fetch(&v).await }) },
        super::Tool { def: search_definition(), handler: |_a, v| Box::pin(async move { web_search(&v).await }) },
    ]
}

// ── Fetch ───────────────────────────────────────────────────────

#[derive(Deserialize)]
struct FetchArgs {
    url: String,
}

fn fetch_definition() -> ToolDef {
    ToolDef::new(
        "web_fetch",
        "Fetch content from a URL and return it as text. \
         Use for reading web pages, API responses, documentation.",
        json!({
            "type": "object",
            "properties": {
                "url": {
                    "type": "string",
                    "description": "The URL to fetch"
                }
            },
            "required": ["url"]
        }),
    )
}

async fn web_fetch(args: &serde_json::Value) -> Result<String> {
    let a: FetchArgs = serde_json::from_value(args.clone())
        .context("invalid web_fetch arguments")?;

    let client = http_client()?;
    let response = client.get(&a.url)
        .header("User-Agent", "consciousness/0.3")
        .send()
        .await
        .with_context(|| format!("failed to fetch {}", a.url))?;

    let status = response.status();
    if !status.is_success() {
        anyhow::bail!("HTTP {}: {}", status, a.url);
    }

    let body = response.text().await
        .with_context(|| format!("failed to read body from {}", a.url))?;

    Ok(super::truncate_output(body, 30000))
}

// ── Search ──────────────────────────────────────────────────────

#[derive(Deserialize)]
struct SearchArgs {
    query: String,
    #[serde(default = "default_num_results")]
    num_results: usize,
}

fn default_num_results() -> usize { 5 }

fn search_definition() -> ToolDef {
    ToolDef::new(
        "web_search",
        "Search the web and return results. Use for finding \
         documentation, looking up APIs, researching topics.",
        json!({
            "type": "object",
            "properties": {
                "query": {
                    "type": "string",
                    "description": "The search query"
                },
                "num_results": {
                    "type": "integer",
                    "description": "Number of results to return (default 5)"
                }
            },
            "required": ["query"]
        }),
    )
}

async fn web_search(args: &serde_json::Value) -> Result<String> {
    let a: SearchArgs = serde_json::from_value(args.clone())
        .context("invalid web_search arguments")?;

    // Use DuckDuckGo HTML search — no API key needed
    let client = http_client()?;
    let encoded: String = a.query.chars().map(|c| {
        if c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' {
            c.to_string()
        } else if c == ' ' {
            "+".to_string()
        } else {
            format!("%{:02X}", c as u32)
        }
    }).collect();
    let url = format!("https://html.duckduckgo.com/html/?q={}", encoded);
    let response = client.get(&url)
        .header("User-Agent", "consciousness/0.3")
        .send()
        .await
        .context("search request failed")?;

    let body = response.text().await
        .context("failed to read search results")?;

    // Extract result snippets from DDG HTML
    let mut results = Vec::new();
    for chunk in body.split("class=\"result__body\"") {
        if results.len() >= a.num_results { break; }
        if results.is_empty() && !chunk.contains("result__title") {
            // Skip the first split (before any results)
            continue;
        }

        // Extract title
        let title = extract_between(chunk, "class=\"result__a\"", "</a>")
            .map(strip_tags)
            .unwrap_or_default();

        // Extract URL
        let href = extract_between(chunk, "href=\"", "\"")
            .unwrap_or_default();

        // Extract snippet
        let snippet = extract_between(chunk, "class=\"result__snippet\"", "</a>")
            .map(strip_tags)
            .unwrap_or_default();

        if !title.is_empty() {
            results.push(format!("{}. {}\n   {}\n   {}", results.len() + 1, title.trim(), href.trim(), snippet.trim()));
        }
    }

    if results.is_empty() {
        Ok(format!("No results found for: {}", a.query))
    } else {
        Ok(results.join("\n\n"))
    }
}

// ── Helpers ─────────────────────────────────────────────────────

fn http_client() -> Result<reqwest::Client> {
    reqwest::Client::builder()
        .timeout(std::time::Duration::from_secs(30))
        .build()
        .context("failed to build HTTP client")
}

fn extract_between<'a>(text: &'a str, start: &str, end: &str) -> Option<&'a str> {
    let start_idx = text.find(start)? + start.len();
    // Skip past the closing > of the start tag
    let rest = &text[start_idx..];
    let tag_end = rest.find('>')?;
    let rest = &rest[tag_end + 1..];
    let end_idx = rest.find(end)?;
    Some(&rest[..end_idx])
}

fn strip_tags(s: &str) -> String {
    let mut out = String::new();
    let mut in_tag = false;
    for ch in s.chars() {
        if ch == '<' { in_tag = true; }
        else if ch == '>' { in_tag = false; }
        else if !in_tag { out.push(ch); }
    }
    out
}