tools: add web_fetch and web_search

web_fetch: HTTP GET, returns body as text. For reading docs, APIs, pages. web_search: DuckDuckGo HTML search, no API key. Returns title/url/snippet. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-04 12:18:11 -04:00 · 2026-04-04 12:18:11 -04:00 · 22f955ad9f
commit 22f955ad9f
parent fb54488f30
2 changed files with 182 additions and 0 deletions
--- a/src/agent/tools/mod.rs
+++ b/src/agent/tools/mod.rs
@ -11,6 +11,7 @@ mod glob;
 mod grep;
 mod memory;
 mod read;
+mod web;
 mod write;

 // Agent-specific tools
@ -196,6 +197,8 @@ pub async fn dispatch_shared(
        "write_file" => write::write_file(args),
        "edit_file" => edit::edit_file(args),
        "bash" => bash::run_bash(args).await,
+        "web_fetch" => web::web_fetch(args).await,
+        "web_search" => web::web_search(args).await,
        "grep" => grep::grep(args),
        "glob" => glob::glob_search(args),
        _ => return None,
@ -216,6 +219,8 @@ pub fn definitions() -> Vec<ToolDef> {
        write::definition(),
        edit::definition(),
        bash::definition(),
+        web::fetch_definition(),
+        web::search_definition(),
        grep::definition(),
        glob::definition(),
    ];
--- a/src/agent/tools/web.rs
+++ b/src/agent/tools/web.rs
@ -0,0 +1,177 @@
+// tools/web.rs — Web fetch and search
+
+use anyhow::{Context, Result};
+use serde::Deserialize;
+use serde_json::json;
+
+use super::ToolDef;
+
+// ── Fetch ───────────────────────────────────────────────────────
+
+#[derive(Deserialize)]
+struct FetchArgs {
+    url: String,
+}
+
+pub fn fetch_definition() -> ToolDef {
+    ToolDef::new(
+        "web_fetch",
+        "Fetch content from a URL and return it as text. \
+         Use for reading web pages, API responses, documentation.",
+        json!({
+            "type": "object",
+            "properties": {
+                "url": {
+                    "type": "string",
+                    "description": "The URL to fetch"
+                }
+            },
+            "required": ["url"]
+        }),
+    )
+}
+
+pub async fn web_fetch(args: &serde_json::Value) -> Result<String> {
+    let a: FetchArgs = serde_json::from_value(args.clone())
+        .context("invalid web_fetch arguments")?;
+
+    let client = http_client()?;
+    let response = client.get(&a.url)
+        .header("User-Agent", "consciousness/0.3")
+        .send()
+        .await
+        .with_context(|| format!("failed to fetch {}", a.url))?;
+
+    let status = response.status();
+    if !status.is_success() {
+        anyhow::bail!("HTTP {}: {}", status, a.url);
+    }
+
+    let body = response.text().await
+        .with_context(|| format!("failed to read body from {}", a.url))?;
+
+    Ok(super::truncate_output(body, 30000))
+}
+
+// ── Search ──────────────────────────────────────────────────────
+
+#[derive(Deserialize)]
+struct SearchArgs {
+    query: String,
+    #[serde(default = "default_num_results")]
+    num_results: usize,
+}
+
+fn default_num_results() -> usize { 5 }
+
+pub fn search_definition() -> ToolDef {
+    ToolDef::new(
+        "web_search",
+        "Search the web and return results. Use for finding \
+         documentation, looking up APIs, researching topics.",
+        json!({
+            "type": "object",
+            "properties": {
+                "query": {
+                    "type": "string",
+                    "description": "The search query"
+                },
+                "num_results": {
+                    "type": "integer",
+                    "description": "Number of results to return (default 5)"
+                }
+            },
+            "required": ["query"]
+        }),
+    )
+}
+
+pub async fn web_search(args: &serde_json::Value) -> Result<String> {
+    let a: SearchArgs = serde_json::from_value(args.clone())
+        .context("invalid web_search arguments")?;
+
+    // Use DuckDuckGo HTML search — no API key needed
+    let client = http_client()?;
+    let encoded: String = a.query.chars().map(|c| {
+        if c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' {
+            c.to_string()
+        } else if c == ' ' {
+            "+".to_string()
+        } else {
+            format!("%{:02X}", c as u32)
+        }
+    }).collect();
+    let url = format!("https://html.duckduckgo.com/html/?q={}", encoded);
+    let response = client.get(&url)
+        .header("User-Agent", "consciousness/0.3")
+        .send()
+        .await
+        .context("search request failed")?;
+
+    let body = response.text().await
+        .context("failed to read search results")?;
+
+    // Extract result snippets from DDG HTML
+    let mut results = Vec::new();
+    for chunk in body.split("class=\"result__body\"") {
+        if results.len() >= a.num_results { break; }
+        if results.is_empty() && !chunk.contains("result__title") {
+            // Skip the first split (before any results)
+            continue;
+        }
+
+        // Extract title
+        let title = extract_between(chunk, "class=\"result__a\"", "</a>")
+            .map(strip_tags)
+            .unwrap_or_default();
+
+        // Extract URL
+        let href = extract_between(chunk, "href=\"", "\"")
+            .unwrap_or_default();
+
+        // Extract snippet
+        let snippet = extract_between(chunk, "class=\"result__snippet\"", "</a>")
+            .map(strip_tags)
+            .unwrap_or_default();
+
+        if !title.is_empty() {
+            results.push(format!("{}. {}\n   {}\n   {}", results.len() + 1, title.trim(), href.trim(), snippet.trim()));
+        }
+    }
+
+    if results.is_empty() {
+        Ok(format!("No results found for: {}", a.query))
+    } else {
+        Ok(results.join("\n\n"))
+    }
+}
+
+// ── Helpers ─────────────────────────────────────────────────────
+
+fn http_client() -> Result<reqwest::Client> {
+    reqwest::Client::builder()
+        .timeout(std::time::Duration::from_secs(30))
+        .build()
+        .context("failed to build HTTP client")
+}
+
+fn extract_between<'a>(text: &'a str, start: &str, end: &str) -> Option<&'a str> {
+    let start_idx = text.find(start)? + start.len();
+    // Skip past the closing > of the start tag
+    let rest = &text[start_idx..];
+    let tag_end = rest.find('>')?;
+    let rest = &rest[tag_end + 1..];
+    let end_idx = rest.find(end)?;
+    Some(&rest[..end_idx])
+}
+
+fn strip_tags(s: &str) -> String {
+    let mut out = String::new();
+    let mut in_tag = false;
+    for ch in s.chars() {
+        if ch == '<' { in_tag = true; }
+        else if ch == '>' { in_tag = false; }
+        else if !in_tag { out.push(ch); }
+    }
+    out
+}