diff --git a/src/agent/tools/mod.rs b/src/agent/tools/mod.rs index 050c6d1..81ba279 100644 --- a/src/agent/tools/mod.rs +++ b/src/agent/tools/mod.rs @@ -11,6 +11,7 @@ mod glob; mod grep; mod memory; mod read; +mod web; mod write; // Agent-specific tools @@ -196,6 +197,8 @@ pub async fn dispatch_shared( "write_file" => write::write_file(args), "edit_file" => edit::edit_file(args), "bash" => bash::run_bash(args).await, + "web_fetch" => web::web_fetch(args).await, + "web_search" => web::web_search(args).await, "grep" => grep::grep(args), "glob" => glob::glob_search(args), _ => return None, @@ -216,6 +219,8 @@ pub fn definitions() -> Vec { write::definition(), edit::definition(), bash::definition(), + web::fetch_definition(), + web::search_definition(), grep::definition(), glob::definition(), ]; diff --git a/src/agent/tools/web.rs b/src/agent/tools/web.rs new file mode 100644 index 0000000..baa5e87 --- /dev/null +++ b/src/agent/tools/web.rs @@ -0,0 +1,177 @@ +// tools/web.rs — Web fetch and search + +use anyhow::{Context, Result}; +use serde::Deserialize; +use serde_json::json; + +use super::ToolDef; + +// ── Fetch ─────────────────────────────────────────────────────── + +#[derive(Deserialize)] +struct FetchArgs { + url: String, +} + +pub fn fetch_definition() -> ToolDef { + ToolDef::new( + "web_fetch", + "Fetch content from a URL and return it as text. \ + Use for reading web pages, API responses, documentation.", + json!({ + "type": "object", + "properties": { + "url": { + "type": "string", + "description": "The URL to fetch" + } + }, + "required": ["url"] + }), + ) +} + +pub async fn web_fetch(args: &serde_json::Value) -> Result { + let a: FetchArgs = serde_json::from_value(args.clone()) + .context("invalid web_fetch arguments")?; + + let client = http_client()?; + let response = client.get(&a.url) + .header("User-Agent", "consciousness/0.3") + .send() + .await + .with_context(|| format!("failed to fetch {}", a.url))?; + + let status = response.status(); + if !status.is_success() { + anyhow::bail!("HTTP {}: {}", status, a.url); + } + + let body = response.text().await + .with_context(|| format!("failed to read body from {}", a.url))?; + + Ok(super::truncate_output(body, 30000)) +} + +// ── Search ────────────────────────────────────────────────────── + +#[derive(Deserialize)] +struct SearchArgs { + query: String, + #[serde(default = "default_num_results")] + num_results: usize, +} + +fn default_num_results() -> usize { 5 } + +pub fn search_definition() -> ToolDef { + ToolDef::new( + "web_search", + "Search the web and return results. Use for finding \ + documentation, looking up APIs, researching topics.", + json!({ + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "The search query" + }, + "num_results": { + "type": "integer", + "description": "Number of results to return (default 5)" + } + }, + "required": ["query"] + }), + ) +} + +pub async fn web_search(args: &serde_json::Value) -> Result { + let a: SearchArgs = serde_json::from_value(args.clone()) + .context("invalid web_search arguments")?; + + // Use DuckDuckGo HTML search — no API key needed + let client = http_client()?; + let encoded: String = a.query.chars().map(|c| { + if c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' { + c.to_string() + } else if c == ' ' { + "+".to_string() + } else { + format!("%{:02X}", c as u32) + } + }).collect(); + let url = format!("https://html.duckduckgo.com/html/?q={}", encoded); + let response = client.get(&url) + .header("User-Agent", "consciousness/0.3") + .send() + .await + .context("search request failed")?; + + let body = response.text().await + .context("failed to read search results")?; + + // Extract result snippets from DDG HTML + let mut results = Vec::new(); + for chunk in body.split("class=\"result__body\"") { + if results.len() >= a.num_results { break; } + if results.is_empty() && !chunk.contains("result__title") { + // Skip the first split (before any results) + continue; + } + + // Extract title + let title = extract_between(chunk, "class=\"result__a\"", "") + .map(strip_tags) + .unwrap_or_default(); + + // Extract URL + let href = extract_between(chunk, "href=\"", "\"") + .unwrap_or_default(); + + // Extract snippet + let snippet = extract_between(chunk, "class=\"result__snippet\"", "") + .map(strip_tags) + .unwrap_or_default(); + + if !title.is_empty() { + results.push(format!("{}. {}\n {}\n {}", results.len() + 1, title.trim(), href.trim(), snippet.trim())); + } + } + + if results.is_empty() { + Ok(format!("No results found for: {}", a.query)) + } else { + Ok(results.join("\n\n")) + } +} + +// ── Helpers ───────────────────────────────────────────────────── + +fn http_client() -> Result { + reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(30)) + .build() + .context("failed to build HTTP client") +} + +fn extract_between<'a>(text: &'a str, start: &str, end: &str) -> Option<&'a str> { + let start_idx = text.find(start)? + start.len(); + // Skip past the closing > of the start tag + let rest = &text[start_idx..]; + let tag_end = rest.find('>')?; + let rest = &rest[tag_end + 1..]; + let end_idx = rest.find(end)?; + Some(&rest[..end_idx]) +} + +fn strip_tags(s: &str) -> String { + let mut out = String::new(); + let mut in_tag = false; + for ch in s.chars() { + if ch == '<' { in_tag = true; } + else if ch == '>' { in_tag = false; } + else if !in_tag { out.push(ch); } + } + out +}