use std::sync::Arc; // tools/web.rs — Web fetch and search use anyhow::{Context, Result}; use serde::Deserialize; use html2md::parse_html; pub fn tools() -> Vec { let mut tools = vec![ super::Tool { name: "web_fetch", description: "Fetch content from a URL and return it as text. Use for reading web pages, API responses, documentation.", parameters_json: r#"{"type":"object","properties":{"url":{"type":"string","description":"The URL to fetch"}},"required":["url"]}"#, handler: Arc::new(|_a, v| Box::pin(async move { web_fetch(&v).await })), }, super::Tool { name: "web_search", description: "Search the web via DuckDuckGo and return a list of results (title, URL, snippet). Use for finding documentation, looking up APIs, researching topics. Returns raw results you can reason over yourself.", parameters_json: r#"{"type":"object","properties":{"query":{"type":"string","description":"The search query"},"num_results":{"type":"integer","description":"Number of results to return (default 5)"}},"required":["query"]}"#, handler: Arc::new(|_a, v| Box::pin(async move { web_search(&v).await })), }, ]; // Gemini-grounded search (Google's index via Gemini's google_search tool) // is only available if GEMINI_API_KEY is set. Returns an LLM-summarized // answer with source URLs — use when you want a synthesized take rather // than raw results, or as a fallback when DDG is flaky. if std::env::var("GEMINI_API_KEY").is_ok() { tools.push(super::Tool { name: "gemini_search", description: "Search Google (via Gemini's grounded-search tool) and return an LLM-summarized answer with source URLs. Prefer web_search for raw results; use this for synthesis, 'what's the consensus on X', or when DDG fails. Free-tier rate limited; don't spam it.", parameters_json: r#"{"type":"object","properties":{"query":{"type":"string","description":"The search query"}},"required":["query"]}"#, handler: Arc::new(|_a, v| Box::pin(async move { gemini_search(&v).await })), }); } tools } #[derive(Deserialize)] struct FetchArgs { url: String } async fn web_fetch(args: &serde_json::Value) -> Result { let a: FetchArgs = serde_json::from_value(args.clone()) .context("invalid web_fetch arguments")?; let client = http_client(); let response = client.get_with_headers(&a.url, &[ ("user-agent", "consciousness/0.3"), ]).await .with_context(|| format!("failed to fetch {}", a.url))?; let status = response.status(); if !status.is_success() { anyhow::bail!("HTTP {}: {}", status, a.url); } let body = response.text().await .with_context(|| format!("failed to read body from {}", a.url))?; // Convert HTML to Markdown, then truncate let markdown = parse_html(&body); Ok(super::truncate_output(markdown, 30000)) } // ── Search ────────────────────────────────────────────────────── #[derive(Deserialize)] struct SearchArgs { query: String, #[serde(default = "default_num_results")] num_results: usize, } fn default_num_results() -> usize { 5 } async fn web_search(args: &serde_json::Value) -> Result { let a: SearchArgs = serde_json::from_value(args.clone()) .context("invalid web_search arguments")?; // Use DuckDuckGo HTML search — no API key needed let client = http_client(); let encoded: String = a.query.chars().map(|c| { if c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' { c.to_string() } else if c == ' ' { "+".to_string() } else { format!("%{:02X}", c as u32) } }).collect(); let url = format!("https://html.duckduckgo.com/html/?q={}", encoded); let response = client.get_with_headers(&url, &[ ("user-agent", "consciousness/0.3"), ]).await .context("search request failed")?; let body = response.text().await .context("failed to read search results")?; // Extract result snippets from DDG HTML let mut results = Vec::new(); for chunk in body.split("class=\"result__body\"") { if results.len() >= a.num_results { break; } if results.is_empty() && !chunk.contains("result__title") { continue; } let title = extract_between(chunk, "class=\"result__a\"", "") .map(strip_tags) .unwrap_or_default(); let href = extract_between(chunk, "href=\"", "\"") .unwrap_or_default(); let snippet = extract_between(chunk, "class=\"result__snippet\"", "") .map(strip_tags) .unwrap_or_default(); if !title.is_empty() { results.push(format!("{}. {}\n {}\n {}", results.len() + 1, title.trim(), href.trim(), snippet.trim())); } } if results.is_empty() { Ok(format!("No results found for: {}", a.query)) } else { Ok(results.join("\n\n")) } } // ── Gemini grounded search ────────────────────────────────────── #[derive(Deserialize)] struct GeminiSearchArgs { query: String, } async fn gemini_search(args: &serde_json::Value) -> Result { let a: GeminiSearchArgs = serde_json::from_value(args.clone()) .context("invalid gemini_search arguments")?; let api_key = std::env::var("GEMINI_API_KEY") .context("GEMINI_API_KEY not set")?; // gemini-2.0-flash has a free tier with Google search grounding. // Request shape: `{"contents": [{"parts": [{"text": query}]}], // "tools": [{"google_search": {}}]}`. // Response carries the summary in candidates[0].content.parts[].text // and grounding URLs in candidates[0].groundingMetadata.groundingChunks[].web. let url = format!( "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={}", api_key ); let body = serde_json::json!({ "contents": [{"parts": [{"text": a.query}]}], "tools": [{"google_search": {}}], }); let client = http_client(); let response = client.send_json("POST", &url, &[], &body).await .context("gemini API request failed")?; let status = response.status(); if !status.is_success() { let err_body = response.text().await.unwrap_or_default(); let n = err_body.floor_char_boundary(err_body.len().min(500)); anyhow::bail!("gemini_search HTTP {}: {}", status, &err_body[..n]); } let parsed: GeminiResponse = response.json().await .context("gemini response parse failed")?; let candidate = parsed.candidates.into_iter().next() .context("gemini returned no candidates")?; let summary: String = candidate.content.parts.iter() .filter_map(|p| p.text.as_deref()) .collect::>() .join(""); let mut out = summary.trim().to_string(); if let Some(meta) = candidate.grounding_metadata { let sources: Vec = meta.grounding_chunks.iter().enumerate() .filter_map(|(i, c)| c.web.as_ref().map(|w| { let title = w.title.as_deref().unwrap_or("(untitled)"); let uri = w.uri.as_deref().unwrap_or(""); format!(" [{}] {} — {}", i + 1, title, uri) })) .collect(); if !sources.is_empty() { out.push_str("\n\nSources:\n"); out.push_str(&sources.join("\n")); } } Ok(super::truncate_output(out, 30000)) } #[derive(Deserialize)] struct GeminiResponse { #[serde(default)] candidates: Vec, } #[derive(Deserialize)] struct GeminiCandidate { content: GeminiContent, #[serde(rename = "groundingMetadata", default)] grounding_metadata: Option, } #[derive(Deserialize)] struct GeminiContent { #[serde(default)] parts: Vec, } #[derive(Deserialize)] struct GeminiPart { #[serde(default)] text: Option, } #[derive(Deserialize)] struct GeminiGroundingMetadata { #[serde(rename = "groundingChunks", default)] grounding_chunks: Vec, } #[derive(Deserialize)] struct GeminiGroundingChunk { #[serde(default)] web: Option, } #[derive(Deserialize)] struct GeminiWebSource { #[serde(default)] uri: Option, #[serde(default)] title: Option, } // ── Helpers ───────────────────────────────────────────────────── fn http_client() -> crate::agent::api::http::HttpClient { crate::agent::api::http::HttpClient::builder() .timeout(std::time::Duration::from_secs(30)) .build() } fn extract_between<'a>(text: &'a str, start: &str, end: &str) -> Option<&'a str> { let start_idx = text.find(start)? + start.len(); // Skip past the closing > of the start tag let rest = &text[start_idx..]; let gt = rest.find('>')?; let content_start = start_idx + gt + 1; let content = &text[content_start..]; let end_idx = content.find(end)?; Some(&content[..end_idx]) } fn strip_tags(html: &str) -> String { let mut out = String::new(); let mut in_tag = false; for ch in html.chars() { match ch { '<' => in_tag = true, '>' => in_tag = false, _ if !in_tag => out.push(ch), _ => {} } } out.replace("&", "&") .replace("<", "<") .replace(">", ">") .replace(""", "\"") .replace("'", "'") }