2026-04-08 20:41:42 -04:00
|
|
|
use std::sync::Arc;
|
2026-04-04 12:18:11 -04:00
|
|
|
// tools/web.rs — Web fetch and search
|
|
|
|
|
|
|
|
|
|
use anyhow::{Context, Result};
|
|
|
|
|
use serde::Deserialize;
|
2026-04-12 11:12:12 +01:00
|
|
|
use html2md::parse_html;
|
2026-04-04 12:18:11 -04:00
|
|
|
|
2026-04-04 15:34:07 -04:00
|
|
|
pub fn tools() -> [super::Tool; 2] {
|
|
|
|
|
[
|
2026-04-04 15:50:14 -04:00
|
|
|
super::Tool {
|
|
|
|
|
name: "web_fetch",
|
|
|
|
|
description: "Fetch content from a URL and return it as text. Use for reading web pages, API responses, documentation.",
|
|
|
|
|
parameters_json: r#"{"type":"object","properties":{"url":{"type":"string","description":"The URL to fetch"}},"required":["url"]}"#,
|
2026-04-08 20:38:42 -04:00
|
|
|
handler: Arc::new(|_a, v| Box::pin(async move { web_fetch(&v).await })),
|
2026-04-04 15:50:14 -04:00
|
|
|
},
|
|
|
|
|
super::Tool {
|
|
|
|
|
name: "web_search",
|
|
|
|
|
description: "Search the web and return results. Use for finding documentation, looking up APIs, researching topics.",
|
|
|
|
|
parameters_json: r#"{"type":"object","properties":{"query":{"type":"string","description":"The search query"},"num_results":{"type":"integer","description":"Number of results to return (default 5)"}},"required":["query"]}"#,
|
2026-04-08 20:38:42 -04:00
|
|
|
handler: Arc::new(|_a, v| Box::pin(async move { web_search(&v).await })),
|
2026-04-04 15:50:14 -04:00
|
|
|
},
|
2026-04-04 15:34:07 -04:00
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-04 12:18:11 -04:00
|
|
|
#[derive(Deserialize)]
|
2026-04-04 15:50:14 -04:00
|
|
|
struct FetchArgs { url: String }
|
2026-04-04 12:18:11 -04:00
|
|
|
|
2026-04-04 15:34:07 -04:00
|
|
|
async fn web_fetch(args: &serde_json::Value) -> Result<String> {
|
2026-04-04 12:18:11 -04:00
|
|
|
let a: FetchArgs = serde_json::from_value(args.clone())
|
|
|
|
|
.context("invalid web_fetch arguments")?;
|
|
|
|
|
|
2026-04-07 12:50:40 -04:00
|
|
|
let client = http_client();
|
|
|
|
|
let response = client.get_with_headers(&a.url, &[
|
|
|
|
|
("user-agent", "consciousness/0.3"),
|
|
|
|
|
]).await
|
2026-04-04 12:18:11 -04:00
|
|
|
.with_context(|| format!("failed to fetch {}", a.url))?;
|
|
|
|
|
|
|
|
|
|
let status = response.status();
|
|
|
|
|
if !status.is_success() {
|
|
|
|
|
anyhow::bail!("HTTP {}: {}", status, a.url);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let body = response.text().await
|
|
|
|
|
.with_context(|| format!("failed to read body from {}", a.url))?;
|
|
|
|
|
|
2026-04-12 11:12:12 +01:00
|
|
|
// Convert HTML to Markdown, then truncate
|
|
|
|
|
let markdown = parse_html(&body);
|
|
|
|
|
Ok(super::truncate_output(markdown, 30000))
|
2026-04-04 12:18:11 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ── Search ──────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
#[derive(Deserialize)]
|
|
|
|
|
struct SearchArgs {
|
|
|
|
|
query: String,
|
|
|
|
|
#[serde(default = "default_num_results")]
|
|
|
|
|
num_results: usize,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn default_num_results() -> usize { 5 }
|
|
|
|
|
|
2026-04-04 15:34:07 -04:00
|
|
|
async fn web_search(args: &serde_json::Value) -> Result<String> {
|
2026-04-04 12:18:11 -04:00
|
|
|
let a: SearchArgs = serde_json::from_value(args.clone())
|
|
|
|
|
.context("invalid web_search arguments")?;
|
|
|
|
|
|
|
|
|
|
// Use DuckDuckGo HTML search — no API key needed
|
2026-04-07 12:50:40 -04:00
|
|
|
let client = http_client();
|
2026-04-04 12:18:11 -04:00
|
|
|
let encoded: String = a.query.chars().map(|c| {
|
|
|
|
|
if c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' {
|
|
|
|
|
c.to_string()
|
|
|
|
|
} else if c == ' ' {
|
|
|
|
|
"+".to_string()
|
|
|
|
|
} else {
|
|
|
|
|
format!("%{:02X}", c as u32)
|
|
|
|
|
}
|
|
|
|
|
}).collect();
|
|
|
|
|
let url = format!("https://html.duckduckgo.com/html/?q={}", encoded);
|
2026-04-07 12:50:40 -04:00
|
|
|
let response = client.get_with_headers(&url, &[
|
|
|
|
|
("user-agent", "consciousness/0.3"),
|
|
|
|
|
]).await
|
2026-04-04 12:18:11 -04:00
|
|
|
.context("search request failed")?;
|
|
|
|
|
|
|
|
|
|
let body = response.text().await
|
|
|
|
|
.context("failed to read search results")?;
|
|
|
|
|
|
|
|
|
|
// Extract result snippets from DDG HTML
|
|
|
|
|
let mut results = Vec::new();
|
|
|
|
|
for chunk in body.split("class=\"result__body\"") {
|
|
|
|
|
if results.len() >= a.num_results { break; }
|
|
|
|
|
if results.is_empty() && !chunk.contains("result__title") {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let title = extract_between(chunk, "class=\"result__a\"", "</a>")
|
|
|
|
|
.map(strip_tags)
|
|
|
|
|
.unwrap_or_default();
|
|
|
|
|
|
|
|
|
|
let href = extract_between(chunk, "href=\"", "\"")
|
|
|
|
|
.unwrap_or_default();
|
|
|
|
|
|
|
|
|
|
let snippet = extract_between(chunk, "class=\"result__snippet\"", "</a>")
|
|
|
|
|
.map(strip_tags)
|
|
|
|
|
.unwrap_or_default();
|
|
|
|
|
|
|
|
|
|
if !title.is_empty() {
|
|
|
|
|
results.push(format!("{}. {}\n {}\n {}", results.len() + 1, title.trim(), href.trim(), snippet.trim()));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if results.is_empty() {
|
|
|
|
|
Ok(format!("No results found for: {}", a.query))
|
|
|
|
|
} else {
|
|
|
|
|
Ok(results.join("\n\n"))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ── Helpers ─────────────────────────────────────────────────────
|
|
|
|
|
|
2026-04-07 12:50:40 -04:00
|
|
|
fn http_client() -> crate::agent::api::http::HttpClient {
|
|
|
|
|
crate::agent::api::http::HttpClient::builder()
|
2026-04-04 12:18:11 -04:00
|
|
|
.timeout(std::time::Duration::from_secs(30))
|
|
|
|
|
.build()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn extract_between<'a>(text: &'a str, start: &str, end: &str) -> Option<&'a str> {
|
|
|
|
|
let start_idx = text.find(start)? + start.len();
|
|
|
|
|
// Skip past the closing > of the start tag
|
|
|
|
|
let rest = &text[start_idx..];
|
2026-04-07 12:50:40 -04:00
|
|
|
let gt = rest.find('>')?;
|
|
|
|
|
let content_start = start_idx + gt + 1;
|
|
|
|
|
let content = &text[content_start..];
|
|
|
|
|
let end_idx = content.find(end)?;
|
|
|
|
|
Some(&content[..end_idx])
|
2026-04-04 12:18:11 -04:00
|
|
|
}
|
|
|
|
|
|
2026-04-07 12:50:40 -04:00
|
|
|
fn strip_tags(html: &str) -> String {
|
2026-04-04 12:18:11 -04:00
|
|
|
let mut out = String::new();
|
|
|
|
|
let mut in_tag = false;
|
2026-04-07 12:50:40 -04:00
|
|
|
for ch in html.chars() {
|
|
|
|
|
match ch {
|
|
|
|
|
'<' => in_tag = true,
|
|
|
|
|
'>' => in_tag = false,
|
|
|
|
|
_ if !in_tag => out.push(ch),
|
|
|
|
|
_ => {}
|
|
|
|
|
}
|
2026-04-04 12:18:11 -04:00
|
|
|
}
|
2026-04-07 12:50:40 -04:00
|
|
|
out.replace("&", "&")
|
|
|
|
|
.replace("<", "<")
|
|
|
|
|
.replace(">", ">")
|
|
|
|
|
.replace(""", "\"")
|
|
|
|
|
.replace("'", "'")
|
2026-04-04 12:18:11 -04:00
|
|
|
}
|