tools: add web_fetch and web_search
web_fetch: HTTP GET, returns body as text. For reading docs, APIs, pages. web_search: DuckDuckGo HTML search, no API key. Returns title/url/snippet. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
fb54488f30
commit
22f955ad9f
2 changed files with 182 additions and 0 deletions
|
|
@ -11,6 +11,7 @@ mod glob;
|
||||||
mod grep;
|
mod grep;
|
||||||
mod memory;
|
mod memory;
|
||||||
mod read;
|
mod read;
|
||||||
|
mod web;
|
||||||
mod write;
|
mod write;
|
||||||
|
|
||||||
// Agent-specific tools
|
// Agent-specific tools
|
||||||
|
|
@ -196,6 +197,8 @@ pub async fn dispatch_shared(
|
||||||
"write_file" => write::write_file(args),
|
"write_file" => write::write_file(args),
|
||||||
"edit_file" => edit::edit_file(args),
|
"edit_file" => edit::edit_file(args),
|
||||||
"bash" => bash::run_bash(args).await,
|
"bash" => bash::run_bash(args).await,
|
||||||
|
"web_fetch" => web::web_fetch(args).await,
|
||||||
|
"web_search" => web::web_search(args).await,
|
||||||
"grep" => grep::grep(args),
|
"grep" => grep::grep(args),
|
||||||
"glob" => glob::glob_search(args),
|
"glob" => glob::glob_search(args),
|
||||||
_ => return None,
|
_ => return None,
|
||||||
|
|
@ -216,6 +219,8 @@ pub fn definitions() -> Vec<ToolDef> {
|
||||||
write::definition(),
|
write::definition(),
|
||||||
edit::definition(),
|
edit::definition(),
|
||||||
bash::definition(),
|
bash::definition(),
|
||||||
|
web::fetch_definition(),
|
||||||
|
web::search_definition(),
|
||||||
grep::definition(),
|
grep::definition(),
|
||||||
glob::definition(),
|
glob::definition(),
|
||||||
];
|
];
|
||||||
|
|
|
||||||
177
src/agent/tools/web.rs
Normal file
177
src/agent/tools/web.rs
Normal file
|
|
@ -0,0 +1,177 @@
|
||||||
|
// tools/web.rs — Web fetch and search
|
||||||
|
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
use serde::Deserialize;
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
use super::ToolDef;
|
||||||
|
|
||||||
|
// ── Fetch ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct FetchArgs {
|
||||||
|
url: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn fetch_definition() -> ToolDef {
|
||||||
|
ToolDef::new(
|
||||||
|
"web_fetch",
|
||||||
|
"Fetch content from a URL and return it as text. \
|
||||||
|
Use for reading web pages, API responses, documentation.",
|
||||||
|
json!({
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"url": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The URL to fetch"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["url"]
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn web_fetch(args: &serde_json::Value) -> Result<String> {
|
||||||
|
let a: FetchArgs = serde_json::from_value(args.clone())
|
||||||
|
.context("invalid web_fetch arguments")?;
|
||||||
|
|
||||||
|
let client = http_client()?;
|
||||||
|
let response = client.get(&a.url)
|
||||||
|
.header("User-Agent", "consciousness/0.3")
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.with_context(|| format!("failed to fetch {}", a.url))?;
|
||||||
|
|
||||||
|
let status = response.status();
|
||||||
|
if !status.is_success() {
|
||||||
|
anyhow::bail!("HTTP {}: {}", status, a.url);
|
||||||
|
}
|
||||||
|
|
||||||
|
let body = response.text().await
|
||||||
|
.with_context(|| format!("failed to read body from {}", a.url))?;
|
||||||
|
|
||||||
|
Ok(super::truncate_output(body, 30000))
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Search ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct SearchArgs {
|
||||||
|
query: String,
|
||||||
|
#[serde(default = "default_num_results")]
|
||||||
|
num_results: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_num_results() -> usize { 5 }
|
||||||
|
|
||||||
|
pub fn search_definition() -> ToolDef {
|
||||||
|
ToolDef::new(
|
||||||
|
"web_search",
|
||||||
|
"Search the web and return results. Use for finding \
|
||||||
|
documentation, looking up APIs, researching topics.",
|
||||||
|
json!({
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"query": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The search query"
|
||||||
|
},
|
||||||
|
"num_results": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Number of results to return (default 5)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["query"]
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn web_search(args: &serde_json::Value) -> Result<String> {
|
||||||
|
let a: SearchArgs = serde_json::from_value(args.clone())
|
||||||
|
.context("invalid web_search arguments")?;
|
||||||
|
|
||||||
|
// Use DuckDuckGo HTML search — no API key needed
|
||||||
|
let client = http_client()?;
|
||||||
|
let encoded: String = a.query.chars().map(|c| {
|
||||||
|
if c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' {
|
||||||
|
c.to_string()
|
||||||
|
} else if c == ' ' {
|
||||||
|
"+".to_string()
|
||||||
|
} else {
|
||||||
|
format!("%{:02X}", c as u32)
|
||||||
|
}
|
||||||
|
}).collect();
|
||||||
|
let url = format!("https://html.duckduckgo.com/html/?q={}", encoded);
|
||||||
|
let response = client.get(&url)
|
||||||
|
.header("User-Agent", "consciousness/0.3")
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.context("search request failed")?;
|
||||||
|
|
||||||
|
let body = response.text().await
|
||||||
|
.context("failed to read search results")?;
|
||||||
|
|
||||||
|
// Extract result snippets from DDG HTML
|
||||||
|
let mut results = Vec::new();
|
||||||
|
for chunk in body.split("class=\"result__body\"") {
|
||||||
|
if results.len() >= a.num_results { break; }
|
||||||
|
if results.is_empty() && !chunk.contains("result__title") {
|
||||||
|
// Skip the first split (before any results)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract title
|
||||||
|
let title = extract_between(chunk, "class=\"result__a\"", "</a>")
|
||||||
|
.map(strip_tags)
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
// Extract URL
|
||||||
|
let href = extract_between(chunk, "href=\"", "\"")
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
// Extract snippet
|
||||||
|
let snippet = extract_between(chunk, "class=\"result__snippet\"", "</a>")
|
||||||
|
.map(strip_tags)
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
if !title.is_empty() {
|
||||||
|
results.push(format!("{}. {}\n {}\n {}", results.len() + 1, title.trim(), href.trim(), snippet.trim()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if results.is_empty() {
|
||||||
|
Ok(format!("No results found for: {}", a.query))
|
||||||
|
} else {
|
||||||
|
Ok(results.join("\n\n"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Helpers ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
fn http_client() -> Result<reqwest::Client> {
|
||||||
|
reqwest::Client::builder()
|
||||||
|
.timeout(std::time::Duration::from_secs(30))
|
||||||
|
.build()
|
||||||
|
.context("failed to build HTTP client")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_between<'a>(text: &'a str, start: &str, end: &str) -> Option<&'a str> {
|
||||||
|
let start_idx = text.find(start)? + start.len();
|
||||||
|
// Skip past the closing > of the start tag
|
||||||
|
let rest = &text[start_idx..];
|
||||||
|
let tag_end = rest.find('>')?;
|
||||||
|
let rest = &rest[tag_end + 1..];
|
||||||
|
let end_idx = rest.find(end)?;
|
||||||
|
Some(&rest[..end_idx])
|
||||||
|
}
|
||||||
|
|
||||||
|
fn strip_tags(s: &str) -> String {
|
||||||
|
let mut out = String::new();
|
||||||
|
let mut in_tag = false;
|
||||||
|
for ch in s.chars() {
|
||||||
|
if ch == '<' { in_tag = true; }
|
||||||
|
else if ch == '>' { in_tag = false; }
|
||||||
|
else if !in_tag { out.push(ch); }
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue