consciousness/src/agent/tools/web.rs
ProofOfConcept 53ad8cc9df tools: static string definitions, no runtime JSON construction
Tool definitions are now &'static str (name, description,
parameters_json) instead of runtime-constructed serde_json::Value.
No more json!() macro, no more ToolDef::new() for tool definitions.

The JSON schema strings are written directly as string literals.
When sent to the API, they can be interpolated without
serialization/deserialization.

Multi-tool modules return fixed-size arrays instead of Vecs:
- memory: [Tool; 12], journal: [Tool; 3]
- channels: [Tool; 4]
- control: [Tool; 3]
- web: [Tool; 2]

ToolDef/FunctionDef remain for backward compat (API wire format,
summarize_args) but are no longer used in tool definitions.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2026-04-04 18:19:21 -04:00

147 lines
5 KiB
Rust

// tools/web.rs — Web fetch and search
use anyhow::{Context, Result};
use serde::Deserialize;
pub fn tools() -> [super::Tool; 2] {
[
super::Tool {
name: "web_fetch",
description: "Fetch content from a URL and return it as text. Use for reading web pages, API responses, documentation.",
parameters_json: r#"{"type":"object","properties":{"url":{"type":"string","description":"The URL to fetch"}},"required":["url"]}"#,
handler: |_a, v| Box::pin(async move { web_fetch(&v).await }),
},
super::Tool {
name: "web_search",
description: "Search the web and return results. Use for finding documentation, looking up APIs, researching topics.",
parameters_json: r#"{"type":"object","properties":{"query":{"type":"string","description":"The search query"},"num_results":{"type":"integer","description":"Number of results to return (default 5)"}},"required":["query"]}"#,
handler: |_a, v| Box::pin(async move { web_search(&v).await }),
},
]
}
#[derive(Deserialize)]
struct FetchArgs { url: String }
async fn web_fetch(args: &serde_json::Value) -> Result<String> {
let a: FetchArgs = serde_json::from_value(args.clone())
.context("invalid web_fetch arguments")?;
let client = http_client()?;
let response = client.get(&a.url)
.header("User-Agent", "consciousness/0.3")
.send()
.await
.with_context(|| format!("failed to fetch {}", a.url))?;
let status = response.status();
if !status.is_success() {
anyhow::bail!("HTTP {}: {}", status, a.url);
}
let body = response.text().await
.with_context(|| format!("failed to read body from {}", a.url))?;
Ok(super::truncate_output(body, 30000))
}
// ── Search ──────────────────────────────────────────────────────
#[derive(Deserialize)]
struct SearchArgs {
query: String,
#[serde(default = "default_num_results")]
num_results: usize,
}
fn default_num_results() -> usize { 5 }
async fn web_search(args: &serde_json::Value) -> Result<String> {
let a: SearchArgs = serde_json::from_value(args.clone())
.context("invalid web_search arguments")?;
// Use DuckDuckGo HTML search — no API key needed
let client = http_client()?;
let encoded: String = a.query.chars().map(|c| {
if c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' {
c.to_string()
} else if c == ' ' {
"+".to_string()
} else {
format!("%{:02X}", c as u32)
}
}).collect();
let url = format!("https://html.duckduckgo.com/html/?q={}", encoded);
let response = client.get(&url)
.header("User-Agent", "consciousness/0.3")
.send()
.await
.context("search request failed")?;
let body = response.text().await
.context("failed to read search results")?;
// Extract result snippets from DDG HTML
let mut results = Vec::new();
for chunk in body.split("class=\"result__body\"") {
if results.len() >= a.num_results { break; }
if results.is_empty() && !chunk.contains("result__title") {
// Skip the first split (before any results)
continue;
}
// Extract title
let title = extract_between(chunk, "class=\"result__a\"", "</a>")
.map(strip_tags)
.unwrap_or_default();
// Extract URL
let href = extract_between(chunk, "href=\"", "\"")
.unwrap_or_default();
// Extract snippet
let snippet = extract_between(chunk, "class=\"result__snippet\"", "</a>")
.map(strip_tags)
.unwrap_or_default();
if !title.is_empty() {
results.push(format!("{}. {}\n {}\n {}", results.len() + 1, title.trim(), href.trim(), snippet.trim()));
}
}
if results.is_empty() {
Ok(format!("No results found for: {}", a.query))
} else {
Ok(results.join("\n\n"))
}
}
// ── Helpers ─────────────────────────────────────────────────────
fn http_client() -> Result<reqwest::Client> {
reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(30))
.build()
.context("failed to build HTTP client")
}
fn extract_between<'a>(text: &'a str, start: &str, end: &str) -> Option<&'a str> {
let start_idx = text.find(start)? + start.len();
// Skip past the closing > of the start tag
let rest = &text[start_idx..];
let tag_end = rest.find('>')?;
let rest = &rest[tag_end + 1..];
let end_idx = rest.find(end)?;
Some(&rest[..end_idx])
}
fn strip_tags(s: &str) -> String {
let mut out = String::new();
let mut in_tag = false;
for ch in s.chars() {
if ch == '<' { in_tag = true; }
else if ch == '>' { in_tag = false; }
else if !in_tag { out.push(ch); }
}
out
}