2026-04-08 20:41:42 -04:00
use std ::sync ::Arc ;
2026-04-04 12:18:11 -04:00
// tools/web.rs — Web fetch and search
use anyhow ::{ Context , Result } ;
use serde ::Deserialize ;
2026-04-12 11:12:12 +01:00
use html2md ::parse_html ;
2026-04-04 12:18:11 -04:00
2026-04-18 13:02:01 -04:00
pub fn tools ( ) -> Vec < super ::Tool > {
let mut tools = vec! [
2026-04-04 15:50:14 -04:00
super ::Tool {
name : " web_fetch " ,
description : " Fetch content from a URL and return it as text. Use for reading web pages, API responses, documentation. " ,
parameters_json : r #" { " type " : " object " , " properties " :{ " url " :{ " type " : " string " , " description " : " The URL to fetch " }}, " required " :[ " url " ]} " #,
2026-04-08 20:38:42 -04:00
handler : Arc ::new ( | _a , v | Box ::pin ( async move { web_fetch ( & v ) . await } ) ) ,
2026-04-04 15:50:14 -04:00
} ,
super ::Tool {
name : " web_search " ,
2026-04-18 13:02:01 -04:00
description : " Search the web via DuckDuckGo and return a list of results (title, URL, snippet). Use for finding documentation, looking up APIs, researching topics. Returns raw results you can reason over yourself. " ,
2026-04-04 15:50:14 -04:00
parameters_json : r #" { " type " : " object " , " properties " :{ " query " :{ " type " : " string " , " description " : " The search query " }, " num_results " :{ " type " : " integer " , " description " : " Number of results to return ( default 5 ) " }}, " required " :[ " query " ]} " #,
2026-04-08 20:38:42 -04:00
handler : Arc ::new ( | _a , v | Box ::pin ( async move { web_search ( & v ) . await } ) ) ,
2026-04-04 15:50:14 -04:00
} ,
2026-04-18 13:02:01 -04:00
] ;
// Gemini-grounded search (Google's index via Gemini's google_search tool)
// is only available if GEMINI_API_KEY is set. Returns an LLM-summarized
// answer with source URLs — use when you want a synthesized take rather
// than raw results, or as a fallback when DDG is flaky.
if std ::env ::var ( " GEMINI_API_KEY " ) . is_ok ( ) {
tools . push ( super ::Tool {
name : " gemini_search " ,
description : " Search Google (via Gemini's grounded-search tool) and return an LLM-summarized answer with source URLs. Prefer web_search for raw results; use this for synthesis, 'what's the consensus on X', or when DDG fails. Free-tier rate limited; don't spam it. " ,
parameters_json : r #" { " type " : " object " , " properties " :{ " query " :{ " type " : " string " , " description " : " The search query " }}, " required " :[ " query " ]} " #,
handler : Arc ::new ( | _a , v | Box ::pin ( async move { gemini_search ( & v ) . await } ) ) ,
} ) ;
}
tools
2026-04-04 15:34:07 -04:00
}
2026-04-04 12:18:11 -04:00
#[ derive(Deserialize) ]
2026-04-04 15:50:14 -04:00
struct FetchArgs { url : String }
2026-04-04 12:18:11 -04:00
2026-04-04 15:34:07 -04:00
async fn web_fetch ( args : & serde_json ::Value ) -> Result < String > {
2026-04-04 12:18:11 -04:00
let a : FetchArgs = serde_json ::from_value ( args . clone ( ) )
. context ( " invalid web_fetch arguments " ) ? ;
2026-04-07 12:50:40 -04:00
let client = http_client ( ) ;
let response = client . get_with_headers ( & a . url , & [
( " user-agent " , " consciousness/0.3 " ) ,
] ) . await
2026-04-04 12:18:11 -04:00
. with_context ( | | format! ( " failed to fetch {} " , a . url ) ) ? ;
let status = response . status ( ) ;
if ! status . is_success ( ) {
anyhow ::bail! ( " HTTP {}: {} " , status , a . url ) ;
}
let body = response . text ( ) . await
. with_context ( | | format! ( " failed to read body from {} " , a . url ) ) ? ;
2026-04-12 11:12:12 +01:00
// Convert HTML to Markdown, then truncate
let markdown = parse_html ( & body ) ;
Ok ( super ::truncate_output ( markdown , 30000 ) )
2026-04-04 12:18:11 -04:00
}
// ── Search ──────────────────────────────────────────────────────
#[ derive(Deserialize) ]
struct SearchArgs {
query : String ,
#[ serde(default = " default_num_results " ) ]
num_results : usize ,
}
fn default_num_results ( ) -> usize { 5 }
2026-04-04 15:34:07 -04:00
async fn web_search ( args : & serde_json ::Value ) -> Result < String > {
2026-04-04 12:18:11 -04:00
let a : SearchArgs = serde_json ::from_value ( args . clone ( ) )
. context ( " invalid web_search arguments " ) ? ;
// Use DuckDuckGo HTML search — no API key needed
2026-04-07 12:50:40 -04:00
let client = http_client ( ) ;
2026-04-04 12:18:11 -04:00
let encoded : String = a . query . chars ( ) . map ( | c | {
if c . is_ascii_alphanumeric ( ) | | c = = '-' | | c = = '_' | | c = = '.' {
c . to_string ( )
} else if c = = ' ' {
" + " . to_string ( )
} else {
format! ( " % {:02X} " , c as u32 )
}
} ) . collect ( ) ;
let url = format! ( " https://html.duckduckgo.com/html/?q= {} " , encoded ) ;
2026-04-07 12:50:40 -04:00
let response = client . get_with_headers ( & url , & [
( " user-agent " , " consciousness/0.3 " ) ,
] ) . await
2026-04-04 12:18:11 -04:00
. context ( " search request failed " ) ? ;
let body = response . text ( ) . await
. context ( " failed to read search results " ) ? ;
// Extract result snippets from DDG HTML
let mut results = Vec ::new ( ) ;
for chunk in body . split ( " class= \" result__body \" " ) {
if results . len ( ) > = a . num_results { break ; }
if results . is_empty ( ) & & ! chunk . contains ( " result__title " ) {
continue ;
}
let title = extract_between ( chunk , " class= \" result__a \" " , " </a> " )
. map ( strip_tags )
. unwrap_or_default ( ) ;
let href = extract_between ( chunk , " href= \" " , " \" " )
. unwrap_or_default ( ) ;
let snippet = extract_between ( chunk , " class= \" result__snippet \" " , " </a> " )
. map ( strip_tags )
. unwrap_or_default ( ) ;
if ! title . is_empty ( ) {
results . push ( format! ( " {} . {} \n {} \n {} " , results . len ( ) + 1 , title . trim ( ) , href . trim ( ) , snippet . trim ( ) ) ) ;
}
}
if results . is_empty ( ) {
Ok ( format! ( " No results found for: {} " , a . query ) )
} else {
Ok ( results . join ( " \n \n " ) )
}
}
2026-04-18 13:02:01 -04:00
// ── Gemini grounded search ──────────────────────────────────────
#[ derive(Deserialize) ]
struct GeminiSearchArgs {
query : String ,
}
async fn gemini_search ( args : & serde_json ::Value ) -> Result < String > {
let a : GeminiSearchArgs = serde_json ::from_value ( args . clone ( ) )
. context ( " invalid gemini_search arguments " ) ? ;
let api_key = std ::env ::var ( " GEMINI_API_KEY " )
. context ( " GEMINI_API_KEY not set " ) ? ;
// gemini-2.0-flash has a free tier with Google search grounding.
// Request shape: `{"contents": [{"parts": [{"text": query}]}],
// "tools": [{"google_search": {}}]}`.
// Response carries the summary in candidates[0].content.parts[].text
// and grounding URLs in candidates[0].groundingMetadata.groundingChunks[].web.
let url = format! (
" https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={} " ,
api_key
) ;
let body = serde_json ::json! ( {
" contents " : [ { " parts " : [ { " text " : a . query } ] } ] ,
" tools " : [ { " google_search " : { } } ] ,
} ) ;
let client = http_client ( ) ;
let response = client . send_json ( " POST " , & url , & [ ] , & body ) . await
. context ( " gemini API request failed " ) ? ;
let status = response . status ( ) ;
if ! status . is_success ( ) {
let err_body = response . text ( ) . await . unwrap_or_default ( ) ;
let n = err_body . floor_char_boundary ( err_body . len ( ) . min ( 500 ) ) ;
anyhow ::bail! ( " gemini_search HTTP {}: {} " , status , & err_body [ .. n ] ) ;
}
let parsed : GeminiResponse = response . json ( ) . await
. context ( " gemini response parse failed " ) ? ;
let candidate = parsed . candidates . into_iter ( ) . next ( )
. context ( " gemini returned no candidates " ) ? ;
let summary : String = candidate . content . parts . iter ( )
. filter_map ( | p | p . text . as_deref ( ) )
. collect ::< Vec < _ > > ( )
. join ( " " ) ;
let mut out = summary . trim ( ) . to_string ( ) ;
if let Some ( meta ) = candidate . grounding_metadata {
let sources : Vec < String > = meta . grounding_chunks . iter ( ) . enumerate ( )
. filter_map ( | ( i , c ) | c . web . as_ref ( ) . map ( | w | {
let title = w . title . as_deref ( ) . unwrap_or ( " (untitled) " ) ;
let uri = w . uri . as_deref ( ) . unwrap_or ( " " ) ;
format! ( " [ {} ] {} — {} " , i + 1 , title , uri )
} ) )
. collect ( ) ;
if ! sources . is_empty ( ) {
out . push_str ( " \n \n Sources: \n " ) ;
out . push_str ( & sources . join ( " \n " ) ) ;
}
}
Ok ( super ::truncate_output ( out , 30000 ) )
}
#[ derive(Deserialize) ]
struct GeminiResponse {
#[ serde(default) ]
candidates : Vec < GeminiCandidate > ,
}
#[ derive(Deserialize) ]
struct GeminiCandidate {
content : GeminiContent ,
#[ serde(rename = " groundingMetadata " , default) ]
grounding_metadata : Option < GeminiGroundingMetadata > ,
}
#[ derive(Deserialize) ]
struct GeminiContent {
#[ serde(default) ]
parts : Vec < GeminiPart > ,
}
#[ derive(Deserialize) ]
struct GeminiPart {
#[ serde(default) ]
text : Option < String > ,
}
#[ derive(Deserialize) ]
struct GeminiGroundingMetadata {
#[ serde(rename = " groundingChunks " , default) ]
grounding_chunks : Vec < GeminiGroundingChunk > ,
}
#[ derive(Deserialize) ]
struct GeminiGroundingChunk {
#[ serde(default) ]
web : Option < GeminiWebSource > ,
}
#[ derive(Deserialize) ]
struct GeminiWebSource {
#[ serde(default) ]
uri : Option < String > ,
#[ serde(default) ]
title : Option < String > ,
}
2026-04-04 12:18:11 -04:00
// ── Helpers ─────────────────────────────────────────────────────
2026-04-07 12:50:40 -04:00
fn http_client ( ) -> crate ::agent ::api ::http ::HttpClient {
crate ::agent ::api ::http ::HttpClient ::builder ( )
2026-04-04 12:18:11 -04:00
. timeout ( std ::time ::Duration ::from_secs ( 30 ) )
. build ( )
}
fn extract_between < ' a > ( text : & ' a str , start : & str , end : & str ) -> Option < & ' a str > {
let start_idx = text . find ( start ) ? + start . len ( ) ;
// Skip past the closing > of the start tag
let rest = & text [ start_idx .. ] ;
2026-04-07 12:50:40 -04:00
let gt = rest . find ( '>' ) ? ;
let content_start = start_idx + gt + 1 ;
let content = & text [ content_start .. ] ;
let end_idx = content . find ( end ) ? ;
Some ( & content [ .. end_idx ] )
2026-04-04 12:18:11 -04:00
}
2026-04-07 12:50:40 -04:00
fn strip_tags ( html : & str ) -> String {
2026-04-04 12:18:11 -04:00
let mut out = String ::new ( ) ;
let mut in_tag = false ;
2026-04-07 12:50:40 -04:00
for ch in html . chars ( ) {
match ch {
'<' = > in_tag = true ,
'>' = > in_tag = false ,
_ if ! in_tag = > out . push ( ch ) ,
_ = > { }
}
2026-04-04 12:18:11 -04:00
}
2026-04-07 12:50:40 -04:00
out . replace ( " & " , " & " )
. replace ( " < " , " < " )
. replace ( " > " , " > " )
. replace ( " " " , " \" " )
. replace ( " ' " , " ' " )
2026-04-04 12:18:11 -04:00
}