From e17c46edc14a1e051f859ff4af8481df66db62e6 Mon Sep 17 00:00:00 2001 From: spqrz Date: Sun, 12 Apr 2026 11:12:12 +0100 Subject: [PATCH] use html2md on web_fetch (fixes #3) --- Cargo.toml | 1 + src/agent/tools/web.rs | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2c5246f..9186f64 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ edition.workspace = true [dependencies] anyhow = "1" +html2md = "0.2" crossterm = { version = "0.29", features = ["event-stream", "bracketed-paste", "osc52"] } clap = { version = "4", features = ["derive"] } figment = { version = "0.10", features = ["env"] } diff --git a/src/agent/tools/web.rs b/src/agent/tools/web.rs index 7ad7fc9..15d011e 100644 --- a/src/agent/tools/web.rs +++ b/src/agent/tools/web.rs @@ -3,6 +3,7 @@ use std::sync::Arc; use anyhow::{Context, Result}; use serde::Deserialize; +use html2md::parse_html; pub fn tools() -> [super::Tool; 2] { [ @@ -42,7 +43,9 @@ async fn web_fetch(args: &serde_json::Value) -> Result { let body = response.text().await .with_context(|| format!("failed to read body from {}", a.url))?; - Ok(super::truncate_output(body, 30000)) + // Convert HTML to Markdown, then truncate + let markdown = parse_html(&body); + Ok(super::truncate_output(markdown, 30000)) } // ── Search ──────────────────────────────────────────────────────