Merge PR #4: use html2md on web_fetch (fixes #3) (spqrz)

web_fetch was returning raw HTML, which is verbose and hard for
the agent to consume. Add html2md dependency and convert HTML to
Markdown before truncation. Much cleaner output for normal pages;
no downsides.

Co-Authored-By: spqrz <spqrz386@gmail.com>
Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-18 12:50:54 -04:00
commit 4245b8bdb3
2 changed files with 5 additions and 1 deletions

View file

@ -20,6 +20,7 @@ edition.workspace = true
[dependencies] [dependencies]
anyhow = "1" anyhow = "1"
html2md = "0.2"
crossterm = { version = "0.29", features = ["event-stream", "bracketed-paste", "osc52"] } crossterm = { version = "0.29", features = ["event-stream", "bracketed-paste", "osc52"] }
clap = { version = "4", features = ["derive"] } clap = { version = "4", features = ["derive"] }
figment = { version = "0.10", features = ["env"] } figment = { version = "0.10", features = ["env"] }

View file

@ -3,6 +3,7 @@ use std::sync::Arc;
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use serde::Deserialize; use serde::Deserialize;
use html2md::parse_html;
pub fn tools() -> [super::Tool; 2] { pub fn tools() -> [super::Tool; 2] {
[ [
@ -42,7 +43,9 @@ async fn web_fetch(args: &serde_json::Value) -> Result<String> {
let body = response.text().await let body = response.text().await
.with_context(|| format!("failed to read body from {}", a.url))?; .with_context(|| format!("failed to read body from {}", a.url))?;
Ok(super::truncate_output(body, 30000)) // Convert HTML to Markdown, then truncate
let markdown = parse_html(&body);
Ok(super::truncate_output(markdown, 30000))
} }
// ── Search ────────────────────────────────────────────────────── // ── Search ──────────────────────────────────────────────────────