From c303653dd0ac726fb6a83fd4249305b7d8f714fe Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 1 May 2026 17:58:43 -0400 Subject: [PATCH] telegram: bridge photos via [image: ] markers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an incoming update has a photo array, pick the largest size, resolve the file_id via getFile, and download to ~/.consciousness/channels/telegram.logs/media/.. The message line surfaced to the channel is [image: /abs/path/to/file.jpg] so a multimodal Read on the path works end-to-end. On download failure we still surface the caption with an [image: download failed: ...] marker so context isn't lost. Other media types (voice/video/sticker/etc.) log a one-line "skipping" notice — easy hook to extend later. The media/ dir was already being created at startup; this fills in the rest. Co-Authored-By: Proof of Concept --- channels/telegram/src/main.rs | 118 +++++++++++++++++++++++++++++++--- 1 file changed, 108 insertions(+), 10 deletions(-) diff --git a/channels/telegram/src/main.rs b/channels/telegram/src/main.rs index d3753f7..837a044 100644 --- a/channels/telegram/src/main.rs +++ b/channels/telegram/src/main.rs @@ -181,6 +181,8 @@ struct TelegramMessage { chat_id: i64, sender: String, text: String, + /// Absolute path to a downloaded media file (photo, etc.), if any. + media_path: Option, } /// Fetch and parse pending updates from Telegram via long polling. @@ -206,19 +208,107 @@ async fn get_updates( let sender = msg["from"]["first_name"].as_str().unwrap_or("unknown").to_string(); let chat_id = msg["chat"]["id"].as_i64().unwrap_or(0); - if let Some(text) = msg["text"].as_str() { - messages.push(TelegramMessage { - update_id, - chat_id, - sender, - text: text.to_string(), - }); - } + // Photo: array of PhotoSize, largest is last. Download largest, + // surface message with [image: ] marker so the multimodal + // model can Read the image. + let (text, media_path) = if let Some(sizes) = msg["photo"].as_array() { + let caption = msg["caption"].as_str().unwrap_or("").to_string(); + let largest = sizes.last(); + let file_id = largest + .and_then(|s| s["file_id"].as_str()) + .unwrap_or(""); + if file_id.is_empty() { + error!("telegram photo: missing file_id in update {update_id}"); + (caption, None) + } else { + match download_telegram_file(client, token, file_id).await { + Ok(path) => (caption, Some(path)), + Err(e) => { + error!("telegram photo download failed (file_id={file_id}): {e}"); + // Surface what we have: caption plus a marker that + // a photo was sent but couldn't be fetched. + let marker = format!("[image: download failed: {e}]"); + let combined = if caption.is_empty() { + marker + } else { + format!("{marker}\n{caption}") + }; + (combined, None) + } + } + } + } else if let Some(text) = msg["text"].as_str() { + (text.to_string(), None) + } else { + // Other media types (voice, video, sticker, etc.) — skip for now, + // but log so we can extend later. + let kind = ["voice", "video", "sticker", "document", "audio", "animation"] + .iter() + .find(|k| !msg[**k].is_null()) + .copied() + .unwrap_or("unknown"); + info!("telegram: skipping non-text/photo message (kind={kind}, update_id={update_id})"); + continue; + }; + + messages.push(TelegramMessage { + update_id, + chat_id, + sender, + text, + media_path, + }); } } Ok(messages) } +/// Resolve a Telegram file_id to a downloadable URL path via getFile. +async fn get_file_path( + client: &HttpClient, + token: &str, + file_id: &str, +) -> Result> { + let url = format!( + "https://api.telegram.org/bot{}/getFile?file_id={}", + token, file_id, + ); + let response = client.get(&url).await?; + let body = response.text().await?; + let resp: serde_json::Value = serde_json::from_str(&body) + .map_err(|e| format!("getFile JSON parse error: {e}"))?; + if !resp["ok"].as_bool().unwrap_or(false) { + return Err(format!("getFile failed: {}", resp["description"].as_str().unwrap_or("?")).into()); + } + let file_path = resp["result"]["file_path"].as_str() + .ok_or("getFile: missing result.file_path")?; + Ok(file_path.to_string()) +} + +/// Download a Telegram file by file_id into the channel media dir. +/// Returns the absolute local path on success. +async fn download_telegram_file( + client: &HttpClient, + token: &str, + file_id: &str, +) -> Result> { + let file_path = get_file_path(client, token, file_id).await?; + let url = format!("https://api.telegram.org/file/bot{}/{}", token, file_path); + let response = client.get(&url).await?; + let status = response.status(); + if !status.is_success() { + return Err(format!("file download failed: {status}").into()); + } + let bytes = response.bytes().await?; + + let ext = file_path.rsplit('.').next().filter(|e| !e.contains('/')).unwrap_or("dat"); + let media_dir = log_dir().join("media"); + std::fs::create_dir_all(&media_dir)?; + let dest = media_dir.join(format!("{file_id}.{ext}")); + std::fs::write(&dest, &bytes)?; + Ok(dest.to_string_lossy().to_string()) +} + /// Send a text message to a Telegram chat. async fn send_message( client: &HttpClient, @@ -369,11 +459,19 @@ async fn poll_once( let sender_lower = msg.sender.to_lowercase(); let channel = format!("telegram.{}", sender_lower); - channel_log::append_disk_log(&log_dir(), &sender_lower, &msg.sender, &msg.text); + // If the message has media, prepend an [image: ] marker + // so the multimodal model can Read the file directly. + let body = match &msg.media_path { + Some(path) if msg.text.is_empty() => format!("[image: {path}]"), + Some(path) => format!("[image: {path}]\n{}", msg.text), + None => msg.text.clone(), + }; + + channel_log::append_disk_log(&log_dir(), &sender_lower, &msg.sender, &body); let mut s = state.borrow_mut(); s.config.chat_ids.insert(sender_lower, msg.chat_id); - let line = format!("[{}] {}", msg.sender, msg.text); + let line = format!("[{}] {}", msg.sender, body); s.push_message(line, 2, &channel); }