// Shared JSONL transcript parsing // // Three agents (enrich, fact_mine, knowledge) all parse Claude Code JSONL // transcripts. This module provides the shared core: parse each line, extract // message type, text content from string-or-array blocks, timestamp, and // user type. Callers filter and transform as needed. use std::fs; use std::path::Path; /// A single message extracted from a JSONL transcript. pub struct TranscriptMessage { /// 1-based line number in the JSONL file. pub line: usize, /// Raw role: "user" or "assistant". pub role: String, /// Extracted text content (trimmed, blocks joined with newlines). pub text: String, /// ISO timestamp from the message, or empty string. pub timestamp: String, /// For user messages: "external", "internal", etc. None for assistant. pub user_type: Option, } /// Parse a JSONL transcript into structured messages. /// /// Extracts all user and assistant messages. Content blocks of type "text" /// are joined; tool_use, tool_result, thinking blocks are skipped. /// System-reminder blocks are filtered out. pub fn parse_transcript(path: &Path) -> Result, String> { let content = fs::read_to_string(path) .map_err(|e| format!("read {}: {}", path.display(), e))?; let mut messages = Vec::new(); for (i, line) in content.lines().enumerate() { let Ok(obj) = serde_json::from_str::(line) else { continue }; let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or(""); if msg_type != "user" && msg_type != "assistant" { continue; } let timestamp = obj.get("timestamp") .and_then(|v| v.as_str()) .unwrap_or("") .to_string(); let user_type = obj.get("userType") .and_then(|v| v.as_str()) .map(|s| s.to_string()); let Some(text) = extract_text_content(&obj) else { continue }; let text = text.trim().to_string(); if text.is_empty() { continue; } messages.push(TranscriptMessage { line: i + 1, role: msg_type.to_string(), text, timestamp, user_type, }); } Ok(messages) } /// Extract text content from a JSONL message object. /// /// Handles both string content and array-of-blocks content (filtering to /// type="text" blocks only). Strips `` tags. fn extract_text_content(obj: &serde_json::Value) -> Option { let msg = obj.get("message").unwrap_or(obj); let content = msg.get("content")?; let text = match content { serde_json::Value::String(s) => s.clone(), serde_json::Value::Array(arr) => { let texts: Vec<&str> = arr.iter() .filter_map(|block| { let block_type = block.get("type").and_then(|v| v.as_str())?; if block_type != "text" { return None; } let t = block.get("text").and_then(|v| v.as_str())?; // Skip system-reminder blocks entirely if t.contains("") { return None; } Some(t) }) .collect(); if texts.is_empty() { return None; } texts.join("\n") } _ => return None, }; Some(text) }