enrich: explicitly filter for text blocks in transcript extraction

Only extract content blocks with "type": "text". Previously relied on
tool_use/tool_result blocks lacking a "text" field, which worked but
was fragile. Now explicitly checks block type.
This commit is contained in:
ProofOfConcept 2026-03-06 21:54:19 -05:00
parent 1c122ffd10
commit 80bdaab8ee

View file

@ -97,8 +97,10 @@ fn extract_conversation(jsonl_path: &str) -> Result<Vec<(usize, String, String,
Some(serde_json::Value::Array(arr)) => {
arr.iter()
.filter_map(|c| {
if let Some(t) = c.get("text").and_then(|v| v.as_str()) {
Some(t.to_string())
// Only extract text blocks; skip tool_use, tool_result, thinking, etc.
let is_text = c.get("type").and_then(|v| v.as_str()) == Some("text");
if is_text {
c.get("text").and_then(|v| v.as_str()).map(|s| s.to_string())
} else {
c.as_str().map(|s| s.to_string())
}