enrich: explicitly filter for text blocks in transcript extraction
Only extract content blocks with "type": "text". Previously relied on tool_use/tool_result blocks lacking a "text" field, which worked but was fragile. Now explicitly checks block type.
This commit is contained in:
parent
1c122ffd10
commit
80bdaab8ee
1 changed files with 4 additions and 2 deletions
|
|
@ -97,8 +97,10 @@ fn extract_conversation(jsonl_path: &str) -> Result<Vec<(usize, String, String,
|
|||
Some(serde_json::Value::Array(arr)) => {
|
||||
arr.iter()
|
||||
.filter_map(|c| {
|
||||
if let Some(t) = c.get("text").and_then(|v| v.as_str()) {
|
||||
Some(t.to_string())
|
||||
// Only extract text blocks; skip tool_use, tool_result, thinking, etc.
|
||||
let is_text = c.get("type").and_then(|v| v.as_str()) == Some("text");
|
||||
if is_text {
|
||||
c.get("text").and_then(|v| v.as_str()).map(|s| s.to_string())
|
||||
} else {
|
||||
c.as_str().map(|s| s.to_string())
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue