enrich: explicitly filter for text blocks in transcript extraction
Only extract content blocks with "type": "text". Previously relied on tool_use/tool_result blocks lacking a "text" field, which worked but was fragile. Now explicitly checks block type.
This commit is contained in:
parent
1c122ffd10
commit
80bdaab8ee
1 changed files with 4 additions and 2 deletions
|
|
@ -97,8 +97,10 @@ fn extract_conversation(jsonl_path: &str) -> Result<Vec<(usize, String, String,
|
||||||
Some(serde_json::Value::Array(arr)) => {
|
Some(serde_json::Value::Array(arr)) => {
|
||||||
arr.iter()
|
arr.iter()
|
||||||
.filter_map(|c| {
|
.filter_map(|c| {
|
||||||
if let Some(t) = c.get("text").and_then(|v| v.as_str()) {
|
// Only extract text blocks; skip tool_use, tool_result, thinking, etc.
|
||||||
Some(t.to_string())
|
let is_text = c.get("type").and_then(|v| v.as_str()) == Some("text");
|
||||||
|
if is_text {
|
||||||
|
c.get("text").and_then(|v| v.as_str()).map(|s| s.to_string())
|
||||||
} else {
|
} else {
|
||||||
c.as_str().map(|s| s.to_string())
|
c.as_str().map(|s| s.to_string())
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue