Split conversation transcript parsing
This commit is contained in:
parent
741a421073
commit
54df271308
9 changed files with 614 additions and 348 deletions
113
src/conversation/claude.rs
Normal file
113
src/conversation/claude.rs
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
use serde_json::Value;
|
||||
|
||||
use super::{ConversationSource, TranscriptMessage, TranscriptRole};
|
||||
|
||||
pub struct ClaudeSource;
|
||||
|
||||
impl ConversationSource for ClaudeSource {
|
||||
fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage> {
|
||||
parse_message(obj, offset)
|
||||
}
|
||||
|
||||
fn is_compaction(&self, obj: &Value) -> bool {
|
||||
is_compaction(obj)
|
||||
}
|
||||
|
||||
fn may_contain_compaction(&self, obj_bytes: &[u8]) -> bool {
|
||||
contains_bytes(obj_bytes, b"This session is being continued")
|
||||
}
|
||||
}
|
||||
|
||||
fn text_content(value: &Value) -> Option<String> {
|
||||
let text = match value {
|
||||
Value::String(s) => s.clone(),
|
||||
Value::Array(arr) => {
|
||||
arr.iter()
|
||||
.filter(|b| b.get("type").and_then(|v| v.as_str()) == Some("text"))
|
||||
.filter_map(|b| b.get("text").and_then(|v| v.as_str()))
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
_ => return None,
|
||||
};
|
||||
(!text.is_empty()).then_some(text)
|
||||
}
|
||||
|
||||
pub(crate) fn parse_message(obj: &Value, offset: u64) -> Option<TranscriptMessage> {
|
||||
let role = match obj.get("type").and_then(|v| v.as_str()) {
|
||||
Some("user") => TranscriptRole::User,
|
||||
Some("assistant") => TranscriptRole::Assistant,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
let msg = obj.get("message").unwrap_or(obj);
|
||||
let text = msg.get("content").and_then(text_content)?;
|
||||
let timestamp = obj.get("timestamp")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(str::to_string);
|
||||
|
||||
Some(TranscriptMessage { role, text, timestamp, offset })
|
||||
}
|
||||
|
||||
pub(crate) fn is_compaction(obj: &Value) -> bool {
|
||||
obj.get("type").and_then(|v| v.as_str()) == Some("user")
|
||||
&& obj.get("message")
|
||||
.and_then(|m| m.get("content"))
|
||||
.and_then(|c| c.as_str())
|
||||
.is_some_and(|content| content.starts_with("This session is being continued"))
|
||||
}
|
||||
|
||||
fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
|
||||
haystack.windows(needle.len()).any(|w| w == needle)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
|
||||
#[test]
|
||||
fn parses_string_and_array_content() {
|
||||
let user = json!({
|
||||
"timestamp": "2026-06-15T15:00:00.000Z",
|
||||
"type": "user",
|
||||
"message": { "content": "hello" }
|
||||
});
|
||||
let assistant = json!({
|
||||
"timestamp": "2026-06-15T15:00:01.000Z",
|
||||
"type": "assistant",
|
||||
"message": {
|
||||
"content": [
|
||||
{ "type": "text", "text": "hi" },
|
||||
{ "type": "tool_use", "name": "ignored" },
|
||||
{ "type": "text", "text": "there" }
|
||||
]
|
||||
}
|
||||
});
|
||||
|
||||
assert_eq!(
|
||||
parse_message(&user, 7).unwrap(),
|
||||
TranscriptMessage {
|
||||
role: TranscriptRole::User,
|
||||
text: "hello".to_string(),
|
||||
timestamp: Some("2026-06-15T15:00:00.000Z".to_string()),
|
||||
offset: 7,
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(parse_message(&assistant, 9).unwrap().text, "hi there");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_compaction_marker() {
|
||||
let obj = json!({
|
||||
"timestamp": "2026-06-15T15:00:01.000Z",
|
||||
"type": "user",
|
||||
"message": {
|
||||
"content": "This session is being continued from a previous conversation."
|
||||
}
|
||||
});
|
||||
|
||||
assert!(is_compaction(&obj));
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue