2026-03-17 00:54:12 -04:00
|
|
|
// Conversation extraction from JSONL transcripts
|
2026-03-03 17:18:18 -05:00
|
|
|
//
|
2026-03-17 00:54:12 -04:00
|
|
|
// extract_conversation — parse JSONL transcript to messages
|
|
|
|
|
// split_on_compaction — split messages at compaction boundaries
|
2026-03-12 18:03:26 -04:00
|
|
|
|
2026-03-17 00:54:12 -04:00
|
|
|
/// Extract conversation messages from a JSONL transcript file.
|
|
|
|
|
/// Returns (line_number, role, text, timestamp) tuples.
|
2026-03-07 12:01:38 -05:00
|
|
|
pub fn extract_conversation(jsonl_path: &str) -> Result<Vec<(usize, String, String, String)>, String> {
|
2026-03-08 21:42:53 -04:00
|
|
|
let path = std::path::Path::new(jsonl_path);
|
|
|
|
|
let messages = super::transcript::parse_transcript(path)?;
|
|
|
|
|
Ok(messages.into_iter()
|
|
|
|
|
.map(|m| (m.line, m.role, m.text, m.timestamp))
|
|
|
|
|
.collect())
|
2026-03-03 17:18:18 -05:00
|
|
|
}
|
|
|
|
|
|
2026-03-07 12:01:38 -05:00
|
|
|
pub const COMPACTION_MARKER: &str = "This session is being continued from a previous conversation that ran out of context";
|
|
|
|
|
|
|
|
|
|
/// Split extracted messages into segments at compaction boundaries.
|
|
|
|
|
/// Each segment represents one continuous conversation before context was compacted.
|
|
|
|
|
pub fn split_on_compaction(messages: Vec<(usize, String, String, String)>) -> Vec<Vec<(usize, String, String, String)>> {
|
|
|
|
|
let mut segments: Vec<Vec<(usize, String, String, String)>> = Vec::new();
|
|
|
|
|
let mut current = Vec::new();
|
|
|
|
|
|
|
|
|
|
for msg in messages {
|
|
|
|
|
if msg.1 == "user" && msg.2.starts_with(COMPACTION_MARKER) {
|
|
|
|
|
if !current.is_empty() {
|
|
|
|
|
segments.push(current);
|
|
|
|
|
current = Vec::new();
|
|
|
|
|
}
|
|
|
|
|
current.push(msg);
|
|
|
|
|
} else {
|
|
|
|
|
current.push(msg);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if !current.is_empty() {
|
|
|
|
|
segments.push(current);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
segments
|
|
|
|
|
}
|