forked from kent/consciousness
Split conversation transcript parsing
This commit is contained in:
parent
f6a6e3066c
commit
78b4bbd5bb
9 changed files with 614 additions and 348 deletions
105
src/conversation/codex.rs
Normal file
105
src/conversation/codex.rs
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
use serde_json::Value;
|
||||
|
||||
use super::{ConversationSource, TranscriptMessage, TranscriptRole};
|
||||
|
||||
pub struct CodexSource;
|
||||
|
||||
impl ConversationSource for CodexSource {
|
||||
fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage> {
|
||||
parse_message(obj, offset)
|
||||
}
|
||||
|
||||
fn is_compaction(&self, obj: &Value) -> bool {
|
||||
is_compaction(obj)
|
||||
}
|
||||
|
||||
fn may_contain_compaction(&self, obj_bytes: &[u8]) -> bool {
|
||||
contains_bytes(obj_bytes, b"context_compacted")
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn parse_message(obj: &Value, offset: u64) -> Option<TranscriptMessage> {
|
||||
if obj.get("type").and_then(|v| v.as_str()) != Some("event_msg") {
|
||||
return None;
|
||||
}
|
||||
|
||||
let payload = obj.get("payload")?;
|
||||
let (role, text) = match payload.get("type").and_then(|v| v.as_str()) {
|
||||
Some("user_message") => (
|
||||
TranscriptRole::User,
|
||||
payload.get("message").and_then(|v| v.as_str())?.to_string(),
|
||||
),
|
||||
Some("agent_message") => (
|
||||
TranscriptRole::Assistant,
|
||||
payload.get("message").and_then(|v| v.as_str())?.to_string(),
|
||||
),
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
if text.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let timestamp = obj.get("timestamp")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(str::to_string);
|
||||
|
||||
Some(TranscriptMessage { role, text, timestamp, offset })
|
||||
}
|
||||
|
||||
pub(crate) fn is_compaction(obj: &Value) -> bool {
|
||||
obj.get("type").and_then(|v| v.as_str()) == Some("event_msg")
|
||||
&& obj.get("payload")
|
||||
.and_then(|p| p.get("type"))
|
||||
.and_then(|v| v.as_str()) == Some("context_compacted")
|
||||
}
|
||||
|
||||
fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
|
||||
haystack.windows(needle.len()).any(|w| w == needle)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
|
||||
#[test]
|
||||
fn parses_event_messages_and_skips_noise() {
|
||||
let user = json!({
|
||||
"timestamp": "2026-06-15T15:00:00.000Z",
|
||||
"type": "event_msg",
|
||||
"payload": { "type": "user_message", "message": "start here" }
|
||||
});
|
||||
let assistant = json!({
|
||||
"timestamp": "2026-06-15T15:00:01.000Z",
|
||||
"type": "event_msg",
|
||||
"payload": { "type": "agent_message", "message": "working" }
|
||||
});
|
||||
let tool = json!({
|
||||
"timestamp": "2026-06-15T15:00:02.000Z",
|
||||
"type": "event_msg",
|
||||
"payload": { "type": "task_started" }
|
||||
});
|
||||
let raw = json!({
|
||||
"timestamp": "2026-06-15T15:00:03.000Z",
|
||||
"type": "response_item",
|
||||
"payload": { "type": "message", "role": "user" }
|
||||
});
|
||||
|
||||
assert_eq!(parse_message(&user, 1).unwrap().role, TranscriptRole::User);
|
||||
assert_eq!(parse_message(&assistant, 2).unwrap().text, "working");
|
||||
assert!(parse_message(&tool, 3).is_none());
|
||||
assert!(parse_message(&raw, 4).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_compaction_event() {
|
||||
let obj = json!({
|
||||
"timestamp": "2026-06-15T15:00:01.000Z",
|
||||
"type": "event_msg",
|
||||
"payload": { "type": "context_compacted" }
|
||||
});
|
||||
|
||||
assert!(is_compaction(&obj));
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue