extract shared transcript parser and similarity matching helpers
- New agents/transcript.rs: shared JSONL parsing for enrich, fact_mine, and knowledge (was 3 separate implementations, ~150 lines duplicated) - New best_match() and section_children() helpers in neuro/rewrite.rs (was duplicated find-best-by-similarity loop + section collection) - Net -153 lines
This commit is contained in:
parent
7c491e92eb
commit
92f3ba5acf
6 changed files with 166 additions and 225 deletions
|
|
@ -72,53 +72,11 @@ pub fn is_transcript_mined_with_keys(mined: &HashSet<String>, path: &str) -> boo
|
|||
/// Extract user/assistant messages with line numbers from a JSONL transcript.
|
||||
/// (line_number, role, text, timestamp)
|
||||
pub fn extract_conversation(jsonl_path: &str) -> Result<Vec<(usize, String, String, String)>, String> {
|
||||
let content = fs::read_to_string(jsonl_path)
|
||||
.map_err(|e| format!("read {}: {}", jsonl_path, e))?;
|
||||
|
||||
let mut messages = Vec::new();
|
||||
for (i, line) in content.lines().enumerate() {
|
||||
let obj: serde_json::Value = match serde_json::from_str(line) {
|
||||
Ok(v) => v,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if msg_type != "user" && msg_type != "assistant" { continue; }
|
||||
|
||||
let timestamp = obj.get("timestamp")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
let msg = obj.get("message").unwrap_or(&obj);
|
||||
let content = msg.get("content");
|
||||
|
||||
let text = match content {
|
||||
Some(serde_json::Value::String(s)) => s.clone(),
|
||||
Some(serde_json::Value::Array(arr)) => {
|
||||
arr.iter()
|
||||
.filter_map(|c| {
|
||||
// Only extract text blocks; skip tool_use, tool_result, thinking, etc.
|
||||
let is_text = c.get("type").and_then(|v| v.as_str()) == Some("text");
|
||||
if is_text {
|
||||
c.get("text").and_then(|v| v.as_str()).map(|s| s.to_string())
|
||||
} else {
|
||||
c.as_str().map(|s| s.to_string())
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
}
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
let text = text.trim().to_string();
|
||||
if text.is_empty() { continue; }
|
||||
|
||||
messages.push((i + 1, msg_type.to_string(), text, timestamp));
|
||||
}
|
||||
|
||||
Ok(messages)
|
||||
let path = std::path::Path::new(jsonl_path);
|
||||
let messages = super::transcript::parse_transcript(path)?;
|
||||
Ok(messages.into_iter()
|
||||
.map(|m| (m.line, m.role, m.text, m.timestamp))
|
||||
.collect())
|
||||
}
|
||||
|
||||
pub const COMPACTION_MARKER: &str = "This session is being continued from a previous conversation that ran out of context";
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue