extract shared transcript parser and similarity matching helpers
- New agents/transcript.rs: shared JSONL parsing for enrich, fact_mine, and knowledge (was 3 separate implementations, ~150 lines duplicated) - New best_match() and section_children() helpers in neuro/rewrite.rs (was duplicated find-best-by-similarity loop + section collection) - Net -153 lines
This commit is contained in:
parent
7c491e92eb
commit
92f3ba5acf
6 changed files with 166 additions and 225 deletions
|
|
@ -333,84 +333,41 @@ fn get_graph_topology(store: &Store, graph: &Graph) -> String {
|
|||
}
|
||||
|
||||
/// Strip <system-reminder> blocks from text
|
||||
fn strip_system_tags(text: &str) -> String {
|
||||
let re = Regex::new(r"(?s)<system-reminder>.*?</system-reminder>").unwrap();
|
||||
re.replace_all(text, "").trim().to_string()
|
||||
}
|
||||
|
||||
/// Extract human-readable dialogue from a conversation JSONL
|
||||
fn extract_conversation_text(path: &Path, max_chars: usize) -> String {
|
||||
let Ok(content) = fs::read_to_string(path) else { return String::new() };
|
||||
let cfg = crate::config::get();
|
||||
let messages = super::transcript::parse_transcript(path).unwrap_or_default();
|
||||
let mut fragments = Vec::new();
|
||||
let mut total = 0;
|
||||
|
||||
for line in content.lines() {
|
||||
let Ok(obj) = serde_json::from_str::<serde_json::Value>(line) else { continue };
|
||||
for msg in &messages {
|
||||
let min_len = if msg.role == "user" { 5 } else { 10 };
|
||||
if msg.text.len() <= min_len { continue; }
|
||||
|
||||
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
|
||||
|
||||
if msg_type == "user" && obj.get("userType").and_then(|v| v.as_str()) == Some("external") {
|
||||
if let Some(text) = extract_text_content(&obj) {
|
||||
let text = strip_system_tags(&text);
|
||||
if text.starts_with("[Request interrupted") { continue; }
|
||||
if text.len() > 5 {
|
||||
fragments.push(format!("**{}:** {}", crate::config::get().user_name, text));
|
||||
total += text.len();
|
||||
}
|
||||
}
|
||||
} else if msg_type == "assistant" {
|
||||
if let Some(text) = extract_text_content(&obj) {
|
||||
let text = strip_system_tags(&text);
|
||||
if text.len() > 10 {
|
||||
fragments.push(format!("**{}:** {}", crate::config::get().assistant_name, text));
|
||||
total += text.len();
|
||||
}
|
||||
}
|
||||
// Only include external user messages
|
||||
if msg.role == "user" {
|
||||
if msg.user_type.as_deref() != Some("external") { continue; }
|
||||
if msg.text.starts_with("[Request interrupted") { continue; }
|
||||
}
|
||||
|
||||
let role = if msg.role == "user" { &cfg.user_name } else { &cfg.assistant_name };
|
||||
fragments.push(format!("**{}:** {}", role, msg.text));
|
||||
total += msg.text.len();
|
||||
if total > max_chars { break; }
|
||||
}
|
||||
fragments.join("\n\n")
|
||||
}
|
||||
|
||||
fn extract_text_content(obj: &serde_json::Value) -> Option<String> {
|
||||
let msg = obj.get("message")?;
|
||||
let content = msg.get("content")?;
|
||||
if let Some(s) = content.as_str() {
|
||||
return Some(s.to_string());
|
||||
}
|
||||
if let Some(arr) = content.as_array() {
|
||||
let texts: Vec<&str> = arr.iter()
|
||||
.filter_map(|b| {
|
||||
if b.get("type")?.as_str()? == "text" {
|
||||
b.get("text")?.as_str()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
if !texts.is_empty() {
|
||||
return Some(texts.join("\n"));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Count short user messages (dialogue turns) in a JSONL
|
||||
fn count_dialogue_turns(path: &Path) -> usize {
|
||||
let Ok(content) = fs::read_to_string(path) else { return 0 };
|
||||
content.lines()
|
||||
.filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
|
||||
.filter(|obj| {
|
||||
obj.get("type").and_then(|v| v.as_str()) == Some("user")
|
||||
&& obj.get("userType").and_then(|v| v.as_str()) == Some("external")
|
||||
})
|
||||
.filter(|obj| {
|
||||
let text = extract_text_content(obj).unwrap_or_default();
|
||||
text.len() > 5 && text.len() < 500
|
||||
&& !text.starts_with("[Request interrupted")
|
||||
&& !text.starts_with("Implement the following")
|
||||
})
|
||||
let messages = super::transcript::parse_transcript(path).unwrap_or_default();
|
||||
messages.iter()
|
||||
.filter(|m| m.role == "user"
|
||||
&& m.user_type.as_deref() == Some("external")
|
||||
&& m.text.len() > 5
|
||||
&& m.text.len() < 500
|
||||
&& !m.text.starts_with("[Request interrupted")
|
||||
&& !m.text.starts_with("Implement the following"))
|
||||
.count()
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue