remove more dead transcript mining code
Delete subconscious/transcript.rs (94 lines), is_segment_mined, mark_segment_mined — all orphaned by the extraction pipeline removal. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
72d967edbf
commit
dae0cc8191
3 changed files with 0 additions and 108 deletions
|
|
@ -506,19 +506,6 @@ impl Store {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check if a transcript segment has been processed by a given agent.
|
|
||||||
pub fn is_segment_mined(&self, transcript_id: &str, segment_index: u32, agent: &str) -> bool {
|
|
||||||
self.transcript_progress
|
|
||||||
.get(&(transcript_id.to_string(), segment_index))
|
|
||||||
.is_some_and(|agents| agents.contains(agent))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Mark a transcript segment as successfully processed.
|
|
||||||
pub fn mark_segment_mined(&mut self, transcript_id: &str, segment_index: u32, agent: &str) -> Result<(), String> {
|
|
||||||
let seg = new_transcript_segment(transcript_id, segment_index, agent);
|
|
||||||
self.append_transcript_progress(&[seg])
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Migrate old stub-node transcript markers into the new progress log.
|
/// Migrate old stub-node transcript markers into the new progress log.
|
||||||
/// Reads _observed-transcripts-f-*, _mined-transcripts#f-*, and _facts-* keys,
|
/// Reads _observed-transcripts-f-*, _mined-transcripts#f-*, and _facts-* keys,
|
||||||
/// extracts transcript_id and segment_index, writes to transcript-progress.capnp,
|
/// extracts transcript_id and segment_index, writes to transcript-progress.capnp,
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,6 @@
|
||||||
|
|
||||||
pub mod subconscious;
|
pub mod subconscious;
|
||||||
pub mod hook;
|
pub mod hook;
|
||||||
pub mod transcript;
|
|
||||||
pub mod api;
|
pub mod api;
|
||||||
pub mod llm;
|
pub mod llm;
|
||||||
pub mod prompts;
|
pub mod prompts;
|
||||||
|
|
|
||||||
|
|
@ -1,94 +0,0 @@
|
||||||
// Shared JSONL transcript parsing
|
|
||||||
//
|
|
||||||
// Three agents (enrich, fact_mine, knowledge) all parse Claude Code JSONL
|
|
||||||
// transcripts. This module provides the shared core: parse each line, extract
|
|
||||||
// message type, text content from string-or-array blocks, timestamp, and
|
|
||||||
// user type. Callers filter and transform as needed.
|
|
||||||
|
|
||||||
use std::fs;
|
|
||||||
use std::path::Path;
|
|
||||||
|
|
||||||
/// A single message extracted from a JSONL transcript.
|
|
||||||
pub struct TranscriptMessage {
|
|
||||||
/// 1-based line number in the JSONL file.
|
|
||||||
pub line: usize,
|
|
||||||
/// Raw role: "user" or "assistant".
|
|
||||||
pub role: String,
|
|
||||||
/// Extracted text content (trimmed, blocks joined with newlines).
|
|
||||||
pub text: String,
|
|
||||||
/// ISO timestamp from the message, or empty string.
|
|
||||||
pub timestamp: String,
|
|
||||||
/// For user messages: "external", "internal", etc. None for assistant.
|
|
||||||
pub user_type: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parse a JSONL transcript into structured messages.
|
|
||||||
///
|
|
||||||
/// Extracts all user and assistant messages. Content blocks of type "text"
|
|
||||||
/// are joined; tool_use, tool_result, thinking blocks are skipped.
|
|
||||||
/// System-reminder blocks are filtered out.
|
|
||||||
pub fn parse_transcript(path: &Path) -> Result<Vec<TranscriptMessage>, String> {
|
|
||||||
let content = fs::read_to_string(path)
|
|
||||||
.map_err(|e| format!("read {}: {}", path.display(), e))?;
|
|
||||||
|
|
||||||
let mut messages = Vec::new();
|
|
||||||
for (i, line) in content.lines().enumerate() {
|
|
||||||
let Ok(obj) = serde_json::from_str::<serde_json::Value>(line) else { continue };
|
|
||||||
|
|
||||||
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
|
|
||||||
if msg_type != "user" && msg_type != "assistant" { continue; }
|
|
||||||
|
|
||||||
let timestamp = obj.get("timestamp")
|
|
||||||
.and_then(|v| v.as_str())
|
|
||||||
.unwrap_or("")
|
|
||||||
.to_string();
|
|
||||||
|
|
||||||
let user_type = obj.get("userType")
|
|
||||||
.and_then(|v| v.as_str())
|
|
||||||
.map(|s| s.to_string());
|
|
||||||
|
|
||||||
let Some(text) = extract_text_content(&obj) else { continue };
|
|
||||||
let text = text.trim().to_string();
|
|
||||||
if text.is_empty() { continue; }
|
|
||||||
|
|
||||||
messages.push(TranscriptMessage {
|
|
||||||
line: i + 1,
|
|
||||||
role: msg_type.to_string(),
|
|
||||||
text,
|
|
||||||
timestamp,
|
|
||||||
user_type,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(messages)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Extract text content from a JSONL message object.
|
|
||||||
///
|
|
||||||
/// Handles both string content and array-of-blocks content (filtering to
|
|
||||||
/// type="text" blocks only). Strips `<system-reminder>` tags.
|
|
||||||
fn extract_text_content(obj: &serde_json::Value) -> Option<String> {
|
|
||||||
let msg = obj.get("message").unwrap_or(obj);
|
|
||||||
let content = msg.get("content")?;
|
|
||||||
|
|
||||||
let text = match content {
|
|
||||||
serde_json::Value::String(s) => s.clone(),
|
|
||||||
serde_json::Value::Array(arr) => {
|
|
||||||
let texts: Vec<&str> = arr.iter()
|
|
||||||
.filter_map(|block| {
|
|
||||||
let block_type = block.get("type").and_then(|v| v.as_str())?;
|
|
||||||
if block_type != "text" { return None; }
|
|
||||||
let t = block.get("text").and_then(|v| v.as_str())?;
|
|
||||||
// Skip system-reminder blocks entirely
|
|
||||||
if t.contains("<system-reminder>") { return None; }
|
|
||||||
Some(t)
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
if texts.is_empty() { return None; }
|
|
||||||
texts.join("\n")
|
|
||||||
}
|
|
||||||
_ => return None,
|
|
||||||
};
|
|
||||||
|
|
||||||
Some(text)
|
|
||||||
}
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue