remove more dead transcript mining code

Delete subconscious/transcript.rs (94 lines), is_segment_mined, mark_segment_mined — all orphaned by the extraction pipeline removal. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-02 21:45:56 -04:00 · 2026-04-02 21:45:56 -04:00 · dae0cc8191
commit dae0cc8191
parent 72d967edbf
3 changed files with 0 additions and 108 deletions
--- a/src/hippocampus/store/persist.rs
+++ b/src/hippocampus/store/persist.rs
@ -506,19 +506,6 @@ impl Store {
        Ok(())
    }
    /// Check if a transcript segment has been processed by a given agent.
    pub fn is_segment_mined(&self, transcript_id: &str, segment_index: u32, agent: &str) -> bool {
        self.transcript_progress
            .get(&(transcript_id.to_string(), segment_index))
            .is_some_and(|agents| agents.contains(agent))
    }
    /// Mark a transcript segment as successfully processed.
    pub fn mark_segment_mined(&mut self, transcript_id: &str, segment_index: u32, agent: &str) -> Result<(), String> {
        let seg = new_transcript_segment(transcript_id, segment_index, agent);
        self.append_transcript_progress(&[seg])
    }
    /// Migrate old stub-node transcript markers into the new progress log.
    /// Reads _observed-transcripts-f-*, _mined-transcripts#f-*, and _facts-* keys,
    /// extracts transcript_id and segment_index, writes to transcript-progress.capnp,
--- a/src/subconscious/mod.rs
+++ b/src/subconscious/mod.rs
@ -18,7 +18,6 @@
 pub mod subconscious;
 pub mod hook;
 pub mod transcript;
 pub mod api;
 pub mod llm;
 pub mod prompts;
--- a/src/subconscious/transcript.rs
+++ b/src/subconscious/transcript.rs
@ -1,94 +0,0 @@
 // Shared JSONL transcript parsing
 //
 // Three agents (enrich, fact_mine, knowledge) all parse Claude Code JSONL
 // transcripts. This module provides the shared core: parse each line, extract
 // message type, text content from string-or-array blocks, timestamp, and
 // user type. Callers filter and transform as needed.
 use std::fs;
 use std::path::Path;
 /// A single message extracted from a JSONL transcript.
 pub struct TranscriptMessage {
    /// 1-based line number in the JSONL file.
    pub line: usize,
    /// Raw role: "user" or "assistant".
    pub role: String,
    /// Extracted text content (trimmed, blocks joined with newlines).
    pub text: String,
    /// ISO timestamp from the message, or empty string.
    pub timestamp: String,
    /// For user messages: "external", "internal", etc. None for assistant.
    pub user_type: Option<String>,
 }
 /// Parse a JSONL transcript into structured messages.
 ///
 /// Extracts all user and assistant messages. Content blocks of type "text"
 /// are joined; tool_use, tool_result, thinking blocks are skipped.
 /// System-reminder blocks are filtered out.
 pub fn parse_transcript(path: &Path) -> Result<Vec<TranscriptMessage>, String> {
    let content = fs::read_to_string(path)
        .map_err(|e| format!("read {}: {}", path.display(), e))?;
    let mut messages = Vec::new();
    for (i, line) in content.lines().enumerate() {
        let Ok(obj) = serde_json::from_str::<serde_json::Value>(line) else { continue };
        let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
        if msg_type != "user" && msg_type != "assistant" { continue; }
        let timestamp = obj.get("timestamp")
            .and_then(|v| v.as_str())
            .unwrap_or("")
            .to_string();
        let user_type = obj.get("userType")
            .and_then(|v| v.as_str())
            .map(|s| s.to_string());
        let Some(text) = extract_text_content(&obj) else { continue };
        let text = text.trim().to_string();
        if text.is_empty() { continue; }
        messages.push(TranscriptMessage {
            line: i + 1,
            role: msg_type.to_string(),
            text,
            timestamp,
            user_type,
        });
    }
    Ok(messages)
 }
 /// Extract text content from a JSONL message object.
 ///
 /// Handles both string content and array-of-blocks content (filtering to
 /// type="text" blocks only). Strips `<system-reminder>` tags.
 fn extract_text_content(obj: &serde_json::Value) -> Option<String> {
    let msg = obj.get("message").unwrap_or(obj);
    let content = msg.get("content")?;
    let text = match content {
        serde_json::Value::String(s) => s.clone(),
        serde_json::Value::Array(arr) => {
            let texts: Vec<&str> = arr.iter()
                .filter_map(|block| {
                    let block_type = block.get("type").and_then(|v| v.as_str())?;
                    if block_type != "text" { return None; }
                    let t = block.get("text").and_then(|v| v.as_str())?;
                    // Skip system-reminder blocks entirely
                    if t.contains("<system-reminder>") { return None; }
                    Some(t)
                })
                .collect();
            if texts.is_empty() { return None; }
            texts.join("\n")
        }
        _ => return None,
    };
    Some(text)
 }