From 72d967edbf45afb18491d26bc153d7a50d1c8226 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 2 Apr 2026 21:42:03 -0400 Subject: [PATCH] remove dead transcript mining pipeline Delete enrich.rs (conversation extraction), select_conversation_fragments, mark_observation_done, format_segment, and the {{conversations}} placeholder. Transcript processing is handled by observe/journal agents now. Co-Authored-By: Proof of Concept --- src/lib.rs | 2 +- src/subconscious/defs.rs | 12 --- src/subconscious/enrich.rs | 40 ---------- src/subconscious/knowledge.rs | 145 +--------------------------------- src/subconscious/mod.rs | 1 - 5 files changed, 2 insertions(+), 198 deletions(-) delete mode 100644 src/subconscious/enrich.rs diff --git a/src/lib.rs b/src/lib.rs index 67ea608..8cedec5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -67,7 +67,7 @@ pub use hippocampus::query::parser as query_parser; pub use subconscious as agents; pub use subconscious::{ llm, audit, consolidate, knowledge, - enrich, digest, daemon, + digest, daemon, }; // Backward compat: memory_search moved from hippocampus to subconscious::hook pub use subconscious::hook as memory_search; diff --git a/src/subconscious/defs.rs b/src/subconscious/defs.rs index 6bd34a3..49b4854 100644 --- a/src/subconscious/defs.rs +++ b/src/subconscious/defs.rs @@ -333,18 +333,6 @@ fn resolve( Some(Resolved { text, keys: result_keys }) } - "conversations" => { - let fragments = super::knowledge::select_conversation_fragments(count); - let fragment_ids: Vec = fragments.iter() - .map(|(id, _)| id.clone()) - .collect(); - let text = fragments.iter() - .map(|(id, text)| format!("### Session {}\n\n{}", id, text)) - .collect::>() - .join("\n\n---\n\n"); - Some(Resolved { text, keys: fragment_ids }) - } - "siblings" | "neighborhood" => { let mut out = String::new(); let mut all_keys: Vec = Vec::new(); diff --git a/src/subconscious/enrich.rs b/src/subconscious/enrich.rs deleted file mode 100644 index b44a71a..0000000 --- a/src/subconscious/enrich.rs +++ /dev/null @@ -1,40 +0,0 @@ -// Conversation extraction from JSONL transcripts -// -// extract_conversation — parse JSONL transcript to messages -// split_on_compaction — split messages at compaction boundaries - -/// Extract conversation messages from a JSONL transcript file. -/// Returns (line_number, role, text, timestamp) tuples. -pub fn extract_conversation(jsonl_path: &str) -> Result, String> { - let path = std::path::Path::new(jsonl_path); - let messages = super::transcript::parse_transcript(path)?; - Ok(messages.into_iter() - .map(|m| (m.line, m.role, m.text, m.timestamp)) - .collect()) -} - -pub const COMPACTION_MARKER: &str = "This session is being continued from a previous conversation that ran out of context"; - -/// Split extracted messages into segments at compaction boundaries. -/// Each segment represents one continuous conversation before context was compacted. -pub fn split_on_compaction(messages: Vec<(usize, String, String, String)>) -> Vec> { - let mut segments: Vec> = Vec::new(); - let mut current = Vec::new(); - - for msg in messages { - if msg.1 == "user" && msg.2.starts_with(COMPACTION_MARKER) { - if !current.is_empty() { - segments.push(current); - current = Vec::new(); - } - current.push(msg); - } else { - current.push(msg); - } - } - if !current.is_empty() { - segments.push(current); - } - - segments -} diff --git a/src/subconscious/knowledge.rs b/src/subconscious/knowledge.rs index e82fa4d..8b093ba 100644 --- a/src/subconscious/knowledge.rs +++ b/src/subconscious/knowledge.rs @@ -104,11 +104,6 @@ pub fn run_and_apply_excluded( ) -> Result<(), String> { let result = run_one_agent_excluded(store, agent_name, batch_size, llm_tag, log, exclude)?; - // Mark conversation segments as mined after successful processing - if agent_name == "observation" { - mark_observation_done(&result.node_keys); - } - Ok(()) } @@ -397,142 +392,4 @@ fn run_one_agent_inner( } // --------------------------------------------------------------------------- -// Conversation fragment selection -// --------------------------------------------------------------------------- - -/// Select conversation fragments (per-segment) for the observation extractor. -/// Uses the transcript-progress.capnp log for dedup — no stub nodes. -/// Does NOT pre-mark segments; caller must call mark_observation_done() after success. -pub fn select_conversation_fragments(n: usize) -> Vec<(String, String)> { - let projects = crate::config::get().projects_dir.clone(); - if !projects.exists() { return Vec::new(); } - - let store = match crate::store::Store::load() { - Ok(s) => s, - Err(_) => return Vec::new(), - }; - - let mut jsonl_files: Vec = Vec::new(); - if let Ok(dirs) = fs::read_dir(&projects) { - for dir in dirs.filter_map(|e| e.ok()) { - if !dir.path().is_dir() { continue; } - if let Ok(files) = fs::read_dir(dir.path()) { - for f in files.filter_map(|e| e.ok()) { - let p = f.path(); - if p.extension().map(|x| x == "jsonl").unwrap_or(false) - && let Ok(meta) = p.metadata() - && meta.len() > 50_000 { - jsonl_files.push(p); - } - } - } - } - } - - // Collect unmined segments across all transcripts - let mut candidates: Vec<(String, String)> = Vec::new(); - for path in &jsonl_files { - let path_str = path.to_string_lossy(); - let messages = match super::enrich::extract_conversation(&path_str) { - Ok(m) => m, - Err(_) => continue, - }; - let session_id = path.file_stem() - .map(|s| s.to_string_lossy().to_string()) - .unwrap_or_else(|| "unknown".into()); - - let segments = super::enrich::split_on_compaction(messages); - for (seg_idx, segment) in segments.into_iter().enumerate() { - if store.is_segment_mined(&session_id, seg_idx as u32, "observation") { - continue; - } - // Skip segments with too few assistant messages (rate limits, errors) - let assistant_msgs = segment.iter() - .filter(|(_, role, _, _)| role == "assistant") - .count(); - if assistant_msgs < 2 { - continue; - } - // Skip segments that are just rate limit errors - let has_rate_limit = segment.iter().any(|(_, _, text, _)| - text.contains("hit your limit") || text.contains("rate limit")); - if has_rate_limit && assistant_msgs < 3 { - continue; - } - let text = format_segment(&segment); - if text.len() < 500 { - continue; - } - const CHUNK_SIZE: usize = 50_000; - const OVERLAP: usize = 10_000; - if text.len() <= CHUNK_SIZE { - let id = format!("{}.{}", session_id, seg_idx); - candidates.push((id, text)); - } else { - // Split on line boundaries with overlap - let lines: Vec<&str> = text.lines().collect(); - let mut start_line = 0; - let mut chunk_idx = 0; - while start_line < lines.len() { - let mut end_line = start_line; - let mut size = 0; - while end_line < lines.len() && size < CHUNK_SIZE { - size += lines[end_line].len() + 1; - end_line += 1; - } - let chunk: String = lines[start_line..end_line].join("\n"); - let id = format!("{}.{}.{}", session_id, seg_idx, chunk_idx); - candidates.push((id, chunk)); - if end_line >= lines.len() { break; } - // Back up by overlap amount for next chunk - let mut overlap_size = 0; - let mut overlap_start = end_line; - while overlap_start > start_line && overlap_size < OVERLAP { - overlap_start -= 1; - overlap_size += lines[overlap_start].len() + 1; - } - start_line = overlap_start; - chunk_idx += 1; - } - } - } - - if candidates.len() >= n { break; } - } - - candidates.truncate(n); - candidates -} - -/// Mark observation segments as successfully mined (call AFTER the agent succeeds). -pub fn mark_observation_done(fragment_ids: &[String]) { - let mut store = match crate::store::Store::load() { - Ok(s) => s, - Err(_) => return, - }; - for id in fragment_ids { - if let Some((session_id, seg_str)) = id.rsplit_once('.') - && let Ok(seg) = seg_str.parse::() { - let _ = store.mark_segment_mined(session_id, seg, "observation"); - } - } -} - -/// Format a segment's messages into readable text for the observation agent. -fn format_segment(messages: &[(usize, String, String, String)]) -> String { - let cfg = crate::config::get(); - let mut fragments = Vec::new(); - - for (_, role, text, ts) in messages { - let min_len = if role == "user" { 5 } else { 10 }; - if text.len() <= min_len { continue; } - - let name = if role == "user" { &cfg.user_name } else { &cfg.assistant_name }; - if ts.is_empty() { - fragments.push(format!("**{}:** {}", name, text)); - } else { - fragments.push(format!("**{}** {}: {}", name, &ts[..ts.len().min(19)], text)); - } - } - fragments.join("\n\n") -} +// Conversation fragment selection removed — observe/journal agents handle transcripts. diff --git a/src/subconscious/mod.rs b/src/subconscious/mod.rs index c585266..a63cbbe 100644 --- a/src/subconscious/mod.rs +++ b/src/subconscious/mod.rs @@ -26,6 +26,5 @@ pub mod defs; pub mod audit; pub mod consolidate; pub mod knowledge; -pub mod enrich; pub mod digest; pub mod daemon;