experience-mine: split oversized sessions at compaction boundaries

Claude Code doesn't create new session files on context compaction — a single UUID can accumulate 170+ conversations, producing 400MB+ JSONL files that generate 1.3M token prompts. Split at compaction markers ("This session is being continued..."): - extract_conversation made pub, split_on_compaction splits messages - experience_mine takes optional segment index - daemon watcher parses files, spawns per-segment jobs (.0, .1, .2) - seg_cache memoizes segment counts across ticks - per-segment dedup keys; whole-file key when all segments complete - 150K token guard skips any remaining oversized segments - char-boundary-safe truncation in enrich.rs and fact_mine.rs Backwards compatible: unsegmented calls still write content-hash dedup keys, old whole-file mined keys still recognized.
2026-03-07 12:01:38 -05:00 · 2026-03-07 12:01:38 -05:00 · 45335de220
commit 45335de220
parent 22a9fdabdb
4 changed files with 155 additions and 39 deletions
--- a/src/enrich.rs
+++ b/src/enrich.rs
@ -85,7 +85,7 @@ pub fn is_transcript_mined_with_keys(mined: &HashSet<String>, path: &str) -> boo

 /// Extract user/assistant messages with line numbers from a JSONL transcript.
 /// (line_number, role, text, timestamp)
-fn extract_conversation(jsonl_path: &str) -> Result<Vec<(usize, String, String, String)>, String> {
+pub fn extract_conversation(jsonl_path: &str) -> Result<Vec<(usize, String, String, String)>, String> {
    let content = fs::read_to_string(jsonl_path)
        .map_err(|e| format!("read {}: {}", jsonl_path, e))?;

@ -135,6 +135,33 @@ fn extract_conversation(jsonl_path: &str) -> Result<Vec<(usize, String, String,
    Ok(messages)
 }

+pub const COMPACTION_MARKER: &str = "This session is being continued from a previous conversation that ran out of context";
+
+/// Split extracted messages into segments at compaction boundaries.
+/// Each segment represents one continuous conversation before context was compacted.
+pub fn split_on_compaction(messages: Vec<(usize, String, String, String)>) -> Vec<Vec<(usize, String, String, String)>> {
+    let mut segments: Vec<Vec<(usize, String, String, String)>> = Vec::new();
+    let mut current = Vec::new();
+
+    for msg in messages {
+        if msg.1 == "user" && msg.2.starts_with(COMPACTION_MARKER) {
+            if !current.is_empty() {
+                segments.push(current);
+                current = Vec::new();
+            }
+            // The continuation message itself is part of the new segment
+            current.push(msg);
+        } else {
+            current.push(msg);
+        }
+    }
+    if !current.is_empty() {
+        segments.push(current);
+    }
+
+    segments
+}
+
 /// Format conversation messages for the prompt (truncating long messages).
 fn format_conversation(messages: &[(usize, String, String, String)]) -> String {
    messages.iter()
@ -259,9 +286,11 @@ pub fn journal_enrich(
 }

 /// Mine a conversation transcript for experiential moments not yet journaled.
+/// If `segment` is Some, only process that compaction segment of the file.
 pub fn experience_mine(
    store: &mut Store,
    jsonl_path: &str,
+    segment: Option<usize>,
 ) -> Result<usize, String> {
    println!("Experience mining: {}", jsonl_path);

@ -287,7 +316,18 @@ pub fn experience_mine(
        return Ok(0);
    }

-    let messages = extract_conversation(jsonl_path)?;
+    let all_messages = extract_conversation(jsonl_path)?;
+
+    // If segment is specified, extract just that segment; otherwise process all messages
+    let messages = match segment {
+        Some(idx) => {
+            let segments = split_on_compaction(all_messages);
+            segments.into_iter().nth(idx)
+                .ok_or_else(|| format!("segment {} out of range", idx))?
+        }
+        None => all_messages,
+    };
+
    let conversation = format_conversation(&messages);
    println!("  {} messages, {} chars", messages.len(), conversation.len());

@ -327,7 +367,13 @@ pub fn experience_mine(
        ("{{KEYS}}", &keys_text),
        ("{{CONVERSATION}}", &conversation),
    ])?;
-    println!("  Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
+    let est_tokens = prompt.len() / 4;
+    println!("  Prompt: {} chars (~{} tokens)", prompt.len(), est_tokens);
+
+    if est_tokens > 150_000 {
+        println!("  Skipping: prompt too large ({} tokens > 150k limit)", est_tokens);
+        return Ok(0);
+    }

    println!("  Calling Sonnet...");
    let response = call_sonnet("experience-mine", &prompt)?;
@ -389,24 +435,34 @@ pub fn experience_mine(
        let _ = store.upsert_node(node);
        count += 1;

-        let preview = if content.len() > 80 { &content[..77] } else { content };
+        let preview = if content.len() > 80 {
+            let end = content.floor_char_boundary(77);
+            &content[..end]
+        } else {
+            content
+        };
        println!("  + [{}] {}...", ts, preview);
    }

-    // Record this transcript as mined (even if count == 0, to prevent re-runs)
-    // Two keys: content hash (exact dedup) and filename (fast daemon reconcile)
+    // Record this transcript/segment as mined (even if count == 0, to prevent re-runs)
+    let fname_key = match segment {
+        Some(idx) => format!("{}.{}", transcript_filename_key(jsonl_path), idx),
+        None => transcript_filename_key(jsonl_path),
+    };
    let dedup_content = format!("Mined {} ({} entries)", jsonl_path, count);
-    let mut dedup_node = new_node(&dedup_key, &dedup_content);
-    dedup_node.category = store::Category::Task;
-    dedup_node.provenance = store::Provenance::AgentExperienceMine;
-    let _ = store.upsert_node(dedup_node);
-
-    let fname_key = transcript_filename_key(jsonl_path);
    let mut fname_node = new_node(&fname_key, &dedup_content);
    fname_node.category = store::Category::Task;
    fname_node.provenance = store::Provenance::AgentExperienceMine;
    let _ = store.upsert_node(fname_node);

+    // For unsegmented calls, also write the content-hash key for backwards compat
+    if segment.is_none() {
+        let mut dedup_node = new_node(&dedup_key, &dedup_content);
+        dedup_node.category = store::Category::Task;
+        dedup_node.provenance = store::Provenance::AgentExperienceMine;
+        let _ = store.upsert_node(dedup_node);
+    }
+
    if count > 0 {
        println!("  Saved {} new journal entries.", count);
    }