observation: chunk large transcripts, remove format_segment limit

Large conversation segments are now split into 50KB chunks with 10KB overlap, instead of being truncated to 8000 chars (which was broken anyway — broke after exceeding, not before). Each chunk gets its own candidate ID for independent mining and dedup. format_segment simplified: no size limit, added timestamps to output so observation agent can cross-reference with journal entries. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-16 20:52:20 -04:00 · 2026-03-16 20:52:20 -04:00 · 15d4bfa01f
commit 15d4bfa01f
parent 03310dafa4
1 changed files with 40 additions and 8 deletions
--- a/poc-memory/src/agents/knowledge.rs
+++ b/poc-memory/src/agents/knowledge.rs
@ -773,10 +773,41 @@ pub fn select_conversation_fragments(n: usize) -> Vec<(String, String)> {
            if has_rate_limit && assistant_msgs < 3 {
                continue;
            }
-            let text = format_segment(&segment, 8000);
+            let text = format_segment(&segment);
-            if text.len() > 500 {
+            if text.len() < 500 {
                continue;
            }
            const CHUNK_SIZE: usize = 50_000;
            const OVERLAP: usize = 10_000;
            if text.len() <= CHUNK_SIZE {
                let id = format!("{}.{}", session_id, seg_idx);
                candidates.push((id, text));
            } else {
                // Split on line boundaries with overlap
                let lines: Vec<&str> = text.lines().collect();
                let mut start_line = 0;
                let mut chunk_idx = 0;
                while start_line < lines.len() {
                    let mut end_line = start_line;
                    let mut size = 0;
                    while end_line < lines.len() && size < CHUNK_SIZE {
                        size += lines[end_line].len() + 1;
                        end_line += 1;
                    }
                    let chunk: String = lines[start_line..end_line].join("\n");
                    let id = format!("{}.{}.{}", session_id, seg_idx, chunk_idx);
                    candidates.push((id, chunk));
                    if end_line >= lines.len() { break; }
                    // Back up by overlap amount for next chunk
                    let mut overlap_size = 0;
                    let mut overlap_start = end_line;
                    while overlap_start > start_line && overlap_size < OVERLAP {
                        overlap_start -= 1;
                        overlap_size += lines[overlap_start].len() + 1;
                    }
                    start_line = overlap_start;
                    chunk_idx += 1;
                }
            }
        }
@ -803,19 +834,20 @@ pub fn mark_observation_done(fragment_ids: &[String]) {
 }
 /// Format a segment's messages into readable text for the observation agent.
-fn format_segment(messages: &[(usize, String, String, String)], max_chars: usize) -> String {
+fn format_segment(messages: &[(usize, String, String, String)]) -> String {
    let cfg = crate::config::get();
    let mut fragments = Vec::new();
    let mut total = 0;
-    for (_, role, text, _) in messages {
+    for (_, role, text, ts) in messages {
        let min_len = if role == "user" { 5 } else { 10 };
        if text.len() <= min_len { continue; }
        let name = if role == "user" { &cfg.user_name } else { &cfg.assistant_name };
-        fragments.push(format!("**{}:** {}", name, text));
+        if ts.is_empty() {
-        total += text.len();
+            fragments.push(format!("**{}:** {}", name, text));
-        if total > max_chars { break; }
+        } else {
            fragments.push(format!("**{}** {}: {}", name, &ts[..ts.len().min(19)], text));
        }
    }
    fragments.join("\n\n")
 }