experience-mine: per-segment dedup keys, retry backoff

The whole-file dedup key (_mined-transcripts#f-{UUID}) prevented mining
new compaction segments when session files grew. Replace with per-segment
keys (_mined-transcripts#f-{UUID}.{N}) so each segment is tracked
independently.

Changes:
- daemon session-watcher: segment-aware dedup, migrate 272 existing
  whole-file keys to per-segment on restart
- seg_cache with size-based invalidation (re-parse when file grows)
- exponential retry backoff (5min → 30min cap) for failed sessions
- experience_mine(): write per-segment key only, backfill on
  content-hash early return
- fact-mining gated on all per-segment keys existing

Also adds documentation:
- docs/claude-code-transcript-format.md: JSONL transcript format
- docs/plan-experience-mine-dedup-fix.md: design document
This commit is contained in:
ProofOfConcept 2026-03-09 02:27:51 -04:00
parent 1326a683a5
commit 8eb6308760
4 changed files with 367 additions and 95 deletions

View file

@ -61,13 +61,6 @@ pub fn mined_transcript_keys() -> HashSet<String> {
keys
}
/// Check if a transcript has been mined, given a pre-loaded set of mined keys.
/// Checks filename-based key only (no file read). Sessions mined before the
/// filename key was added will pass through and short-circuit in experience_mine
/// via the content hash check — a one-time cost on first restart after this change.
pub fn is_transcript_mined_with_keys(mined: &HashSet<String>, path: &str) -> bool {
mined.contains(&transcript_filename_key(path))
}
/// Extract user/assistant messages with line numbers from a JSONL transcript.
/// (line_number, role, text, timestamp)
@ -243,13 +236,15 @@ pub fn experience_mine(
let dedup_key = format!("_mined-transcripts#h-{:016x}", hash);
if store.nodes.contains_key(&dedup_key) {
// Backfill filename key if missing (transcripts mined before this key existed)
let fname_key = transcript_filename_key(jsonl_path);
if !store.nodes.contains_key(&fname_key) {
let mut node = new_node(&fname_key, &format!("Backfilled from {}", dedup_key));
node.provenance = store::Provenance::AgentExperienceMine;
let _ = store.upsert_node(node);
store.save()?;
// Backfill per-segment key if called with a specific segment
if let Some(idx) = segment {
let seg_key = format!("{}.{}", transcript_filename_key(jsonl_path), idx);
if !store.nodes.contains_key(&seg_key) {
let mut node = new_node(&seg_key, &format!("Backfilled from {}", dedup_key));
node.provenance = store::Provenance::AgentExperienceMine;
let _ = store.upsert_node(node);
store.save()?;
}
}
println!(" Already mined this transcript ({}), skipping.", &dedup_key[24..]);
return Ok(0);
@ -370,20 +365,23 @@ pub fn experience_mine(
}
// Record this transcript/segment as mined (even if count == 0, to prevent re-runs)
let fname_key = match segment {
Some(idx) => format!("{}.{}", transcript_filename_key(jsonl_path), idx),
None => transcript_filename_key(jsonl_path),
};
let dedup_content = format!("Mined {} ({} entries)", jsonl_path, count);
let mut fname_node = new_node(&fname_key, &dedup_content);
fname_node.provenance = store::Provenance::AgentExperienceMine;
let _ = store.upsert_node(fname_node);
// For unsegmented calls, also write the content-hash key for backwards compat
if segment.is_none() {
let mut dedup_node = new_node(&dedup_key, &dedup_content);
dedup_node.provenance = store::Provenance::AgentExperienceMine;
let _ = store.upsert_node(dedup_node);
match segment {
Some(idx) => {
// Per-segment key: the daemon writes the whole-file key when all segments are done
let seg_key = format!("{}.{}", transcript_filename_key(jsonl_path), idx);
let mut node = new_node(&seg_key, &dedup_content);
node.provenance = store::Provenance::AgentExperienceMine;
let _ = store.upsert_node(node);
}
None => {
// Unsegmented: only write content-hash key (not the filename key, since the
// file may grow with new compaction segments later — the daemon handles
// writing the whole-file filename key after verifying all segments are done)
let mut node = new_node(&dedup_key, &dedup_content);
node.provenance = store::Provenance::AgentExperienceMine;
let _ = store.upsert_node(node);
}
}
if count > 0 {