Score memories in first 60% of conversation by tokens

Use cumulative token position instead of entry index for the scoring
cutoff. This reflects actual context usage — a few large entries
near the end won't skew the boundary.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2026-04-07 21:36:39 -04:00
parent fd58386951
commit 613704720b

View file

@ -337,16 +337,25 @@ where
let http = http_client(); let http = http_client();
let mut scored = 0; let mut scored = 0;
let total_entries = context.conversation.entries().len(); let total_tokens = context.conversation.tokens();
let first_quarter = total_entries / 4; let token_cutoff = total_tokens * 60 / 100;
// Precompute cumulative token position for each entry
let entries = context.conversation.entries();
let mut cumulative: Vec<usize> = Vec::with_capacity(entries.len());
let mut running = 0;
for e in entries {
running += e.tokens;
cumulative.push(running);
}
for (pos, key, _) in &candidates { for (pos, key, _) in &candidates {
let (end, full_window) = nth_response_end(context.conversation.entries(), *pos, response_window); // Only score memories in the first 70% of the conversation by tokens —
// Skip memories without a full window, unless they're in the // recent memories don't have enough responses to evaluate yet.
// first quarter of the conversation (always score those). if cumulative.get(*pos).copied().unwrap_or(total_tokens) > token_cutoff {
if !full_window && *pos >= first_quarter {
continue; continue;
} }
let (end, _) = nth_response_end(context.conversation.entries(), *pos, response_window);
let range = *pos..end; let range = *pos..end;
if !context.conversation.entries()[range.clone()].iter().any(|ce| ce.entry.message().role == Role::Assistant) { if !context.conversation.entries()[range.clone()].iter().any(|ce| ce.entry.message().role == Role::Assistant) {
continue; continue;