trim_entries: take ContextBudget instead of recomputing

compact() already computes context_budget() — pass it to trim_entries
so it has access to all budget components without recomputing them.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-07 19:42:06 -04:00
parent df62b7ceaa
commit 776ac527f1
2 changed files with 6 additions and 10 deletions

View file

@ -45,10 +45,11 @@ fn context_budget_tokens() -> usize {
/// 2. Trim: drop oldest entries until the conversation fits, snapping
/// to user message boundaries.
pub fn trim_entries(
context: &ContextState,
entries: &[ConversationEntry],
tokenizer: &CoreBPE,
budget: &ContextBudget,
) -> Vec<ConversationEntry> {
let fixed_tokens = budget.system + budget.identity + budget.journal;
// --- Phase 1: dedup memory entries by key (keep last) ---
let mut seen_keys: std::collections::HashMap<&str, usize> = std::collections::HashMap::new();
let mut drop_indices: std::collections::HashSet<usize> = std::collections::HashSet::new();
@ -67,19 +68,13 @@ pub fn trim_entries(
.collect();
// --- Phase 2: trim to fit context budget ---
// Everything in the context window is a message. Count them all,
// trim entries until the total fits.
let max_tokens = context_budget_tokens();
let count_msg = |m: &Message| msg_token_count(tokenizer, m);
let fixed_cost = count_msg(&Message::system(&context.system_prompt))
+ count_msg(&Message::user(context.render_context_message()))
+ count_msg(&Message::user(render_journal(&context.journal)));
let msg_costs: Vec<usize> = deduped.iter()
.map(|e| if e.is_log() { 0 } else { count_msg(e.api_message()) }).collect();
let entry_total: usize = msg_costs.iter().sum();
let total: usize = fixed_cost + entry_total;
let total: usize = fixed_tokens + entry_total;
let mem_tokens: usize = deduped.iter().zip(&msg_costs)
.filter(|(e, _)| e.is_memory())
@ -87,7 +82,7 @@ pub fn trim_entries(
let conv_tokens: usize = entry_total - mem_tokens;
dbglog!("[trim] max_tokens={} fixed={} mem={} conv={} total={} entries={}",
max_tokens, fixed_cost, mem_tokens, conv_tokens, total, deduped.len());
max_tokens, fixed_tokens, mem_tokens, conv_tokens, total, deduped.len());
// Phase 2a: evict all DMN entries first — they're ephemeral
let mut drop = vec![false; deduped.len()];

View file

@ -987,11 +987,12 @@ impl Agent {
self.load_startup_journal();
// Dedup memory, trim to budget
let budget = self.context_budget();
let entries = self.context.entries.clone();
self.context.entries = crate::agent::context::trim_entries(
&self.context,
&entries,
&self.tokenizer,
&budget,
);
let after = self.context.entries.len();