trim_entries: take ContextBudget instead of recomputing
compact() already computes context_budget() — pass it to trim_entries so it has access to all budget components without recomputing them. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
df62b7ceaa
commit
776ac527f1
2 changed files with 6 additions and 10 deletions
|
|
@ -45,10 +45,11 @@ fn context_budget_tokens() -> usize {
|
|||
/// 2. Trim: drop oldest entries until the conversation fits, snapping
|
||||
/// to user message boundaries.
|
||||
pub fn trim_entries(
|
||||
context: &ContextState,
|
||||
entries: &[ConversationEntry],
|
||||
tokenizer: &CoreBPE,
|
||||
budget: &ContextBudget,
|
||||
) -> Vec<ConversationEntry> {
|
||||
let fixed_tokens = budget.system + budget.identity + budget.journal;
|
||||
// --- Phase 1: dedup memory entries by key (keep last) ---
|
||||
let mut seen_keys: std::collections::HashMap<&str, usize> = std::collections::HashMap::new();
|
||||
let mut drop_indices: std::collections::HashSet<usize> = std::collections::HashSet::new();
|
||||
|
|
@ -67,19 +68,13 @@ pub fn trim_entries(
|
|||
.collect();
|
||||
|
||||
// --- Phase 2: trim to fit context budget ---
|
||||
// Everything in the context window is a message. Count them all,
|
||||
// trim entries until the total fits.
|
||||
let max_tokens = context_budget_tokens();
|
||||
let count_msg = |m: &Message| msg_token_count(tokenizer, m);
|
||||
|
||||
let fixed_cost = count_msg(&Message::system(&context.system_prompt))
|
||||
+ count_msg(&Message::user(context.render_context_message()))
|
||||
+ count_msg(&Message::user(render_journal(&context.journal)));
|
||||
|
||||
let msg_costs: Vec<usize> = deduped.iter()
|
||||
.map(|e| if e.is_log() { 0 } else { count_msg(e.api_message()) }).collect();
|
||||
let entry_total: usize = msg_costs.iter().sum();
|
||||
let total: usize = fixed_cost + entry_total;
|
||||
let total: usize = fixed_tokens + entry_total;
|
||||
|
||||
let mem_tokens: usize = deduped.iter().zip(&msg_costs)
|
||||
.filter(|(e, _)| e.is_memory())
|
||||
|
|
@ -87,7 +82,7 @@ pub fn trim_entries(
|
|||
let conv_tokens: usize = entry_total - mem_tokens;
|
||||
|
||||
dbglog!("[trim] max_tokens={} fixed={} mem={} conv={} total={} entries={}",
|
||||
max_tokens, fixed_cost, mem_tokens, conv_tokens, total, deduped.len());
|
||||
max_tokens, fixed_tokens, mem_tokens, conv_tokens, total, deduped.len());
|
||||
|
||||
// Phase 2a: evict all DMN entries first — they're ephemeral
|
||||
let mut drop = vec![false; deduped.len()];
|
||||
|
|
|
|||
|
|
@ -987,11 +987,12 @@ impl Agent {
|
|||
self.load_startup_journal();
|
||||
|
||||
// Dedup memory, trim to budget
|
||||
let budget = self.context_budget();
|
||||
let entries = self.context.entries.clone();
|
||||
self.context.entries = crate::agent::context::trim_entries(
|
||||
&self.context,
|
||||
&entries,
|
||||
&self.tokenizer,
|
||||
&budget,
|
||||
);
|
||||
|
||||
let after = self.context.entries.len();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue