increase context budget: 80% window, 15% journal, no double reserve
Context was too aggressively trimmed — 80% free after compaction. Budget was 60% of window minus 25% reserve = only 45% usable. Now: 80% of window for total budget (20% output reserve built in), no extra reserve subtraction. Journal budget 5% → 15% to carry more context across compactions. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
78abf90461
commit
d921e76f82
2 changed files with 7 additions and 8 deletions
|
|
@ -825,11 +825,11 @@ impl Agent {
|
|||
};
|
||||
dbg_log!("[journal] cutoff_idx={}", cutoff_idx);
|
||||
|
||||
// Walk backwards from cutoff, accumulating entries within 5% of context
|
||||
// Walk backwards from cutoff, accumulating entries within 15% of context
|
||||
let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len();
|
||||
let context_window = crate::thought::context::context_window();
|
||||
let journal_budget = context_window * 5 / 100;
|
||||
dbg_log!("[journal] budget={} tokens ({}*5%)", journal_budget, context_window);
|
||||
let journal_budget = context_window * 15 / 100;
|
||||
dbg_log!("[journal] budget={} tokens ({}*15%)", journal_budget, context_window);
|
||||
|
||||
let mut entries = Vec::new();
|
||||
let mut total_tokens = 0;
|
||||
|
|
|
|||
|
|
@ -20,9 +20,10 @@ pub fn context_window() -> usize {
|
|||
crate::config::get().api_context_window
|
||||
}
|
||||
|
||||
/// Context budget in tokens: 60% of the model's context window.
|
||||
/// Context budget in tokens: 80% of the model's context window.
|
||||
/// The remaining 20% is reserved for model output.
|
||||
fn context_budget_tokens() -> usize {
|
||||
context_window() * 60 / 100
|
||||
context_window() * 80 / 100
|
||||
}
|
||||
|
||||
/// Dedup and trim conversation entries to fit within the context budget.
|
||||
|
|
@ -61,11 +62,9 @@ pub fn trim_entries(
|
|||
let identity_cost = count(&context.system_prompt)
|
||||
+ context.personality.iter().map(|(_, c)| count(c)).sum::<usize>();
|
||||
let journal_cost: usize = context.journal.iter().map(|e| count(&e.content)).sum();
|
||||
let reserve = max_tokens / 4;
|
||||
let available = max_tokens
|
||||
.saturating_sub(identity_cost)
|
||||
.saturating_sub(journal_cost)
|
||||
.saturating_sub(reserve);
|
||||
.saturating_sub(journal_cost);
|
||||
|
||||
let msg_costs: Vec<usize> = deduped.iter()
|
||||
.map(|e| msg_token_count(tokenizer, e.message())).collect();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue