increase context budget: 80% window, 15% journal, no double reserve
Context was too aggressively trimmed — 80% free after compaction. Budget was 60% of window minus 25% reserve = only 45% usable. Now: 80% of window for total budget (20% output reserve built in), no extra reserve subtraction. Journal budget 5% → 15% to carry more context across compactions. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
78abf90461
commit
d921e76f82
2 changed files with 7 additions and 8 deletions
|
|
@ -825,11 +825,11 @@ impl Agent {
|
||||||
};
|
};
|
||||||
dbg_log!("[journal] cutoff_idx={}", cutoff_idx);
|
dbg_log!("[journal] cutoff_idx={}", cutoff_idx);
|
||||||
|
|
||||||
// Walk backwards from cutoff, accumulating entries within 5% of context
|
// Walk backwards from cutoff, accumulating entries within 15% of context
|
||||||
let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len();
|
let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len();
|
||||||
let context_window = crate::thought::context::context_window();
|
let context_window = crate::thought::context::context_window();
|
||||||
let journal_budget = context_window * 5 / 100;
|
let journal_budget = context_window * 15 / 100;
|
||||||
dbg_log!("[journal] budget={} tokens ({}*5%)", journal_budget, context_window);
|
dbg_log!("[journal] budget={} tokens ({}*15%)", journal_budget, context_window);
|
||||||
|
|
||||||
let mut entries = Vec::new();
|
let mut entries = Vec::new();
|
||||||
let mut total_tokens = 0;
|
let mut total_tokens = 0;
|
||||||
|
|
|
||||||
|
|
@ -20,9 +20,10 @@ pub fn context_window() -> usize {
|
||||||
crate::config::get().api_context_window
|
crate::config::get().api_context_window
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Context budget in tokens: 60% of the model's context window.
|
/// Context budget in tokens: 80% of the model's context window.
|
||||||
|
/// The remaining 20% is reserved for model output.
|
||||||
fn context_budget_tokens() -> usize {
|
fn context_budget_tokens() -> usize {
|
||||||
context_window() * 60 / 100
|
context_window() * 80 / 100
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Dedup and trim conversation entries to fit within the context budget.
|
/// Dedup and trim conversation entries to fit within the context budget.
|
||||||
|
|
@ -61,11 +62,9 @@ pub fn trim_entries(
|
||||||
let identity_cost = count(&context.system_prompt)
|
let identity_cost = count(&context.system_prompt)
|
||||||
+ context.personality.iter().map(|(_, c)| count(c)).sum::<usize>();
|
+ context.personality.iter().map(|(_, c)| count(c)).sum::<usize>();
|
||||||
let journal_cost: usize = context.journal.iter().map(|e| count(&e.content)).sum();
|
let journal_cost: usize = context.journal.iter().map(|e| count(&e.content)).sum();
|
||||||
let reserve = max_tokens / 4;
|
|
||||||
let available = max_tokens
|
let available = max_tokens
|
||||||
.saturating_sub(identity_cost)
|
.saturating_sub(identity_cost)
|
||||||
.saturating_sub(journal_cost)
|
.saturating_sub(journal_cost);
|
||||||
.saturating_sub(reserve);
|
|
||||||
|
|
||||||
let msg_costs: Vec<usize> = deduped.iter()
|
let msg_costs: Vec<usize> = deduped.iter()
|
||||||
.map(|e| msg_token_count(tokenizer, e.message())).collect();
|
.map(|e| msg_token_count(tokenizer, e.message())).collect();
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue