Cache context budget instead of recomputing every frame

budget() called tiktoken on every UI tick, which was the main CPU hog
during rapid key input. Move the cached ContextBudget onto ContextState
and recompute only when entries actually change (push_entry, compact,
restore_from_log).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-06 18:36:33 -04:00
parent 49cd6d6ab6
commit f4664ca06f
3 changed files with 17 additions and 12 deletions

View file

@ -230,13 +230,15 @@ pub struct ContextState {
/// Conversation entries — messages and memory, interleaved in order.
/// Does NOT include system prompt, personality, or journal.
pub entries: Vec<ConversationEntry>,
/// Cached token budget — recomputed when entries change, not every frame.
pub budget: ContextBudget,
}
impl ContextState {
/// Compute the context budget from typed sources.
pub fn budget(&self, count_str: &dyn Fn(&str) -> usize,
/// Compute the context budget from typed sources and cache the result.
pub fn recompute_budget(&mut self, count_str: &dyn Fn(&str) -> usize,
count_msg: &dyn Fn(&Message) -> usize,
window_tokens: usize) -> ContextBudget {
window_tokens: usize) -> &ContextBudget {
let id = count_str(&self.system_prompt)
+ self.personality.iter().map(|(_, c)| count_str(c)).sum::<usize>();
let jnl: usize = self.journal.iter().map(|e| count_str(&e.content)).sum();
@ -246,13 +248,14 @@ impl ContextState {
let tokens = count_msg(entry.api_message());
if entry.is_memory() { mem += tokens } else { conv += tokens }
}
ContextBudget {
self.budget = ContextBudget {
identity_tokens: id,
memory_tokens: mem,
journal_tokens: jnl,
conversation_tokens: conv,
window_tokens,
}
};
&self.budget
}
pub fn render_context_message(&self) -> String {