Cache context budget instead of recomputing every frame

budget() called tiktoken on every UI tick, which was the main CPU hog
during rapid key input. Move the cached ContextBudget onto ContextState
and recompute only when entries actually change (push_entry, compact,
restore_from_log).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-06 18:36:33 -04:00
parent 49cd6d6ab6
commit f4664ca06f
3 changed files with 17 additions and 12 deletions

View file

@ -215,6 +215,7 @@ impl Agent {
journal: Vec::new(),
working_stack: Vec::new(),
entries: Vec::new(),
budget: ContextBudget::default(),
};
let session_id = format!("consciousness-{}", chrono::Utc::now().format("%Y%m%d-%H%M%S"));
let agent_cycles = crate::subconscious::subconscious::AgentCycleState::new(&session_id);
@ -297,6 +298,7 @@ impl Agent {
}
}
self.context.entries.push(entry);
self.recompute_budget();
self.changed.notify_one();
}
@ -318,11 +320,11 @@ impl Agent {
self.changed.notify_one();
}
pub fn budget(&self) -> ContextBudget {
pub fn recompute_budget(&mut self) -> &ContextBudget {
let count_str = |s: &str| self.tokenizer.encode_with_special_tokens(s).len();
let count_msg = |m: &Message| crate::agent::context::msg_token_count(&self.tokenizer, m);
let window = crate::agent::context::context_window();
self.context.budget(&count_str, &count_msg, window)
self.context.recompute_budget(&count_str, &count_msg, window)
}
/// Send a user message and run the agent loop until the model
@ -1040,8 +1042,8 @@ impl Agent {
dbglog!("[compact] entries: {} → {} (mem: {} → {}, conv: {} → {})",
before, after, before_mem, after_mem, before_conv, after_conv);
let budget = self.budget();
dbglog!("[compact] budget: {}", budget.status_string());
self.recompute_budget();
dbglog!("[compact] budget: {}", self.context.budget.status_string());
self.load_startup_journal();
self.generation += 1;
@ -1073,8 +1075,8 @@ impl Agent {
self.context.entries = all;
self.compact();
// Estimate prompt tokens from budget so status bar isn't 0 on startup
let b = self.budget();
self.last_prompt_tokens = b.used() as u32;
self.recompute_budget();
self.last_prompt_tokens = self.context.budget.used() as u32;
true
}