Cache context budget instead of recomputing every frame

budget() called tiktoken on every UI tick, which was the main CPU hog
during rapid key input. Move the cached ContextBudget onto ContextState
and recompute only when entries actually change (push_entry, compact,
restore_from_log).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-06 18:36:33 -04:00
parent 49cd6d6ab6
commit f4664ca06f
3 changed files with 17 additions and 12 deletions

View file

@ -230,13 +230,15 @@ pub struct ContextState {
/// Conversation entries — messages and memory, interleaved in order. /// Conversation entries — messages and memory, interleaved in order.
/// Does NOT include system prompt, personality, or journal. /// Does NOT include system prompt, personality, or journal.
pub entries: Vec<ConversationEntry>, pub entries: Vec<ConversationEntry>,
/// Cached token budget — recomputed when entries change, not every frame.
pub budget: ContextBudget,
} }
impl ContextState { impl ContextState {
/// Compute the context budget from typed sources. /// Compute the context budget from typed sources and cache the result.
pub fn budget(&self, count_str: &dyn Fn(&str) -> usize, pub fn recompute_budget(&mut self, count_str: &dyn Fn(&str) -> usize,
count_msg: &dyn Fn(&Message) -> usize, count_msg: &dyn Fn(&Message) -> usize,
window_tokens: usize) -> ContextBudget { window_tokens: usize) -> &ContextBudget {
let id = count_str(&self.system_prompt) let id = count_str(&self.system_prompt)
+ self.personality.iter().map(|(_, c)| count_str(c)).sum::<usize>(); + self.personality.iter().map(|(_, c)| count_str(c)).sum::<usize>();
let jnl: usize = self.journal.iter().map(|e| count_str(&e.content)).sum(); let jnl: usize = self.journal.iter().map(|e| count_str(&e.content)).sum();
@ -246,13 +248,14 @@ impl ContextState {
let tokens = count_msg(entry.api_message()); let tokens = count_msg(entry.api_message());
if entry.is_memory() { mem += tokens } else { conv += tokens } if entry.is_memory() { mem += tokens } else { conv += tokens }
} }
ContextBudget { self.budget = ContextBudget {
identity_tokens: id, identity_tokens: id,
memory_tokens: mem, memory_tokens: mem,
journal_tokens: jnl, journal_tokens: jnl,
conversation_tokens: conv, conversation_tokens: conv,
window_tokens, window_tokens,
} };
&self.budget
} }
pub fn render_context_message(&self) -> String { pub fn render_context_message(&self) -> String {

View file

@ -215,6 +215,7 @@ impl Agent {
journal: Vec::new(), journal: Vec::new(),
working_stack: Vec::new(), working_stack: Vec::new(),
entries: Vec::new(), entries: Vec::new(),
budget: ContextBudget::default(),
}; };
let session_id = format!("consciousness-{}", chrono::Utc::now().format("%Y%m%d-%H%M%S")); let session_id = format!("consciousness-{}", chrono::Utc::now().format("%Y%m%d-%H%M%S"));
let agent_cycles = crate::subconscious::subconscious::AgentCycleState::new(&session_id); let agent_cycles = crate::subconscious::subconscious::AgentCycleState::new(&session_id);
@ -297,6 +298,7 @@ impl Agent {
} }
} }
self.context.entries.push(entry); self.context.entries.push(entry);
self.recompute_budget();
self.changed.notify_one(); self.changed.notify_one();
} }
@ -318,11 +320,11 @@ impl Agent {
self.changed.notify_one(); self.changed.notify_one();
} }
pub fn budget(&self) -> ContextBudget { pub fn recompute_budget(&mut self) -> &ContextBudget {
let count_str = |s: &str| self.tokenizer.encode_with_special_tokens(s).len(); let count_str = |s: &str| self.tokenizer.encode_with_special_tokens(s).len();
let count_msg = |m: &Message| crate::agent::context::msg_token_count(&self.tokenizer, m); let count_msg = |m: &Message| crate::agent::context::msg_token_count(&self.tokenizer, m);
let window = crate::agent::context::context_window(); let window = crate::agent::context::context_window();
self.context.budget(&count_str, &count_msg, window) self.context.recompute_budget(&count_str, &count_msg, window)
} }
/// Send a user message and run the agent loop until the model /// Send a user message and run the agent loop until the model
@ -1040,8 +1042,8 @@ impl Agent {
dbglog!("[compact] entries: {} → {} (mem: {} → {}, conv: {} → {})", dbglog!("[compact] entries: {} → {} (mem: {} → {}, conv: {} → {})",
before, after, before_mem, after_mem, before_conv, after_conv); before, after, before_mem, after_mem, before_conv, after_conv);
let budget = self.budget(); self.recompute_budget();
dbglog!("[compact] budget: {}", budget.status_string()); dbglog!("[compact] budget: {}", self.context.budget.status_string());
self.load_startup_journal(); self.load_startup_journal();
self.generation += 1; self.generation += 1;
@ -1073,8 +1075,8 @@ impl Agent {
self.context.entries = all; self.context.entries = all;
self.compact(); self.compact();
// Estimate prompt tokens from budget so status bar isn't 0 on startup // Estimate prompt tokens from budget so status bar isn't 0 on startup
let b = self.budget(); self.recompute_budget();
self.last_prompt_tokens = b.used() as u32; self.last_prompt_tokens = self.context.budget.used() as u32;
true true
} }

View file

@ -828,7 +828,7 @@ impl ScreenView for InteractScreen {
agent.expire_activities(); agent.expire_activities();
app.status.prompt_tokens = agent.last_prompt_tokens(); app.status.prompt_tokens = agent.last_prompt_tokens();
app.status.model = agent.model().to_string(); app.status.model = agent.model().to_string();
app.status.context_budget = agent.budget().status_string(); app.status.context_budget = agent.context.budget.status_string();
app.activity = agent.activities.last() app.activity = agent.activities.last()
.map(|a| a.label.clone()) .map(|a| a.label.clone())
.unwrap_or_default(); .unwrap_or_default();