From 7da3efc5dfc33d9139b88dc7e6ce9fcc59c16906 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 9 Apr 2026 13:06:19 -0400 Subject: [PATCH] Fast startup: only retokenize tail of conversation log MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit restore_from_log reads the full log but walks backwards from the tail, retokenizing each node as it goes. Stops when conversation budget is full. Only the nodes that fit get pushed into context. Added AstNode::retokenize() — recomputes token_ids on all leaves after deserialization (serde skip means they're empty). Co-Authored-By: Proof of Concept --- src/agent/context.rs | 17 +++++++++++++++++ src/agent/mod.rs | 28 +++++++++++++++++++++++----- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/src/agent/context.rs b/src/agent/context.rs index ccc0830..3d5e969 100644 --- a/src/agent/context.rs +++ b/src/agent/context.rs @@ -296,6 +296,23 @@ impl AstNode { // -- Builder -------------------------------------------------------------- + pub fn retokenize(self) -> Self { + match self { + Self::Leaf(leaf) => { + let token_ids = if leaf.body.is_prompt_visible() { + tokenizer::encode(&leaf.body.render()) + } else { + vec![] + }; + Self::Leaf(NodeLeaf { token_ids, ..leaf }) + } + Self::Branch { role, children } => Self::Branch { + role, + children: children.into_iter().map(|c| c.retokenize()).collect(), + }, + } + } + pub fn with_timestamp(mut self, ts: DateTime) -> Self { match &mut self { Self::Leaf(leaf) => leaf.timestamp = Some(ts), diff --git a/src/agent/mod.rs b/src/agent/mod.rs index 8b6f43d..204747a 100644 --- a/src/agent/mod.rs +++ b/src/agent/mod.rs @@ -568,7 +568,7 @@ impl Agent { } pub async fn restore_from_log(&self) -> bool { - let nodes = { + let all_nodes = { let ctx = self.context.lock().await; match &ctx.conversation_log { Some(log) => match log.read_nodes(64 * 1024 * 1024) { @@ -579,17 +579,35 @@ impl Agent { } }; + // Walk backwards from the tail, retokenize, stop at budget + let budget = context::context_budget_tokens(); + let fixed = { + let ctx = self.context.lock().await; + ctx.system().iter().chain(ctx.identity().iter()) + .map(|n| n.tokens()).sum::() + }; + let conv_budget = budget.saturating_sub(fixed); + + let mut kept = Vec::new(); + let mut total = 0; + for node in all_nodes.into_iter().rev() { + let node = node.retokenize(); + let tok = node.tokens(); + if total + tok > conv_budget && !kept.is_empty() { break; } + total += tok; + kept.push(node); + } + kept.reverse(); + { let mut ctx = self.context.lock().await; ctx.clear(Section::Conversation); - // Push without logging — these are already in the log - for node in nodes { + for node in kept { ctx.push_no_log(Section::Conversation, node); } } self.compact().await; - let mut st = self.state.lock().await; - st.last_prompt_tokens = self.context.lock().await.tokens() as u32; + self.state.lock().await.last_prompt_tokens = self.context.lock().await.tokens() as u32; true }