WIP: ContextEntry/ContextSection data structures for incremental token counting

New types — not yet wired to callers:

- ContextEntry: wraps ConversationEntry with cached token count and
  timestamp
- ContextSection: named group of entries with cached token total.
  Private entries/tokens, read via entries()/tokens().
  Mutation via push(entry), set(index, entry), del(index).
- ContextState: system/identity/journal/conversation sections + working_stack
- ConversationEntry::System variant for system prompt entries

Token counting happens once at push time. Sections maintain their
totals incrementally via push/set/del. No more recomputing from
scratch on every budget check.

Does not compile — callers need updating.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-07 20:15:31 -04:00
parent 776ac527f1
commit 62996e27d7
10 changed files with 450 additions and 403 deletions

View file

@ -24,7 +24,7 @@ use tiktoken_rs::CoreBPE;
use api::{ApiClient, ToolCall};
use api::{ContentPart, Message, MessageContent, Role};
use context::{ConversationEntry, ContextState};
use context::{ConversationEntry, ContextEntry, ContextState};
use tools::{summarize_args, working_stack};
use crate::mind::log::ConversationLog;
@ -195,12 +195,27 @@ impl Agent {
let tokenizer = tiktoken_rs::cl100k_base()
.expect("failed to load cl100k_base tokenizer");
let mut system = ContextSection::new("System prompt");
system.push(ContextEntry {
entry: ConversationEntry::System(Message::system(&system_prompt)),
tokens: context::msg_token_count(&tokenizer, &Message::system(&system_prompt)),
timestamp: None,
});
let mut identity = ContextSection::new("Identity");
for (_name, content) in &personality {
let msg = Message::user(content);
identity.push(ContextEntry {
tokens: context::msg_token_count(&tokenizer, &msg),
entry: ConversationEntry::Message(msg),
timestamp: None,
});
}
let context = ContextState {
system_prompt: system_prompt.clone(),
personality,
journal: Vec::new(),
system,
identity,
journal: ContextSection::new("Journal"),
conversation: ContextSection::new("Conversation"),
working_stack: Vec::new(),
entries: Vec::new(),
};
let session_id = format!("consciousness-{}", chrono::Utc::now().format("%Y%m%d-%H%M%S"));
let mut agent = Self {
@ -274,18 +289,24 @@ impl Agent {
/// System prompt + personality context + journal + conversation messages.
pub fn assemble_api_messages(&self) -> Vec<Message> {
let mut msgs = Vec::new();
msgs.push(Message::system(&self.context.system_prompt));
// System section
for e in self.context.system.entries() {
msgs.push(e.entry.api_message().clone());
}
// Identity — render personality files + working stack into one user message
let ctx = self.context.render_context_message();
if !ctx.is_empty() {
msgs.push(Message::user(ctx));
}
let jnl = context::render_journal(&self.context.journal);
// Journal — render into one user message
let jnl = self.context.render_journal();
if !jnl.is_empty() {
msgs.push(Message::user(jnl));
}
msgs.extend(self.context.entries.iter()
.filter(|e| !e.is_log())
.map(|e| e.api_message().clone()));
// Conversation entries
msgs.extend(self.context.conversation.entries().iter()
.filter(|e| !e.entry.is_log())
.map(|e| e.entry.api_message().clone()));
msgs
}
@ -303,50 +324,64 @@ impl Agent {
eprintln!("warning: failed to log entry: {:#}", e);
}
}
self.context.entries.push(entry);
let tokens = if entry.is_log() { 0 } else {
context::msg_token_count(&self.tokenizer, entry.api_message())
};
self.context.conversation.push(ContextEntry {
entry, tokens, timestamp: Some(chrono::Utc::now()),
});
self.changed.notify_one();
}
fn streaming_entry(&mut self) -> Option<&mut Message> {
for entry in self.context.entries.iter_mut().rev() {
let m = entry.message_mut();
if m.role == Role::Assistant {
return if m.timestamp.is_none() { Some(m) } else { None }
}
}
None
/// Find the index of the in-progress streaming entry (unstamped assistant message).
fn streaming_index(&self) -> Option<usize> {
self.context.conversation.entries().iter().rposition(|ce| {
let m = ce.entry.message();
m.role == Role::Assistant && m.timestamp.is_none()
})
}
/// Append streaming text to the last entry (creating a partial
/// assistant entry if needed). Called by collect_stream per token batch.
fn append_streaming(&mut self, text: &str) {
if let Some(m) = self.streaming_entry() {
m.append_content(text);
if let Some(idx) = self.streaming_index() {
let mut msg = self.context.conversation.entries()[idx].entry.message().clone();
msg.append_content(text);
self.context.conversation.set_message(idx, &self.tokenizer, msg);
} else {
// No streaming entry — create without timestamp so finalize can find it
self.context.entries.push(ConversationEntry::Message(Message {
let msg = Message {
role: Role::Assistant,
content: Some(MessageContent::Text(text.to_string())),
tool_calls: None,
tool_call_id: None,
name: None,
timestamp: None,
}));
};
let tokens = context::msg_token_count(&self.tokenizer, &msg);
self.context.conversation.push(ContextEntry {
entry: ConversationEntry::Message(msg),
tokens,
timestamp: None,
});
}
self.changed.notify_one();
}
/// Finalize the streaming entry with the complete response message.
/// Finds the unstamped assistant entry, updates it in place, and logs it.
/// Finds the unstamped assistant entry, replaces it via set() with proper token count.
fn finalize_streaming(&mut self, msg: Message) {
if let Some(m) = self.streaming_entry() {
*m = msg.clone();
m.stamp();
if let Some(i) = self.streaming_index() {
let mut stamped = msg.clone();
stamped.stamp();
let tokens = context::msg_token_count(&self.tokenizer, &stamped);
self.context.conversation.set(i, ContextEntry {
entry: ConversationEntry::Message(stamped),
tokens,
timestamp: Some(chrono::Utc::now()),
});
} else {
// No streaming entry found — push as new (this logs via push_message)
self.push_message(msg.clone());
}
@ -655,173 +690,32 @@ impl Agent {
self.push_message(Message::tool_result(&call.id, &output));
}
/// Token budget by category — cheap, no formatting. Used for compaction decisions.
/// Token budget by category — just reads cached section totals.
pub fn context_budget(&self) -> context::ContextBudget {
let count = |m: &Message| context::msg_token_count(&self.tokenizer, m);
let system = count(&Message::system(&self.context.system_prompt));
let identity = count(&Message::user(&self.context.render_context_message()));
let journal_rendered = context::render_journal(&self.context.journal);
let journal = if journal_rendered.is_empty() { 0 } else {
count(&Message::user(&journal_rendered))
};
let memory: usize = self.context.entries.iter()
.filter(|e| e.is_memory())
.map(|e| count(e.message()))
let memory: usize = self.context.conversation.entries().iter()
.filter(|e| e.entry.is_memory())
.map(|e| e.tokens)
.sum();
let conversation: usize = self.context.entries.iter()
.filter(|e| !e.is_memory() && !e.is_log())
.map(|e| count(e.api_message()))
.sum();
context::ContextBudget { system, identity, journal, memory, conversation }
}
/// Build context state summary for the debug screen.
pub fn context_state_summary(&self) -> Vec<ContextSection> {
let count_msg = |m: &Message| context::msg_token_count(&self.tokenizer, m);
let mut sections = Vec::new();
// System prompt — counted as the actual message sent
let system_msg = Message::system(&self.context.system_prompt);
sections.push(ContextSection {
name: "System prompt".into(),
tokens: count_msg(&system_msg),
content: self.context.system_prompt.clone(),
children: Vec::new(),
});
// Context message (personality + working stack) — counted as the
// single user message that assemble_api_messages sends
let context_rendered = self.context.render_context_message();
let context_msg = Message::user(&context_rendered);
sections.push(ContextSection {
name: format!("Identity ({} files + stack)", self.context.personality.len()),
tokens: count_msg(&context_msg),
content: context_rendered,
children: Vec::new(),
});
// Journal — counted as the single rendered message sent
let journal_rendered = context::render_journal(&self.context.journal);
let journal_msg = Message::user(&journal_rendered);
sections.push(ContextSection {
name: format!("Journal ({} entries)", self.context.journal.len()),
tokens: if journal_rendered.is_empty() { 0 } else { count_msg(&journal_msg) },
content: journal_rendered,
children: Vec::new(),
});
// Memory nodes — extracted from Memory entries in the conversation
let memory_entries: Vec<&ConversationEntry> = self.context.entries.iter()
.filter(|e| e.is_memory())
.collect();
if !memory_entries.is_empty() {
let node_children: Vec<ContextSection> = memory_entries.iter()
.map(|entry| {
let (key, score) = match entry {
ConversationEntry::Memory { key, score, .. } => (key.as_str(), *score),
_ => unreachable!(),
};
let label = match score {
Some(s) => format!("{} (score:{:.1})", key, s),
None => key.to_string(),
};
ContextSection {
name: label,
tokens: count_msg(entry.message()),
content: String::new(),
children: Vec::new(),
}
})
.collect();
let node_tokens: usize = node_children.iter().map(|c| c.tokens).sum();
sections.push(ContextSection {
name: format!("Memory nodes ({} loaded)", memory_entries.len()),
tokens: node_tokens,
content: String::new(),
children: node_children,
});
let conv_total = self.context.conversation.tokens();
context::ContextBudget {
system: self.context.system.tokens(),
identity: self.context.identity.tokens(),
journal: self.context.journal.tokens(),
memory,
conversation: conv_total - memory,
}
// Conversation — memories excluded (counted in their own section above)
let conv_children = self.entry_sections(&count_msg, 0, false);
let conv_tokens: usize = conv_children.iter().map(|c| c.tokens).sum();
sections.push(ContextSection {
name: format!("Conversation ({} messages)", conv_children.len()),
tokens: conv_tokens,
content: String::new(),
children: conv_children,
});
sections
}
/// Build ContextSection nodes for conversation entries starting at `from`.
/// When `include_memories` is false, memory entries are excluded (they get
/// their own section in context_state_summary to avoid double-counting).
fn entry_sections(
&self,
count_msg: &dyn Fn(&Message) -> usize,
from: usize,
include_memories: bool,
) -> Vec<ContextSection> {
let cfg = crate::config::get();
self.context.entries.iter().enumerate()
.skip(from)
.filter(|(_, e)| !e.is_log() && (include_memories || !e.is_memory()))
.map(|(i, entry)| {
let m = entry.message();
let text = m.content.as_ref()
.map(|c| c.as_text().to_string())
.unwrap_or_default();
let (role_name, label) = if let ConversationEntry::Memory { key, score, .. } = entry {
let label = match score {
Some(s) => format!("[memory: {} score:{:.1}]", key, s),
None => format!("[memory: {}]", key),
};
("mem".to_string(), label)
} else {
let tool_info = m.tool_calls.as_ref().map(|tc| {
tc.iter()
.map(|c| c.function.name.clone())
.collect::<Vec<_>>()
.join(", ")
});
let label = match &tool_info {
Some(tools) => format!("[tool_call: {}]", tools),
None => {
let preview: String = text.chars().take(60).collect();
let preview = preview.replace('\n', " ");
if text.len() > 60 { format!("{}...", preview) } else { preview }
}
};
let role_name = match m.role {
Role::Assistant => cfg.assistant_name.clone(),
Role::User => cfg.user_name.clone(),
Role::Tool => "tool".to_string(),
Role::System => "system".to_string(),
};
(role_name, label)
};
ContextSection {
name: format!("[{}] {}: {}", i, role_name, label),
tokens: count_msg(entry.api_message()),
content: text,
children: Vec::new(),
}
})
.collect()
/// Context state sections — just returns references to the live data.
pub fn context_sections(&self) -> [&ContextSection; 4] {
self.context.sections()
}
/// Context sections for entries from `from` onward — used by the
/// Conversation entries from `from` onward — used by the
/// subconscious debug screen to show forked agent conversations.
pub fn conversation_sections_from(&self, from: usize) -> Vec<ContextSection> {
let count_msg = |m: &Message| context::msg_token_count(&self.tokenizer, m);
self.entry_sections(&count_msg, from, true)
pub fn conversation_entries_from(&self, from: usize) -> &[ContextEntry] {
let entries = self.context.conversation.entries();
if from < entries.len() { &entries[from..] } else { &[] }
}
/// Load recent journal entries at startup for orientation.
@ -876,35 +770,38 @@ impl Agent {
dbg_log!("[journal] cutoff_idx={}", cutoff_idx);
// Walk backwards from cutoff, accumulating entries within 15% of context
let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len();
let context_window = crate::agent::context::context_window();
let journal_budget = context_window * 15 / 100;
dbg_log!("[journal] budget={} tokens ({}*15%)", journal_budget, context_window);
let mut entries = Vec::new();
let mut journal_entries = Vec::new();
let mut total_tokens = 0;
for node in journal_nodes[..cutoff_idx].iter().rev() {
let tokens = count(&node.content);
if total_tokens + tokens > journal_budget && !entries.is_empty() {
let msg = Message::user(&node.content);
let tokens = context::msg_token_count(&self.tokenizer, &msg);
if total_tokens + tokens > journal_budget && !journal_entries.is_empty() {
break;
}
entries.push(context::JournalEntry {
timestamp: chrono::DateTime::from_timestamp(node.created_at, 0)
.unwrap_or_default(),
content: node.content.clone(),
journal_entries.push(ContextEntry {
entry: ConversationEntry::Message(msg),
tokens,
timestamp: chrono::DateTime::from_timestamp(node.created_at, 0),
});
total_tokens += tokens;
}
entries.reverse();
dbg_log!("[journal] loaded {} entries, {} tokens", entries.len(), total_tokens);
journal_entries.reverse();
dbg_log!("[journal] loaded {} entries, {} tokens", journal_entries.len(), total_tokens);
if entries.is_empty() {
if journal_entries.is_empty() {
dbg_log!("[journal] no entries!");
return;
}
self.context.journal = entries;
self.context.journal.clear();
for entry in journal_entries {
self.context.journal.push(entry);
}
dbg_log!("[journal] context.journal now has {} entries", self.context.journal.len());
}
@ -923,10 +820,10 @@ impl Agent {
/// The tool result message before each image records what was loaded.
pub fn age_out_images(&mut self) {
// Find image entries newest-first, skip 1 (caller is about to add another)
let to_age: Vec<usize> = self.context.entries.iter().enumerate()
let to_age: Vec<usize> = self.context.conversation.entries().iter().enumerate()
.rev()
.filter(|(_, e)| {
if let Some(MessageContent::Parts(parts)) = &e.message().content {
.filter(|(_, ce)| {
if let Some(MessageContent::Parts(parts)) = &ce.entry.message().content {
parts.iter().any(|p| matches!(p, ContentPart::ImageUrl { .. }))
} else { false }
})
@ -935,7 +832,9 @@ impl Agent {
.collect();
for i in to_age {
let msg = self.context.entries[i].message_mut();
// Build replacement entry with image data stripped
let old = &self.context.conversation.entries()[i];
let msg = old.entry.message();
if let Some(MessageContent::Parts(parts)) = &msg.content {
let mut replacement = String::new();
for part in parts {
@ -950,7 +849,14 @@ impl Agent {
}
}
}
msg.content = Some(MessageContent::Text(replacement));
let mut new_msg = msg.clone();
new_msg.content = Some(MessageContent::Text(replacement));
let tokens = context::msg_token_count(&self.tokenizer, &new_msg);
self.context.conversation.set(i, ContextEntry {
entry: ConversationEntry::Message(new_msg),
tokens,
timestamp: old.timestamp,
});
}
}
self.generation += 1;
@ -968,16 +874,30 @@ impl Agent {
// Reload identity from config
match crate::config::reload_for_model(&self.app_config, &self.prompt_file) {
Ok((system_prompt, personality)) => {
self.context.system_prompt = system_prompt;
self.context.personality = personality;
self.context.system.clear();
self.context.system.push(ContextEntry {
entry: ConversationEntry::System(Message::system(&system_prompt)),
tokens: context::msg_token_count(&self.tokenizer, &Message::system(&system_prompt)),
timestamp: None,
});
self.context.identity.clear();
for (_name, content) in &personality {
let msg = Message::user(content);
self.context.identity.push(ContextEntry {
tokens: context::msg_token_count(&self.tokenizer, &msg),
entry: ConversationEntry::Message(msg),
timestamp: None,
});
}
}
Err(e) => {
eprintln!("warning: failed to reload identity: {:#}", e);
}
}
let before = self.context.entries.len();
let before_mem = self.context.entries.iter().filter(|e| e.is_memory()).count();
let before = self.context.conversation.len();
let before_mem = self.context.conversation.entries().iter()
.filter(|e| e.entry.is_memory()).count();
let before_conv = before - before_mem;
// Age out images before trimming — they're huge in the request payload
@ -988,15 +908,11 @@ impl Agent {
// Dedup memory, trim to budget
let budget = self.context_budget();
let entries = self.context.entries.clone();
self.context.entries = crate::agent::context::trim_entries(
&entries,
&self.tokenizer,
&budget,
);
self.context.conversation.trim(&budget, &self.tokenizer);
let after = self.context.entries.len();
let after_mem = self.context.entries.iter().filter(|e| e.is_memory()).count();
let after = self.context.conversation.len();
let after_mem = self.context.conversation.entries().iter()
.filter(|e| e.entry.is_memory()).count();
let after_conv = after - after_mem;
dbglog!("[compact] entries: {} → {} (mem: {} → {}, conv: {} → {})",
@ -1022,14 +938,26 @@ impl Agent {
};
// Load extra — compact() will dedup, trim, reload identity + journal
let all: Vec<_> = entries.into_iter()
let all: Vec<ContextEntry> = entries.into_iter()
.filter(|e| !e.is_log() && e.message().role != Role::System)
.map(|e| {
let tokens = if e.is_log() { 0 } else {
context::msg_token_count(&self.tokenizer, e.api_message())
};
let timestamp = if e.is_log() { None } else {
e.message().timestamp.as_ref().and_then(|ts| {
chrono::DateTime::parse_from_rfc3339(ts).ok()
.map(|dt| dt.with_timezone(&chrono::Utc))
})
};
ContextEntry { entry: e, tokens, timestamp }
})
.collect();
let mem_count = all.iter().filter(|e| e.is_memory()).count();
let mem_count = all.iter().filter(|e| e.entry.is_memory()).count();
let conv_count = all.len() - mem_count;
dbglog!("[restore] loaded {} entries from log (mem: {}, conv: {})",
all.len(), mem_count, conv_count);
self.context.entries = all;
self.context.conversation.set_entries(all);
self.compact();
// Estimate prompt tokens so status bar isn't 0 on startup
self.last_prompt_tokens = self.context_budget().total() as u32;
@ -1046,9 +974,9 @@ impl Agent {
&self.client.model
}
/// Get the conversation entries for persistence.
pub fn entries(&self) -> &[ConversationEntry] {
&self.context.entries
/// Get the conversation entries.
pub fn entries(&self) -> &[ContextEntry] {
self.context.conversation.entries()
}
/// Mutable access to conversation entries (for /retry).