WIP: ContextEntry/ContextSection data structures for incremental token counting

New types — not yet wired to callers:

- ContextEntry: wraps ConversationEntry with cached token count and
  timestamp
- ContextSection: named group of entries with cached token total.
  Private entries/tokens, read via entries()/tokens().
  Mutation via push(entry), set(index, entry), del(index).
- ContextState: system/identity/journal/conversation sections + working_stack
- ConversationEntry::System variant for system prompt entries

Token counting happens once at push time. Sections maintain their
totals incrementally via push/set/del. No more recomputing from
scratch on every budget check.

Does not compile — callers need updating.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-07 20:15:31 -04:00
parent 776ac527f1
commit 62996e27d7
10 changed files with 450 additions and 403 deletions

View file

@ -10,20 +10,130 @@ use serde::{Deserialize, Serialize};
use tiktoken_rs::CoreBPE;
use crate::agent::tools::working_stack;
/// A section of the context window, possibly with children.
// --- Context state types ---
/// Conversation entry — either a regular message or memory content.
/// Memory entries preserve the original message for KV cache round-tripping.
#[derive(Debug, Clone, PartialEq)]
pub enum ConversationEntry {
/// System prompt or system-level instruction.
System(Message),
Message(Message),
Memory { key: String, message: Message, score: Option<f64> },
/// DMN heartbeat/autonomous prompt — evicted aggressively during compaction.
Dmn(Message),
/// Debug/status log line — written to conversation log for tracing,
/// skipped on read-back.
Log(String),
}
/// Entry in the context window — wraps a ConversationEntry with cached metadata.
#[derive(Debug, Clone)]
pub struct ContextEntry {
pub entry: ConversationEntry,
/// Cached token count (0 for Log entries).
pub tokens: usize,
/// When this entry was added to the context.
pub timestamp: Option<DateTime<Utc>>,
}
/// A named section of the context window with cached token total.
#[derive(Debug, Clone)]
pub struct ContextSection {
pub name: String,
pub tokens: usize,
pub content: String,
pub children: Vec<ContextSection>,
/// Cached sum of entry tokens.
tokens: usize,
entries: Vec<ContextEntry>,
}
/// A single journal entry with its timestamp and content.
#[derive(Debug, Clone)]
pub struct JournalEntry {
pub timestamp: DateTime<Utc>,
pub content: String,
impl ContextSection {
pub fn new(name: impl Into<String>) -> Self {
Self { name: name.into(), tokens: 0, entries: Vec::new() }
}
pub fn entries(&self) -> &[ContextEntry] { &self.entries }
pub fn tokens(&self) -> usize { self.tokens }
pub fn len(&self) -> usize { self.entries.len() }
pub fn is_empty(&self) -> bool { self.entries.is_empty() }
/// Push an entry, updating the cached token total.
pub fn push(&mut self, entry: ContextEntry) {
self.tokens += entry.tokens;
self.entries.push(entry);
}
/// Replace an entry at `index`, adjusting the token total.
pub fn set(&mut self, index: usize, entry: ContextEntry) {
self.tokens -= self.entries[index].tokens;
self.tokens += entry.tokens;
self.entries[index] = entry;
}
/// Remove an entry at `index`, adjusting the token total.
pub fn del(&mut self, index: usize) -> ContextEntry {
let removed = self.entries.remove(index);
self.tokens -= removed.tokens;
removed
}
/// Replace the message inside an entry, recomputing its token count.
pub fn set_message(&mut self, index: usize, tokenizer: &CoreBPE, msg: Message) {
let old_tokens = self.entries[index].tokens;
*self.entries[index].entry.message_mut() = msg;
let new_tokens = msg_token_count(tokenizer, self.entries[index].entry.api_message());
self.entries[index].tokens = new_tokens;
self.tokens = self.tokens - old_tokens + new_tokens;
}
/// Set the score on a Memory entry. No token change.
pub fn set_score(&mut self, index: usize, score: Option<f64>) {
if let ConversationEntry::Memory { score: s, .. } = &mut self.entries[index].entry {
*s = score;
}
}
/// Bulk replace all entries, recomputing token total.
pub fn set_entries(&mut self, entries: Vec<ContextEntry>) {
self.tokens = entries.iter().map(|e| e.tokens).sum();
self.entries = entries;
}
/// Dedup and trim entries to fit within context budget.
pub fn trim(&mut self, budget: &ContextBudget, tokenizer: &CoreBPE) {
let result = trim_entries(&self.entries, tokenizer, budget);
self.entries = result;
self.tokens = self.entries.iter().map(|e| e.tokens).sum();
}
/// Clear all entries.
pub fn clear(&mut self) {
self.entries.clear();
self.tokens = 0;
}
}
#[derive(Clone)]
pub struct ContextState {
pub system: ContextSection,
pub identity: ContextSection,
pub journal: ContextSection,
pub conversation: ContextSection,
/// Working stack — separate from identity because it's managed
/// by its own tool, not loaded from personality files.
pub working_stack: Vec<String>,
}
impl ContextState {
/// Total tokens across all sections.
pub fn total_tokens(&self) -> usize {
self.system.tokens() + self.identity.tokens()
+ self.journal.tokens() + self.conversation.tokens()
}
/// All sections as a slice for iteration.
pub fn sections(&self) -> [&ContextSection; 4] {
[&self.system, &self.identity, &self.journal, &self.conversation]
}
}
/// Context window size in tokens (from config).
@ -44,41 +154,39 @@ fn context_budget_tokens() -> usize {
/// corresponding assistant tool_call message).
/// 2. Trim: drop oldest entries until the conversation fits, snapping
/// to user message boundaries.
pub fn trim_entries(
entries: &[ConversationEntry],
tokenizer: &CoreBPE,
fn trim_entries(
entries: &[ContextEntry],
_tokenizer: &CoreBPE,
budget: &ContextBudget,
) -> Vec<ConversationEntry> {
) -> Vec<ContextEntry> {
let fixed_tokens = budget.system + budget.identity + budget.journal;
// --- Phase 1: dedup memory entries by key (keep last) ---
let mut seen_keys: std::collections::HashMap<&str, usize> = std::collections::HashMap::new();
let mut drop_indices: std::collections::HashSet<usize> = std::collections::HashSet::new();
for (i, entry) in entries.iter().enumerate() {
if let ConversationEntry::Memory { key, .. } = entry {
for (i, ce) in entries.iter().enumerate() {
if let ConversationEntry::Memory { key, .. } = &ce.entry {
if let Some(prev) = seen_keys.insert(key.as_str(), i) {
drop_indices.insert(prev);
}
}
}
let deduped: Vec<ConversationEntry> = entries.iter().enumerate()
let deduped: Vec<ContextEntry> = entries.iter().enumerate()
.filter(|(i, _)| !drop_indices.contains(i))
.map(|(_, e)| e.clone())
.collect();
// --- Phase 2: trim to fit context budget ---
let max_tokens = context_budget_tokens();
let count_msg = |m: &Message| msg_token_count(tokenizer, m);
let msg_costs: Vec<usize> = deduped.iter()
.map(|e| if e.is_log() { 0 } else { count_msg(e.api_message()) }).collect();
let msg_costs: Vec<usize> = deduped.iter().map(|e| e.tokens).collect();
let entry_total: usize = msg_costs.iter().sum();
let total: usize = fixed_tokens + entry_total;
let mem_tokens: usize = deduped.iter().zip(&msg_costs)
.filter(|(e, _)| e.is_memory())
.map(|(_, &c)| c).sum();
let mem_tokens: usize = deduped.iter()
.filter(|ce| ce.entry.is_memory())
.map(|ce| ce.tokens).sum();
let conv_tokens: usize = entry_total - mem_tokens;
dbglog!("[trim] max_tokens={} fixed={} mem={} conv={} total={} entries={}",
@ -90,7 +198,7 @@ pub fn trim_entries(
let mut cur_mem = mem_tokens;
for i in 0..deduped.len() {
if deduped[i].is_dmn() {
if deduped[i].entry.is_dmn() {
drop[i] = true;
trimmed -= msg_costs[i];
}
@ -99,14 +207,14 @@ pub fn trim_entries(
// Phase 2b: if memories > 50% of context, evict lowest-scored first
if cur_mem > conv_tokens && trimmed > max_tokens {
let mut mem_indices: Vec<usize> = (0..deduped.len())
.filter(|&i| !drop[i] && deduped[i].is_memory())
.filter(|&i| !drop[i] && deduped[i].entry.is_memory())
.collect();
mem_indices.sort_by(|&a, &b| {
let sa = match &deduped[a] {
let sa = match &deduped[a].entry {
ConversationEntry::Memory { score, .. } => score.unwrap_or(0.0),
_ => 0.0,
};
let sb = match &deduped[b] {
let sb = match &deduped[b].entry {
ConversationEntry::Memory { score, .. } => score.unwrap_or(0.0),
_ => 0.0,
};
@ -130,16 +238,16 @@ pub fn trim_entries(
}
// Walk forward to include complete conversation boundaries
let mut result: Vec<ConversationEntry> = Vec::new();
let mut result: Vec<ContextEntry> = Vec::new();
let mut skipping = true;
for (i, entry) in deduped.into_iter().enumerate() {
for (i, ce) in deduped.into_iter().enumerate() {
if skipping {
if drop[i] { continue; }
// Snap to user message boundary
if entry.message().role != Role::User { continue; }
if ce.entry.message().role != Role::User { continue; }
skipping = false;
}
result.push(entry);
result.push(ce);
}
dbglog!("[trim] result={} trimmed_total={}", result.len(), trimmed);
@ -186,28 +294,13 @@ pub fn is_stream_error(err: &anyhow::Error) -> bool {
err.to_string().contains("model stream error")
}
// --- Context state types ---
/// Conversation entry — either a regular message or memory content.
/// Memory entries preserve the original message for KV cache round-tripping.
#[derive(Debug, Clone, PartialEq)]
pub enum ConversationEntry {
Message(Message),
Memory { key: String, message: Message, score: Option<f64> },
/// DMN heartbeat/autonomous prompt — evicted aggressively during compaction.
Dmn(Message),
/// Debug/status log line — written to conversation log for tracing,
/// skipped on read-back.
Log(String),
}
// Custom serde: serialize Memory with a "memory_key" field added to the message,
// plain messages serialize as-is. This keeps the conversation log readable.
impl Serialize for ConversationEntry {
fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
use serde::ser::SerializeMap;
match self {
Self::Message(m) | Self::Dmn(m) => m.serialize(s),
Self::System(m) | Self::Message(m) | Self::Dmn(m) => m.serialize(s),
Self::Memory { key, message, score } => {
let json = serde_json::to_value(message).map_err(serde::ser::Error::custom)?;
let mut map = s.serialize_map(None)?;
@ -259,7 +352,7 @@ impl ConversationEntry {
/// Panics on Log entries (which should be filtered before API calls).
pub fn api_message(&self) -> &Message {
match self {
Self::Message(m) | Self::Dmn(m) => m,
Self::System(m) | Self::Message(m) | Self::Dmn(m) => m,
Self::Memory { message, .. } => message,
Self::Log(_) => panic!("Log entries have no API message"),
}
@ -281,7 +374,7 @@ impl ConversationEntry {
/// Panics on Log entries.
pub fn message(&self) -> &Message {
match self {
Self::Message(m) | Self::Dmn(m) => m,
Self::System(m) | Self::Message(m) | Self::Dmn(m) => m,
Self::Memory { message, .. } => message,
Self::Log(_) => panic!("Log entries have no message"),
}
@ -291,38 +384,36 @@ impl ConversationEntry {
/// Panics on Log entries.
pub fn message_mut(&mut self) -> &mut Message {
match self {
Self::Message(m) | Self::Dmn(m) => m,
Self::System(m) | Self::Message(m) | Self::Dmn(m) => m,
Self::Memory { message, .. } => message,
Self::Log(_) => panic!("Log entries have no message"),
}
}
}
#[derive(Clone)]
pub struct ContextState {
pub system_prompt: String,
pub personality: Vec<(String, String)>,
pub journal: Vec<JournalEntry>,
pub working_stack: Vec<String>,
/// Conversation entries — messages and memory, interleaved in order.
/// Does NOT include system prompt, personality, or journal.
pub entries: Vec<ConversationEntry>,
}
pub fn render_journal(entries: &[JournalEntry]) -> String {
if entries.is_empty() { return String::new(); }
let mut text = String::from("[Earlier — from your journal]\n\n");
for entry in entries {
use std::fmt::Write;
writeln!(text, "## {}\n{}\n", entry.timestamp.format("%Y-%m-%dT%H:%M"), entry.content).ok();
}
text
}
impl ContextState {
/// Render journal entries into a single text block.
pub fn render_journal(&self) -> String {
if self.journal.is_empty() { return String::new(); }
let mut text = String::from("[Earlier — from your journal]\n\n");
for e in self.journal.entries() {
use std::fmt::Write;
if let Some(ts) = &e.timestamp {
writeln!(text, "## {}\n{}\n",
ts.format("%Y-%m-%dT%H:%M"),
e.entry.message().content_text()).ok();
} else {
text.push_str(&e.entry.message().content_text());
text.push_str("\n\n");
}
}
text
}
/// Render identity files + working stack into a single user message.
pub fn render_context_message(&self) -> String {
let mut parts: Vec<String> = self.personality.iter()
.map(|(name, content)| format!("## {}\n\n{}", name, content))
let mut parts: Vec<String> = self.identity.entries().iter()
.map(|e| e.entry.message().content_text().to_string())
.collect();
let instructions = std::fs::read_to_string(working_stack::instructions_path()).unwrap_or_default();
let mut stack_section = instructions;

View file

@ -24,7 +24,7 @@ use tiktoken_rs::CoreBPE;
use api::{ApiClient, ToolCall};
use api::{ContentPart, Message, MessageContent, Role};
use context::{ConversationEntry, ContextState};
use context::{ConversationEntry, ContextEntry, ContextState};
use tools::{summarize_args, working_stack};
use crate::mind::log::ConversationLog;
@ -195,12 +195,27 @@ impl Agent {
let tokenizer = tiktoken_rs::cl100k_base()
.expect("failed to load cl100k_base tokenizer");
let mut system = ContextSection::new("System prompt");
system.push(ContextEntry {
entry: ConversationEntry::System(Message::system(&system_prompt)),
tokens: context::msg_token_count(&tokenizer, &Message::system(&system_prompt)),
timestamp: None,
});
let mut identity = ContextSection::new("Identity");
for (_name, content) in &personality {
let msg = Message::user(content);
identity.push(ContextEntry {
tokens: context::msg_token_count(&tokenizer, &msg),
entry: ConversationEntry::Message(msg),
timestamp: None,
});
}
let context = ContextState {
system_prompt: system_prompt.clone(),
personality,
journal: Vec::new(),
system,
identity,
journal: ContextSection::new("Journal"),
conversation: ContextSection::new("Conversation"),
working_stack: Vec::new(),
entries: Vec::new(),
};
let session_id = format!("consciousness-{}", chrono::Utc::now().format("%Y%m%d-%H%M%S"));
let mut agent = Self {
@ -274,18 +289,24 @@ impl Agent {
/// System prompt + personality context + journal + conversation messages.
pub fn assemble_api_messages(&self) -> Vec<Message> {
let mut msgs = Vec::new();
msgs.push(Message::system(&self.context.system_prompt));
// System section
for e in self.context.system.entries() {
msgs.push(e.entry.api_message().clone());
}
// Identity — render personality files + working stack into one user message
let ctx = self.context.render_context_message();
if !ctx.is_empty() {
msgs.push(Message::user(ctx));
}
let jnl = context::render_journal(&self.context.journal);
// Journal — render into one user message
let jnl = self.context.render_journal();
if !jnl.is_empty() {
msgs.push(Message::user(jnl));
}
msgs.extend(self.context.entries.iter()
.filter(|e| !e.is_log())
.map(|e| e.api_message().clone()));
// Conversation entries
msgs.extend(self.context.conversation.entries().iter()
.filter(|e| !e.entry.is_log())
.map(|e| e.entry.api_message().clone()));
msgs
}
@ -303,50 +324,64 @@ impl Agent {
eprintln!("warning: failed to log entry: {:#}", e);
}
}
self.context.entries.push(entry);
let tokens = if entry.is_log() { 0 } else {
context::msg_token_count(&self.tokenizer, entry.api_message())
};
self.context.conversation.push(ContextEntry {
entry, tokens, timestamp: Some(chrono::Utc::now()),
});
self.changed.notify_one();
}
fn streaming_entry(&mut self) -> Option<&mut Message> {
for entry in self.context.entries.iter_mut().rev() {
let m = entry.message_mut();
if m.role == Role::Assistant {
return if m.timestamp.is_none() { Some(m) } else { None }
}
}
None
/// Find the index of the in-progress streaming entry (unstamped assistant message).
fn streaming_index(&self) -> Option<usize> {
self.context.conversation.entries().iter().rposition(|ce| {
let m = ce.entry.message();
m.role == Role::Assistant && m.timestamp.is_none()
})
}
/// Append streaming text to the last entry (creating a partial
/// assistant entry if needed). Called by collect_stream per token batch.
fn append_streaming(&mut self, text: &str) {
if let Some(m) = self.streaming_entry() {
m.append_content(text);
if let Some(idx) = self.streaming_index() {
let mut msg = self.context.conversation.entries()[idx].entry.message().clone();
msg.append_content(text);
self.context.conversation.set_message(idx, &self.tokenizer, msg);
} else {
// No streaming entry — create without timestamp so finalize can find it
self.context.entries.push(ConversationEntry::Message(Message {
let msg = Message {
role: Role::Assistant,
content: Some(MessageContent::Text(text.to_string())),
tool_calls: None,
tool_call_id: None,
name: None,
timestamp: None,
}));
};
let tokens = context::msg_token_count(&self.tokenizer, &msg);
self.context.conversation.push(ContextEntry {
entry: ConversationEntry::Message(msg),
tokens,
timestamp: None,
});
}
self.changed.notify_one();
}
/// Finalize the streaming entry with the complete response message.
/// Finds the unstamped assistant entry, updates it in place, and logs it.
/// Finds the unstamped assistant entry, replaces it via set() with proper token count.
fn finalize_streaming(&mut self, msg: Message) {
if let Some(m) = self.streaming_entry() {
*m = msg.clone();
m.stamp();
if let Some(i) = self.streaming_index() {
let mut stamped = msg.clone();
stamped.stamp();
let tokens = context::msg_token_count(&self.tokenizer, &stamped);
self.context.conversation.set(i, ContextEntry {
entry: ConversationEntry::Message(stamped),
tokens,
timestamp: Some(chrono::Utc::now()),
});
} else {
// No streaming entry found — push as new (this logs via push_message)
self.push_message(msg.clone());
}
@ -655,173 +690,32 @@ impl Agent {
self.push_message(Message::tool_result(&call.id, &output));
}
/// Token budget by category — cheap, no formatting. Used for compaction decisions.
/// Token budget by category — just reads cached section totals.
pub fn context_budget(&self) -> context::ContextBudget {
let count = |m: &Message| context::msg_token_count(&self.tokenizer, m);
let system = count(&Message::system(&self.context.system_prompt));
let identity = count(&Message::user(&self.context.render_context_message()));
let journal_rendered = context::render_journal(&self.context.journal);
let journal = if journal_rendered.is_empty() { 0 } else {
count(&Message::user(&journal_rendered))
};
let memory: usize = self.context.entries.iter()
.filter(|e| e.is_memory())
.map(|e| count(e.message()))
let memory: usize = self.context.conversation.entries().iter()
.filter(|e| e.entry.is_memory())
.map(|e| e.tokens)
.sum();
let conversation: usize = self.context.entries.iter()
.filter(|e| !e.is_memory() && !e.is_log())
.map(|e| count(e.api_message()))
.sum();
context::ContextBudget { system, identity, journal, memory, conversation }
}
/// Build context state summary for the debug screen.
pub fn context_state_summary(&self) -> Vec<ContextSection> {
let count_msg = |m: &Message| context::msg_token_count(&self.tokenizer, m);
let mut sections = Vec::new();
// System prompt — counted as the actual message sent
let system_msg = Message::system(&self.context.system_prompt);
sections.push(ContextSection {
name: "System prompt".into(),
tokens: count_msg(&system_msg),
content: self.context.system_prompt.clone(),
children: Vec::new(),
});
// Context message (personality + working stack) — counted as the
// single user message that assemble_api_messages sends
let context_rendered = self.context.render_context_message();
let context_msg = Message::user(&context_rendered);
sections.push(ContextSection {
name: format!("Identity ({} files + stack)", self.context.personality.len()),
tokens: count_msg(&context_msg),
content: context_rendered,
children: Vec::new(),
});
// Journal — counted as the single rendered message sent
let journal_rendered = context::render_journal(&self.context.journal);
let journal_msg = Message::user(&journal_rendered);
sections.push(ContextSection {
name: format!("Journal ({} entries)", self.context.journal.len()),
tokens: if journal_rendered.is_empty() { 0 } else { count_msg(&journal_msg) },
content: journal_rendered,
children: Vec::new(),
});
// Memory nodes — extracted from Memory entries in the conversation
let memory_entries: Vec<&ConversationEntry> = self.context.entries.iter()
.filter(|e| e.is_memory())
.collect();
if !memory_entries.is_empty() {
let node_children: Vec<ContextSection> = memory_entries.iter()
.map(|entry| {
let (key, score) = match entry {
ConversationEntry::Memory { key, score, .. } => (key.as_str(), *score),
_ => unreachable!(),
};
let label = match score {
Some(s) => format!("{} (score:{:.1})", key, s),
None => key.to_string(),
};
ContextSection {
name: label,
tokens: count_msg(entry.message()),
content: String::new(),
children: Vec::new(),
}
})
.collect();
let node_tokens: usize = node_children.iter().map(|c| c.tokens).sum();
sections.push(ContextSection {
name: format!("Memory nodes ({} loaded)", memory_entries.len()),
tokens: node_tokens,
content: String::new(),
children: node_children,
});
let conv_total = self.context.conversation.tokens();
context::ContextBudget {
system: self.context.system.tokens(),
identity: self.context.identity.tokens(),
journal: self.context.journal.tokens(),
memory,
conversation: conv_total - memory,
}
// Conversation — memories excluded (counted in their own section above)
let conv_children = self.entry_sections(&count_msg, 0, false);
let conv_tokens: usize = conv_children.iter().map(|c| c.tokens).sum();
sections.push(ContextSection {
name: format!("Conversation ({} messages)", conv_children.len()),
tokens: conv_tokens,
content: String::new(),
children: conv_children,
});
sections
}
/// Build ContextSection nodes for conversation entries starting at `from`.
/// When `include_memories` is false, memory entries are excluded (they get
/// their own section in context_state_summary to avoid double-counting).
fn entry_sections(
&self,
count_msg: &dyn Fn(&Message) -> usize,
from: usize,
include_memories: bool,
) -> Vec<ContextSection> {
let cfg = crate::config::get();
self.context.entries.iter().enumerate()
.skip(from)
.filter(|(_, e)| !e.is_log() && (include_memories || !e.is_memory()))
.map(|(i, entry)| {
let m = entry.message();
let text = m.content.as_ref()
.map(|c| c.as_text().to_string())
.unwrap_or_default();
let (role_name, label) = if let ConversationEntry::Memory { key, score, .. } = entry {
let label = match score {
Some(s) => format!("[memory: {} score:{:.1}]", key, s),
None => format!("[memory: {}]", key),
};
("mem".to_string(), label)
} else {
let tool_info = m.tool_calls.as_ref().map(|tc| {
tc.iter()
.map(|c| c.function.name.clone())
.collect::<Vec<_>>()
.join(", ")
});
let label = match &tool_info {
Some(tools) => format!("[tool_call: {}]", tools),
None => {
let preview: String = text.chars().take(60).collect();
let preview = preview.replace('\n', " ");
if text.len() > 60 { format!("{}...", preview) } else { preview }
}
};
let role_name = match m.role {
Role::Assistant => cfg.assistant_name.clone(),
Role::User => cfg.user_name.clone(),
Role::Tool => "tool".to_string(),
Role::System => "system".to_string(),
};
(role_name, label)
};
ContextSection {
name: format!("[{}] {}: {}", i, role_name, label),
tokens: count_msg(entry.api_message()),
content: text,
children: Vec::new(),
}
})
.collect()
/// Context state sections — just returns references to the live data.
pub fn context_sections(&self) -> [&ContextSection; 4] {
self.context.sections()
}
/// Context sections for entries from `from` onward — used by the
/// Conversation entries from `from` onward — used by the
/// subconscious debug screen to show forked agent conversations.
pub fn conversation_sections_from(&self, from: usize) -> Vec<ContextSection> {
let count_msg = |m: &Message| context::msg_token_count(&self.tokenizer, m);
self.entry_sections(&count_msg, from, true)
pub fn conversation_entries_from(&self, from: usize) -> &[ContextEntry] {
let entries = self.context.conversation.entries();
if from < entries.len() { &entries[from..] } else { &[] }
}
/// Load recent journal entries at startup for orientation.
@ -876,35 +770,38 @@ impl Agent {
dbg_log!("[journal] cutoff_idx={}", cutoff_idx);
// Walk backwards from cutoff, accumulating entries within 15% of context
let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len();
let context_window = crate::agent::context::context_window();
let journal_budget = context_window * 15 / 100;
dbg_log!("[journal] budget={} tokens ({}*15%)", journal_budget, context_window);
let mut entries = Vec::new();
let mut journal_entries = Vec::new();
let mut total_tokens = 0;
for node in journal_nodes[..cutoff_idx].iter().rev() {
let tokens = count(&node.content);
if total_tokens + tokens > journal_budget && !entries.is_empty() {
let msg = Message::user(&node.content);
let tokens = context::msg_token_count(&self.tokenizer, &msg);
if total_tokens + tokens > journal_budget && !journal_entries.is_empty() {
break;
}
entries.push(context::JournalEntry {
timestamp: chrono::DateTime::from_timestamp(node.created_at, 0)
.unwrap_or_default(),
content: node.content.clone(),
journal_entries.push(ContextEntry {
entry: ConversationEntry::Message(msg),
tokens,
timestamp: chrono::DateTime::from_timestamp(node.created_at, 0),
});
total_tokens += tokens;
}
entries.reverse();
dbg_log!("[journal] loaded {} entries, {} tokens", entries.len(), total_tokens);
journal_entries.reverse();
dbg_log!("[journal] loaded {} entries, {} tokens", journal_entries.len(), total_tokens);
if entries.is_empty() {
if journal_entries.is_empty() {
dbg_log!("[journal] no entries!");
return;
}
self.context.journal = entries;
self.context.journal.clear();
for entry in journal_entries {
self.context.journal.push(entry);
}
dbg_log!("[journal] context.journal now has {} entries", self.context.journal.len());
}
@ -923,10 +820,10 @@ impl Agent {
/// The tool result message before each image records what was loaded.
pub fn age_out_images(&mut self) {
// Find image entries newest-first, skip 1 (caller is about to add another)
let to_age: Vec<usize> = self.context.entries.iter().enumerate()
let to_age: Vec<usize> = self.context.conversation.entries().iter().enumerate()
.rev()
.filter(|(_, e)| {
if let Some(MessageContent::Parts(parts)) = &e.message().content {
.filter(|(_, ce)| {
if let Some(MessageContent::Parts(parts)) = &ce.entry.message().content {
parts.iter().any(|p| matches!(p, ContentPart::ImageUrl { .. }))
} else { false }
})
@ -935,7 +832,9 @@ impl Agent {
.collect();
for i in to_age {
let msg = self.context.entries[i].message_mut();
// Build replacement entry with image data stripped
let old = &self.context.conversation.entries()[i];
let msg = old.entry.message();
if let Some(MessageContent::Parts(parts)) = &msg.content {
let mut replacement = String::new();
for part in parts {
@ -950,7 +849,14 @@ impl Agent {
}
}
}
msg.content = Some(MessageContent::Text(replacement));
let mut new_msg = msg.clone();
new_msg.content = Some(MessageContent::Text(replacement));
let tokens = context::msg_token_count(&self.tokenizer, &new_msg);
self.context.conversation.set(i, ContextEntry {
entry: ConversationEntry::Message(new_msg),
tokens,
timestamp: old.timestamp,
});
}
}
self.generation += 1;
@ -968,16 +874,30 @@ impl Agent {
// Reload identity from config
match crate::config::reload_for_model(&self.app_config, &self.prompt_file) {
Ok((system_prompt, personality)) => {
self.context.system_prompt = system_prompt;
self.context.personality = personality;
self.context.system.clear();
self.context.system.push(ContextEntry {
entry: ConversationEntry::System(Message::system(&system_prompt)),
tokens: context::msg_token_count(&self.tokenizer, &Message::system(&system_prompt)),
timestamp: None,
});
self.context.identity.clear();
for (_name, content) in &personality {
let msg = Message::user(content);
self.context.identity.push(ContextEntry {
tokens: context::msg_token_count(&self.tokenizer, &msg),
entry: ConversationEntry::Message(msg),
timestamp: None,
});
}
}
Err(e) => {
eprintln!("warning: failed to reload identity: {:#}", e);
}
}
let before = self.context.entries.len();
let before_mem = self.context.entries.iter().filter(|e| e.is_memory()).count();
let before = self.context.conversation.len();
let before_mem = self.context.conversation.entries().iter()
.filter(|e| e.entry.is_memory()).count();
let before_conv = before - before_mem;
// Age out images before trimming — they're huge in the request payload
@ -988,15 +908,11 @@ impl Agent {
// Dedup memory, trim to budget
let budget = self.context_budget();
let entries = self.context.entries.clone();
self.context.entries = crate::agent::context::trim_entries(
&entries,
&self.tokenizer,
&budget,
);
self.context.conversation.trim(&budget, &self.tokenizer);
let after = self.context.entries.len();
let after_mem = self.context.entries.iter().filter(|e| e.is_memory()).count();
let after = self.context.conversation.len();
let after_mem = self.context.conversation.entries().iter()
.filter(|e| e.entry.is_memory()).count();
let after_conv = after - after_mem;
dbglog!("[compact] entries: {} → {} (mem: {} → {}, conv: {} → {})",
@ -1022,14 +938,26 @@ impl Agent {
};
// Load extra — compact() will dedup, trim, reload identity + journal
let all: Vec<_> = entries.into_iter()
let all: Vec<ContextEntry> = entries.into_iter()
.filter(|e| !e.is_log() && e.message().role != Role::System)
.map(|e| {
let tokens = if e.is_log() { 0 } else {
context::msg_token_count(&self.tokenizer, e.api_message())
};
let timestamp = if e.is_log() { None } else {
e.message().timestamp.as_ref().and_then(|ts| {
chrono::DateTime::parse_from_rfc3339(ts).ok()
.map(|dt| dt.with_timezone(&chrono::Utc))
})
};
ContextEntry { entry: e, tokens, timestamp }
})
.collect();
let mem_count = all.iter().filter(|e| e.is_memory()).count();
let mem_count = all.iter().filter(|e| e.entry.is_memory()).count();
let conv_count = all.len() - mem_count;
dbglog!("[restore] loaded {} entries from log (mem: {}, conv: {})",
all.len(), mem_count, conv_count);
self.context.entries = all;
self.context.conversation.set_entries(all);
self.compact();
// Estimate prompt tokens so status bar isn't 0 on startup
self.last_prompt_tokens = self.context_budget().total() as u32;
@ -1046,9 +974,9 @@ impl Agent {
&self.client.model
}
/// Get the conversation entries for persistence.
pub fn entries(&self) -> &[ConversationEntry] {
&self.context.entries
/// Get the conversation entries.
pub fn entries(&self) -> &[ContextEntry] {
self.context.conversation.entries()
}
/// Mutable access to conversation entries (for /retry).

View file

@ -93,8 +93,7 @@ impl Backend {
match self {
Backend::Standalone { messages, .. } => messages.push(msg),
Backend::Forked(agent) => {
agent.lock().await.context.entries.push(
super::context::ConversationEntry::Message(msg));
agent.lock().await.push_message(msg);
}
}
}