Replace build_context_window with trim_conversation

build_context_window loaded journal from a stale flat file and
assembled the full context. Now journal comes from the memory graph
and context is assembled on the fly. All that's needed is trimming
the conversation to fit the budget.

trim_conversation accounts for identity, journal, and reserve
tokens, then drops oldest conversation messages until it fits.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-02 03:35:28 -04:00
parent 87add36cdd
commit e9e47eb798
2 changed files with 30 additions and 41 deletions

View file

@ -47,55 +47,44 @@ pub struct ContextPlan {
/// it's what's happening now. Journal fills the rest, newest first.
///
/// Returns (messages, journal_text) — caller stores journal_text in ContextState.
pub fn build_context_window(
/// Trim conversation to fit within the context budget.
/// Returns the trimmed conversation messages (oldest dropped first).
pub fn trim_conversation(
context: &ContextState,
conversation: &[Message],
model: &str,
tokenizer: &CoreBPE,
) -> (Vec<Message>, String) {
let journal_path = journal::default_journal_path();
let all_entries = journal::parse_journal(&journal_path);
dbglog!("[ctx] {} journal entries from {}", all_entries.len(), journal_path.display());
) -> Vec<Message> {
let count = |s: &str| tokenizer.encode_with_special_tokens(s).len();
let system_prompt = context.system_prompt.clone();
let context_message = context.render_context_message();
// Cap memory to 50% of the context budget so conversation always
// gets space. Truncate at the last complete section boundary.
let max_tokens = context_budget_tokens(model);
let memory_cap = max_tokens / 2;
let memory_tokens = count(&context_message);
let context_message = if memory_tokens > memory_cap {
dbglog!("[ctx] memory too large: {} tokens > {} cap, truncating", memory_tokens, memory_cap);
truncate_at_section(&context_message, memory_cap, &count)
} else {
context_message
};
let recent_start = find_journal_cutoff(conversation, all_entries.last());
dbglog!("[ctx] journal cutoff: {} of {} conversation messages are 'recent'",
conversation.len() - recent_start, conversation.len());
let recent = &conversation[recent_start..];
let identity_cost = count(&context.system_prompt)
+ context.personality.iter().map(|(_, c)| count(c)).sum::<usize>();
let journal_cost: usize = context.journal.iter().map(|e| count(&e.content)).sum();
let reserve = max_tokens / 4;
let available = max_tokens
.saturating_sub(identity_cost)
.saturating_sub(journal_cost)
.saturating_sub(reserve);
let plan = plan_context(
&system_prompt,
&context_message,
recent,
&all_entries,
model,
&count,
);
// Trim oldest messages until we fit
let msg_costs: Vec<usize> = conversation.iter()
.map(|m| msg_token_count(tokenizer, m)).collect();
let total: usize = msg_costs.iter().sum();
let journal_text = render_journal_text(&all_entries, &plan);
dbglog!("[ctx] plan: header_start={} full_start={} entry_count={} conv_trim={} journal_text={} chars",
plan.header_start, plan.full_start, plan.entry_count, plan.conv_trim, journal_text.len());
let mut skip = 0;
let mut trimmed = total;
while trimmed > available && skip < conversation.len() {
trimmed -= msg_costs[skip];
skip += 1;
}
let messages = assemble_context(
system_prompt, context_message, &journal_text,
recent, &plan,
);
(messages, journal_text)
// Walk forward to user message boundary
while skip < conversation.len() && conversation[skip].role != Role::User {
skip += 1;
}
conversation[skip..].to_vec()
}
pub fn plan_context(

View file

@ -967,7 +967,7 @@ impl Agent {
fn do_compact(&mut self) {
let conversation: Vec<Message> = self.context.entries.iter()
.map(|e| e.api_message().clone()).collect();
let (messages, _) = crate::agent::context::build_context_window(
let messages = crate::agent::context::trim_conversation(
&self.context,
&conversation,
&self.client.model,
@ -1030,7 +1030,7 @@ impl Agent {
.collect();
dbglog!("[restore] {} messages after filtering system", conversation.len());
let (messages, _) = crate::agent::context::build_context_window(
let messages = crate::agent::context::trim_conversation(
&self.context,
&conversation,
&self.client.model,