refactor: extract context window building into context.rs
Move context window construction (build_context_window, plan_context, render_journal_text, assemble_context), token counting, error classification, and related helpers from agent.rs into context.rs. All extracted functions are pure — they take inputs and return values with no mutable state access. State mutation stays in agent.rs (compact, restore_from_log, load_startup_journal). agent.rs: 1504 → 987 lines (-517) context.rs: 365 lines (new) Net: -152 lines (duplicate comments removed) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
d04d41e993
commit
db48d57917
3 changed files with 206 additions and 488 deletions
|
|
@ -14,7 +14,6 @@
|
||||||
// in, response out, tool calls dispatched.
|
// in, response out, tool calls dispatched.
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use chrono::{DateTime, Utc};
|
|
||||||
use tiktoken_rs::CoreBPE;
|
use tiktoken_rs::CoreBPE;
|
||||||
|
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
|
|
@ -54,10 +53,6 @@ struct DispatchState {
|
||||||
dmn_pause: bool,
|
dmn_pause: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Mutable context state — the structured regions of the context window.
|
|
||||||
///
|
|
||||||
/// Each field is a different dimension of awareness. The struct renders
|
|
||||||
|
|
||||||
pub struct Agent {
|
pub struct Agent {
|
||||||
client: ApiClient,
|
client: ApiClient,
|
||||||
messages: Vec<Message>,
|
messages: Vec<Message>,
|
||||||
|
|
@ -196,7 +191,7 @@ impl Agent {
|
||||||
let mut in_conversation = false;
|
let mut in_conversation = false;
|
||||||
|
|
||||||
for msg in &self.messages {
|
for msg in &self.messages {
|
||||||
let tokens = msg_token_count(&self.tokenizer, msg);
|
let tokens = crate::context::msg_token_count(&self.tokenizer, msg);
|
||||||
|
|
||||||
if in_conversation {
|
if in_conversation {
|
||||||
conv_tokens += tokens;
|
conv_tokens += tokens;
|
||||||
|
|
@ -231,7 +226,7 @@ impl Agent {
|
||||||
memory_tokens: mem_tokens,
|
memory_tokens: mem_tokens,
|
||||||
journal_tokens: jnl_tokens,
|
journal_tokens: jnl_tokens,
|
||||||
conversation_tokens: conv_tokens,
|
conversation_tokens: conv_tokens,
|
||||||
window_tokens: model_context_window(&self.client.model),
|
window_tokens: crate::context::model_context_window(&self.client.model),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -279,7 +274,7 @@ impl Agent {
|
||||||
// Context overflow → compact and retry (max 2 attempts)
|
// Context overflow → compact and retry (max 2 attempts)
|
||||||
// Stream error → retry with backoff (max 2 attempts)
|
// Stream error → retry with backoff (max 2 attempts)
|
||||||
let (msg, usage) = match api_result {
|
let (msg, usage) = match api_result {
|
||||||
Err(e) if is_context_overflow(&e) && overflow_retries < 2 => {
|
Err(e) if crate::context::is_context_overflow(&e) && overflow_retries < 2 => {
|
||||||
overflow_retries += 1;
|
overflow_retries += 1;
|
||||||
let _ = ui_tx.send(UiMessage::Info(format!(
|
let _ = ui_tx.send(UiMessage::Info(format!(
|
||||||
"[context overflow — compacting and retrying ({}/2)]",
|
"[context overflow — compacting and retrying ({}/2)]",
|
||||||
|
|
@ -288,7 +283,7 @@ impl Agent {
|
||||||
self.emergency_compact();
|
self.emergency_compact();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
Err(e) if is_stream_error(&e) && empty_retries < 2 => {
|
Err(e) if crate::context::is_stream_error(&e) && empty_retries < 2 => {
|
||||||
empty_retries += 1;
|
empty_retries += 1;
|
||||||
let _ = ui_tx.send(UiMessage::Info(format!(
|
let _ = ui_tx.send(UiMessage::Info(format!(
|
||||||
"[stream error: {} — retrying ({}/2)]",
|
"[stream error: {} — retrying ({}/2)]",
|
||||||
|
|
@ -651,7 +646,7 @@ impl Agent {
|
||||||
let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len();
|
let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len();
|
||||||
let context_message = self.context.render_context_message();
|
let context_message = self.context.render_context_message();
|
||||||
|
|
||||||
let plan = plan_context(
|
let plan = crate::context::plan_context(
|
||||||
&self.context.system_prompt,
|
&self.context.system_prompt,
|
||||||
&context_message,
|
&context_message,
|
||||||
&[], // no conversation yet
|
&[], // no conversation yet
|
||||||
|
|
@ -660,7 +655,7 @@ impl Agent {
|
||||||
&count,
|
&count,
|
||||||
);
|
);
|
||||||
|
|
||||||
self.context.journal = render_journal_text(&entries, &plan);
|
self.context.journal = crate::context::render_journal_text(&entries, &plan);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Re-render the context message in self.messages from live ContextState.
|
/// Re-render the context message in self.messages from live ContextState.
|
||||||
|
|
@ -820,7 +815,7 @@ impl Agent {
|
||||||
.unwrap_or(self.messages.len());
|
.unwrap_or(self.messages.len());
|
||||||
|
|
||||||
let conversation: Vec<Message> = self.messages[conv_start..].to_vec();
|
let conversation: Vec<Message> = self.messages[conv_start..].to_vec();
|
||||||
let (messages, journal) = build_context_window(
|
let (messages, journal) = crate::context::build_context_window(
|
||||||
&self.context,
|
&self.context,
|
||||||
&conversation,
|
&conversation,
|
||||||
&self.client.model,
|
&self.client.model,
|
||||||
|
|
@ -880,7 +875,7 @@ impl Agent {
|
||||||
.collect();
|
.collect();
|
||||||
dbglog!("[restore] {} messages after filtering system", conversation.len());
|
dbglog!("[restore] {} messages after filtering system", conversation.len());
|
||||||
|
|
||||||
let (messages, journal) = build_context_window(
|
let (messages, journal) = crate::context::build_context_window(
|
||||||
&self.context,
|
&self.context,
|
||||||
&conversation,
|
&conversation,
|
||||||
&self.client.model,
|
&self.client.model,
|
||||||
|
|
@ -923,420 +918,9 @@ impl Agent {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Look up a model's context window size in tokens.
|
// Context window building, token counting, and error classification
|
||||||
pub fn model_context_window(model: &str) -> usize {
|
// live in context.rs
|
||||||
let m = model.to_lowercase();
|
|
||||||
if m.contains("opus") || m.contains("sonnet") {
|
|
||||||
200_000
|
|
||||||
} else if m.contains("qwen") {
|
|
||||||
131_072
|
|
||||||
} else {
|
|
||||||
128_000
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Context budget in tokens: 60% of the model's context window.
|
|
||||||
/// Leaves headroom for conversation to grow before compaction triggers.
|
|
||||||
///
|
|
||||||
/// Future direction: make this dynamic based on what the agent is
|
|
||||||
/// doing — deep coding work might allocate more to conversation,
|
|
||||||
/// consolidation might allocate more to journal/memory, idle might
|
|
||||||
/// shrink everything to save cost.
|
|
||||||
fn context_budget_tokens(model: &str) -> usize {
|
|
||||||
model_context_window(model) * 60 / 100
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Allocation plan for the context window. Separates the budget math
|
|
||||||
/// (which entries and messages to include) from the message assembly
|
|
||||||
/// (building the actual Vec<Message>). This makes the core algorithm
|
|
||||||
/// testable and inspectable — log the plan on compaction to see exactly
|
|
||||||
/// what allocation decisions were made.
|
|
||||||
struct ContextPlan {
|
|
||||||
/// Index into all_entries: header-only entries start here
|
|
||||||
header_start: usize,
|
|
||||||
/// Index into all_entries: full entries start here (headers end here)
|
|
||||||
full_start: usize,
|
|
||||||
/// Total journal entries (header-only + full go up to this)
|
|
||||||
entry_count: usize,
|
|
||||||
/// Index into recent conversation: skip messages before this
|
|
||||||
conv_trim: usize,
|
|
||||||
/// Total recent conversation messages
|
|
||||||
_conv_count: usize,
|
|
||||||
/// Tokens used by full journal entries
|
|
||||||
_full_tokens: usize,
|
|
||||||
/// Tokens used by header-only journal entries
|
|
||||||
_header_tokens: usize,
|
|
||||||
/// Tokens used by conversation (after trimming)
|
|
||||||
_conv_tokens: usize,
|
|
||||||
/// Total budget available (after identity, memory, reserve)
|
|
||||||
_available: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Build a context window from conversation messages + journal entries.
|
|
||||||
/// This is the core algorithm shared by compact() and restore_from_log().
|
|
||||||
///
|
|
||||||
/// Allocation strategy: identity and memory are fixed costs. The
|
|
||||||
/// remaining budget (minus 25% reserve for model output) is split
|
|
||||||
/// between journal and conversation. Conversation gets priority —
|
|
||||||
/// it's what's happening now. Journal fills the rest, newest first.
|
|
||||||
///
|
|
||||||
/// When the budget is tight, journal entries are dropped first
|
|
||||||
/// (oldest entries go first). If conversation alone exceeds the
|
|
||||||
/// budget, oldest messages are trimmed to fit.
|
|
||||||
/// Returns (messages, journal_text) — caller stores journal_text in ContextState.
|
|
||||||
fn build_context_window(
|
|
||||||
context: &ContextState,
|
|
||||||
conversation: &[Message],
|
|
||||||
model: &str,
|
|
||||||
tokenizer: &CoreBPE,
|
|
||||||
) -> (Vec<Message>, String) {
|
|
||||||
let journal_path = journal::default_journal_path();
|
|
||||||
let all_entries = journal::parse_journal(&journal_path);
|
|
||||||
dbglog!("[ctx] {} journal entries from {}", all_entries.len(), journal_path.display());
|
|
||||||
let count = |s: &str| tokenizer.encode_with_special_tokens(s).len();
|
|
||||||
|
|
||||||
let system_prompt = context.system_prompt.clone();
|
|
||||||
let context_message = context.render_context_message();
|
|
||||||
|
|
||||||
// Cap memory to 50% of the context budget so conversation always
|
|
||||||
// gets space. Truncate at the last complete section boundary.
|
|
||||||
let max_tokens = context_budget_tokens(model);
|
|
||||||
let memory_cap = max_tokens / 2;
|
|
||||||
let memory_tokens = count(&context_message);
|
|
||||||
let context_message = if memory_tokens > memory_cap {
|
|
||||||
dbglog!("[ctx] memory too large: {} tokens > {} cap, truncating", memory_tokens, memory_cap);
|
|
||||||
truncate_at_section(&context_message, memory_cap, &count)
|
|
||||||
} else {
|
|
||||||
context_message
|
|
||||||
};
|
|
||||||
|
|
||||||
let recent_start = find_journal_cutoff(conversation, all_entries.last());
|
|
||||||
dbglog!("[ctx] journal cutoff: {} of {} conversation messages are 'recent'",
|
|
||||||
conversation.len() - recent_start, conversation.len());
|
|
||||||
let recent = &conversation[recent_start..];
|
|
||||||
|
|
||||||
let plan = plan_context(
|
|
||||||
&system_prompt,
|
|
||||||
&context_message,
|
|
||||||
recent,
|
|
||||||
&all_entries,
|
|
||||||
model,
|
|
||||||
&count,
|
|
||||||
);
|
|
||||||
|
|
||||||
// Render journal text from the plan
|
|
||||||
let journal_text = render_journal_text(&all_entries, &plan);
|
|
||||||
dbglog!("[ctx] plan: header_start={} full_start={} entry_count={} conv_trim={} journal_text={} chars",
|
|
||||||
plan.header_start, plan.full_start, plan.entry_count, plan.conv_trim, journal_text.len());
|
|
||||||
|
|
||||||
let messages = assemble_context(
|
|
||||||
system_prompt, context_message, &journal_text,
|
|
||||||
recent, &plan,
|
|
||||||
);
|
|
||||||
(messages, journal_text)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Compute the allocation plan: how much budget goes to journal vs
|
|
||||||
/// conversation, which entries and messages to include.
|
|
||||||
fn plan_context(
|
|
||||||
system_prompt: &str,
|
|
||||||
context_message: &str,
|
|
||||||
recent: &[Message],
|
|
||||||
entries: &[journal::JournalEntry],
|
|
||||||
model: &str,
|
|
||||||
count: &dyn Fn(&str) -> usize,
|
|
||||||
) -> ContextPlan {
|
|
||||||
let max_tokens = context_budget_tokens(model);
|
|
||||||
|
|
||||||
// Fixed costs — always included
|
|
||||||
let identity_cost = count(system_prompt);
|
|
||||||
let memory_cost = count(context_message);
|
|
||||||
let reserve = max_tokens / 4;
|
|
||||||
let available = max_tokens
|
|
||||||
.saturating_sub(identity_cost)
|
|
||||||
.saturating_sub(memory_cost)
|
|
||||||
.saturating_sub(reserve);
|
|
||||||
|
|
||||||
// Measure conversation
|
|
||||||
let conv_costs: Vec<usize> = recent.iter().map(|m| msg_token_count_fn(m, count)).collect();
|
|
||||||
let total_conv: usize = conv_costs.iter().sum();
|
|
||||||
|
|
||||||
// Journal always gets at least 15% of available budget so it doesn't
|
|
||||||
// get squeezed out by large conversations.
|
|
||||||
let journal_min = available * 15 / 100;
|
|
||||||
let journal_budget = available.saturating_sub(total_conv).max(journal_min);
|
|
||||||
|
|
||||||
// Fill journal entries newest-first within budget.
|
|
||||||
// Tiered: recent entries get full content, older entries get just
|
|
||||||
// a header line (timestamp + first line) for timeline awareness.
|
|
||||||
let full_budget = journal_budget * 70 / 100;
|
|
||||||
let header_budget = journal_budget.saturating_sub(full_budget);
|
|
||||||
|
|
||||||
// Phase 1: Full entries (newest first)
|
|
||||||
let mut full_used = 0;
|
|
||||||
let mut n_full = 0;
|
|
||||||
for entry in entries.iter().rev() {
|
|
||||||
let cost = count(&entry.content) + 10;
|
|
||||||
if full_used + cost > full_budget {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
full_used += cost;
|
|
||||||
n_full += 1;
|
|
||||||
}
|
|
||||||
let full_start = entries.len().saturating_sub(n_full);
|
|
||||||
|
|
||||||
// Phase 2: Header-only entries (continuing backward from where full stopped)
|
|
||||||
let mut header_used = 0;
|
|
||||||
let mut n_headers = 0;
|
|
||||||
for entry in entries[..full_start].iter().rev() {
|
|
||||||
let first_line = entry
|
|
||||||
.content
|
|
||||||
.lines()
|
|
||||||
.find(|l| !l.trim().is_empty())
|
|
||||||
.unwrap_or("(empty)");
|
|
||||||
let cost = count(first_line) + 10;
|
|
||||||
if header_used + cost > header_budget {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
header_used += cost;
|
|
||||||
n_headers += 1;
|
|
||||||
}
|
|
||||||
let header_start = full_start.saturating_sub(n_headers);
|
|
||||||
|
|
||||||
// If conversation exceeds available budget, trim oldest messages
|
|
||||||
let journal_used = full_used + header_used;
|
|
||||||
let mut conv_trim = 0;
|
|
||||||
let mut trimmed_conv = total_conv;
|
|
||||||
while trimmed_conv + journal_used > available && conv_trim < recent.len() {
|
|
||||||
trimmed_conv -= conv_costs[conv_trim];
|
|
||||||
conv_trim += 1;
|
|
||||||
}
|
|
||||||
// Walk forward to user message boundary
|
|
||||||
while conv_trim < recent.len() && recent[conv_trim].role != Role::User {
|
|
||||||
conv_trim += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
dbglog!("[plan] model={} max_tokens={} available={} (identity={} memory={} reserve={})",
|
|
||||||
model, max_tokens, available, identity_cost, memory_cost, reserve);
|
|
||||||
dbglog!("[plan] conv: {} msgs, {} tokens total, trimming {} msgs → {} tokens",
|
|
||||||
recent.len(), total_conv, conv_trim, trimmed_conv);
|
|
||||||
dbglog!("[plan] journal: {} full entries ({}t) + {} headers ({}t)",
|
|
||||||
n_full, full_used, n_headers, header_used);
|
|
||||||
|
|
||||||
ContextPlan {
|
|
||||||
header_start,
|
|
||||||
full_start,
|
|
||||||
entry_count: entries.len(),
|
|
||||||
conv_trim,
|
|
||||||
_conv_count: recent.len(),
|
|
||||||
_full_tokens: full_used,
|
|
||||||
_header_tokens: header_used,
|
|
||||||
_conv_tokens: trimmed_conv,
|
|
||||||
_available: available,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Render journal entries into text from a context plan.
|
|
||||||
fn render_journal_text(
|
|
||||||
entries: &[journal::JournalEntry],
|
|
||||||
plan: &ContextPlan,
|
|
||||||
) -> String {
|
|
||||||
let has_journal = plan.header_start < plan.entry_count;
|
|
||||||
if !has_journal {
|
|
||||||
return String::new();
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut text = String::from("[Earlier in this conversation — from your journal]\n\n");
|
|
||||||
|
|
||||||
// Header-only entries (older) — just timestamp + first line
|
|
||||||
for entry in &entries[plan.header_start..plan.full_start] {
|
|
||||||
let first_line = entry
|
|
||||||
.content
|
|
||||||
.lines()
|
|
||||||
.find(|l| !l.trim().is_empty())
|
|
||||||
.unwrap_or("(empty)");
|
|
||||||
text.push_str(&format!(
|
|
||||||
"## {} — {}\n",
|
|
||||||
entry.timestamp.format("%Y-%m-%dT%H:%M"),
|
|
||||||
first_line,
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Separator between headers and full entries
|
|
||||||
let n_headers = plan.full_start - plan.header_start;
|
|
||||||
let n_full = plan.entry_count - plan.full_start;
|
|
||||||
if n_headers > 0 && n_full > 0 {
|
|
||||||
text.push_str("\n---\n\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Full entries (recent)
|
|
||||||
for entry in &entries[plan.full_start..] {
|
|
||||||
text.push_str(&format!(
|
|
||||||
"## {}\n\n{}\n\n",
|
|
||||||
entry.timestamp.format("%Y-%m-%dT%H:%M"),
|
|
||||||
entry.content
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
text
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Assemble the context window from a plan. No allocation decisions
|
|
||||||
/// happen here — just follow the plan to build messages.
|
|
||||||
fn assemble_context(
|
|
||||||
system_prompt: String,
|
|
||||||
context_message: String,
|
|
||||||
journal_text: &str,
|
|
||||||
recent: &[Message],
|
|
||||||
plan: &ContextPlan,
|
|
||||||
) -> Vec<Message> {
|
|
||||||
let mut messages = vec![Message::system(system_prompt)];
|
|
||||||
if !context_message.is_empty() {
|
|
||||||
messages.push(Message::user(context_message));
|
|
||||||
}
|
|
||||||
|
|
||||||
let final_recent = &recent[plan.conv_trim..];
|
|
||||||
|
|
||||||
if !journal_text.is_empty() {
|
|
||||||
messages.push(Message::user(journal_text.to_string()));
|
|
||||||
} else if !final_recent.is_empty() {
|
|
||||||
messages.push(Message::user(
|
|
||||||
"Your context was just rebuilt. Memory files have been \
|
|
||||||
reloaded. Your recent conversation continues below. \
|
|
||||||
Earlier context is in your journal and memory files."
|
|
||||||
.to_string(),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
messages.extend(final_recent.iter().cloned());
|
|
||||||
messages
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Find the conversation index where messages are no longer covered
|
|
||||||
/// Truncate a context message to fit within a token budget. Cuts at
|
|
||||||
/// section boundaries (lines starting with `---` or `## `) to avoid
|
|
||||||
/// splitting mid-section. Drops sections from the end first since
|
|
||||||
/// earlier sections (identity, instructions) matter more.
|
|
||||||
fn truncate_at_section(text: &str, max_tokens: usize, count: &dyn Fn(&str) -> usize) -> String {
|
|
||||||
// Find section boundaries (--- separators between assembled parts)
|
|
||||||
let mut boundaries = vec![0usize];
|
|
||||||
for (i, line) in text.lines().enumerate() {
|
|
||||||
if line.trim() == "---" || line.starts_with("## ") {
|
|
||||||
// Find byte offset of this line
|
|
||||||
let offset = text.lines().take(i).map(|l| l.len() + 1).sum::<usize>();
|
|
||||||
boundaries.push(offset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
boundaries.push(text.len());
|
|
||||||
|
|
||||||
// Binary search: find the largest prefix of sections that fits
|
|
||||||
let mut best = 0;
|
|
||||||
for &end in &boundaries[1..] {
|
|
||||||
let slice = &text[..end];
|
|
||||||
if count(slice) <= max_tokens {
|
|
||||||
best = end;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if best == 0 {
|
|
||||||
// Even the first section doesn't fit — hard truncate
|
|
||||||
best = text.len().min(max_tokens * 3); // ~3 chars/token rough estimate
|
|
||||||
}
|
|
||||||
|
|
||||||
let truncated = &text[..best];
|
|
||||||
dbglog!("[ctx] truncated memory from {} to {} chars ({} tokens)",
|
|
||||||
text.len(), truncated.len(), count(truncated));
|
|
||||||
truncated.to_string()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// by journal entries. Messages before this index are summarized by
|
|
||||||
/// the journal; messages from this index onward stay as raw conversation.
|
|
||||||
/// Walks back to a user message boundary to avoid splitting tool
|
|
||||||
/// call/result sequences.
|
|
||||||
fn find_journal_cutoff(
|
|
||||||
conversation: &[Message],
|
|
||||||
newest_entry: Option<&journal::JournalEntry>,
|
|
||||||
) -> usize {
|
|
||||||
let cutoff = match newest_entry {
|
|
||||||
Some(entry) => entry.timestamp,
|
|
||||||
None => return 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut split = conversation.len();
|
|
||||||
for (i, msg) in conversation.iter().enumerate() {
|
|
||||||
if let Some(ts) = parse_msg_timestamp(msg) {
|
|
||||||
if ts > cutoff {
|
|
||||||
split = i;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Walk back to user message boundary
|
|
||||||
while split > 0 && split < conversation.len() && conversation[split].role != Role::User {
|
|
||||||
split -= 1;
|
|
||||||
}
|
|
||||||
split
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Count the token footprint of a message using a token counting function.
|
|
||||||
fn msg_token_count_fn(msg: &Message, count: &dyn Fn(&str) -> usize) -> usize {
|
|
||||||
let content = msg.content.as_ref().map_or(0, |c| match c {
|
|
||||||
MessageContent::Text(s) => count(s),
|
|
||||||
MessageContent::Parts(parts) => parts
|
|
||||||
.iter()
|
|
||||||
.map(|p| match p {
|
|
||||||
ContentPart::Text { text } => count(text),
|
|
||||||
ContentPart::ImageUrl { .. } => 85,
|
|
||||||
})
|
|
||||||
.sum(),
|
|
||||||
});
|
|
||||||
let tools = msg.tool_calls.as_ref().map_or(0, |calls| {
|
|
||||||
calls
|
|
||||||
.iter()
|
|
||||||
.map(|c| count(&c.function.arguments) + count(&c.function.name))
|
|
||||||
.sum()
|
|
||||||
});
|
|
||||||
content + tools
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Count the token footprint of a message using BPE tokenization.
|
|
||||||
fn msg_token_count(tokenizer: &CoreBPE, msg: &Message) -> usize {
|
|
||||||
msg_token_count_fn(msg, &|s| tokenizer.encode_with_special_tokens(s).len())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Detect context window overflow errors from the API.
|
|
||||||
/// Different providers phrase this differently; we check for common patterns.
|
|
||||||
/// OpenRouter wraps upstream errors, so we check both the wrapper and the raw message.
|
|
||||||
fn is_context_overflow(err: &anyhow::Error) -> bool {
|
|
||||||
let msg = err.to_string().to_lowercase();
|
|
||||||
msg.contains("context length")
|
|
||||||
|| msg.contains("token limit")
|
|
||||||
|| msg.contains("too many tokens")
|
|
||||||
|| msg.contains("maximum context")
|
|
||||||
|| msg.contains("prompt is too long")
|
|
||||||
|| msg.contains("request too large")
|
|
||||||
|| msg.contains("input validation error")
|
|
||||||
|| msg.contains("content length limit")
|
|
||||||
|| (msg.contains("400") && msg.contains("tokens"))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Detect model/provider errors delivered inside the SSE stream.
|
|
||||||
/// OpenRouter returns HTTP 200 but finish_reason="error" with
|
|
||||||
/// partial content (e.g. "system") — we surface this as an error
|
|
||||||
/// so the turn loop can retry.
|
|
||||||
fn is_stream_error(err: &anyhow::Error) -> bool {
|
|
||||||
err.to_string().contains("model stream error")
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parse a message's timestamp field into a DateTime.
|
|
||||||
fn parse_msg_timestamp(msg: &Message) -> Option<DateTime<Utc>> {
|
|
||||||
msg.timestamp
|
|
||||||
.as_ref()
|
|
||||||
.and_then(|ts| DateTime::parse_from_rfc3339(ts).ok())
|
|
||||||
.map(|dt| dt.with_timezone(&Utc))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Create a short summary of tool args for the tools pane header.
|
/// Create a short summary of tool args for the tools pane header.
|
||||||
fn summarize_args(tool_name: &str, args: &serde_json::Value) -> String {
|
fn summarize_args(tool_name: &str, args: &serde_json::Value) -> String {
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,52 @@
|
||||||
// context.rs — Context window building and management
|
// context.rs — Context window building and management
|
||||||
//
|
//
|
||||||
// Pure functions for building the agent's context window from journal
|
// Pure functions for building the agent's context window from journal
|
||||||
// entries and conversation messages. No mutable state.
|
// entries and conversation messages. No mutable state — all functions
|
||||||
|
// take inputs and return new values. State mutation happens in agent.rs.
|
||||||
|
|
||||||
use crate::journal;
|
use crate::journal;
|
||||||
use crate::types::{ContextPlan, ContextState, Message};
|
use crate::types::*;
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use tiktoken_rs::CoreBPE;
|
use tiktoken_rs::CoreBPE;
|
||||||
|
|
||||||
|
/// Look up a model's context window size in tokens.
|
||||||
|
pub fn model_context_window(model: &str) -> usize {
|
||||||
|
let m = model.to_lowercase();
|
||||||
|
if m.contains("opus") || m.contains("sonnet") {
|
||||||
|
200_000
|
||||||
|
} else if m.contains("qwen") {
|
||||||
|
131_072
|
||||||
|
} else {
|
||||||
|
128_000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Context budget in tokens: 60% of the model's context window.
|
||||||
|
fn context_budget_tokens(model: &str) -> usize {
|
||||||
|
model_context_window(model) * 60 / 100
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Allocation plan for the context window.
|
||||||
|
pub struct ContextPlan {
|
||||||
|
header_start: usize,
|
||||||
|
full_start: usize,
|
||||||
|
entry_count: usize,
|
||||||
|
conv_trim: usize,
|
||||||
|
_conv_count: usize,
|
||||||
|
_full_tokens: usize,
|
||||||
|
_header_tokens: usize,
|
||||||
|
_conv_tokens: usize,
|
||||||
|
_available: usize,
|
||||||
|
}
|
||||||
|
|
||||||
/// Build a context window from conversation messages + journal entries.
|
/// Build a context window from conversation messages + journal entries.
|
||||||
|
///
|
||||||
|
/// Allocation strategy: identity and memory are fixed costs. The
|
||||||
|
/// remaining budget (minus 25% reserve for model output) is split
|
||||||
|
/// between journal and conversation. Conversation gets priority —
|
||||||
|
/// it's what's happening now. Journal fills the rest, newest first.
|
||||||
|
///
|
||||||
|
/// Returns (messages, journal_text) — caller stores journal_text in ContextState.
|
||||||
pub fn build_context_window(
|
pub fn build_context_window(
|
||||||
context: &ContextState,
|
context: &ContextState,
|
||||||
conversation: &[Message],
|
conversation: &[Message],
|
||||||
|
|
@ -17,44 +55,50 @@ pub fn build_context_window(
|
||||||
) -> (Vec<Message>, String) {
|
) -> (Vec<Message>, String) {
|
||||||
let journal_path = journal::default_journal_path();
|
let journal_path = journal::default_journal_path();
|
||||||
let all_entries = journal::parse_journal(&journal_path);
|
let all_entries = journal::parse_journal(&journal_path);
|
||||||
crate::dbglog!("[ctx] {} journal entries from {}", all_entries.len(), journal_path.display());
|
dbglog!("[ctx] {} journal entries from {}", all_entries.len(), journal_path.display());
|
||||||
let count = |s: &str| tokenizer.encode_with_special_tokens(s).len();
|
let count = |s: &str| tokenizer.encode_with_special_tokens(s).len();
|
||||||
|
|
||||||
let system_prompt = context.system_prompt.clone();
|
let system_prompt = context.system_prompt.clone();
|
||||||
let context_message = context.render_context_message();
|
let context_message = context.render_context_message();
|
||||||
|
|
||||||
|
// Cap memory to 50% of the context budget so conversation always
|
||||||
|
// gets space. Truncate at the last complete section boundary.
|
||||||
let max_tokens = context_budget_tokens(model);
|
let max_tokens = context_budget_tokens(model);
|
||||||
let memory_cap = max_tokens / 2;
|
let memory_cap = max_tokens / 2;
|
||||||
let memory_tokens = count(&context_message);
|
let memory_tokens = count(&context_message);
|
||||||
let context_message = if memory_tokens > memory_cap {
|
let context_message = if memory_tokens > memory_cap {
|
||||||
crate::dbglog!("[ctx] memory too large: {} tokens > {} cap, truncating", memory_tokens, memory_cap);
|
dbglog!("[ctx] memory too large: {} tokens > {} cap, truncating", memory_tokens, memory_cap);
|
||||||
truncate_at_section(&context_message, memory_cap, &count)
|
truncate_at_section(&context_message, memory_cap, &count)
|
||||||
} else {
|
} else {
|
||||||
context_message
|
context_message
|
||||||
};
|
};
|
||||||
|
|
||||||
let recent_start = find_journal_cutoff(conversation, all_entries.last());
|
let recent_start = find_journal_cutoff(conversation, all_entries.last());
|
||||||
|
dbglog!("[ctx] journal cutoff: {} of {} conversation messages are 'recent'",
|
||||||
|
conversation.len() - recent_start, conversation.len());
|
||||||
let recent = &conversation[recent_start..];
|
let recent = &conversation[recent_start..];
|
||||||
|
|
||||||
let plan = plan_context(&system_prompt, &context_message, recent, &all_entries, model, &count);
|
let plan = plan_context(
|
||||||
let journal_text = render_journal_text(&all_entries, &plan);
|
&system_prompt,
|
||||||
|
&context_message,
|
||||||
|
recent,
|
||||||
|
&all_entries,
|
||||||
|
model,
|
||||||
|
&count,
|
||||||
|
);
|
||||||
|
|
||||||
let messages = assemble_context(system_prompt, context_message, &journal_text, recent, &plan);
|
let journal_text = render_journal_text(&all_entries, &plan);
|
||||||
|
dbglog!("[ctx] plan: header_start={} full_start={} entry_count={} conv_trim={} journal_text={} chars",
|
||||||
|
plan.header_start, plan.full_start, plan.entry_count, plan.conv_trim, journal_text.len());
|
||||||
|
|
||||||
|
let messages = assemble_context(
|
||||||
|
system_prompt, context_message, &journal_text,
|
||||||
|
recent, &plan,
|
||||||
|
);
|
||||||
(messages, journal_text)
|
(messages, journal_text)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn model_context_window(model: &str) -> usize {
|
pub fn plan_context(
|
||||||
let m = model.to_lowercase();
|
|
||||||
if m.contains("opus") || m.contains("sonnet") { 200_000 }
|
|
||||||
else if m.contains("qwen") { 131_072 }
|
|
||||||
else { 128_000 }
|
|
||||||
}
|
|
||||||
|
|
||||||
fn context_budget_tokens(model: &str) -> usize {
|
|
||||||
model_context_window(model) * 60 / 100
|
|
||||||
}
|
|
||||||
|
|
||||||
fn plan_context(
|
|
||||||
system_prompt: &str,
|
system_prompt: &str,
|
||||||
context_message: &str,
|
context_message: &str,
|
||||||
recent: &[Message],
|
recent: &[Message],
|
||||||
|
|
@ -63,40 +107,56 @@ fn plan_context(
|
||||||
count: &dyn Fn(&str) -> usize,
|
count: &dyn Fn(&str) -> usize,
|
||||||
) -> ContextPlan {
|
) -> ContextPlan {
|
||||||
let max_tokens = context_budget_tokens(model);
|
let max_tokens = context_budget_tokens(model);
|
||||||
|
|
||||||
let identity_cost = count(system_prompt);
|
let identity_cost = count(system_prompt);
|
||||||
let memory_cost = count(context_message);
|
let memory_cost = count(context_message);
|
||||||
let reserve = max_tokens / 4;
|
let reserve = max_tokens / 4;
|
||||||
let available = max_tokens.saturating_sub(identity_cost).saturating_sub(memory_cost).saturating_sub(reserve);
|
let available = max_tokens
|
||||||
|
.saturating_sub(identity_cost)
|
||||||
|
.saturating_sub(memory_cost)
|
||||||
|
.saturating_sub(reserve);
|
||||||
|
|
||||||
let conv_costs: Vec<usize> = recent.iter().map(|m| msg_token_count_fn(m, count)).collect();
|
let conv_costs: Vec<usize> = recent.iter().map(|m| msg_token_count_fn(m, count)).collect();
|
||||||
let total_conv: usize = conv_costs.iter().sum();
|
let total_conv: usize = conv_costs.iter().sum();
|
||||||
|
|
||||||
let journal_min = available * 15 / 100;
|
let journal_min = available * 15 / 100;
|
||||||
let journal_budget = available.saturating_sub(total_conv).max(journal_min);
|
let journal_budget = available.saturating_sub(total_conv).max(journal_min);
|
||||||
|
|
||||||
let full_budget = journal_budget * 70 / 100;
|
let full_budget = journal_budget * 70 / 100;
|
||||||
let header_budget = journal_budget.saturating_sub(full_budget);
|
let header_budget = journal_budget.saturating_sub(full_budget);
|
||||||
|
|
||||||
|
// Phase 1: Full entries (newest first)
|
||||||
let mut full_used = 0;
|
let mut full_used = 0;
|
||||||
let mut n_full = 0;
|
let mut n_full = 0;
|
||||||
for entry in entries.iter().rev() {
|
for entry in entries.iter().rev() {
|
||||||
let cost = count(&entry.content) + 10;
|
let cost = count(&entry.content) + 10;
|
||||||
if full_used + cost > full_budget { break; }
|
if full_used + cost > full_budget {
|
||||||
|
break;
|
||||||
|
}
|
||||||
full_used += cost;
|
full_used += cost;
|
||||||
n_full += 1;
|
n_full += 1;
|
||||||
}
|
}
|
||||||
let full_start = entries.len().saturating_sub(n_full);
|
let full_start = entries.len().saturating_sub(n_full);
|
||||||
|
|
||||||
|
// Phase 2: Header-only entries (continuing backward)
|
||||||
let mut header_used = 0;
|
let mut header_used = 0;
|
||||||
let mut n_headers = 0;
|
let mut n_headers = 0;
|
||||||
for entry in entries[..full_start].iter().rev() {
|
for entry in entries[..full_start].iter().rev() {
|
||||||
let first_line = entry.content.lines().find(|l| !l.trim().is_empty()).unwrap_or("(empty)");
|
let first_line = entry
|
||||||
|
.content
|
||||||
|
.lines()
|
||||||
|
.find(|l| !l.trim().is_empty())
|
||||||
|
.unwrap_or("(empty)");
|
||||||
let cost = count(first_line) + 10;
|
let cost = count(first_line) + 10;
|
||||||
if header_used + cost > header_budget { break; }
|
if header_used + cost > header_budget {
|
||||||
|
break;
|
||||||
|
}
|
||||||
header_used += cost;
|
header_used += cost;
|
||||||
n_headers += 1;
|
n_headers += 1;
|
||||||
}
|
}
|
||||||
let header_start = full_start.saturating_sub(n_headers);
|
let header_start = full_start.saturating_sub(n_headers);
|
||||||
|
|
||||||
|
// Trim oldest conversation if it exceeds budget
|
||||||
let journal_used = full_used + header_used;
|
let journal_used = full_used + header_used;
|
||||||
let mut conv_trim = 0;
|
let mut conv_trim = 0;
|
||||||
let mut trimmed_conv = total_conv;
|
let mut trimmed_conv = total_conv;
|
||||||
|
|
@ -104,34 +164,69 @@ fn plan_context(
|
||||||
trimmed_conv -= conv_costs[conv_trim];
|
trimmed_conv -= conv_costs[conv_trim];
|
||||||
conv_trim += 1;
|
conv_trim += 1;
|
||||||
}
|
}
|
||||||
while conv_trim < recent.len() && recent[conv_trim].role != crate::types::Role::User {
|
// Walk forward to user message boundary
|
||||||
|
while conv_trim < recent.len() && recent[conv_trim].role != Role::User {
|
||||||
conv_trim += 1;
|
conv_trim += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dbglog!("[plan] model={} max_tokens={} available={} (identity={} memory={} reserve={})",
|
||||||
|
model, max_tokens, available, identity_cost, memory_cost, reserve);
|
||||||
|
dbglog!("[plan] conv: {} msgs, {} tokens total, trimming {} msgs → {} tokens",
|
||||||
|
recent.len(), total_conv, conv_trim, trimmed_conv);
|
||||||
|
dbglog!("[plan] journal: {} full entries ({}t) + {} headers ({}t)",
|
||||||
|
n_full, full_used, n_headers, header_used);
|
||||||
|
|
||||||
ContextPlan {
|
ContextPlan {
|
||||||
header_start, full_start, entry_count: entries.len(), conv_trim,
|
header_start,
|
||||||
_conv_count: recent.len(), _full_tokens: full_used, _header_tokens: header_used,
|
full_start,
|
||||||
_conv_tokens: trimmed_conv, _available: available,
|
entry_count: entries.len(),
|
||||||
|
conv_trim,
|
||||||
|
_conv_count: recent.len(),
|
||||||
|
_full_tokens: full_used,
|
||||||
|
_header_tokens: header_used,
|
||||||
|
_conv_tokens: trimmed_conv,
|
||||||
|
_available: available,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn render_journal_text(entries: &[journal::JournalEntry], plan: &ContextPlan) -> String {
|
pub fn render_journal_text(
|
||||||
if plan.header_start >= plan.entry_count { return String::new(); }
|
entries: &[journal::JournalEntry],
|
||||||
|
plan: &ContextPlan,
|
||||||
|
) -> String {
|
||||||
|
let has_journal = plan.header_start < plan.entry_count;
|
||||||
|
if !has_journal {
|
||||||
|
return String::new();
|
||||||
|
}
|
||||||
|
|
||||||
let mut text = String::from("[Earlier in this conversation — from your journal]\n\n");
|
let mut text = String::from("[Earlier in this conversation — from your journal]\n\n");
|
||||||
|
|
||||||
for entry in &entries[plan.header_start..plan.full_start] {
|
for entry in &entries[plan.header_start..plan.full_start] {
|
||||||
let first_line = entry.content.lines().find(|l| !l.trim().is_empty()).unwrap_or("(empty)");
|
let first_line = entry
|
||||||
text.push_str(&format!("## {} — {}\n", entry.timestamp.format("%Y-%m-%dT%H:%M"), first_line));
|
.content
|
||||||
|
.lines()
|
||||||
|
.find(|l| !l.trim().is_empty())
|
||||||
|
.unwrap_or("(empty)");
|
||||||
|
text.push_str(&format!(
|
||||||
|
"## {} — {}\n",
|
||||||
|
entry.timestamp.format("%Y-%m-%dT%H:%M"),
|
||||||
|
first_line,
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
let n_headers = plan.full_start - plan.header_start;
|
let n_headers = plan.full_start - plan.header_start;
|
||||||
let n_full = plan.entry_count - plan.full_start;
|
let n_full = plan.entry_count - plan.full_start;
|
||||||
if n_headers > 0 && n_full > 0 { text.push_str("\n---\n\n"); }
|
if n_headers > 0 && n_full > 0 {
|
||||||
|
text.push_str("\n---\n\n");
|
||||||
|
}
|
||||||
|
|
||||||
for entry in &entries[plan.full_start..] {
|
for entry in &entries[plan.full_start..] {
|
||||||
text.push_str(&format!("## {}\n\n{}\n\n", entry.timestamp.format("%Y-%m-%dT%H:%M"), entry.content));
|
text.push_str(&format!(
|
||||||
|
"## {}\n\n{}\n\n",
|
||||||
|
entry.timestamp.format("%Y-%m-%dT%H:%M"),
|
||||||
|
entry.content
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
text
|
text
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -143,7 +238,9 @@ fn assemble_context(
|
||||||
plan: &ContextPlan,
|
plan: &ContextPlan,
|
||||||
) -> Vec<Message> {
|
) -> Vec<Message> {
|
||||||
let mut messages = vec![Message::system(system_prompt)];
|
let mut messages = vec![Message::system(system_prompt)];
|
||||||
if !context_message.is_empty() { messages.push(Message::user(context_message)); }
|
if !context_message.is_empty() {
|
||||||
|
messages.push(Message::user(context_message));
|
||||||
|
}
|
||||||
|
|
||||||
let final_recent = &recent[plan.conv_trim..];
|
let final_recent = &recent[plan.conv_trim..];
|
||||||
|
|
||||||
|
|
@ -157,6 +254,7 @@ fn assemble_context(
|
||||||
.to_string(),
|
.to_string(),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
messages.extend(final_recent.iter().cloned());
|
messages.extend(final_recent.iter().cloned());
|
||||||
messages
|
messages
|
||||||
}
|
}
|
||||||
|
|
@ -174,26 +272,42 @@ fn truncate_at_section(text: &str, max_tokens: usize, count: &dyn Fn(&str) -> us
|
||||||
let mut best = 0;
|
let mut best = 0;
|
||||||
for &end in &boundaries[1..] {
|
for &end in &boundaries[1..] {
|
||||||
let slice = &text[..end];
|
let slice = &text[..end];
|
||||||
if count(slice) <= max_tokens { best = end; }
|
if count(slice) <= max_tokens {
|
||||||
else { break; }
|
best = end;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if best == 0 {
|
||||||
|
best = text.len().min(max_tokens * 3);
|
||||||
}
|
}
|
||||||
if best == 0 { best = text.len().min(max_tokens * 3); }
|
|
||||||
|
|
||||||
let truncated = &text[..best];
|
let truncated = &text[..best];
|
||||||
crate::dbglog!("[ctx] truncated memory from {} to {} chars ({} tokens)", text.len(), truncated.len(), count(truncated));
|
dbglog!("[ctx] truncated memory from {} to {} chars ({} tokens)",
|
||||||
|
text.len(), truncated.len(), count(truncated));
|
||||||
truncated.to_string()
|
truncated.to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn find_journal_cutoff(conversation: &[Message], newest_entry: Option<&journal::JournalEntry>) -> usize {
|
fn find_journal_cutoff(
|
||||||
let cutoff = match newest_entry { Some(entry) => entry.timestamp, None => return 0 };
|
conversation: &[Message],
|
||||||
|
newest_entry: Option<&journal::JournalEntry>,
|
||||||
|
) -> usize {
|
||||||
|
let cutoff = match newest_entry {
|
||||||
|
Some(entry) => entry.timestamp,
|
||||||
|
None => return 0,
|
||||||
|
};
|
||||||
|
|
||||||
let mut split = conversation.len();
|
let mut split = conversation.len();
|
||||||
for (i, msg) in conversation.iter().enumerate() {
|
for (i, msg) in conversation.iter().enumerate() {
|
||||||
if let Some(ts) = parse_msg_timestamp(msg) {
|
if let Some(ts) = parse_msg_timestamp(msg) {
|
||||||
if ts > cutoff { split = i; break; }
|
if ts > cutoff {
|
||||||
|
split = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
while split > 0 && split < conversation.len() && conversation[split].role != crate::types::Role::User {
|
while split > 0 && split < conversation.len() && conversation[split].role != Role::User {
|
||||||
split -= 1;
|
split -= 1;
|
||||||
}
|
}
|
||||||
split
|
split
|
||||||
|
|
@ -201,32 +315,51 @@ fn find_journal_cutoff(conversation: &[Message], newest_entry: Option<&journal::
|
||||||
|
|
||||||
fn msg_token_count_fn(msg: &Message, count: &dyn Fn(&str) -> usize) -> usize {
|
fn msg_token_count_fn(msg: &Message, count: &dyn Fn(&str) -> usize) -> usize {
|
||||||
let content = msg.content.as_ref().map_or(0, |c| match c {
|
let content = msg.content.as_ref().map_or(0, |c| match c {
|
||||||
crate::types::MessageContent::Text(s) => count(s),
|
MessageContent::Text(s) => count(s),
|
||||||
crate::types::MessageContent::Parts(parts) => parts.iter().map(|p| match p {
|
MessageContent::Parts(parts) => parts
|
||||||
crate::types::ContentPart::Text { text } => count(text),
|
.iter()
|
||||||
crate::types::ContentPart::ImageUrl { .. } => 85,
|
.map(|p| match p {
|
||||||
}).sum(),
|
ContentPart::Text { text } => count(text),
|
||||||
|
ContentPart::ImageUrl { .. } => 85,
|
||||||
|
})
|
||||||
|
.sum(),
|
||||||
|
});
|
||||||
|
let tools = msg.tool_calls.as_ref().map_or(0, |calls| {
|
||||||
|
calls
|
||||||
|
.iter()
|
||||||
|
.map(|c| count(&c.function.arguments) + count(&c.function.name))
|
||||||
|
.sum()
|
||||||
});
|
});
|
||||||
let tools = msg.tool_calls.as_ref().map_or(0, |calls| calls.iter().map(|c| count(&c.function.arguments) + count(&c.function.name)).sum());
|
|
||||||
content + tools
|
content + tools
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_msg_timestamp(msg: &Message) -> Option<DateTime<Utc>> {
|
/// Count the token footprint of a message using BPE tokenization.
|
||||||
msg.timestamp.as_ref().and_then(|ts| DateTime::parse_from_rfc3339(ts).ok()).map(|dt| dt.with_timezone(&Utc))
|
pub fn msg_token_count(tokenizer: &CoreBPE, msg: &Message) -> usize {
|
||||||
|
msg_token_count_fn(msg, &|s| tokenizer.encode_with_special_tokens(s).len())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Detect context window overflow errors from the API.
|
||||||
pub fn is_context_overflow(err: &anyhow::Error) -> bool {
|
pub fn is_context_overflow(err: &anyhow::Error) -> bool {
|
||||||
let msg = err.to_string().to_lowercase();
|
let msg = err.to_string().to_lowercase();
|
||||||
msg.contains("context length") || msg.contains("token limit") || msg.contains("too many tokens")
|
msg.contains("context length")
|
||||||
|| msg.contains("maximum context") || msg.contains("prompt is too long") || msg.contains("request too large")
|
|| msg.contains("token limit")
|
||||||
|| msg.contains("input validation error") || msg.contains("content length limit")
|
|| msg.contains("too many tokens")
|
||||||
|
|| msg.contains("maximum context")
|
||||||
|
|| msg.contains("prompt is too long")
|
||||||
|
|| msg.contains("request too large")
|
||||||
|
|| msg.contains("input validation error")
|
||||||
|
|| msg.contains("content length limit")
|
||||||
|| (msg.contains("400") && msg.contains("tokens"))
|
|| (msg.contains("400") && msg.contains("tokens"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Detect model/provider errors delivered inside the SSE stream.
|
||||||
pub fn is_stream_error(err: &anyhow::Error) -> bool {
|
pub fn is_stream_error(err: &anyhow::Error) -> bool {
|
||||||
err.to_string().contains("model stream error")
|
err.to_string().contains("model stream error")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn msg_token_count(tokenizer: &CoreBPE, msg: &Message) -> usize {
|
fn parse_msg_timestamp(msg: &Message) -> Option<DateTime<Utc>> {
|
||||||
msg_token_count_fn(msg, &|s| tokenizer.encode_with_special_tokens(s).len())
|
msg.timestamp
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|ts| DateTime::parse_from_rfc3339(ts).ok())
|
||||||
|
.map(|dt| dt.with_timezone(&Utc))
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,7 @@ mod agent;
|
||||||
mod api;
|
mod api;
|
||||||
mod cli;
|
mod cli;
|
||||||
mod config;
|
mod config;
|
||||||
|
mod context;
|
||||||
mod dmn;
|
mod dmn;
|
||||||
mod journal;
|
mod journal;
|
||||||
mod log;
|
mod log;
|
||||||
|
|
@ -66,13 +67,13 @@ use crate::ui_channel::{ContextInfo, StatusInfo, StreamTarget, UiMessage};
|
||||||
/// Hard compaction threshold — context is rebuilt immediately.
|
/// Hard compaction threshold — context is rebuilt immediately.
|
||||||
/// Uses config percentage of model context window.
|
/// Uses config percentage of model context window.
|
||||||
fn compaction_threshold(model: &str, app: &AppConfig) -> u32 {
|
fn compaction_threshold(model: &str, app: &AppConfig) -> u32 {
|
||||||
(agent::model_context_window(model) as u32) * app.compaction.hard_threshold_pct / 100
|
(context::model_context_window(model) as u32) * app.compaction.hard_threshold_pct / 100
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Soft threshold — nudge the model to journal before compaction.
|
/// Soft threshold — nudge the model to journal before compaction.
|
||||||
/// Fires once; the hard threshold handles the actual rebuild.
|
/// Fires once; the hard threshold handles the actual rebuild.
|
||||||
fn pre_compaction_threshold(model: &str, app: &AppConfig) -> u32 {
|
fn pre_compaction_threshold(model: &str, app: &AppConfig) -> u32 {
|
||||||
(agent::model_context_window(model) as u32) * app.compaction.soft_threshold_pct / 100
|
(context::model_context_window(model) as u32) * app.compaction.soft_threshold_pct / 100
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue