WIP: ContextEntry/ContextSection data structures for incremental token counting
New types — not yet wired to callers: - ContextEntry: wraps ConversationEntry with cached token count and timestamp - ContextSection: named group of entries with cached token total. Private entries/tokens, read via entries()/tokens(). Mutation via push(entry), set(index, entry), del(index). - ContextState: system/identity/journal/conversation sections + working_stack - ConversationEntry::System variant for system prompt entries Token counting happens once at push time. Sections maintain their totals incrementally via push/set/del. No more recomputing from scratch on every budget check. Does not compile — callers need updating. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
776ac527f1
commit
62996e27d7
10 changed files with 450 additions and 403 deletions
|
|
@ -10,20 +10,130 @@ use serde::{Deserialize, Serialize};
|
|||
use tiktoken_rs::CoreBPE;
|
||||
use crate::agent::tools::working_stack;
|
||||
|
||||
/// A section of the context window, possibly with children.
|
||||
// --- Context state types ---
|
||||
|
||||
/// Conversation entry — either a regular message or memory content.
|
||||
/// Memory entries preserve the original message for KV cache round-tripping.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum ConversationEntry {
|
||||
/// System prompt or system-level instruction.
|
||||
System(Message),
|
||||
Message(Message),
|
||||
Memory { key: String, message: Message, score: Option<f64> },
|
||||
/// DMN heartbeat/autonomous prompt — evicted aggressively during compaction.
|
||||
Dmn(Message),
|
||||
/// Debug/status log line — written to conversation log for tracing,
|
||||
/// skipped on read-back.
|
||||
Log(String),
|
||||
}
|
||||
|
||||
/// Entry in the context window — wraps a ConversationEntry with cached metadata.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ContextEntry {
|
||||
pub entry: ConversationEntry,
|
||||
/// Cached token count (0 for Log entries).
|
||||
pub tokens: usize,
|
||||
/// When this entry was added to the context.
|
||||
pub timestamp: Option<DateTime<Utc>>,
|
||||
}
|
||||
|
||||
/// A named section of the context window with cached token total.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ContextSection {
|
||||
pub name: String,
|
||||
pub tokens: usize,
|
||||
pub content: String,
|
||||
pub children: Vec<ContextSection>,
|
||||
/// Cached sum of entry tokens.
|
||||
tokens: usize,
|
||||
entries: Vec<ContextEntry>,
|
||||
}
|
||||
|
||||
/// A single journal entry with its timestamp and content.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct JournalEntry {
|
||||
pub timestamp: DateTime<Utc>,
|
||||
pub content: String,
|
||||
impl ContextSection {
|
||||
pub fn new(name: impl Into<String>) -> Self {
|
||||
Self { name: name.into(), tokens: 0, entries: Vec::new() }
|
||||
}
|
||||
|
||||
pub fn entries(&self) -> &[ContextEntry] { &self.entries }
|
||||
pub fn tokens(&self) -> usize { self.tokens }
|
||||
pub fn len(&self) -> usize { self.entries.len() }
|
||||
pub fn is_empty(&self) -> bool { self.entries.is_empty() }
|
||||
|
||||
/// Push an entry, updating the cached token total.
|
||||
pub fn push(&mut self, entry: ContextEntry) {
|
||||
self.tokens += entry.tokens;
|
||||
self.entries.push(entry);
|
||||
}
|
||||
|
||||
/// Replace an entry at `index`, adjusting the token total.
|
||||
pub fn set(&mut self, index: usize, entry: ContextEntry) {
|
||||
self.tokens -= self.entries[index].tokens;
|
||||
self.tokens += entry.tokens;
|
||||
self.entries[index] = entry;
|
||||
}
|
||||
|
||||
/// Remove an entry at `index`, adjusting the token total.
|
||||
pub fn del(&mut self, index: usize) -> ContextEntry {
|
||||
let removed = self.entries.remove(index);
|
||||
self.tokens -= removed.tokens;
|
||||
removed
|
||||
}
|
||||
|
||||
/// Replace the message inside an entry, recomputing its token count.
|
||||
pub fn set_message(&mut self, index: usize, tokenizer: &CoreBPE, msg: Message) {
|
||||
let old_tokens = self.entries[index].tokens;
|
||||
*self.entries[index].entry.message_mut() = msg;
|
||||
let new_tokens = msg_token_count(tokenizer, self.entries[index].entry.api_message());
|
||||
self.entries[index].tokens = new_tokens;
|
||||
self.tokens = self.tokens - old_tokens + new_tokens;
|
||||
}
|
||||
|
||||
/// Set the score on a Memory entry. No token change.
|
||||
pub fn set_score(&mut self, index: usize, score: Option<f64>) {
|
||||
if let ConversationEntry::Memory { score: s, .. } = &mut self.entries[index].entry {
|
||||
*s = score;
|
||||
}
|
||||
}
|
||||
|
||||
/// Bulk replace all entries, recomputing token total.
|
||||
pub fn set_entries(&mut self, entries: Vec<ContextEntry>) {
|
||||
self.tokens = entries.iter().map(|e| e.tokens).sum();
|
||||
self.entries = entries;
|
||||
}
|
||||
|
||||
/// Dedup and trim entries to fit within context budget.
|
||||
pub fn trim(&mut self, budget: &ContextBudget, tokenizer: &CoreBPE) {
|
||||
let result = trim_entries(&self.entries, tokenizer, budget);
|
||||
self.entries = result;
|
||||
self.tokens = self.entries.iter().map(|e| e.tokens).sum();
|
||||
}
|
||||
|
||||
/// Clear all entries.
|
||||
pub fn clear(&mut self) {
|
||||
self.entries.clear();
|
||||
self.tokens = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ContextState {
|
||||
pub system: ContextSection,
|
||||
pub identity: ContextSection,
|
||||
pub journal: ContextSection,
|
||||
pub conversation: ContextSection,
|
||||
/// Working stack — separate from identity because it's managed
|
||||
/// by its own tool, not loaded from personality files.
|
||||
pub working_stack: Vec<String>,
|
||||
}
|
||||
|
||||
impl ContextState {
|
||||
/// Total tokens across all sections.
|
||||
pub fn total_tokens(&self) -> usize {
|
||||
self.system.tokens() + self.identity.tokens()
|
||||
+ self.journal.tokens() + self.conversation.tokens()
|
||||
}
|
||||
|
||||
/// All sections as a slice for iteration.
|
||||
pub fn sections(&self) -> [&ContextSection; 4] {
|
||||
[&self.system, &self.identity, &self.journal, &self.conversation]
|
||||
}
|
||||
}
|
||||
|
||||
/// Context window size in tokens (from config).
|
||||
|
|
@ -44,41 +154,39 @@ fn context_budget_tokens() -> usize {
|
|||
/// corresponding assistant tool_call message).
|
||||
/// 2. Trim: drop oldest entries until the conversation fits, snapping
|
||||
/// to user message boundaries.
|
||||
pub fn trim_entries(
|
||||
entries: &[ConversationEntry],
|
||||
tokenizer: &CoreBPE,
|
||||
fn trim_entries(
|
||||
entries: &[ContextEntry],
|
||||
_tokenizer: &CoreBPE,
|
||||
budget: &ContextBudget,
|
||||
) -> Vec<ConversationEntry> {
|
||||
) -> Vec<ContextEntry> {
|
||||
let fixed_tokens = budget.system + budget.identity + budget.journal;
|
||||
// --- Phase 1: dedup memory entries by key (keep last) ---
|
||||
let mut seen_keys: std::collections::HashMap<&str, usize> = std::collections::HashMap::new();
|
||||
let mut drop_indices: std::collections::HashSet<usize> = std::collections::HashSet::new();
|
||||
|
||||
for (i, entry) in entries.iter().enumerate() {
|
||||
if let ConversationEntry::Memory { key, .. } = entry {
|
||||
for (i, ce) in entries.iter().enumerate() {
|
||||
if let ConversationEntry::Memory { key, .. } = &ce.entry {
|
||||
if let Some(prev) = seen_keys.insert(key.as_str(), i) {
|
||||
drop_indices.insert(prev);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let deduped: Vec<ConversationEntry> = entries.iter().enumerate()
|
||||
let deduped: Vec<ContextEntry> = entries.iter().enumerate()
|
||||
.filter(|(i, _)| !drop_indices.contains(i))
|
||||
.map(|(_, e)| e.clone())
|
||||
.collect();
|
||||
|
||||
// --- Phase 2: trim to fit context budget ---
|
||||
let max_tokens = context_budget_tokens();
|
||||
let count_msg = |m: &Message| msg_token_count(tokenizer, m);
|
||||
|
||||
let msg_costs: Vec<usize> = deduped.iter()
|
||||
.map(|e| if e.is_log() { 0 } else { count_msg(e.api_message()) }).collect();
|
||||
let msg_costs: Vec<usize> = deduped.iter().map(|e| e.tokens).collect();
|
||||
let entry_total: usize = msg_costs.iter().sum();
|
||||
let total: usize = fixed_tokens + entry_total;
|
||||
|
||||
let mem_tokens: usize = deduped.iter().zip(&msg_costs)
|
||||
.filter(|(e, _)| e.is_memory())
|
||||
.map(|(_, &c)| c).sum();
|
||||
let mem_tokens: usize = deduped.iter()
|
||||
.filter(|ce| ce.entry.is_memory())
|
||||
.map(|ce| ce.tokens).sum();
|
||||
let conv_tokens: usize = entry_total - mem_tokens;
|
||||
|
||||
dbglog!("[trim] max_tokens={} fixed={} mem={} conv={} total={} entries={}",
|
||||
|
|
@ -90,7 +198,7 @@ pub fn trim_entries(
|
|||
let mut cur_mem = mem_tokens;
|
||||
|
||||
for i in 0..deduped.len() {
|
||||
if deduped[i].is_dmn() {
|
||||
if deduped[i].entry.is_dmn() {
|
||||
drop[i] = true;
|
||||
trimmed -= msg_costs[i];
|
||||
}
|
||||
|
|
@ -99,14 +207,14 @@ pub fn trim_entries(
|
|||
// Phase 2b: if memories > 50% of context, evict lowest-scored first
|
||||
if cur_mem > conv_tokens && trimmed > max_tokens {
|
||||
let mut mem_indices: Vec<usize> = (0..deduped.len())
|
||||
.filter(|&i| !drop[i] && deduped[i].is_memory())
|
||||
.filter(|&i| !drop[i] && deduped[i].entry.is_memory())
|
||||
.collect();
|
||||
mem_indices.sort_by(|&a, &b| {
|
||||
let sa = match &deduped[a] {
|
||||
let sa = match &deduped[a].entry {
|
||||
ConversationEntry::Memory { score, .. } => score.unwrap_or(0.0),
|
||||
_ => 0.0,
|
||||
};
|
||||
let sb = match &deduped[b] {
|
||||
let sb = match &deduped[b].entry {
|
||||
ConversationEntry::Memory { score, .. } => score.unwrap_or(0.0),
|
||||
_ => 0.0,
|
||||
};
|
||||
|
|
@ -130,16 +238,16 @@ pub fn trim_entries(
|
|||
}
|
||||
|
||||
// Walk forward to include complete conversation boundaries
|
||||
let mut result: Vec<ConversationEntry> = Vec::new();
|
||||
let mut result: Vec<ContextEntry> = Vec::new();
|
||||
let mut skipping = true;
|
||||
for (i, entry) in deduped.into_iter().enumerate() {
|
||||
for (i, ce) in deduped.into_iter().enumerate() {
|
||||
if skipping {
|
||||
if drop[i] { continue; }
|
||||
// Snap to user message boundary
|
||||
if entry.message().role != Role::User { continue; }
|
||||
if ce.entry.message().role != Role::User { continue; }
|
||||
skipping = false;
|
||||
}
|
||||
result.push(entry);
|
||||
result.push(ce);
|
||||
}
|
||||
|
||||
dbglog!("[trim] result={} trimmed_total={}", result.len(), trimmed);
|
||||
|
|
@ -186,28 +294,13 @@ pub fn is_stream_error(err: &anyhow::Error) -> bool {
|
|||
err.to_string().contains("model stream error")
|
||||
}
|
||||
|
||||
// --- Context state types ---
|
||||
|
||||
/// Conversation entry — either a regular message or memory content.
|
||||
/// Memory entries preserve the original message for KV cache round-tripping.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum ConversationEntry {
|
||||
Message(Message),
|
||||
Memory { key: String, message: Message, score: Option<f64> },
|
||||
/// DMN heartbeat/autonomous prompt — evicted aggressively during compaction.
|
||||
Dmn(Message),
|
||||
/// Debug/status log line — written to conversation log for tracing,
|
||||
/// skipped on read-back.
|
||||
Log(String),
|
||||
}
|
||||
|
||||
// Custom serde: serialize Memory with a "memory_key" field added to the message,
|
||||
// plain messages serialize as-is. This keeps the conversation log readable.
|
||||
impl Serialize for ConversationEntry {
|
||||
fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
|
||||
use serde::ser::SerializeMap;
|
||||
match self {
|
||||
Self::Message(m) | Self::Dmn(m) => m.serialize(s),
|
||||
Self::System(m) | Self::Message(m) | Self::Dmn(m) => m.serialize(s),
|
||||
Self::Memory { key, message, score } => {
|
||||
let json = serde_json::to_value(message).map_err(serde::ser::Error::custom)?;
|
||||
let mut map = s.serialize_map(None)?;
|
||||
|
|
@ -259,7 +352,7 @@ impl ConversationEntry {
|
|||
/// Panics on Log entries (which should be filtered before API calls).
|
||||
pub fn api_message(&self) -> &Message {
|
||||
match self {
|
||||
Self::Message(m) | Self::Dmn(m) => m,
|
||||
Self::System(m) | Self::Message(m) | Self::Dmn(m) => m,
|
||||
Self::Memory { message, .. } => message,
|
||||
Self::Log(_) => panic!("Log entries have no API message"),
|
||||
}
|
||||
|
|
@ -281,7 +374,7 @@ impl ConversationEntry {
|
|||
/// Panics on Log entries.
|
||||
pub fn message(&self) -> &Message {
|
||||
match self {
|
||||
Self::Message(m) | Self::Dmn(m) => m,
|
||||
Self::System(m) | Self::Message(m) | Self::Dmn(m) => m,
|
||||
Self::Memory { message, .. } => message,
|
||||
Self::Log(_) => panic!("Log entries have no message"),
|
||||
}
|
||||
|
|
@ -291,38 +384,36 @@ impl ConversationEntry {
|
|||
/// Panics on Log entries.
|
||||
pub fn message_mut(&mut self) -> &mut Message {
|
||||
match self {
|
||||
Self::Message(m) | Self::Dmn(m) => m,
|
||||
Self::System(m) | Self::Message(m) | Self::Dmn(m) => m,
|
||||
Self::Memory { message, .. } => message,
|
||||
Self::Log(_) => panic!("Log entries have no message"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ContextState {
|
||||
pub system_prompt: String,
|
||||
pub personality: Vec<(String, String)>,
|
||||
pub journal: Vec<JournalEntry>,
|
||||
pub working_stack: Vec<String>,
|
||||
/// Conversation entries — messages and memory, interleaved in order.
|
||||
/// Does NOT include system prompt, personality, or journal.
|
||||
pub entries: Vec<ConversationEntry>,
|
||||
}
|
||||
|
||||
pub fn render_journal(entries: &[JournalEntry]) -> String {
|
||||
if entries.is_empty() { return String::new(); }
|
||||
let mut text = String::from("[Earlier — from your journal]\n\n");
|
||||
for entry in entries {
|
||||
use std::fmt::Write;
|
||||
writeln!(text, "## {}\n{}\n", entry.timestamp.format("%Y-%m-%dT%H:%M"), entry.content).ok();
|
||||
}
|
||||
text
|
||||
}
|
||||
|
||||
impl ContextState {
|
||||
/// Render journal entries into a single text block.
|
||||
pub fn render_journal(&self) -> String {
|
||||
if self.journal.is_empty() { return String::new(); }
|
||||
let mut text = String::from("[Earlier — from your journal]\n\n");
|
||||
for e in self.journal.entries() {
|
||||
use std::fmt::Write;
|
||||
if let Some(ts) = &e.timestamp {
|
||||
writeln!(text, "## {}\n{}\n",
|
||||
ts.format("%Y-%m-%dT%H:%M"),
|
||||
e.entry.message().content_text()).ok();
|
||||
} else {
|
||||
text.push_str(&e.entry.message().content_text());
|
||||
text.push_str("\n\n");
|
||||
}
|
||||
}
|
||||
text
|
||||
}
|
||||
|
||||
/// Render identity files + working stack into a single user message.
|
||||
pub fn render_context_message(&self) -> String {
|
||||
let mut parts: Vec<String> = self.personality.iter()
|
||||
.map(|(name, content)| format!("## {}\n\n{}", name, content))
|
||||
let mut parts: Vec<String> = self.identity.entries().iter()
|
||||
.map(|e| e.entry.message().content_text().to_string())
|
||||
.collect();
|
||||
let instructions = std::fs::read_to_string(working_stack::instructions_path()).unwrap_or_default();
|
||||
let mut stack_section = instructions;
|
||||
|
|
|
|||
376
src/agent/mod.rs
376
src/agent/mod.rs
|
|
@ -24,7 +24,7 @@ use tiktoken_rs::CoreBPE;
|
|||
|
||||
use api::{ApiClient, ToolCall};
|
||||
use api::{ContentPart, Message, MessageContent, Role};
|
||||
use context::{ConversationEntry, ContextState};
|
||||
use context::{ConversationEntry, ContextEntry, ContextState};
|
||||
use tools::{summarize_args, working_stack};
|
||||
|
||||
use crate::mind::log::ConversationLog;
|
||||
|
|
@ -195,12 +195,27 @@ impl Agent {
|
|||
let tokenizer = tiktoken_rs::cl100k_base()
|
||||
.expect("failed to load cl100k_base tokenizer");
|
||||
|
||||
let mut system = ContextSection::new("System prompt");
|
||||
system.push(ContextEntry {
|
||||
entry: ConversationEntry::System(Message::system(&system_prompt)),
|
||||
tokens: context::msg_token_count(&tokenizer, &Message::system(&system_prompt)),
|
||||
timestamp: None,
|
||||
});
|
||||
let mut identity = ContextSection::new("Identity");
|
||||
for (_name, content) in &personality {
|
||||
let msg = Message::user(content);
|
||||
identity.push(ContextEntry {
|
||||
tokens: context::msg_token_count(&tokenizer, &msg),
|
||||
entry: ConversationEntry::Message(msg),
|
||||
timestamp: None,
|
||||
});
|
||||
}
|
||||
let context = ContextState {
|
||||
system_prompt: system_prompt.clone(),
|
||||
personality,
|
||||
journal: Vec::new(),
|
||||
system,
|
||||
identity,
|
||||
journal: ContextSection::new("Journal"),
|
||||
conversation: ContextSection::new("Conversation"),
|
||||
working_stack: Vec::new(),
|
||||
entries: Vec::new(),
|
||||
};
|
||||
let session_id = format!("consciousness-{}", chrono::Utc::now().format("%Y%m%d-%H%M%S"));
|
||||
let mut agent = Self {
|
||||
|
|
@ -274,18 +289,24 @@ impl Agent {
|
|||
/// System prompt + personality context + journal + conversation messages.
|
||||
pub fn assemble_api_messages(&self) -> Vec<Message> {
|
||||
let mut msgs = Vec::new();
|
||||
msgs.push(Message::system(&self.context.system_prompt));
|
||||
// System section
|
||||
for e in self.context.system.entries() {
|
||||
msgs.push(e.entry.api_message().clone());
|
||||
}
|
||||
// Identity — render personality files + working stack into one user message
|
||||
let ctx = self.context.render_context_message();
|
||||
if !ctx.is_empty() {
|
||||
msgs.push(Message::user(ctx));
|
||||
}
|
||||
let jnl = context::render_journal(&self.context.journal);
|
||||
// Journal — render into one user message
|
||||
let jnl = self.context.render_journal();
|
||||
if !jnl.is_empty() {
|
||||
msgs.push(Message::user(jnl));
|
||||
}
|
||||
msgs.extend(self.context.entries.iter()
|
||||
.filter(|e| !e.is_log())
|
||||
.map(|e| e.api_message().clone()));
|
||||
// Conversation entries
|
||||
msgs.extend(self.context.conversation.entries().iter()
|
||||
.filter(|e| !e.entry.is_log())
|
||||
.map(|e| e.entry.api_message().clone()));
|
||||
msgs
|
||||
}
|
||||
|
||||
|
|
@ -303,50 +324,64 @@ impl Agent {
|
|||
eprintln!("warning: failed to log entry: {:#}", e);
|
||||
}
|
||||
}
|
||||
self.context.entries.push(entry);
|
||||
let tokens = if entry.is_log() { 0 } else {
|
||||
context::msg_token_count(&self.tokenizer, entry.api_message())
|
||||
};
|
||||
self.context.conversation.push(ContextEntry {
|
||||
entry, tokens, timestamp: Some(chrono::Utc::now()),
|
||||
});
|
||||
|
||||
self.changed.notify_one();
|
||||
}
|
||||
|
||||
fn streaming_entry(&mut self) -> Option<&mut Message> {
|
||||
for entry in self.context.entries.iter_mut().rev() {
|
||||
let m = entry.message_mut();
|
||||
if m.role == Role::Assistant {
|
||||
return if m.timestamp.is_none() { Some(m) } else { None }
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
/// Find the index of the in-progress streaming entry (unstamped assistant message).
|
||||
fn streaming_index(&self) -> Option<usize> {
|
||||
self.context.conversation.entries().iter().rposition(|ce| {
|
||||
let m = ce.entry.message();
|
||||
m.role == Role::Assistant && m.timestamp.is_none()
|
||||
})
|
||||
}
|
||||
|
||||
/// Append streaming text to the last entry (creating a partial
|
||||
/// assistant entry if needed). Called by collect_stream per token batch.
|
||||
fn append_streaming(&mut self, text: &str) {
|
||||
if let Some(m) = self.streaming_entry() {
|
||||
m.append_content(text);
|
||||
if let Some(idx) = self.streaming_index() {
|
||||
let mut msg = self.context.conversation.entries()[idx].entry.message().clone();
|
||||
msg.append_content(text);
|
||||
self.context.conversation.set_message(idx, &self.tokenizer, msg);
|
||||
} else {
|
||||
// No streaming entry — create without timestamp so finalize can find it
|
||||
self.context.entries.push(ConversationEntry::Message(Message {
|
||||
let msg = Message {
|
||||
role: Role::Assistant,
|
||||
content: Some(MessageContent::Text(text.to_string())),
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
name: None,
|
||||
timestamp: None,
|
||||
}));
|
||||
};
|
||||
let tokens = context::msg_token_count(&self.tokenizer, &msg);
|
||||
self.context.conversation.push(ContextEntry {
|
||||
entry: ConversationEntry::Message(msg),
|
||||
tokens,
|
||||
timestamp: None,
|
||||
});
|
||||
}
|
||||
|
||||
self.changed.notify_one();
|
||||
}
|
||||
|
||||
/// Finalize the streaming entry with the complete response message.
|
||||
/// Finds the unstamped assistant entry, updates it in place, and logs it.
|
||||
/// Finds the unstamped assistant entry, replaces it via set() with proper token count.
|
||||
fn finalize_streaming(&mut self, msg: Message) {
|
||||
if let Some(m) = self.streaming_entry() {
|
||||
*m = msg.clone();
|
||||
m.stamp();
|
||||
if let Some(i) = self.streaming_index() {
|
||||
let mut stamped = msg.clone();
|
||||
stamped.stamp();
|
||||
let tokens = context::msg_token_count(&self.tokenizer, &stamped);
|
||||
self.context.conversation.set(i, ContextEntry {
|
||||
entry: ConversationEntry::Message(stamped),
|
||||
tokens,
|
||||
timestamp: Some(chrono::Utc::now()),
|
||||
});
|
||||
} else {
|
||||
// No streaming entry found — push as new (this logs via push_message)
|
||||
self.push_message(msg.clone());
|
||||
}
|
||||
|
||||
|
|
@ -655,173 +690,32 @@ impl Agent {
|
|||
self.push_message(Message::tool_result(&call.id, &output));
|
||||
}
|
||||
|
||||
/// Token budget by category — cheap, no formatting. Used for compaction decisions.
|
||||
/// Token budget by category — just reads cached section totals.
|
||||
pub fn context_budget(&self) -> context::ContextBudget {
|
||||
let count = |m: &Message| context::msg_token_count(&self.tokenizer, m);
|
||||
|
||||
let system = count(&Message::system(&self.context.system_prompt));
|
||||
let identity = count(&Message::user(&self.context.render_context_message()));
|
||||
let journal_rendered = context::render_journal(&self.context.journal);
|
||||
let journal = if journal_rendered.is_empty() { 0 } else {
|
||||
count(&Message::user(&journal_rendered))
|
||||
};
|
||||
let memory: usize = self.context.entries.iter()
|
||||
.filter(|e| e.is_memory())
|
||||
.map(|e| count(e.message()))
|
||||
let memory: usize = self.context.conversation.entries().iter()
|
||||
.filter(|e| e.entry.is_memory())
|
||||
.map(|e| e.tokens)
|
||||
.sum();
|
||||
let conversation: usize = self.context.entries.iter()
|
||||
.filter(|e| !e.is_memory() && !e.is_log())
|
||||
.map(|e| count(e.api_message()))
|
||||
.sum();
|
||||
|
||||
context::ContextBudget { system, identity, journal, memory, conversation }
|
||||
}
|
||||
|
||||
/// Build context state summary for the debug screen.
|
||||
pub fn context_state_summary(&self) -> Vec<ContextSection> {
|
||||
let count_msg = |m: &Message| context::msg_token_count(&self.tokenizer, m);
|
||||
|
||||
let mut sections = Vec::new();
|
||||
|
||||
// System prompt — counted as the actual message sent
|
||||
let system_msg = Message::system(&self.context.system_prompt);
|
||||
sections.push(ContextSection {
|
||||
name: "System prompt".into(),
|
||||
tokens: count_msg(&system_msg),
|
||||
content: self.context.system_prompt.clone(),
|
||||
children: Vec::new(),
|
||||
});
|
||||
|
||||
// Context message (personality + working stack) — counted as the
|
||||
// single user message that assemble_api_messages sends
|
||||
let context_rendered = self.context.render_context_message();
|
||||
let context_msg = Message::user(&context_rendered);
|
||||
sections.push(ContextSection {
|
||||
name: format!("Identity ({} files + stack)", self.context.personality.len()),
|
||||
tokens: count_msg(&context_msg),
|
||||
content: context_rendered,
|
||||
children: Vec::new(),
|
||||
});
|
||||
|
||||
// Journal — counted as the single rendered message sent
|
||||
let journal_rendered = context::render_journal(&self.context.journal);
|
||||
let journal_msg = Message::user(&journal_rendered);
|
||||
sections.push(ContextSection {
|
||||
name: format!("Journal ({} entries)", self.context.journal.len()),
|
||||
tokens: if journal_rendered.is_empty() { 0 } else { count_msg(&journal_msg) },
|
||||
content: journal_rendered,
|
||||
children: Vec::new(),
|
||||
});
|
||||
|
||||
// Memory nodes — extracted from Memory entries in the conversation
|
||||
let memory_entries: Vec<&ConversationEntry> = self.context.entries.iter()
|
||||
.filter(|e| e.is_memory())
|
||||
.collect();
|
||||
if !memory_entries.is_empty() {
|
||||
let node_children: Vec<ContextSection> = memory_entries.iter()
|
||||
.map(|entry| {
|
||||
let (key, score) = match entry {
|
||||
ConversationEntry::Memory { key, score, .. } => (key.as_str(), *score),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let label = match score {
|
||||
Some(s) => format!("{} (score:{:.1})", key, s),
|
||||
None => key.to_string(),
|
||||
};
|
||||
ContextSection {
|
||||
name: label,
|
||||
tokens: count_msg(entry.message()),
|
||||
content: String::new(),
|
||||
children: Vec::new(),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
let node_tokens: usize = node_children.iter().map(|c| c.tokens).sum();
|
||||
sections.push(ContextSection {
|
||||
name: format!("Memory nodes ({} loaded)", memory_entries.len()),
|
||||
tokens: node_tokens,
|
||||
content: String::new(),
|
||||
children: node_children,
|
||||
});
|
||||
let conv_total = self.context.conversation.tokens();
|
||||
context::ContextBudget {
|
||||
system: self.context.system.tokens(),
|
||||
identity: self.context.identity.tokens(),
|
||||
journal: self.context.journal.tokens(),
|
||||
memory,
|
||||
conversation: conv_total - memory,
|
||||
}
|
||||
|
||||
// Conversation — memories excluded (counted in their own section above)
|
||||
let conv_children = self.entry_sections(&count_msg, 0, false);
|
||||
let conv_tokens: usize = conv_children.iter().map(|c| c.tokens).sum();
|
||||
sections.push(ContextSection {
|
||||
name: format!("Conversation ({} messages)", conv_children.len()),
|
||||
tokens: conv_tokens,
|
||||
content: String::new(),
|
||||
children: conv_children,
|
||||
});
|
||||
|
||||
sections
|
||||
}
|
||||
|
||||
/// Build ContextSection nodes for conversation entries starting at `from`.
|
||||
/// When `include_memories` is false, memory entries are excluded (they get
|
||||
/// their own section in context_state_summary to avoid double-counting).
|
||||
fn entry_sections(
|
||||
&self,
|
||||
count_msg: &dyn Fn(&Message) -> usize,
|
||||
from: usize,
|
||||
include_memories: bool,
|
||||
) -> Vec<ContextSection> {
|
||||
let cfg = crate::config::get();
|
||||
self.context.entries.iter().enumerate()
|
||||
.skip(from)
|
||||
.filter(|(_, e)| !e.is_log() && (include_memories || !e.is_memory()))
|
||||
.map(|(i, entry)| {
|
||||
let m = entry.message();
|
||||
let text = m.content.as_ref()
|
||||
.map(|c| c.as_text().to_string())
|
||||
.unwrap_or_default();
|
||||
|
||||
let (role_name, label) = if let ConversationEntry::Memory { key, score, .. } = entry {
|
||||
let label = match score {
|
||||
Some(s) => format!("[memory: {} score:{:.1}]", key, s),
|
||||
None => format!("[memory: {}]", key),
|
||||
};
|
||||
("mem".to_string(), label)
|
||||
} else {
|
||||
let tool_info = m.tool_calls.as_ref().map(|tc| {
|
||||
tc.iter()
|
||||
.map(|c| c.function.name.clone())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
});
|
||||
let label = match &tool_info {
|
||||
Some(tools) => format!("[tool_call: {}]", tools),
|
||||
None => {
|
||||
let preview: String = text.chars().take(60).collect();
|
||||
let preview = preview.replace('\n', " ");
|
||||
if text.len() > 60 { format!("{}...", preview) } else { preview }
|
||||
}
|
||||
};
|
||||
let role_name = match m.role {
|
||||
Role::Assistant => cfg.assistant_name.clone(),
|
||||
Role::User => cfg.user_name.clone(),
|
||||
Role::Tool => "tool".to_string(),
|
||||
Role::System => "system".to_string(),
|
||||
};
|
||||
(role_name, label)
|
||||
};
|
||||
|
||||
ContextSection {
|
||||
name: format!("[{}] {}: {}", i, role_name, label),
|
||||
tokens: count_msg(entry.api_message()),
|
||||
content: text,
|
||||
children: Vec::new(),
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
/// Context state sections — just returns references to the live data.
|
||||
pub fn context_sections(&self) -> [&ContextSection; 4] {
|
||||
self.context.sections()
|
||||
}
|
||||
|
||||
/// Context sections for entries from `from` onward — used by the
|
||||
/// Conversation entries from `from` onward — used by the
|
||||
/// subconscious debug screen to show forked agent conversations.
|
||||
pub fn conversation_sections_from(&self, from: usize) -> Vec<ContextSection> {
|
||||
let count_msg = |m: &Message| context::msg_token_count(&self.tokenizer, m);
|
||||
self.entry_sections(&count_msg, from, true)
|
||||
pub fn conversation_entries_from(&self, from: usize) -> &[ContextEntry] {
|
||||
let entries = self.context.conversation.entries();
|
||||
if from < entries.len() { &entries[from..] } else { &[] }
|
||||
}
|
||||
|
||||
/// Load recent journal entries at startup for orientation.
|
||||
|
|
@ -876,35 +770,38 @@ impl Agent {
|
|||
dbg_log!("[journal] cutoff_idx={}", cutoff_idx);
|
||||
|
||||
// Walk backwards from cutoff, accumulating entries within 15% of context
|
||||
let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len();
|
||||
let context_window = crate::agent::context::context_window();
|
||||
let journal_budget = context_window * 15 / 100;
|
||||
dbg_log!("[journal] budget={} tokens ({}*15%)", journal_budget, context_window);
|
||||
|
||||
let mut entries = Vec::new();
|
||||
let mut journal_entries = Vec::new();
|
||||
let mut total_tokens = 0;
|
||||
|
||||
for node in journal_nodes[..cutoff_idx].iter().rev() {
|
||||
let tokens = count(&node.content);
|
||||
if total_tokens + tokens > journal_budget && !entries.is_empty() {
|
||||
let msg = Message::user(&node.content);
|
||||
let tokens = context::msg_token_count(&self.tokenizer, &msg);
|
||||
if total_tokens + tokens > journal_budget && !journal_entries.is_empty() {
|
||||
break;
|
||||
}
|
||||
entries.push(context::JournalEntry {
|
||||
timestamp: chrono::DateTime::from_timestamp(node.created_at, 0)
|
||||
.unwrap_or_default(),
|
||||
content: node.content.clone(),
|
||||
journal_entries.push(ContextEntry {
|
||||
entry: ConversationEntry::Message(msg),
|
||||
tokens,
|
||||
timestamp: chrono::DateTime::from_timestamp(node.created_at, 0),
|
||||
});
|
||||
total_tokens += tokens;
|
||||
}
|
||||
entries.reverse();
|
||||
dbg_log!("[journal] loaded {} entries, {} tokens", entries.len(), total_tokens);
|
||||
journal_entries.reverse();
|
||||
dbg_log!("[journal] loaded {} entries, {} tokens", journal_entries.len(), total_tokens);
|
||||
|
||||
if entries.is_empty() {
|
||||
if journal_entries.is_empty() {
|
||||
dbg_log!("[journal] no entries!");
|
||||
return;
|
||||
}
|
||||
|
||||
self.context.journal = entries;
|
||||
self.context.journal.clear();
|
||||
for entry in journal_entries {
|
||||
self.context.journal.push(entry);
|
||||
}
|
||||
dbg_log!("[journal] context.journal now has {} entries", self.context.journal.len());
|
||||
}
|
||||
|
||||
|
|
@ -923,10 +820,10 @@ impl Agent {
|
|||
/// The tool result message before each image records what was loaded.
|
||||
pub fn age_out_images(&mut self) {
|
||||
// Find image entries newest-first, skip 1 (caller is about to add another)
|
||||
let to_age: Vec<usize> = self.context.entries.iter().enumerate()
|
||||
let to_age: Vec<usize> = self.context.conversation.entries().iter().enumerate()
|
||||
.rev()
|
||||
.filter(|(_, e)| {
|
||||
if let Some(MessageContent::Parts(parts)) = &e.message().content {
|
||||
.filter(|(_, ce)| {
|
||||
if let Some(MessageContent::Parts(parts)) = &ce.entry.message().content {
|
||||
parts.iter().any(|p| matches!(p, ContentPart::ImageUrl { .. }))
|
||||
} else { false }
|
||||
})
|
||||
|
|
@ -935,7 +832,9 @@ impl Agent {
|
|||
.collect();
|
||||
|
||||
for i in to_age {
|
||||
let msg = self.context.entries[i].message_mut();
|
||||
// Build replacement entry with image data stripped
|
||||
let old = &self.context.conversation.entries()[i];
|
||||
let msg = old.entry.message();
|
||||
if let Some(MessageContent::Parts(parts)) = &msg.content {
|
||||
let mut replacement = String::new();
|
||||
for part in parts {
|
||||
|
|
@ -950,7 +849,14 @@ impl Agent {
|
|||
}
|
||||
}
|
||||
}
|
||||
msg.content = Some(MessageContent::Text(replacement));
|
||||
let mut new_msg = msg.clone();
|
||||
new_msg.content = Some(MessageContent::Text(replacement));
|
||||
let tokens = context::msg_token_count(&self.tokenizer, &new_msg);
|
||||
self.context.conversation.set(i, ContextEntry {
|
||||
entry: ConversationEntry::Message(new_msg),
|
||||
tokens,
|
||||
timestamp: old.timestamp,
|
||||
});
|
||||
}
|
||||
}
|
||||
self.generation += 1;
|
||||
|
|
@ -968,16 +874,30 @@ impl Agent {
|
|||
// Reload identity from config
|
||||
match crate::config::reload_for_model(&self.app_config, &self.prompt_file) {
|
||||
Ok((system_prompt, personality)) => {
|
||||
self.context.system_prompt = system_prompt;
|
||||
self.context.personality = personality;
|
||||
self.context.system.clear();
|
||||
self.context.system.push(ContextEntry {
|
||||
entry: ConversationEntry::System(Message::system(&system_prompt)),
|
||||
tokens: context::msg_token_count(&self.tokenizer, &Message::system(&system_prompt)),
|
||||
timestamp: None,
|
||||
});
|
||||
self.context.identity.clear();
|
||||
for (_name, content) in &personality {
|
||||
let msg = Message::user(content);
|
||||
self.context.identity.push(ContextEntry {
|
||||
tokens: context::msg_token_count(&self.tokenizer, &msg),
|
||||
entry: ConversationEntry::Message(msg),
|
||||
timestamp: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("warning: failed to reload identity: {:#}", e);
|
||||
}
|
||||
}
|
||||
|
||||
let before = self.context.entries.len();
|
||||
let before_mem = self.context.entries.iter().filter(|e| e.is_memory()).count();
|
||||
let before = self.context.conversation.len();
|
||||
let before_mem = self.context.conversation.entries().iter()
|
||||
.filter(|e| e.entry.is_memory()).count();
|
||||
let before_conv = before - before_mem;
|
||||
|
||||
// Age out images before trimming — they're huge in the request payload
|
||||
|
|
@ -988,15 +908,11 @@ impl Agent {
|
|||
|
||||
// Dedup memory, trim to budget
|
||||
let budget = self.context_budget();
|
||||
let entries = self.context.entries.clone();
|
||||
self.context.entries = crate::agent::context::trim_entries(
|
||||
&entries,
|
||||
&self.tokenizer,
|
||||
&budget,
|
||||
);
|
||||
self.context.conversation.trim(&budget, &self.tokenizer);
|
||||
|
||||
let after = self.context.entries.len();
|
||||
let after_mem = self.context.entries.iter().filter(|e| e.is_memory()).count();
|
||||
let after = self.context.conversation.len();
|
||||
let after_mem = self.context.conversation.entries().iter()
|
||||
.filter(|e| e.entry.is_memory()).count();
|
||||
let after_conv = after - after_mem;
|
||||
|
||||
dbglog!("[compact] entries: {} → {} (mem: {} → {}, conv: {} → {})",
|
||||
|
|
@ -1022,14 +938,26 @@ impl Agent {
|
|||
};
|
||||
|
||||
// Load extra — compact() will dedup, trim, reload identity + journal
|
||||
let all: Vec<_> = entries.into_iter()
|
||||
let all: Vec<ContextEntry> = entries.into_iter()
|
||||
.filter(|e| !e.is_log() && e.message().role != Role::System)
|
||||
.map(|e| {
|
||||
let tokens = if e.is_log() { 0 } else {
|
||||
context::msg_token_count(&self.tokenizer, e.api_message())
|
||||
};
|
||||
let timestamp = if e.is_log() { None } else {
|
||||
e.message().timestamp.as_ref().and_then(|ts| {
|
||||
chrono::DateTime::parse_from_rfc3339(ts).ok()
|
||||
.map(|dt| dt.with_timezone(&chrono::Utc))
|
||||
})
|
||||
};
|
||||
ContextEntry { entry: e, tokens, timestamp }
|
||||
})
|
||||
.collect();
|
||||
let mem_count = all.iter().filter(|e| e.is_memory()).count();
|
||||
let mem_count = all.iter().filter(|e| e.entry.is_memory()).count();
|
||||
let conv_count = all.len() - mem_count;
|
||||
dbglog!("[restore] loaded {} entries from log (mem: {}, conv: {})",
|
||||
all.len(), mem_count, conv_count);
|
||||
self.context.entries = all;
|
||||
self.context.conversation.set_entries(all);
|
||||
self.compact();
|
||||
// Estimate prompt tokens so status bar isn't 0 on startup
|
||||
self.last_prompt_tokens = self.context_budget().total() as u32;
|
||||
|
|
@ -1046,9 +974,9 @@ impl Agent {
|
|||
&self.client.model
|
||||
}
|
||||
|
||||
/// Get the conversation entries for persistence.
|
||||
pub fn entries(&self) -> &[ConversationEntry] {
|
||||
&self.context.entries
|
||||
/// Get the conversation entries.
|
||||
pub fn entries(&self) -> &[ContextEntry] {
|
||||
self.context.conversation.entries()
|
||||
}
|
||||
|
||||
/// Mutable access to conversation entries (for /retry).
|
||||
|
|
|
|||
|
|
@ -93,8 +93,7 @@ impl Backend {
|
|||
match self {
|
||||
Backend::Standalone { messages, .. } => messages.push(msg),
|
||||
Backend::Forked(agent) => {
|
||||
agent.lock().await.context.entries.push(
|
||||
super::context::ConversationEntry::Message(msg));
|
||||
agent.lock().await.push_message(msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue