Fix context budgeting and compaction
- Budget now counts exact message tokens matching what assemble_api_messages sends, not raw string content. Eliminates undercounting from formatting overhead (journal headers, personality separators, working stack). - Load journal before trimming so trim accounts for journal cost. - Compact before every turn, not just after turn completion. Prevents agent_cycle surfaced memories from pushing context over budget. - Move agent_cycle orchestration from Agent::turn to Mind::start_turn — surfaced memories and reflections now precede the user message. - Move AgentCycleState from Agent to Mind — it's orchestration, not per-agent state. memory_scoring_in_flight and memory_scores stay on Agent where they belong. - Tag DMN entries as ConversationEntry::Dmn — compaction evicts them first since they're ephemeral. Compaction also prefers evicting memories over conversation when memories exceed 50% of entry tokens. - Kill /retry slash command. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
c22b8c3a6f
commit
d5e6f55da9
5 changed files with 194 additions and 170 deletions
167
src/agent/mod.rs
167
src/agent/mod.rs
|
|
@ -160,7 +160,7 @@ pub struct Agent {
|
|||
pub pending_model_switch: Option<String>,
|
||||
pub pending_dmn_pause: bool,
|
||||
/// Persistent conversation log — append-only record of all messages.
|
||||
conversation_log: Option<ConversationLog>,
|
||||
pub conversation_log: Option<ConversationLog>,
|
||||
/// BPE tokenizer for token counting (cl100k_base — close enough
|
||||
/// for Claude and Qwen budget allocation, ~85-90% count accuracy).
|
||||
tokenizer: CoreBPE,
|
||||
|
|
@ -175,24 +175,16 @@ pub struct Agent {
|
|||
pub session_id: String,
|
||||
/// Incremented on compaction — UI uses this to detect resets.
|
||||
pub generation: u64,
|
||||
/// Agent orchestration state (surface-observe, journal, reflect).
|
||||
/// TODO: move to Session — it's session-level, not agent-level.
|
||||
pub agent_cycles: crate::subconscious::subconscious::AgentCycleState,
|
||||
/// Whether incremental memory scoring is currently running.
|
||||
pub memory_scoring_in_flight: bool,
|
||||
/// Latest per-memory scores from incremental scoring.
|
||||
pub memory_scores: Vec<(String, f64)>,
|
||||
/// Shared active tools — Agent writes, TUI reads.
|
||||
pub active_tools: tools::SharedActiveTools,
|
||||
/// Fires when agent state changes — UI wakes on this instead of polling.
|
||||
pub changed: Arc<tokio::sync::Notify>,
|
||||
}
|
||||
|
||||
fn render_journal(entries: &[context::JournalEntry]) -> String {
|
||||
if entries.is_empty() { return String::new(); }
|
||||
let mut text = String::from("[Earlier — from your journal]\n\n");
|
||||
for entry in entries {
|
||||
use std::fmt::Write;
|
||||
writeln!(text, "## {}\n{}\n", entry.timestamp.format("%Y-%m-%dT%H:%M"), entry.content).ok();
|
||||
}
|
||||
text
|
||||
}
|
||||
|
||||
impl Agent {
|
||||
pub fn new(
|
||||
|
|
@ -216,7 +208,6 @@ impl Agent {
|
|||
entries: Vec::new(),
|
||||
};
|
||||
let session_id = format!("consciousness-{}", chrono::Utc::now().format("%Y%m%d-%H%M%S"));
|
||||
let agent_cycles = crate::subconscious::subconscious::AgentCycleState::new(&session_id);
|
||||
let mut agent = Self {
|
||||
client,
|
||||
tools: tools::tools(),
|
||||
|
|
@ -238,7 +229,8 @@ impl Agent {
|
|||
prompt_file,
|
||||
session_id,
|
||||
generation: 0,
|
||||
agent_cycles,
|
||||
memory_scoring_in_flight: false,
|
||||
memory_scores: Vec::new(),
|
||||
active_tools,
|
||||
changed: Arc::new(tokio::sync::Notify::new()),
|
||||
};
|
||||
|
|
@ -258,7 +250,7 @@ impl Agent {
|
|||
if !ctx.is_empty() {
|
||||
msgs.push(Message::user(ctx));
|
||||
}
|
||||
let jnl = render_journal(&self.context.journal);
|
||||
let jnl = context::render_journal(&self.context.journal);
|
||||
if !jnl.is_empty() {
|
||||
msgs.push(Message::user(jnl));
|
||||
}
|
||||
|
|
@ -267,21 +259,6 @@ impl Agent {
|
|||
}
|
||||
|
||||
/// Run agent orchestration cycle, returning structured output.
|
||||
fn run_agent_cycle(&mut self) -> crate::subconscious::subconscious::AgentCycleOutput {
|
||||
let transcript_path = self.conversation_log.as_ref()
|
||||
.map(|l| l.path().to_string_lossy().to_string())
|
||||
.unwrap_or_default();
|
||||
|
||||
let session = crate::session::HookSession::from_fields(
|
||||
self.session_id.clone(),
|
||||
transcript_path,
|
||||
"UserPromptSubmit".into(),
|
||||
);
|
||||
|
||||
self.agent_cycles.trigger(&session);
|
||||
std::mem::take(&mut self.agent_cycles.last_output)
|
||||
}
|
||||
|
||||
/// Push a conversation message — stamped and logged.
|
||||
pub fn push_message(&mut self, mut msg: Message) {
|
||||
msg.stamp();
|
||||
|
|
@ -289,7 +266,7 @@ impl Agent {
|
|||
self.push_entry(entry);
|
||||
}
|
||||
|
||||
fn push_entry(&mut self, entry: ConversationEntry) {
|
||||
pub fn push_entry(&mut self, entry: ConversationEntry) {
|
||||
if let Some(ref log) = self.conversation_log {
|
||||
if let Err(e) = log.append(&entry) {
|
||||
eprintln!("warning: failed to log entry: {:#}", e);
|
||||
|
|
@ -328,31 +305,11 @@ impl Agent {
|
|||
pub async fn turn(
|
||||
agent: Arc<tokio::sync::Mutex<Agent>>,
|
||||
) -> Result<TurnResult> {
|
||||
// --- Pre-loop setup (lock 1): agent cycle, memories, user input ---
|
||||
// --- Pre-loop setup (lock 1): collect finished tools ---
|
||||
let active_tools = {
|
||||
let mut finished = Vec::new();
|
||||
let tools = {
|
||||
let mut me = agent.lock().await;
|
||||
|
||||
let cycle = me.run_agent_cycle();
|
||||
for key in &cycle.surfaced_keys {
|
||||
if let Some(rendered) = crate::cli::node::render_node(
|
||||
&crate::store::Store::load().unwrap_or_default(), key,
|
||||
) {
|
||||
let mut msg = Message::user(format!(
|
||||
"<system-reminder>\n--- {} (surfaced) ---\n{}\n</system-reminder>",
|
||||
key, rendered,
|
||||
));
|
||||
msg.stamp();
|
||||
me.push_entry(ConversationEntry::Memory { key: key.clone(), message: msg });
|
||||
}
|
||||
}
|
||||
if let Some(ref reflection) = cycle.reflection {
|
||||
me.push_message(Message::user(format!(
|
||||
"<system-reminder>\n--- subconscious reflection ---\n{}\n</system-reminder>",
|
||||
reflection.trim(),
|
||||
)));
|
||||
}
|
||||
let me = agent.lock().await;
|
||||
|
||||
// Collect completed background tool handles — remove from active list
|
||||
// but don't await yet (MutexGuard isn't Send).
|
||||
|
|
@ -647,83 +604,38 @@ impl Agent {
|
|||
|
||||
/// Build context state summary for the debug screen.
|
||||
pub fn context_state_summary(&self, memory_scores: Option<&learn::MemoryScore>) -> Vec<ContextSection> {
|
||||
let count = |s: &str| self.tokenizer.encode_with_special_tokens(s).len();
|
||||
let count_msg = |m: &Message| context::msg_token_count(&self.tokenizer, m);
|
||||
|
||||
let mut sections = Vec::new();
|
||||
|
||||
// System prompt
|
||||
// System prompt — counted as the actual message sent
|
||||
let system_msg = Message::system(&self.context.system_prompt);
|
||||
sections.push(ContextSection {
|
||||
name: "System prompt".into(),
|
||||
tokens: count(&self.context.system_prompt),
|
||||
tokens: count_msg(&system_msg),
|
||||
content: self.context.system_prompt.clone(),
|
||||
children: Vec::new(),
|
||||
});
|
||||
|
||||
// Personality — parent with file children
|
||||
let personality_children: Vec<ContextSection> = self.context.personality.iter()
|
||||
.map(|(name, content)| ContextSection {
|
||||
name: name.clone(),
|
||||
tokens: count(content),
|
||||
content: content.clone(),
|
||||
children: Vec::new(),
|
||||
})
|
||||
.collect();
|
||||
let personality_tokens: usize = personality_children.iter().map(|c| c.tokens).sum();
|
||||
// Context message (personality + working stack) — counted as the
|
||||
// single user message that assemble_api_messages sends
|
||||
let context_rendered = self.context.render_context_message();
|
||||
let context_msg = Message::user(&context_rendered);
|
||||
sections.push(ContextSection {
|
||||
name: format!("Personality ({} files)", personality_children.len()),
|
||||
tokens: personality_tokens,
|
||||
content: String::new(),
|
||||
children: personality_children,
|
||||
name: format!("Identity ({} files + stack)", self.context.personality.len()),
|
||||
tokens: count_msg(&context_msg),
|
||||
content: context_rendered,
|
||||
children: Vec::new(),
|
||||
});
|
||||
|
||||
// Journal
|
||||
{
|
||||
let journal_children: Vec<ContextSection> = self.context.journal.iter()
|
||||
.map(|entry| {
|
||||
let preview: String = entry.content.lines()
|
||||
.find(|l| !l.trim().is_empty())
|
||||
.unwrap_or("").chars().take(60).collect();
|
||||
ContextSection {
|
||||
name: format!("{}: {}", entry.timestamp.format("%Y-%m-%dT%H:%M"), preview),
|
||||
tokens: count(&entry.content),
|
||||
content: entry.content.clone(),
|
||||
children: Vec::new(),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
let journal_tokens: usize = journal_children.iter().map(|c| c.tokens).sum();
|
||||
sections.push(ContextSection {
|
||||
name: format!("Journal ({} entries)", journal_children.len()),
|
||||
tokens: journal_tokens,
|
||||
content: String::new(),
|
||||
children: journal_children,
|
||||
});
|
||||
}
|
||||
|
||||
// Working stack — instructions + items as children
|
||||
let instructions = std::fs::read_to_string(working_stack::instructions_path())
|
||||
.unwrap_or_default();
|
||||
let mut stack_children = vec![ContextSection {
|
||||
name: "Instructions".into(),
|
||||
tokens: count(&instructions),
|
||||
content: instructions,
|
||||
children: Vec::new(),
|
||||
}];
|
||||
for (i, item) in self.context.working_stack.iter().enumerate() {
|
||||
let marker = if i == self.context.working_stack.len() - 1 { "→" } else { " " };
|
||||
stack_children.push(ContextSection {
|
||||
name: format!("{} [{}] {}", marker, i, item),
|
||||
tokens: count(item),
|
||||
content: String::new(),
|
||||
children: Vec::new(),
|
||||
});
|
||||
}
|
||||
let stack_tokens: usize = stack_children.iter().map(|c| c.tokens).sum();
|
||||
// Journal — counted as the single rendered message sent
|
||||
let journal_rendered = context::render_journal(&self.context.journal);
|
||||
let journal_msg = Message::user(&journal_rendered);
|
||||
sections.push(ContextSection {
|
||||
name: format!("Working stack ({} items)", self.context.working_stack.len()),
|
||||
tokens: stack_tokens,
|
||||
content: String::new(),
|
||||
children: stack_children,
|
||||
name: format!("Journal ({} entries)", self.context.journal.len()),
|
||||
tokens: if journal_rendered.is_empty() { 0 } else { count_msg(&journal_msg) },
|
||||
content: journal_rendered,
|
||||
children: Vec::new(),
|
||||
});
|
||||
|
||||
// Memory nodes — extracted from Memory entries in the conversation
|
||||
|
|
@ -737,7 +649,6 @@ impl Agent {
|
|||
ConversationEntry::Memory { key, .. } => key.as_str(),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let text = entry.message().content_text();
|
||||
// Show node weight from graph (updated by incremental scorer)
|
||||
let graph_weight = crate::hippocampus::store::Store::load().ok()
|
||||
.and_then(|s| s.nodes.get(key).map(|n| n.weight));
|
||||
|
|
@ -754,7 +665,7 @@ impl Agent {
|
|||
};
|
||||
ContextSection {
|
||||
name: label,
|
||||
tokens: count(text),
|
||||
tokens: count_msg(entry.message()),
|
||||
content: String::new(),
|
||||
children: Vec::new(),
|
||||
}
|
||||
|
|
@ -769,9 +680,9 @@ impl Agent {
|
|||
});
|
||||
}
|
||||
|
||||
// Conversation — each message as a child
|
||||
let conv_messages = &self.context.entries;
|
||||
let conv_children: Vec<ContextSection> = conv_messages.iter().enumerate()
|
||||
// Conversation — non-memory entries only (memories counted above)
|
||||
let conv_children: Vec<ContextSection> = self.context.entries.iter().enumerate()
|
||||
.filter(|(_, e)| !e.is_memory())
|
||||
.map(|(i, entry)| {
|
||||
let m = entry.message();
|
||||
let text = m.content.as_ref()
|
||||
|
|
@ -797,7 +708,7 @@ impl Agent {
|
|||
}
|
||||
}
|
||||
};
|
||||
let tokens = count(&text);
|
||||
let tokens = count_msg(entry.api_message());
|
||||
let cfg = crate::config::get();
|
||||
let role_name = if entry.is_memory() { "mem".to_string() } else {
|
||||
match m.role {
|
||||
|
|
@ -1017,7 +928,10 @@ impl Agent {
|
|||
let before_mem = self.context.entries.iter().filter(|e| e.is_memory()).count();
|
||||
let before_conv = before - before_mem;
|
||||
|
||||
// Dedup memory, trim to budget, reload journal
|
||||
// Load journal BEFORE trimming so trim accounts for journal cost
|
||||
self.load_startup_journal();
|
||||
|
||||
// Dedup memory, trim to budget
|
||||
let entries = self.context.entries.clone();
|
||||
self.context.entries = crate::agent::context::trim_entries(
|
||||
&self.context,
|
||||
|
|
@ -1031,9 +945,6 @@ impl Agent {
|
|||
|
||||
dbglog!("[compact] entries: {} → {} (mem: {} → {}, conv: {} → {})",
|
||||
before, after, before_mem, after_mem, before_conv, after_conv);
|
||||
|
||||
|
||||
self.load_startup_journal();
|
||||
self.generation += 1;
|
||||
self.last_prompt_tokens = 0;
|
||||
self.publish_context_state();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue