Fix context budgeting and compaction

- Budget now counts exact message tokens matching what assemble_api_messages
  sends, not raw string content. Eliminates undercounting from formatting
  overhead (journal headers, personality separators, working stack).

- Load journal before trimming so trim accounts for journal cost.

- Compact before every turn, not just after turn completion. Prevents
  agent_cycle surfaced memories from pushing context over budget.

- Move agent_cycle orchestration from Agent::turn to Mind::start_turn —
  surfaced memories and reflections now precede the user message.

- Move AgentCycleState from Agent to Mind — it's orchestration, not
  per-agent state. memory_scoring_in_flight and memory_scores stay on
  Agent where they belong.

- Tag DMN entries as ConversationEntry::Dmn — compaction evicts them
  first since they're ephemeral. Compaction also prefers evicting
  memories over conversation when memories exceed 50% of entry tokens.

- Kill /retry slash command.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-06 21:48:12 -04:00
parent c22b8c3a6f
commit d5e6f55da9
5 changed files with 194 additions and 170 deletions

View file

@ -199,6 +199,7 @@ pub struct Mind {
pub agent: Arc<tokio::sync::Mutex<Agent>>,
pub shared: Arc<SharedMindState>,
pub config: SessionConfig,
agent_cycles: tokio::sync::Mutex<crate::subconscious::subconscious::AgentCycleState>,
turn_tx: mpsc::Sender<(Result<TurnResult>, StreamTarget)>,
turn_watch: tokio::sync::watch::Sender<bool>,
bg_tx: mpsc::UnboundedSender<BgEvent>,
@ -219,7 +220,7 @@ impl Mind {
config.session_dir.join("conversation.jsonl"),
).ok();
let agent = Arc::new(tokio::sync::Mutex::new(Agent::new(
let ag = Agent::new(
client,
config.system_prompt.clone(),
config.context_parts.clone(),
@ -228,7 +229,9 @@ impl Mind {
conversation_log,
shared_context,
shared_active_tools,
)));
);
let agent_cycles = crate::subconscious::subconscious::AgentCycleState::new(&ag.session_id);
let agent = Arc::new(tokio::sync::Mutex::new(ag));
let shared = Arc::new(std::sync::Mutex::new(MindState::new(config.app.dmn.max_turns)));
let (turn_watch, _) = tokio::sync::watch::channel(false);
@ -238,7 +241,8 @@ impl Mind {
sup.load_config();
sup.ensure_running();
Self { agent, shared, config, turn_tx, turn_watch, bg_tx,
Self { agent, shared, config, agent_cycles: tokio::sync::Mutex::new(agent_cycles),
turn_tx, turn_watch, bg_tx,
bg_rx: std::sync::Mutex::new(Some(bg_rx)), _supervisor: sup }
}
@ -319,8 +323,8 @@ impl Mind {
tokio::spawn(async move {
let (context, client) = {
let mut ag = agent.lock().await;
if ag.agent_cycles.memory_scoring_in_flight { return; }
ag.agent_cycles.memory_scoring_in_flight = true;
if ag.memory_scoring_in_flight { return; }
ag.memory_scoring_in_flight = true;
(ag.context.clone(), ag.client_clone())
};
let result = learn::score_memories_incremental(
@ -328,8 +332,8 @@ impl Mind {
).await;
{
let mut ag = agent.lock().await;
ag.agent_cycles.memory_scoring_in_flight = false;
if let Ok(ref scores) = result { ag.agent_cycles.memory_scores = scores.clone(); }
ag.memory_scoring_in_flight = false;
if let Ok(ref scores) = result { ag.memory_scores = scores.clone(); }
}
let _ = bg_tx.send(BgEvent::ScoringDone);
});
@ -342,7 +346,63 @@ impl Mind {
async fn start_turn(&self, text: &str, target: StreamTarget) {
{
let mut ag = self.agent.lock().await;
ag.push_message(crate::agent::api::types::Message::user(text));
// Run agent cycle — surface memories and reflection before the user message
let transcript_path = ag.conversation_log.as_ref()
.map(|l| l.path().to_string_lossy().to_string())
.unwrap_or_default();
let session = crate::session::HookSession::from_fields(
ag.session_id.clone(),
transcript_path,
"UserPromptSubmit".into(),
);
let mut cycles = self.agent_cycles.lock().await;
cycles.trigger(&session);
let cycle = std::mem::take(&mut cycles.last_output);
drop(cycles);
for key in &cycle.surfaced_keys {
if let Some(rendered) = crate::cli::node::render_node(
&crate::store::Store::load().unwrap_or_default(), &key,
) {
let mut msg = crate::agent::api::types::Message::user(format!(
"<system-reminder>\n--- {} (surfaced) ---\n{}\n</system-reminder>",
key, rendered,
));
msg.stamp();
ag.push_entry(crate::agent::context::ConversationEntry::Memory {
key: key.clone(), message: msg,
});
}
}
if let Some(ref reflection) = cycle.reflection {
ag.push_message(crate::agent::api::types::Message::user(format!(
"<system-reminder>\n--- subconscious reflection ---\n{}\n</system-reminder>",
reflection.trim(),
)));
}
match target {
StreamTarget::Conversation => {
ag.push_message(crate::agent::api::types::Message::user(text));
}
StreamTarget::Autonomous => {
let mut msg = crate::agent::api::types::Message::user(text);
msg.stamp();
ag.push_entry(crate::agent::context::ConversationEntry::Dmn(msg));
}
}
// Compact if over budget before sending
let threshold = compaction_threshold(&self.config.app) as usize;
ag.publish_context_state();
let used = {
let sections = ag.shared_context.read().map(|s| s.clone()).unwrap_or_default();
crate::agent::context::sections_used(&sections)
};
if used > threshold {
ag.compact();
ag.notify("compacted");
}
}
self.shared.lock().unwrap().turn_active = true;
let _ = self.turn_watch.send(true);