From bf503b571eca2f063f36e393c34c703b0a6525f8 Mon Sep 17 00:00:00 2001 From: ProofOfConcept Date: Thu, 9 Apr 2026 20:38:33 -0400 Subject: [PATCH] Wire vLLM priority scheduling through all agent paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The priority field existed in agent definitions and was serialized into vLLM requests, but was never actually set — every request went out with no priority, so vLLM treated them equally. This meant background graph maintenance agents could preempt the main conversation. Add priority to AgentState and set it at each call site: 0 = interactive (main conversation) 1 = surface agent (needs to feed memories promptly) 2 = other subconscious agents 10 = unconscious/standalone agents (batch) Co-Authored-By: Proof of Concept --- src/agent/mod.rs | 7 ++++++- src/agent/oneshot.rs | 1 + src/mind/subconscious.rs | 8 +++++++- src/mind/unconscious.rs | 1 + 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/agent/mod.rs b/src/agent/mod.rs index 695569f..54bc418 100644 --- a/src/agent/mod.rs +++ b/src/agent/mod.rs @@ -163,6 +163,9 @@ pub struct AgentState { pub generation: u64, pub memory_scoring_in_flight: bool, pub active_tools: tools::ActiveTools, + /// vLLM scheduling priority (lower = higher priority). + /// 0 = interactive, 1 = surface agent, 2 = other subconscious, 10 = unconscious. + pub priority: Option, /// Forked agents should not compact on overflow — it blows the /// KV cache prefix and evicts the step prompts. pub no_compact: bool, @@ -225,6 +228,7 @@ impl Agent { generation: 0, memory_scoring_in_flight: false, active_tools, + priority: Some(0), no_compact: false, changed: Arc::new(tokio::sync::Notify::new()), }), @@ -261,6 +265,7 @@ impl Agent { generation: 0, memory_scoring_in_flight: false, active_tools: tools::ActiveTools::new(), + priority: None, no_compact: true, changed: Arc::new(tokio::sync::Notify::new()), }), @@ -318,7 +323,7 @@ impl Agent { top_p: st.top_p, top_k: st.top_k, }, - None, + st.priority, ) }; diff --git a/src/agent/oneshot.rs b/src/agent/oneshot.rs index 24117bc..81bcc91 100644 --- a/src/agent/oneshot.rs +++ b/src/agent/oneshot.rs @@ -143,6 +143,7 @@ impl AutoAgent { let mut st = agent.state.lock().await; st.provenance = format!("standalone:{}", self.name); st.tools = self.tools.clone(); + st.priority = Some(10); } let mut backend = Backend(agent); diff --git a/src/mind/subconscious.rs b/src/mind/subconscious.rs index a35d586..7ea9ae4 100644 --- a/src/mind/subconscious.rs +++ b/src/mind/subconscious.rs @@ -593,7 +593,13 @@ impl Subconscious { let forked = agent.fork(auto.tools.clone()).await; let prov = format!("agent:{}", auto.name); - forked.state.lock().await.provenance = prov.clone(); + { + let mut st = forked.state.lock().await; + st.provenance = prov.clone(); + // Surface agent gets near-interactive priority; + // other subconscious agents get lower priority. + st.priority = Some(if auto.name == "surface" { 1 } else { 2 }); + } let fork_point = forked.context.lock().await.conversation().len(); self.agents[idx].forked_agent = Some(forked.clone()); diff --git a/src/mind/unconscious.rs b/src/mind/unconscious.rs index 2baa5aa..eb7f854 100644 --- a/src/mind/unconscious.rs +++ b/src/mind/unconscious.rs @@ -292,6 +292,7 @@ impl Unconscious { let mut st = agent.state.lock().await; st.provenance = format!("unconscious:{}", auto.name); st.tools = auto.tools.clone(); + st.priority = Some(10); } self.agents[idx].agent = Some(agent.clone());