Wire vLLM priority scheduling through all agent paths
The priority field existed in agent definitions and was serialized into vLLM requests, but was never actually set — every request went out with no priority, so vLLM treated them equally. This meant background graph maintenance agents could preempt the main conversation. Add priority to AgentState and set it at each call site: 0 = interactive (main conversation) 1 = surface agent (needs to feed memories promptly) 2 = other subconscious agents 10 = unconscious/standalone agents (batch) Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
b115cec096
commit
bf503b571e
4 changed files with 15 additions and 2 deletions
|
|
@ -163,6 +163,9 @@ pub struct AgentState {
|
||||||
pub generation: u64,
|
pub generation: u64,
|
||||||
pub memory_scoring_in_flight: bool,
|
pub memory_scoring_in_flight: bool,
|
||||||
pub active_tools: tools::ActiveTools,
|
pub active_tools: tools::ActiveTools,
|
||||||
|
/// vLLM scheduling priority (lower = higher priority).
|
||||||
|
/// 0 = interactive, 1 = surface agent, 2 = other subconscious, 10 = unconscious.
|
||||||
|
pub priority: Option<i32>,
|
||||||
/// Forked agents should not compact on overflow — it blows the
|
/// Forked agents should not compact on overflow — it blows the
|
||||||
/// KV cache prefix and evicts the step prompts.
|
/// KV cache prefix and evicts the step prompts.
|
||||||
pub no_compact: bool,
|
pub no_compact: bool,
|
||||||
|
|
@ -225,6 +228,7 @@ impl Agent {
|
||||||
generation: 0,
|
generation: 0,
|
||||||
memory_scoring_in_flight: false,
|
memory_scoring_in_flight: false,
|
||||||
active_tools,
|
active_tools,
|
||||||
|
priority: Some(0),
|
||||||
no_compact: false,
|
no_compact: false,
|
||||||
changed: Arc::new(tokio::sync::Notify::new()),
|
changed: Arc::new(tokio::sync::Notify::new()),
|
||||||
}),
|
}),
|
||||||
|
|
@ -261,6 +265,7 @@ impl Agent {
|
||||||
generation: 0,
|
generation: 0,
|
||||||
memory_scoring_in_flight: false,
|
memory_scoring_in_flight: false,
|
||||||
active_tools: tools::ActiveTools::new(),
|
active_tools: tools::ActiveTools::new(),
|
||||||
|
priority: None,
|
||||||
no_compact: true,
|
no_compact: true,
|
||||||
changed: Arc::new(tokio::sync::Notify::new()),
|
changed: Arc::new(tokio::sync::Notify::new()),
|
||||||
}),
|
}),
|
||||||
|
|
@ -318,7 +323,7 @@ impl Agent {
|
||||||
top_p: st.top_p,
|
top_p: st.top_p,
|
||||||
top_k: st.top_k,
|
top_k: st.top_k,
|
||||||
},
|
},
|
||||||
None,
|
st.priority,
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -143,6 +143,7 @@ impl AutoAgent {
|
||||||
let mut st = agent.state.lock().await;
|
let mut st = agent.state.lock().await;
|
||||||
st.provenance = format!("standalone:{}", self.name);
|
st.provenance = format!("standalone:{}", self.name);
|
||||||
st.tools = self.tools.clone();
|
st.tools = self.tools.clone();
|
||||||
|
st.priority = Some(10);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut backend = Backend(agent);
|
let mut backend = Backend(agent);
|
||||||
|
|
|
||||||
|
|
@ -593,7 +593,13 @@ impl Subconscious {
|
||||||
|
|
||||||
let forked = agent.fork(auto.tools.clone()).await;
|
let forked = agent.fork(auto.tools.clone()).await;
|
||||||
let prov = format!("agent:{}", auto.name);
|
let prov = format!("agent:{}", auto.name);
|
||||||
forked.state.lock().await.provenance = prov.clone();
|
{
|
||||||
|
let mut st = forked.state.lock().await;
|
||||||
|
st.provenance = prov.clone();
|
||||||
|
// Surface agent gets near-interactive priority;
|
||||||
|
// other subconscious agents get lower priority.
|
||||||
|
st.priority = Some(if auto.name == "surface" { 1 } else { 2 });
|
||||||
|
}
|
||||||
let fork_point = forked.context.lock().await.conversation().len();
|
let fork_point = forked.context.lock().await.conversation().len();
|
||||||
|
|
||||||
self.agents[idx].forked_agent = Some(forked.clone());
|
self.agents[idx].forked_agent = Some(forked.clone());
|
||||||
|
|
|
||||||
|
|
@ -292,6 +292,7 @@ impl Unconscious {
|
||||||
let mut st = agent.state.lock().await;
|
let mut st = agent.state.lock().await;
|
||||||
st.provenance = format!("unconscious:{}", auto.name);
|
st.provenance = format!("unconscious:{}", auto.name);
|
||||||
st.tools = auto.tools.clone();
|
st.tools = auto.tools.clone();
|
||||||
|
st.priority = Some(10);
|
||||||
}
|
}
|
||||||
|
|
||||||
self.agents[idx].agent = Some(agent.clone());
|
self.agents[idx].agent = Some(agent.clone());
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue