forked from kent/consciousness
vLLM priority scheduling for agents
Thread request priority through the API call chain to vLLM's priority scheduler. Lower value = higher priority, with preemption. Priority is set per-agent in the .agent header: - interactive (runner): 0 (default, highest) - surface-observe: 1 (near-realtime, watches conversation) - all other agents: 10 (batch, default if not specified) Requires vLLM started with --scheduling-policy priority. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
503e2995c1
commit
c72eb4d528
8 changed files with 27 additions and 7 deletions
|
|
@ -35,6 +35,7 @@ pub async fn call_api_with_tools(
|
|||
prompts: &[String],
|
||||
phases: &[String],
|
||||
temperature: Option<f32>,
|
||||
priority: i32,
|
||||
tools: &[String],
|
||||
bail_fn: Option<&(dyn Fn(usize) -> Result<(), String> + Sync)>,
|
||||
log: &dyn Fn(&str),
|
||||
|
|
@ -82,6 +83,7 @@ pub async fn call_api_with_tools(
|
|||
&ui_tx,
|
||||
&reasoning,
|
||||
temperature,
|
||||
Some(priority),
|
||||
).await {
|
||||
Ok((msg, usage)) => {
|
||||
msg_opt = Some(msg);
|
||||
|
|
@ -233,6 +235,7 @@ pub fn call_api_with_tools_sync(
|
|||
prompts: &[String],
|
||||
phases: &[String],
|
||||
temperature: Option<f32>,
|
||||
priority: i32,
|
||||
tools: &[String],
|
||||
bail_fn: Option<&(dyn Fn(usize) -> Result<(), String> + Sync)>,
|
||||
log: &(dyn Fn(&str) + Sync),
|
||||
|
|
@ -244,7 +247,7 @@ pub fn call_api_with_tools_sync(
|
|||
.build()
|
||||
.map_err(|e| format!("tokio runtime: {}", e))?;
|
||||
rt.block_on(
|
||||
call_api_with_tools(agent, prompts, phases, temperature, tools, bail_fn, log)
|
||||
call_api_with_tools(agent, prompts, phases, temperature, priority, tools, bail_fn, log)
|
||||
)
|
||||
}).join().unwrap()
|
||||
})
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue