vLLM priority scheduling for agents
Thread request priority through the API call chain to vLLM's priority scheduler. Lower value = higher priority, with preemption. Priority is set per-agent in the .agent header: - interactive (runner): 0 (default, highest) - surface-observe: 1 (near-realtime, watches conversation) - all other agents: 10 (batch, default if not specified) Requires vLLM started with --scheduling-policy priority. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
503e2995c1
commit
c72eb4d528
8 changed files with 27 additions and 7 deletions
|
|
@ -22,7 +22,7 @@ pub(crate) fn call_simple(caller: &str, prompt: &str) -> Result<String, String>
|
|||
|
||||
let prompts = vec![prompt.to_string()];
|
||||
let phases = vec![];
|
||||
super::api::call_api_with_tools_sync(caller, &prompts, &phases, None, &[], None, &log)
|
||||
super::api::call_api_with_tools_sync(caller, &prompts, &phases, None, 10, &[], None, &log)
|
||||
}
|
||||
|
||||
/// Call a model using an agent definition's configuration (multi-step).
|
||||
|
|
@ -34,7 +34,7 @@ pub(crate) fn call_for_def_multi(
|
|||
bail_fn: Option<&(dyn Fn(usize) -> Result<(), String> + Sync)>,
|
||||
log: &(dyn Fn(&str) + Sync),
|
||||
) -> Result<String, String> {
|
||||
super::api::call_api_with_tools_sync(&def.agent, prompts, phases, def.temperature, &def.tools, bail_fn, log)
|
||||
super::api::call_api_with_tools_sync(&def.agent, prompts, phases, def.temperature, def.priority, &def.tools, bail_fn, log)
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue