Thread request priority through the API call chain to vLLM's priority scheduler. Lower value = higher priority, with preemption. Priority is set per-agent in the .agent header: - interactive (runner): 0 (default, highest) - surface-observe: 1 (near-realtime, watches conversation) - all other agents: 10 (batch, default if not specified) Requires vLLM started with --scheduling-policy priority. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
49 lines
1.6 KiB
Rust
49 lines
1.6 KiB
Rust
// LLM utilities: model invocation via direct API
|
|
|
|
use crate::store::Store;
|
|
use std::fs;
|
|
|
|
/// Simple LLM call for non-agent uses (audit, digest, compare).
|
|
/// Logs to llm-logs/{caller}/ file.
|
|
pub(crate) fn call_simple(caller: &str, prompt: &str) -> Result<String, String> {
|
|
let log_dir = dirs::home_dir().unwrap_or_default()
|
|
.join(".consciousness/logs/llm").join(caller);
|
|
fs::create_dir_all(&log_dir).ok();
|
|
let log_path = log_dir.join(format!("{}.txt", crate::store::compact_timestamp()));
|
|
|
|
use std::io::Write;
|
|
let log = move |msg: &str| {
|
|
if let Ok(mut f) = fs::OpenOptions::new()
|
|
.create(true).append(true).open(&log_path)
|
|
{
|
|
let _ = writeln!(f, "{}", msg);
|
|
}
|
|
};
|
|
|
|
let prompts = vec![prompt.to_string()];
|
|
let phases = vec![];
|
|
super::api::call_api_with_tools_sync(caller, &prompts, &phases, None, 10, &[], None, &log)
|
|
}
|
|
|
|
/// Call a model using an agent definition's configuration (multi-step).
|
|
/// Optional bail_fn is called between steps — return Err to stop the pipeline.
|
|
pub(crate) fn call_for_def_multi(
|
|
def: &super::defs::AgentDef,
|
|
prompts: &[String],
|
|
phases: &[String],
|
|
bail_fn: Option<&(dyn Fn(usize) -> Result<(), String> + Sync)>,
|
|
log: &(dyn Fn(&str) + Sync),
|
|
) -> Result<String, String> {
|
|
super::api::call_api_with_tools_sync(&def.agent, prompts, phases, def.temperature, def.priority, &def.tools, bail_fn, log)
|
|
}
|
|
|
|
|
|
/// Get all keys for prompt context.
|
|
pub(crate) fn semantic_keys(store: &Store) -> Vec<String> {
|
|
let mut keys: Vec<String> = store.nodes.keys()
|
|
.cloned()
|
|
.collect();
|
|
keys.sort();
|
|
keys.truncate(200);
|
|
keys
|
|
}
|