agent: add sampling parameters (temperature, top_p, top_k)

Move temperature from a per-call parameter to an Agent field,
add top_p and top_k. All three are sent to the API via a new
SamplingParams struct, displayed on the F5 thalamus screen.

Defaults: temperature=0.6, top_p=0.95, top_k=20 (Qwen3.5 defaults).

Also adds top_p and top_k to ChatRequest so they're sent in the
API payload. Previously only temperature was sent.

UI controls for adjusting these at runtime are not yet implemented.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
ProofOfConcept 2026-04-04 13:48:24 -04:00 committed by Kent Overstreet
parent 22f955ad9f
commit dd009742ef
7 changed files with 53 additions and 8 deletions

View file

@ -29,6 +29,14 @@ impl Drop for AbortOnDrop {
}
}
/// Sampling parameters for model generation.
#[derive(Clone, Copy)]
pub struct SamplingParams {
pub temperature: f32,
pub top_p: f32,
pub top_k: u32,
}
// ─────────────────────────────────────────────────────────────
// Stream events — yielded by backends, consumed by the runner
// ─────────────────────────────────────────────────────────────
@ -93,7 +101,7 @@ impl ApiClient {
tools: Option<&[ToolDef]>,
ui_tx: &UiSender,
reasoning_effort: &str,
temperature: Option<f32>,
sampling: SamplingParams,
priority: Option<i32>,
) -> (mpsc::UnboundedReceiver<StreamEvent>, AbortOnDrop) {
let (tx, rx) = mpsc::unbounded_channel();
@ -110,7 +118,7 @@ impl ApiClient {
let result = openai::stream_events(
&client, &base_url, &api_key, &model,
&messages, tools.as_deref(), &tx, &ui_tx,
&reasoning_effort, temperature, priority,
&reasoning_effort, sampling, priority,
).await;
if let Err(e) = result {
let _ = tx.send(StreamEvent::Error(e.to_string()));
@ -126,11 +134,11 @@ impl ApiClient {
tools: Option<&[ToolDef]>,
ui_tx: &UiSender,
reasoning_effort: &str,
temperature: Option<f32>,
sampling: SamplingParams,
priority: Option<i32>,
) -> Result<(Message, Option<Usage>)> {
// Use the event stream and accumulate into a message.
let (mut rx, _handle) = self.start_stream(messages, tools, ui_tx, reasoning_effort, temperature, priority);
let (mut rx, _handle) = self.start_stream(messages, tools, ui_tx, reasoning_effort, sampling, priority);
let mut content = String::new();
let mut tool_calls: Vec<ToolCall> = Vec::new();
let mut usage = None;