agent: add sampling parameters (temperature, top_p, top_k)

Move temperature from a per-call parameter to an Agent field, add top_p and top_k. All three are sent to the API via a new SamplingParams struct, displayed on the F5 thalamus screen. Defaults: temperature=0.6, top_p=0.95, top_k=20 (Qwen3.5 defaults). Also adds top_p and top_k to ChatRequest so they're sent in the API payload. Previously only temperature was sent. UI controls for adjusting these at runtime are not yet implemented. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-04 13:48:24 -04:00 · 2026-04-04 13:48:24 -04:00 · dd009742ef
commit dd009742ef
parent 22f955ad9f
7 changed files with 53 additions and 8 deletions
--- a/src/agent/api/mod.rs
+++ b/src/agent/api/mod.rs
@ -29,6 +29,14 @@ impl Drop for AbortOnDrop {
    }
 }

+/// Sampling parameters for model generation.
+#[derive(Clone, Copy)]
+pub struct SamplingParams {
+    pub temperature: f32,
+    pub top_p: f32,
+    pub top_k: u32,
+}
+
 // ─────────────────────────────────────────────────────────────
 //  Stream events — yielded by backends, consumed by the runner
 // ─────────────────────────────────────────────────────────────
@ -93,7 +101,7 @@ impl ApiClient {
        tools: Option<&[ToolDef]>,
        ui_tx: &UiSender,
        reasoning_effort: &str,
-        temperature: Option<f32>,
+        sampling: SamplingParams,
        priority: Option<i32>,
    ) -> (mpsc::UnboundedReceiver<StreamEvent>, AbortOnDrop) {
        let (tx, rx) = mpsc::unbounded_channel();
@ -110,7 +118,7 @@ impl ApiClient {
            let result = openai::stream_events(
                &client, &base_url, &api_key, &model,
                &messages, tools.as_deref(), &tx, &ui_tx,
-                &reasoning_effort, temperature, priority,
+                &reasoning_effort, sampling, priority,
            ).await;
            if let Err(e) = result {
                let _ = tx.send(StreamEvent::Error(e.to_string()));
@ -126,11 +134,11 @@ impl ApiClient {
        tools: Option<&[ToolDef]>,
        ui_tx: &UiSender,
        reasoning_effort: &str,
-        temperature: Option<f32>,
+        sampling: SamplingParams,
        priority: Option<i32>,
    ) -> Result<(Message, Option<Usage>)> {
        // Use the event stream and accumulate into a message.
-        let (mut rx, _handle) = self.start_stream(messages, tools, ui_tx, reasoning_effort, temperature, priority);
+        let (mut rx, _handle) = self.start_stream(messages, tools, ui_tx, reasoning_effort, sampling, priority);
        let mut content = String::new();
        let mut tool_calls: Vec<ToolCall> = Vec::new();
        let mut usage = None;