agent: add sampling parameters (temperature, top_p, top_k)

Move temperature from a per-call parameter to an Agent field, add top_p and top_k. All three are sent to the API via a new SamplingParams struct, displayed on the F5 thalamus screen. Defaults: temperature=0.6, top_p=0.95, top_k=20 (Qwen3.5 defaults). Also adds top_p and top_k to ChatRequest so they're sent in the API payload. Previously only temperature was sent. UI controls for adjusting these at runtime are not yet implemented. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-04 13:48:24 -04:00 · 2026-04-04 13:48:24 -04:00 · dd009742ef
commit dd009742ef
parent 22f955ad9f
7 changed files with 53 additions and 8 deletions
--- a/src/agent/mod.rs
+++ b/src/agent/mod.rs
@ -77,6 +77,10 @@ pub struct Agent {
    last_prompt_tokens: u32,
    /// Current reasoning effort level ("none", "low", "high").
    pub reasoning_effort: String,
+    /// Sampling parameters — adjustable at runtime from the thalamus screen.
+    pub temperature: f32,
+    pub top_p: f32,
+    pub top_k: u32,
    /// Persistent conversation log — append-only record of all messages.
    conversation_log: Option<ConversationLog>,
    /// BPE tokenizer for token counting (cl100k_base — close enough
@ -137,6 +141,9 @@ impl Agent {
            tool_defs,
            last_prompt_tokens: 0,
            reasoning_effort: "none".to_string(),
+            temperature: 0.6,
+            top_p: 0.95,
+            top_k: 20,
            conversation_log,
            tokenizer,
            context,
@ -288,12 +295,17 @@ impl Agent {
            let (mut rx, _stream_guard) = {
                let me = agent.lock().await;
                let api_messages = me.assemble_api_messages();
+                let sampling = api::SamplingParams {
+                    temperature: me.temperature,
+                    top_p: me.top_p,
+                    top_k: me.top_k,
+                };
                me.client.start_stream(
                    &api_messages,
                    Some(&me.tool_defs),
                    ui_tx,
                    &me.reasoning_effort,
-                    None,
+                    sampling,
                    None,
                )
            };