agent: add sampling parameters (temperature, top_p, top_k)

Move temperature from a per-call parameter to an Agent field,
add top_p and top_k. All three are sent to the API via a new
SamplingParams struct, displayed on the F5 thalamus screen.

Defaults: temperature=0.6, top_p=0.95, top_k=20 (Qwen3.5 defaults).

Also adds top_p and top_k to ChatRequest so they're sent in the
API payload. Previously only temperature was sent.

UI controls for adjusting these at runtime are not yet implemented.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
ProofOfConcept 2026-04-04 13:48:24 -04:00 committed by Kent Overstreet
parent 22f955ad9f
commit dd009742ef
7 changed files with 53 additions and 8 deletions

View file

@ -77,6 +77,10 @@ pub struct Agent {
last_prompt_tokens: u32,
/// Current reasoning effort level ("none", "low", "high").
pub reasoning_effort: String,
/// Sampling parameters — adjustable at runtime from the thalamus screen.
pub temperature: f32,
pub top_p: f32,
pub top_k: u32,
/// Persistent conversation log — append-only record of all messages.
conversation_log: Option<ConversationLog>,
/// BPE tokenizer for token counting (cl100k_base — close enough
@ -137,6 +141,9 @@ impl Agent {
tool_defs,
last_prompt_tokens: 0,
reasoning_effort: "none".to_string(),
temperature: 0.6,
top_p: 0.95,
top_k: 20,
conversation_log,
tokenizer,
context,
@ -288,12 +295,17 @@ impl Agent {
let (mut rx, _stream_guard) = {
let me = agent.lock().await;
let api_messages = me.assemble_api_messages();
let sampling = api::SamplingParams {
temperature: me.temperature,
top_p: me.top_p,
top_k: me.top_k,
};
me.client.start_stream(
&api_messages,
Some(&me.tool_defs),
ui_tx,
&me.reasoning_effort,
None,
sampling,
None,
)
};