agent: add sampling parameters (temperature, top_p, top_k)
Move temperature from a per-call parameter to an Agent field, add top_p and top_k. All three are sent to the API via a new SamplingParams struct, displayed on the F5 thalamus screen. Defaults: temperature=0.6, top_p=0.95, top_k=20 (Qwen3.5 defaults). Also adds top_p and top_k to ChatRequest so they're sent in the API payload. Previously only temperature was sent. UI controls for adjusting these at runtime are not yet implemented. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
22f955ad9f
commit
dd009742ef
7 changed files with 53 additions and 8 deletions
|
|
@ -77,6 +77,10 @@ pub struct Agent {
|
|||
last_prompt_tokens: u32,
|
||||
/// Current reasoning effort level ("none", "low", "high").
|
||||
pub reasoning_effort: String,
|
||||
/// Sampling parameters — adjustable at runtime from the thalamus screen.
|
||||
pub temperature: f32,
|
||||
pub top_p: f32,
|
||||
pub top_k: u32,
|
||||
/// Persistent conversation log — append-only record of all messages.
|
||||
conversation_log: Option<ConversationLog>,
|
||||
/// BPE tokenizer for token counting (cl100k_base — close enough
|
||||
|
|
@ -137,6 +141,9 @@ impl Agent {
|
|||
tool_defs,
|
||||
last_prompt_tokens: 0,
|
||||
reasoning_effort: "none".to_string(),
|
||||
temperature: 0.6,
|
||||
top_p: 0.95,
|
||||
top_k: 20,
|
||||
conversation_log,
|
||||
tokenizer,
|
||||
context,
|
||||
|
|
@ -288,12 +295,17 @@ impl Agent {
|
|||
let (mut rx, _stream_guard) = {
|
||||
let me = agent.lock().await;
|
||||
let api_messages = me.assemble_api_messages();
|
||||
let sampling = api::SamplingParams {
|
||||
temperature: me.temperature,
|
||||
top_p: me.top_p,
|
||||
top_k: me.top_k,
|
||||
};
|
||||
me.client.start_stream(
|
||||
&api_messages,
|
||||
Some(&me.tool_defs),
|
||||
ui_tx,
|
||||
&me.reasoning_effort,
|
||||
None,
|
||||
sampling,
|
||||
None,
|
||||
)
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue