agent: add sampling parameters (temperature, top_p, top_k)
Move temperature from a per-call parameter to an Agent field, add top_p and top_k. All three are sent to the API via a new SamplingParams struct, displayed on the F5 thalamus screen. Defaults: temperature=0.6, top_p=0.95, top_k=20 (Qwen3.5 defaults). Also adds top_p and top_k to ChatRequest so they're sent in the API payload. Previously only temperature was sent. UI controls for adjusting these at runtime are not yet implemented. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
22f955ad9f
commit
dd009742ef
7 changed files with 53 additions and 8 deletions
|
|
@ -26,7 +26,7 @@ pub(super) async fn stream_events(
|
|||
tx: &mpsc::UnboundedSender<StreamEvent>,
|
||||
ui_tx: &UiSender,
|
||||
reasoning_effort: &str,
|
||||
temperature: Option<f32>,
|
||||
sampling: super::SamplingParams,
|
||||
priority: Option<i32>,
|
||||
) -> Result<()> {
|
||||
let request = ChatRequest {
|
||||
|
|
@ -35,7 +35,9 @@ pub(super) async fn stream_events(
|
|||
tool_choice: tools.map(|_| "auto".to_string()),
|
||||
tools: tools.map(|t| t.to_vec()),
|
||||
max_tokens: Some(16384),
|
||||
temperature: Some(temperature.unwrap_or(0.6)),
|
||||
temperature: Some(sampling.temperature),
|
||||
top_p: Some(sampling.top_p),
|
||||
top_k: Some(sampling.top_k),
|
||||
stream: Some(true),
|
||||
reasoning: if reasoning_effort != "none" && reasoning_effort != "default" {
|
||||
Some(ReasoningConfig {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue