agent: add sampling parameters (temperature, top_p, top_k)
Move temperature from a per-call parameter to an Agent field, add top_p and top_k. All three are sent to the API via a new SamplingParams struct, displayed on the F5 thalamus screen. Defaults: temperature=0.6, top_p=0.95, top_k=20 (Qwen3.5 defaults). Also adds top_p and top_k to ChatRequest so they're sent in the API payload. Previously only temperature was sent. UI controls for adjusting these at runtime are not yet implemented. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
22f955ad9f
commit
dd009742ef
7 changed files with 53 additions and 8 deletions
|
|
@ -29,6 +29,14 @@ impl Drop for AbortOnDrop {
|
|||
}
|
||||
}
|
||||
|
||||
/// Sampling parameters for model generation.
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct SamplingParams {
|
||||
pub temperature: f32,
|
||||
pub top_p: f32,
|
||||
pub top_k: u32,
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────
|
||||
// Stream events — yielded by backends, consumed by the runner
|
||||
// ─────────────────────────────────────────────────────────────
|
||||
|
|
@ -93,7 +101,7 @@ impl ApiClient {
|
|||
tools: Option<&[ToolDef]>,
|
||||
ui_tx: &UiSender,
|
||||
reasoning_effort: &str,
|
||||
temperature: Option<f32>,
|
||||
sampling: SamplingParams,
|
||||
priority: Option<i32>,
|
||||
) -> (mpsc::UnboundedReceiver<StreamEvent>, AbortOnDrop) {
|
||||
let (tx, rx) = mpsc::unbounded_channel();
|
||||
|
|
@ -110,7 +118,7 @@ impl ApiClient {
|
|||
let result = openai::stream_events(
|
||||
&client, &base_url, &api_key, &model,
|
||||
&messages, tools.as_deref(), &tx, &ui_tx,
|
||||
&reasoning_effort, temperature, priority,
|
||||
&reasoning_effort, sampling, priority,
|
||||
).await;
|
||||
if let Err(e) = result {
|
||||
let _ = tx.send(StreamEvent::Error(e.to_string()));
|
||||
|
|
@ -126,11 +134,11 @@ impl ApiClient {
|
|||
tools: Option<&[ToolDef]>,
|
||||
ui_tx: &UiSender,
|
||||
reasoning_effort: &str,
|
||||
temperature: Option<f32>,
|
||||
sampling: SamplingParams,
|
||||
priority: Option<i32>,
|
||||
) -> Result<(Message, Option<Usage>)> {
|
||||
// Use the event stream and accumulate into a message.
|
||||
let (mut rx, _handle) = self.start_stream(messages, tools, ui_tx, reasoning_effort, temperature, priority);
|
||||
let (mut rx, _handle) = self.start_stream(messages, tools, ui_tx, reasoning_effort, sampling, priority);
|
||||
let mut content = String::new();
|
||||
let mut tool_calls: Vec<ToolCall> = Vec::new();
|
||||
let mut usage = None;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue