Add /v1/completions streaming path with raw token IDs
New stream_completions() in openai.rs sends prompt as token IDs to the completions endpoint instead of JSON messages to chat/completions. Handles <think> tags in the response (split into Reasoning events) and stops on <|im_end|> token. start_stream_completions() on ApiClient provides the same interface as start_stream() but takes token IDs instead of Messages. The turn loop in Agent::turn() uses completions when the tokenizer is initialized, falling back to the chat API otherwise. This allows gradual migration — consciousness uses completions (Qwen tokenizer), Claude Code hook still uses chat API (Anthropic). Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
e9765799c4
commit
f458af6dec
3 changed files with 188 additions and 8 deletions
|
|
@ -483,19 +483,28 @@ impl Agent {
|
|||
let _thinking = start_activity(&agent, "thinking...").await;
|
||||
let (mut rx, _stream_guard) = {
|
||||
let me = agent.lock().await;
|
||||
let api_messages = me.assemble_api_messages();
|
||||
let sampling = api::SamplingParams {
|
||||
temperature: me.temperature,
|
||||
top_p: me.top_p,
|
||||
top_k: me.top_k,
|
||||
};
|
||||
me.client.start_stream(
|
||||
&api_messages,
|
||||
&me.tools,
|
||||
&me.reasoning_effort,
|
||||
sampling,
|
||||
None,
|
||||
)
|
||||
if tokenizer::is_initialized() {
|
||||
let prompt_tokens = me.assemble_prompt_tokens();
|
||||
me.client.start_stream_completions(
|
||||
&prompt_tokens,
|
||||
sampling,
|
||||
None,
|
||||
)
|
||||
} else {
|
||||
let api_messages = me.assemble_api_messages();
|
||||
me.client.start_stream(
|
||||
&api_messages,
|
||||
&me.tools,
|
||||
&me.reasoning_effort,
|
||||
sampling,
|
||||
None,
|
||||
)
|
||||
}
|
||||
};
|
||||
// --- Lock released ---
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue