// api/ — LLM API client (OpenAI-compatible) // // Works with any provider that implements the OpenAI chat completions // API: OpenRouter, vLLM, llama.cpp, Fireworks, Together, etc. // // Diagnostics: anomalies always logged to debug panel. // Set POC_DEBUG=1 for verbose per-turn logging. pub mod http; pub mod salience; use std::time::Duration; use anyhow::Result; use tokio::sync::mpsc; use serde::Deserialize; use http::HttpClient; #[derive(Debug, Clone, Deserialize)] pub struct Usage { pub prompt_tokens: u32, pub completion_tokens: u32, pub total_tokens: u32, } /// Concept-readout manifest returned by the vLLM server's /// `/v1/readout/manifest` endpoint. Maps the nameless tensor indices /// in streaming `readout` fields back to concept names and layer /// indices. #[derive(Debug, Clone, Deserialize)] pub struct ReadoutManifest { pub concepts: Vec, pub layers: Vec, } /// Per-token per-layer concept projections streamed alongside each /// sampled token. Shape `[n_layers][n_concepts]`. Named values come /// from pairing with the manifest fetched at startup. pub type TokenReadout = Vec>; /// A JoinHandle that aborts its task when dropped. pub(crate) struct AbortOnDrop(tokio::task::JoinHandle<()>); impl Drop for AbortOnDrop { fn drop(&mut self) { self.0.abort(); } } /// Sampling parameters for model generation. #[derive(Clone, Copy)] #[allow(dead_code)] // fields used once Generate RPC lands in a later step pub(crate) struct SamplingParams { pub temperature: f32, pub top_p: f32, pub top_k: u32, } // ───────────────────────────────────────────────────────────── // Stream events — yielded by backends, consumed by the runner // ───────────────────────────────────────────────────────────── /// One token from the streaming completions API. pub enum StreamToken { /// A sampled token, optionally with its per-layer concept readout. /// `readout` is `None` when the server has readout disabled or /// returned no readout for this chunk. Token { id: u32, readout: Option }, Done { usage: Option }, Error(String), } #[derive(Clone)] pub struct ApiClient { client: HttpClient, api_key: String, pub model: String, base_url: String, /// Cached readout manifest — fetched once per process and shared /// across ApiClient clones (every Agent/fork gets the same cell). /// `None` after fetch means the server has readout disabled (404). manifest: std::sync::Arc>>, } impl ApiClient { pub fn new(base_url: &str, api_key: &str, model: &str) -> Self { let client = HttpClient::builder() .connect_timeout(Duration::from_secs(30)) .timeout(Duration::from_secs(600)) .build(); Self { client, api_key: api_key.to_string(), model: model.to_string(), base_url: base_url.trim_end_matches('/').to_string(), manifest: std::sync::Arc::new(tokio::sync::OnceCell::new()), } } /// Stream generation via a gRPC session. Stubbed during the /// unary-rewrite transition — the Generate RPC is wired in a /// later step of this series. Until then, callers that reach /// this path get a StreamToken::Error. pub(crate) fn stream_session_mm( &self, _session_lock: std::sync::Arc>>, _prompt_tokens: &[u32], _images: &[super::context::WireImage], _sampling: SamplingParams, _priority: Option, ) -> (mpsc::UnboundedReceiver, AbortOnDrop) { let (tx, rx) = mpsc::unbounded_channel(); let handle = tokio::spawn(async move { let _ = tx.send(StreamToken::Error( "Generate RPC not yet wired after protocol rewrite — see \ proto/salience.proto; AppendImage / Generate land next." .into(), )); }); (rx, AbortOnDrop(handle)) } pub fn base_url(&self) -> &str { &self.base_url } pub fn api_key(&self) -> &str { &self.api_key } /// Fetch `/v1/readout/manifest` — returns `Ok(Some(..))` if /// readout is enabled on the server, `Ok(None)` on 404 (disabled), /// or an error on any other failure. /// /// First call performs the HTTP fetch; subsequent calls (including /// across ApiClient clones sharing the same cell) return the /// cached result. The manifest doesn't change during a server run. pub async fn fetch_readout_manifest(&self) -> Result> { let manifest = self.manifest.get_or_try_init(|| async { let url = format!("{}/readout/manifest", self.base_url); let auth = format!("Bearer {}", self.api_key); let response = self .client .get_with_headers(&url, &[("Authorization", &auth)]) .await .map_err(|e| anyhow::anyhow!("readout manifest fetch ({}): {}", url, e))?; let status = response.status(); if status.as_u16() == 404 { return Ok::<_, anyhow::Error>(None); } if !status.is_success() { let body = response.text().await.unwrap_or_default(); let n = body.floor_char_boundary(body.len().min(500)); anyhow::bail!("readout manifest HTTP {} ({}): {}", status, url, &body[..n]); } Ok(Some(response.json().await?)) }).await?; Ok(manifest.clone()) } }