// api/ — LLM API client (OpenAI-compatible)
//
// Works with any provider that implements the OpenAI chat completions
// API: OpenRouter, vLLM, llama.cpp, Fireworks, Together, etc.
//
// Diagnostics: anomalies always logged to debug panel.
// Set POC_DEBUG=1 for verbose per-turn logging.

pub mod http;
pub mod salience;

use std::time::Duration;
use anyhow::Result;
use tokio::sync::mpsc;
use serde::Deserialize;

use http::HttpClient;

#[derive(Debug, Clone, Deserialize)]
pub struct Usage {
    pub prompt_tokens: u32,
    pub completion_tokens: u32,
    pub total_tokens: u32,
}

/// Concept-readout manifest returned by the vLLM server's
/// `/v1/readout/manifest` endpoint. Maps the nameless tensor indices
/// in streaming `readout` fields back to concept names and layer
/// indices.
#[derive(Debug, Clone, Deserialize)]
pub struct ReadoutManifest {
    pub concepts: Vec<String>,
    pub layers: Vec<u32>,
}

/// Per-token per-layer concept projections streamed alongside each
/// sampled token. Shape `[n_layers][n_concepts]`. Named values come
/// from pairing with the manifest fetched at startup.
pub type TokenReadout = Vec<Vec<f32>>;

/// A JoinHandle that aborts its task when dropped.
pub(crate) struct AbortOnDrop(tokio::task::JoinHandle<()>);

impl Drop for AbortOnDrop {
    fn drop(&mut self) {
        self.0.abort();
    }
}

/// Sampling parameters for model generation.
#[derive(Clone, Copy)]
#[allow(dead_code)] // fields used once Generate RPC lands in a later step
pub(crate) struct SamplingParams {
    pub temperature: f32,
    pub top_p: f32,
    pub top_k: u32,
}

// ─────────────────────────────────────────────────────────────
//  Stream events — yielded by backends, consumed by the runner
// ─────────────────────────────────────────────────────────────

/// One token from the streaming completions API.
pub enum StreamToken {
    /// A sampled token, optionally with its per-layer concept readout.
    /// `readout` is `None` when the server has readout disabled or
    /// returned no readout for this chunk.
    Token { id: u32, readout: Option<TokenReadout> },
    Done { usage: Option<Usage> },
    Error(String),
}

#[derive(Clone)]
pub struct ApiClient {
    client: HttpClient,
    api_key: String,
    pub model: String,
    base_url: String,
    /// Cached readout manifest — fetched once per process and shared
    /// across ApiClient clones (every Agent/fork gets the same cell).
    /// `None` after fetch means the server has readout disabled (404).
    manifest: std::sync::Arc<tokio::sync::OnceCell<Option<ReadoutManifest>>>,
}

impl ApiClient {
    pub fn new(base_url: &str, api_key: &str, model: &str) -> Self {
        let client = HttpClient::builder()
            .connect_timeout(Duration::from_secs(30))
            .timeout(Duration::from_secs(600))
            .build();

        Self {
            client,
            api_key: api_key.to_string(),
            model: model.to_string(),
            base_url: base_url.trim_end_matches('/').to_string(),
            manifest: std::sync::Arc::new(tokio::sync::OnceCell::new()),
        }
    }

    /// Stream generation via a gRPC session. Stubbed during the
    /// unary-rewrite transition — the Generate RPC is wired in a
    /// later step of this series. Until then, callers that reach
    /// this path get a StreamToken::Error.
    pub(crate) fn stream_session_mm(
        &self,
        _session_lock: std::sync::Arc<crate::Mutex<Option<salience::SessionHandle>>>,
        _prompt_tokens: &[u32],
        _images: &[super::context::WireImage],
        _sampling: SamplingParams,
        _priority: Option<i32>,
    ) -> (mpsc::UnboundedReceiver<StreamToken>, AbortOnDrop) {
        let (tx, rx) = mpsc::unbounded_channel();
        let handle = tokio::spawn(async move {
            let _ = tx.send(StreamToken::Error(
                "Generate RPC not yet wired after protocol rewrite — see \
                 proto/salience.proto; AppendImage / Generate land next."
                    .into(),
            ));
        });
        (rx, AbortOnDrop(handle))
    }

    pub fn base_url(&self) -> &str { &self.base_url }
    pub fn api_key(&self) -> &str { &self.api_key }

    /// Fetch `/v1/readout/manifest` — returns `Ok(Some(..))` if
    /// readout is enabled on the server, `Ok(None)` on 404 (disabled),
    /// or an error on any other failure.
    ///
    /// First call performs the HTTP fetch; subsequent calls (including
    /// across ApiClient clones sharing the same cell) return the
    /// cached result. The manifest doesn't change during a server run.
    pub async fn fetch_readout_manifest(&self) -> Result<Option<ReadoutManifest>> {
        let manifest = self.manifest.get_or_try_init(|| async {
            let url = format!("{}/readout/manifest", self.base_url);
            let auth = format!("Bearer {}", self.api_key);
            let response = self
                .client
                .get_with_headers(&url, &[("Authorization", &auth)])
                .await
                .map_err(|e| anyhow::anyhow!("readout manifest fetch ({}): {}", url, e))?;
            let status = response.status();
            if status.as_u16() == 404 {
                return Ok::<_, anyhow::Error>(None);
            }
            if !status.is_success() {
                let body = response.text().await.unwrap_or_default();
                let n = body.floor_char_boundary(body.len().min(500));
                anyhow::bail!("readout manifest HTTP {} ({}): {}", status, url, &body[..n]);
            }
            Ok(Some(response.json().await?))
        }).await?;
        Ok(manifest.clone())
    }

}