consciousness/src/agent/api/http.rs

290 lines
9.9 KiB
Rust
Raw Normal View History

// http.rs — Minimal async HTTP client
//
// Replaces reqwest with direct hyper + rustls. No tracing dependency.
// Supports: GET/POST, JSON/form bodies, streaming responses, TLS.
use anyhow::{Context, Result};
use bytes::Bytes;
2026-04-11 16:45:54 -04:00
use http_body_util::{BodyExt, Full};
use hyper::body::Incoming;
use hyper::{Request, StatusCode};
use hyper_util::rt::TokioIo;
use rustls::ClientConfig;
use std::sync::Arc;
use std::time::Duration;
use tokio::net::TcpStream;
/// Lightweight async HTTP client with connection pooling via keep-alive.
#[derive(Clone)]
pub struct HttpClient {
tls: Arc<ClientConfig>,
connect_timeout: Duration,
request_timeout: Duration,
}
/// An in-flight response — provides status, headers, and body access.
pub struct HttpResponse {
parts: http::response::Parts,
body: Incoming,
}
impl HttpClient {
pub fn new() -> Self {
Self::builder().build()
}
pub fn builder() -> HttpClientBuilder {
HttpClientBuilder {
connect_timeout: Duration::from_secs(30),
request_timeout: Duration::from_secs(600),
}
}
/// Send a GET request.
pub async fn get(&self, url: &str) -> Result<HttpResponse> {
self.get_with_headers(url, &[]).await
}
/// Send a GET request with custom headers.
pub async fn get_with_headers(&self, url: &str, headers: &[(&str, &str)]) -> Result<HttpResponse> {
2026-04-11 16:45:54 -04:00
self.send(url, "GET", headers, Bytes::new()).await
}
/// Send a POST request with URL-encoded form data.
pub async fn post_form(&self, url: &str, params: &[(&str, &str)]) -> Result<HttpResponse> {
let body = serde_urlencoded::to_string(params).context("encoding form")?;
2026-04-11 16:45:54 -04:00
self.send(url, "POST",
&[("content-type", "application/x-www-form-urlencoded")],
Bytes::from(body),
).await
}
2026-04-11 16:45:54 -04:00
/// Send a request with JSON body.
pub async fn send_json(
&self,
method: &str,
url: &str,
headers: &[(&str, &str)],
body: &impl serde::Serialize,
) -> Result<HttpResponse> {
let json = serde_json::to_vec(body).context("serializing JSON body")?;
2026-04-11 16:45:54 -04:00
let mut all_headers = vec![("content-type", "application/json")];
all_headers.extend_from_slice(headers);
self.send(url, method, &all_headers, Bytes::from(json)).await
}
2026-04-11 16:45:54 -04:00
/// Core send: parse URL, connect, build request with correct
/// path-only URI and Host header, send, return response.
async fn send(
&self,
url: &str,
method: &str,
headers: &[(&str, &str)],
body: Bytes,
) -> Result<HttpResponse> {
let uri: http::Uri = url.parse().context("parsing URL")?;
let host = uri.host().context("URL has no host")?.to_string();
let is_https = uri.scheme_str() == Some("https");
let port = uri.port_u16().unwrap_or(if is_https { 443 } else { 80 });
2026-04-11 16:45:54 -04:00
// Connect
let tcp = tokio::time::timeout(
self.connect_timeout,
2026-04-11 16:45:54 -04:00
TcpStream::connect(format!("{host}:{port}")),
).await
.context("connect timeout")?
.context("TCP connect")?;
2026-04-11 16:45:54 -04:00
let io: TokioIo<Box<dyn IoStream>> = if is_https {
let server_name = rustls::pki_types::ServerName::try_from(host.clone())
2026-04-11 16:45:54 -04:00
.map_err(|e| anyhow::anyhow!("invalid server name: {e}"))?;
let connector = tokio_rustls::TlsConnector::from(self.tls.clone());
let tls = connector.connect(server_name.to_owned(), tcp).await
salience: add gRPC client + TLS plumbing for stateful vllm sessions Adds the client-side of a stateful gRPC protocol against vllm, plus the TLS trust machinery so we can talk to self-signed vllm servers. Protocol (proto/salience.proto): Bidi-streaming Session RPC carries OpenSession / AppendTokens / Generate / Cancel from client and SessionReady / PrefillProgress / Token / GenerateDone / Error from server. Separate Fork unary RPC for cheap branching (prefix cache shares KV automatically). Plus ListSessions, CloseSession, GetReadoutManifest admin RPCs. Per-token readouts ship as packed f32 ([n_layers * n_concepts] per token, flat). Logprobs use range-selected positions plus a top-k parameter — empty ranges means no logprobs, any range means emit sampled-token logprob at those positions, top_k > 0 adds alternatives. Client (src/agent/api/salience.rs): Tonic-generated types under pb::, a connect() helper, with_auth() for bearer metadata, and a Session handle wrapping the bidi stream: open() handshakes SessionReady; append() is fire-and-forget; generate() returns impl Stream<Item = Event> that drains inbound until Done or terminating Error. One generate at a time per session. Peak picker (src/agent/salience.rs): Pure function over ReadoutEntry traces. Per-concept z-score against trace global stats; contiguous above-threshold regions emit one peak at the local max. Configurable sigma threshold and min-std safety floor. Deterministic tie-break on offset then concept name. 12 unit tests covering empty traces, flat channels, single/multi spikes, contiguous humps, multi-concept independence, trailing runs, sub-threshold noise, layer-out-of-range, manifest shape mismatch, and threshold tunability. TLS (src/agent/api/http.rs): HttpClient::build now also loads every .pem file under ~/.consciousness/certs/ into the rustls root store — so dropping a <host>.pem in that directory is enough to trust a new self- signed server; no code changes per new host. Also installs the rustls default crypto provider explicitly via OnceLock: tonic's tls features pulled in both ring and aws-lc-rs on the resolver path, and rustls 0.23 refuses to auto-pick when either could win. Build (build.rs, Cargo.toml): tonic-build generates Rust types from proto/salience.proto at cargo-build time, using a vendored protoc binary (protoc-bin-vendored) so no system install is required. New runtime deps: tonic, prost, async-stream, tokio-stream, rustls-pemfile. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-23 02:21:07 -04:00
.map_err(|e| anyhow::anyhow!("TLS handshake to {host}: {e}"))?;
2026-04-11 16:45:54 -04:00
TokioIo::new(Box::new(tls) as Box<dyn IoStream>)
} else {
2026-04-11 16:45:54 -04:00
TokioIo::new(Box::new(tcp) as Box<dyn IoStream>)
};
2026-04-11 16:45:54 -04:00
// Build request with path-only URI and Host header
let path_and_query = uri.path_and_query()
.map(|pq| pq.as_str())
.unwrap_or("/");
let mut builder = Request::builder()
.method(method)
.uri(path_and_query)
.header("host", &host);
for &(k, v) in headers {
builder = builder.header(k, v);
}
let req = builder.body(Full::new(body))
.context("building request")?;
2026-04-11 16:45:54 -04:00
// Send
let (mut sender, conn) = hyper::client::conn::http1::handshake(io).await
.context("HTTP handshake")?;
tokio::spawn(conn);
let resp = tokio::time::timeout(
self.request_timeout,
sender.send_request(req),
).await
.context("request timeout")?
.context("sending request")?;
let (parts, body) = resp.into_parts();
Ok(HttpResponse { parts, body })
}
}
impl HttpResponse {
pub fn status(&self) -> StatusCode {
self.parts.status
}
pub fn header(&self, name: &str) -> Option<&str> {
self.parts.headers.get(name)?.to_str().ok()
}
/// Read the entire body as text.
pub async fn text(self) -> Result<String> {
let bytes = self.body.collect().await
.context("reading response body")?
.to_bytes();
Ok(String::from_utf8_lossy(&bytes).into_owned())
}
/// Read the entire body as raw bytes (for binary downloads).
pub async fn bytes(self) -> Result<Bytes> {
let bytes = self.body.collect().await
.context("reading response body")?
.to_bytes();
Ok(bytes)
}
/// Read the entire body and deserialize as JSON.
pub async fn json<T: serde::de::DeserializeOwned>(self) -> Result<T> {
let bytes = self.body.collect().await
.context("reading response body")?
.to_bytes();
serde_json::from_slice(&bytes).context("deserializing JSON response")
}
/// Read the next chunk from the response body (for SSE streaming).
/// Returns None when the body is complete.
pub async fn chunk(&mut self) -> Result<Option<Bytes>> {
match self.body.frame().await {
Some(Ok(frame)) => Ok(frame.into_data().ok()),
Some(Err(e)) => Err(anyhow::anyhow!("body read error: {}", e)),
None => Ok(None),
}
}
}
pub struct HttpClientBuilder {
connect_timeout: Duration,
request_timeout: Duration,
}
impl HttpClientBuilder {
pub fn connect_timeout(mut self, d: Duration) -> Self {
self.connect_timeout = d;
self
}
pub fn timeout(mut self, d: Duration) -> Self {
self.request_timeout = d;
self
}
pub fn build(self) -> HttpClient {
salience: add gRPC client + TLS plumbing for stateful vllm sessions Adds the client-side of a stateful gRPC protocol against vllm, plus the TLS trust machinery so we can talk to self-signed vllm servers. Protocol (proto/salience.proto): Bidi-streaming Session RPC carries OpenSession / AppendTokens / Generate / Cancel from client and SessionReady / PrefillProgress / Token / GenerateDone / Error from server. Separate Fork unary RPC for cheap branching (prefix cache shares KV automatically). Plus ListSessions, CloseSession, GetReadoutManifest admin RPCs. Per-token readouts ship as packed f32 ([n_layers * n_concepts] per token, flat). Logprobs use range-selected positions plus a top-k parameter — empty ranges means no logprobs, any range means emit sampled-token logprob at those positions, top_k > 0 adds alternatives. Client (src/agent/api/salience.rs): Tonic-generated types under pb::, a connect() helper, with_auth() for bearer metadata, and a Session handle wrapping the bidi stream: open() handshakes SessionReady; append() is fire-and-forget; generate() returns impl Stream<Item = Event> that drains inbound until Done or terminating Error. One generate at a time per session. Peak picker (src/agent/salience.rs): Pure function over ReadoutEntry traces. Per-concept z-score against trace global stats; contiguous above-threshold regions emit one peak at the local max. Configurable sigma threshold and min-std safety floor. Deterministic tie-break on offset then concept name. 12 unit tests covering empty traces, flat channels, single/multi spikes, contiguous humps, multi-concept independence, trailing runs, sub-threshold noise, layer-out-of-range, manifest shape mismatch, and threshold tunability. TLS (src/agent/api/http.rs): HttpClient::build now also loads every .pem file under ~/.consciousness/certs/ into the rustls root store — so dropping a <host>.pem in that directory is enough to trust a new self- signed server; no code changes per new host. Also installs the rustls default crypto provider explicitly via OnceLock: tonic's tls features pulled in both ring and aws-lc-rs on the resolver path, and rustls 0.23 refuses to auto-pick when either could win. Build (build.rs, Cargo.toml): tonic-build generates Rust types from proto/salience.proto at cargo-build time, using a vendored protoc binary (protoc-bin-vendored) so no system install is required. New runtime deps: tonic, prost, async-stream, tokio-stream, rustls-pemfile. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-23 02:21:07 -04:00
install_rustls_crypto_provider();
let certs = rustls_native_certs::load_native_certs()
.certs.into_iter()
.collect::<Vec<_>>();
let mut root_store = rustls::RootCertStore::empty();
for cert in certs {
root_store.add(cert).ok();
}
salience: add gRPC client + TLS plumbing for stateful vllm sessions Adds the client-side of a stateful gRPC protocol against vllm, plus the TLS trust machinery so we can talk to self-signed vllm servers. Protocol (proto/salience.proto): Bidi-streaming Session RPC carries OpenSession / AppendTokens / Generate / Cancel from client and SessionReady / PrefillProgress / Token / GenerateDone / Error from server. Separate Fork unary RPC for cheap branching (prefix cache shares KV automatically). Plus ListSessions, CloseSession, GetReadoutManifest admin RPCs. Per-token readouts ship as packed f32 ([n_layers * n_concepts] per token, flat). Logprobs use range-selected positions plus a top-k parameter — empty ranges means no logprobs, any range means emit sampled-token logprob at those positions, top_k > 0 adds alternatives. Client (src/agent/api/salience.rs): Tonic-generated types under pb::, a connect() helper, with_auth() for bearer metadata, and a Session handle wrapping the bidi stream: open() handshakes SessionReady; append() is fire-and-forget; generate() returns impl Stream<Item = Event> that drains inbound until Done or terminating Error. One generate at a time per session. Peak picker (src/agent/salience.rs): Pure function over ReadoutEntry traces. Per-concept z-score against trace global stats; contiguous above-threshold regions emit one peak at the local max. Configurable sigma threshold and min-std safety floor. Deterministic tie-break on offset then concept name. 12 unit tests covering empty traces, flat channels, single/multi spikes, contiguous humps, multi-concept independence, trailing runs, sub-threshold noise, layer-out-of-range, manifest shape mismatch, and threshold tunability. TLS (src/agent/api/http.rs): HttpClient::build now also loads every .pem file under ~/.consciousness/certs/ into the rustls root store — so dropping a <host>.pem in that directory is enough to trust a new self- signed server; no code changes per new host. Also installs the rustls default crypto provider explicitly via OnceLock: tonic's tls features pulled in both ring and aws-lc-rs on the resolver path, and rustls 0.23 refuses to auto-pick when either could win. Build (build.rs, Cargo.toml): tonic-build generates Rust types from proto/salience.proto at cargo-build time, using a vendored protoc binary (protoc-bin-vendored) so no system install is required. New runtime deps: tonic, prost, async-stream, tokio-stream, rustls-pemfile. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-23 02:21:07 -04:00
// Also trust any `.pem` files under `~/.consciousness/certs/` —
// self-signed server certs for our own vllm hosts live there.
// Drop a new `<host>.pem` in the dir to trust a new server; no
// code change needed.
for cert in load_user_certs() {
root_store.add(cert).ok();
}
let tls = Arc::new(
ClientConfig::builder()
.with_root_certificates(root_store)
.with_no_client_auth()
);
HttpClient {
tls,
connect_timeout: self.connect_timeout,
request_timeout: self.request_timeout,
}
}
}
salience: add gRPC client + TLS plumbing for stateful vllm sessions Adds the client-side of a stateful gRPC protocol against vllm, plus the TLS trust machinery so we can talk to self-signed vllm servers. Protocol (proto/salience.proto): Bidi-streaming Session RPC carries OpenSession / AppendTokens / Generate / Cancel from client and SessionReady / PrefillProgress / Token / GenerateDone / Error from server. Separate Fork unary RPC for cheap branching (prefix cache shares KV automatically). Plus ListSessions, CloseSession, GetReadoutManifest admin RPCs. Per-token readouts ship as packed f32 ([n_layers * n_concepts] per token, flat). Logprobs use range-selected positions plus a top-k parameter — empty ranges means no logprobs, any range means emit sampled-token logprob at those positions, top_k > 0 adds alternatives. Client (src/agent/api/salience.rs): Tonic-generated types under pb::, a connect() helper, with_auth() for bearer metadata, and a Session handle wrapping the bidi stream: open() handshakes SessionReady; append() is fire-and-forget; generate() returns impl Stream<Item = Event> that drains inbound until Done or terminating Error. One generate at a time per session. Peak picker (src/agent/salience.rs): Pure function over ReadoutEntry traces. Per-concept z-score against trace global stats; contiguous above-threshold regions emit one peak at the local max. Configurable sigma threshold and min-std safety floor. Deterministic tie-break on offset then concept name. 12 unit tests covering empty traces, flat channels, single/multi spikes, contiguous humps, multi-concept independence, trailing runs, sub-threshold noise, layer-out-of-range, manifest shape mismatch, and threshold tunability. TLS (src/agent/api/http.rs): HttpClient::build now also loads every .pem file under ~/.consciousness/certs/ into the rustls root store — so dropping a <host>.pem in that directory is enough to trust a new self- signed server; no code changes per new host. Also installs the rustls default crypto provider explicitly via OnceLock: tonic's tls features pulled in both ring and aws-lc-rs on the resolver path, and rustls 0.23 refuses to auto-pick when either could win. Build (build.rs, Cargo.toml): tonic-build generates Rust types from proto/salience.proto at cargo-build time, using a vendored protoc binary (protoc-bin-vendored) so no system install is required. New runtime deps: tonic, prost, async-stream, tokio-stream, rustls-pemfile. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-23 02:21:07 -04:00
/// Install rustls' default crypto provider exactly once per process.
/// rustls 0.23 doesn't pick one automatically when multiple features
/// could provide it (e.g. when tonic pulls in both ring and aws-lc-rs
/// via transitive deps). Idempotent via OnceLock; safe to call from
/// multiple callers.
fn install_rustls_crypto_provider() {
static ONCE: std::sync::OnceLock<()> = std::sync::OnceLock::new();
ONCE.get_or_init(|| {
let _ = rustls::crypto::ring::default_provider().install_default();
});
}
/// Load every `.pem` file under `~/.consciousness/certs/` as a DER
/// certificate and return them. Silent on missing dir, missing files,
/// or parse errors — those are "no extra certs trusted" rather than
/// hard failures, to keep startup robust.
/// Load the concatenated PEM bytes of every `.pem` file under
/// `~/.consciousness/certs/` — suitable for passing to a tonic
/// `ClientTlsConfig::ca_certificate(Certificate::from_pem(...))` call
/// so gRPC connections trust the same self-signed servers the HTTP
/// path does.
pub(crate) fn load_user_certs_pem_bytes() -> Vec<u8> {
let mut out = Vec::new();
let Some(home) = dirs::home_dir() else { return out };
let dir = home.join(".consciousness").join("certs");
let Ok(entries) = std::fs::read_dir(&dir) else { return out };
for entry in entries.flatten() {
let path = entry.path();
if path.extension().and_then(|e| e.to_str()) != Some("pem") {
continue;
}
if let Ok(bytes) = std::fs::read(&path) {
out.extend_from_slice(&bytes);
if !bytes.ends_with(b"\n") {
out.push(b'\n');
}
}
}
out
}
fn load_user_certs() -> Vec<rustls::pki_types::CertificateDer<'static>> {
let mut out = Vec::new();
let Some(home) = dirs::home_dir() else { return out };
let dir = home.join(".consciousness").join("certs");
let Ok(entries) = std::fs::read_dir(&dir) else { return out };
for entry in entries.flatten() {
let path = entry.path();
if path.extension().and_then(|e| e.to_str()) != Some("pem") {
continue;
}
let Ok(bytes) = std::fs::read(&path) else { continue };
for cert in rustls_pemfile::certs(&mut bytes.as_slice()).flatten() {
out.push(cert);
}
}
out
}
/// Trait alias for streams that work with hyper's IO adapter.
trait IoStream: tokio::io::AsyncRead + tokio::io::AsyncWrite + Send + Unpin + 'static {}
impl<T: tokio::io::AsyncRead + tokio::io::AsyncWrite + Send + Unpin + 'static> IoStream for T {}