amygdala: F8 screen for live concept-readout projections

Per-token residual-stream projections from the vLLM server's readout
pipeline surfaced as a TUI bar chart. Flow:

* agent/readout.rs — SharedReadoutBuffer (manifest + ring of last ~200
  token entries). Lives on Agent and is shared across forks (single
  stream, one landing pad).
* agent/mod.rs — Agent::new now probes /v1/readout/manifest at startup
  (non-fatal; 404 leaves manifest None, which disables the screen).
* agent/context.rs — the streaming token handler pushes every token
  with attached readout onto the shared buffer.
* user/amygdala.rs — F8 screen. Top-K concepts by |value| as
  horizontal bars (green positive, red negative), plus a 4-line
  recent-tokens panel showing each token's top concept at the selected
  layer. Keys: 1..9 select layer, t toggles current/mean-over-recent.

Disabled state renders a hint pointing at VLLM_READOUT_MANIFEST /
VLLM_READOUT_VECTORS so users can tell the feature apart from
"server up but no tokens yet".

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-18 01:20:30 -04:00
parent 0f1c4cf1de
commit c8976660f4
5 changed files with 410 additions and 2 deletions

View file

@ -682,7 +682,12 @@ impl ResponseParser {
let mut full_text = String::new();
while let Some(event) = stream.recv().await {
match event {
super::api::StreamToken::Token { id, readout: _ } => {
super::api::StreamToken::Token { id, readout } => {
if let Some(r) = readout {
if let Ok(mut buf) = agent.readout.lock() {
buf.push(id, r);
}
}
let text = super::tokenizer::decode(&[id]);
full_text.push_str(&text);
let mut ctx = agent.context.lock().await;

View file

@ -16,6 +16,7 @@
pub mod api;
pub mod context;
pub mod oneshot;
pub mod readout;
pub mod tokenizer;
pub mod tools;
@ -142,6 +143,11 @@ pub struct Agent {
pub session_id: String,
pub context: crate::Mutex<ContextState>,
pub state: crate::Mutex<AgentState>,
/// Shared landing pad for per-token concept-readout projections
/// streamed from the vLLM server. Populated by the streaming
/// token handler, read by UI screens (amygdala). Manifest is
/// `None` when the server has readout disabled.
pub readout: readout::SharedReadoutBuffer,
}
/// Mutable agent state — behind its own mutex.
@ -214,11 +220,13 @@ impl Agent {
}
let session_id = format!("consciousness-{}", chrono::Utc::now().format("%Y%m%d-%H%M%S"));
let readout = readout::new_shared();
let agent = Arc::new(Self {
client,
app_config,
session_id,
context: crate::Mutex::new(context),
readout,
state: crate::Mutex::new(AgentState {
tools: agent_tools,
mcp_tools: McpToolAccess::All,
@ -244,6 +252,32 @@ impl Agent {
});
agent.load_startup_journal().await;
// Probe the vLLM server for its readout manifest. Non-fatal:
// if readout isn't enabled the server returns 404 and we
// leave the manifest as None, which disables the amygdala
// screen gracefully.
match agent.client.fetch_readout_manifest().await {
Ok(Some(m)) => {
dbglog!(
"readout manifest: {} concepts, layers={:?}",
m.concepts.len(),
m.layers,
);
if let Ok(mut buf) = agent.readout.lock() {
buf.set_manifest(Some(m));
}
}
Ok(None) => {
dbglog!(
"readout manifest: server has readout disabled (404)"
);
}
Err(e) => {
dbglog!("readout manifest fetch failed: {}", e);
}
}
agent
}
@ -256,6 +290,10 @@ impl Agent {
app_config: self.app_config.clone(),
session_id: self.session_id.clone(),
context: crate::Mutex::new(ctx),
// Forks share the parent's readout buffer — it's a
// single-stream phenomenon; the fork is driven by the
// same vLLM server's responses.
readout: self.readout.clone(),
state: crate::Mutex::new(AgentState {
tools,
mcp_tools: McpToolAccess::None,

75
src/agent/readout.rs Normal file
View file

@ -0,0 +1,75 @@
// agent/readout.rs — live buffer of concept-readout projections.
//
// The vLLM server projects residual-stream activations onto a fixed
// matrix of concept directions during each decode step and ships the
// result back on every streamed chunk (see
// vllm/docs/features/readout.md). This module owns the client-side
// landing pad: a ring of the last N token projections plus the
// concept/layer mapping fetched from `/v1/readout/manifest` at
// startup.
//
// Readers (UI screens) lock briefly, read a snapshot, release. Writers
// (the streaming token handler) push one entry per token. Intentionally
// a simple Mutex<VecDeque> rather than lock-free — the UI ticks at
// ~15 Hz and the stream at token-rate, contention is nil.
use std::collections::VecDeque;
use std::sync::{Arc, Mutex};
use super::api::{ReadoutManifest, TokenReadout};
/// Default ring length — at ~30 tok/s this is ~6 seconds of history,
/// enough for the amygdala screen's scrolling display.
const DEFAULT_RING_LEN: usize = 200;
/// One entry in the readout ring: the sampled token and its per-layer
/// concept projection vector.
#[derive(Debug, Clone)]
pub struct ReadoutEntry {
pub token_id: u32,
/// Shape `[n_layers][n_concepts]`.
pub readout: TokenReadout,
}
/// Shared buffer of recent per-token concept projections plus the
/// manifest that names the layer/concept indices. `manifest` is `None`
/// when the server has readout disabled or the fetch failed — callers
/// should treat that as "readout unavailable" and skip rendering.
#[derive(Default)]
pub struct ReadoutBuffer {
pub manifest: Option<ReadoutManifest>,
pub recent: VecDeque<ReadoutEntry>,
pub max_len: usize,
}
impl ReadoutBuffer {
pub fn new() -> Self {
Self {
manifest: None,
recent: VecDeque::with_capacity(DEFAULT_RING_LEN),
max_len: DEFAULT_RING_LEN,
}
}
pub fn set_manifest(&mut self, manifest: Option<ReadoutManifest>) {
self.manifest = manifest;
}
pub fn push(&mut self, token_id: u32, readout: TokenReadout) {
if self.recent.len() >= self.max_len {
self.recent.pop_front();
}
self.recent.push_back(ReadoutEntry { token_id, readout });
}
pub fn is_enabled(&self) -> bool {
self.manifest.is_some()
}
}
/// A thread-safe handle.
pub type SharedReadoutBuffer = Arc<Mutex<ReadoutBuffer>>;
pub fn new_shared() -> SharedReadoutBuffer {
Arc::new(Mutex::new(ReadoutBuffer::new()))
}