amygdala: F8 screen for live concept-readout projections

Per-token residual-stream projections from the vLLM server's readout pipeline surfaced as a TUI bar chart. Flow: * agent/readout.rs — SharedReadoutBuffer (manifest + ring of last ~200 token entries). Lives on Agent and is shared across forks (single stream, one landing pad). * agent/mod.rs — Agent::new now probes /v1/readout/manifest at startup (non-fatal; 404 leaves manifest None, which disables the screen). * agent/context.rs — the streaming token handler pushes every token with attached readout onto the shared buffer. * user/amygdala.rs — F8 screen. Top-K concepts by |value| as horizontal bars (green positive, red negative), plus a 4-line recent-tokens panel showing each token's top concept at the selected layer. Keys: 1..9 select layer, t toggles current/mean-over-recent. Disabled state renders a hint pointing at VLLM_READOUT_MANIFEST / VLLM_READOUT_VECTORS so users can tell the feature apart from "server up but no tokens yet". Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-18 01:20:30 -04:00 · 2026-04-18 01:20:30 -04:00 · c8976660f4
commit c8976660f4
parent 0f1c4cf1de
5 changed files with 410 additions and 2 deletions
--- a/src/agent/context.rs
+++ b/src/agent/context.rs
@ -682,7 +682,12 @@ impl ResponseParser {
            let mut full_text = String::new();
            while let Some(event) = stream.recv().await {
                match event {
-                    super::api::StreamToken::Token { id, readout: _ } => {
+                    super::api::StreamToken::Token { id, readout } => {
+                        if let Some(r) = readout {
+                            if let Ok(mut buf) = agent.readout.lock() {
+                                buf.push(id, r);
+                            }
+                        }
                        let text = super::tokenizer::decode(&[id]);
                        full_text.push_str(&text);
                        let mut ctx = agent.context.lock().await;
--- a/src/agent/mod.rs
+++ b/src/agent/mod.rs
@ -16,6 +16,7 @@
 pub mod api;
 pub mod context;
 pub mod oneshot;
+pub mod readout;
 pub mod tokenizer;
 pub mod tools;

@ -142,6 +143,11 @@ pub struct Agent {
    pub session_id: String,
    pub context: crate::Mutex<ContextState>,
    pub state: crate::Mutex<AgentState>,
+    /// Shared landing pad for per-token concept-readout projections
+    /// streamed from the vLLM server. Populated by the streaming
+    /// token handler, read by UI screens (amygdala). Manifest is
+    /// `None` when the server has readout disabled.
+    pub readout: readout::SharedReadoutBuffer,
 }

 /// Mutable agent state — behind its own mutex.
@ -214,11 +220,13 @@ impl Agent {
        }

        let session_id = format!("consciousness-{}", chrono::Utc::now().format("%Y%m%d-%H%M%S"));
+        let readout = readout::new_shared();
        let agent = Arc::new(Self {
            client,
            app_config,
            session_id,
            context: crate::Mutex::new(context),
+            readout,
            state: crate::Mutex::new(AgentState {
                tools: agent_tools,
                mcp_tools: McpToolAccess::All,
@ -244,6 +252,32 @@ impl Agent {
        });

        agent.load_startup_journal().await;
+
+        // Probe the vLLM server for its readout manifest. Non-fatal:
+        // if readout isn't enabled the server returns 404 and we
+        // leave the manifest as None, which disables the amygdala
+        // screen gracefully.
+        match agent.client.fetch_readout_manifest().await {
+            Ok(Some(m)) => {
+                dbglog!(
+                    "readout manifest: {} concepts, layers={:?}",
+                    m.concepts.len(),
+                    m.layers,
+                );
+                if let Ok(mut buf) = agent.readout.lock() {
+                    buf.set_manifest(Some(m));
+                }
+            }
+            Ok(None) => {
+                dbglog!(
+                    "readout manifest: server has readout disabled (404)"
+                );
+            }
+            Err(e) => {
+                dbglog!("readout manifest fetch failed: {}", e);
+            }
+        }
+
        agent
    }

@ -256,6 +290,10 @@ impl Agent {
            app_config: self.app_config.clone(),
            session_id: self.session_id.clone(),
            context: crate::Mutex::new(ctx),
+            // Forks share the parent's readout buffer — it's a
+            // single-stream phenomenon; the fork is driven by the
+            // same vLLM server's responses.
+            readout: self.readout.clone(),
            state: crate::Mutex::new(AgentState {
                tools,
                mcp_tools: McpToolAccess::None,
--- a/src/agent/readout.rs
+++ b/src/agent/readout.rs
@ -0,0 +1,75 @@
+// agent/readout.rs — live buffer of concept-readout projections.
+//
+// The vLLM server projects residual-stream activations onto a fixed
+// matrix of concept directions during each decode step and ships the
+// result back on every streamed chunk (see
+// vllm/docs/features/readout.md). This module owns the client-side
+// landing pad: a ring of the last N token projections plus the
+// concept/layer mapping fetched from `/v1/readout/manifest` at
+// startup.
+//
+// Readers (UI screens) lock briefly, read a snapshot, release. Writers
+// (the streaming token handler) push one entry per token. Intentionally
+// a simple Mutex<VecDeque> rather than lock-free — the UI ticks at
+// ~15 Hz and the stream at token-rate, contention is nil.
+
+use std::collections::VecDeque;
+use std::sync::{Arc, Mutex};
+
+use super::api::{ReadoutManifest, TokenReadout};
+
+/// Default ring length — at ~30 tok/s this is ~6 seconds of history,
+/// enough for the amygdala screen's scrolling display.
+const DEFAULT_RING_LEN: usize = 200;
+
+/// One entry in the readout ring: the sampled token and its per-layer
+/// concept projection vector.
+#[derive(Debug, Clone)]
+pub struct ReadoutEntry {
+    pub token_id: u32,
+    /// Shape `[n_layers][n_concepts]`.
+    pub readout: TokenReadout,
+}
+
+/// Shared buffer of recent per-token concept projections plus the
+/// manifest that names the layer/concept indices. `manifest` is `None`
+/// when the server has readout disabled or the fetch failed — callers
+/// should treat that as "readout unavailable" and skip rendering.
+#[derive(Default)]
+pub struct ReadoutBuffer {
+    pub manifest: Option<ReadoutManifest>,
+    pub recent: VecDeque<ReadoutEntry>,
+    pub max_len: usize,
+}
+
+impl ReadoutBuffer {
+    pub fn new() -> Self {
+        Self {
+            manifest: None,
+            recent: VecDeque::with_capacity(DEFAULT_RING_LEN),
+            max_len: DEFAULT_RING_LEN,
+        }
+    }
+
+    pub fn set_manifest(&mut self, manifest: Option<ReadoutManifest>) {
+        self.manifest = manifest;
+    }
+
+    pub fn push(&mut self, token_id: u32, readout: TokenReadout) {
+        if self.recent.len() >= self.max_len {
+            self.recent.pop_front();
+        }
+        self.recent.push_back(ReadoutEntry { token_id, readout });
+    }
+
+    pub fn is_enabled(&self) -> bool {
+        self.manifest.is_some()
+    }
+}
+
+/// A thread-safe handle.
+pub type SharedReadoutBuffer = Arc<Mutex<ReadoutBuffer>>;
+
+pub fn new_shared() -> SharedReadoutBuffer {
+    Arc::new(Mutex::new(ReadoutBuffer::new()))
+}