amygdala: F8 screen for live concept-readout projections

Per-token residual-stream projections from the vLLM server's readout
pipeline surfaced as a TUI bar chart. Flow:

* agent/readout.rs — SharedReadoutBuffer (manifest + ring of last ~200
  token entries). Lives on Agent and is shared across forks (single
  stream, one landing pad).
* agent/mod.rs — Agent::new now probes /v1/readout/manifest at startup
  (non-fatal; 404 leaves manifest None, which disables the screen).
* agent/context.rs — the streaming token handler pushes every token
  with attached readout onto the shared buffer.
* user/amygdala.rs — F8 screen. Top-K concepts by |value| as
  horizontal bars (green positive, red negative), plus a 4-line
  recent-tokens panel showing each token's top concept at the selected
  layer. Keys: 1..9 select layer, t toggles current/mean-over-recent.

Disabled state renders a hint pointing at VLLM_READOUT_MANIFEST /
VLLM_READOUT_VECTORS so users can tell the feature apart from
"server up but no tokens yet".

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-18 01:20:30 -04:00
parent 0f1c4cf1de
commit c8976660f4
5 changed files with 410 additions and 2 deletions

View file

@ -682,7 +682,12 @@ impl ResponseParser {
let mut full_text = String::new(); let mut full_text = String::new();
while let Some(event) = stream.recv().await { while let Some(event) = stream.recv().await {
match event { match event {
super::api::StreamToken::Token { id, readout: _ } => { super::api::StreamToken::Token { id, readout } => {
if let Some(r) = readout {
if let Ok(mut buf) = agent.readout.lock() {
buf.push(id, r);
}
}
let text = super::tokenizer::decode(&[id]); let text = super::tokenizer::decode(&[id]);
full_text.push_str(&text); full_text.push_str(&text);
let mut ctx = agent.context.lock().await; let mut ctx = agent.context.lock().await;

View file

@ -16,6 +16,7 @@
pub mod api; pub mod api;
pub mod context; pub mod context;
pub mod oneshot; pub mod oneshot;
pub mod readout;
pub mod tokenizer; pub mod tokenizer;
pub mod tools; pub mod tools;
@ -142,6 +143,11 @@ pub struct Agent {
pub session_id: String, pub session_id: String,
pub context: crate::Mutex<ContextState>, pub context: crate::Mutex<ContextState>,
pub state: crate::Mutex<AgentState>, pub state: crate::Mutex<AgentState>,
/// Shared landing pad for per-token concept-readout projections
/// streamed from the vLLM server. Populated by the streaming
/// token handler, read by UI screens (amygdala). Manifest is
/// `None` when the server has readout disabled.
pub readout: readout::SharedReadoutBuffer,
} }
/// Mutable agent state — behind its own mutex. /// Mutable agent state — behind its own mutex.
@ -214,11 +220,13 @@ impl Agent {
} }
let session_id = format!("consciousness-{}", chrono::Utc::now().format("%Y%m%d-%H%M%S")); let session_id = format!("consciousness-{}", chrono::Utc::now().format("%Y%m%d-%H%M%S"));
let readout = readout::new_shared();
let agent = Arc::new(Self { let agent = Arc::new(Self {
client, client,
app_config, app_config,
session_id, session_id,
context: crate::Mutex::new(context), context: crate::Mutex::new(context),
readout,
state: crate::Mutex::new(AgentState { state: crate::Mutex::new(AgentState {
tools: agent_tools, tools: agent_tools,
mcp_tools: McpToolAccess::All, mcp_tools: McpToolAccess::All,
@ -244,6 +252,32 @@ impl Agent {
}); });
agent.load_startup_journal().await; agent.load_startup_journal().await;
// Probe the vLLM server for its readout manifest. Non-fatal:
// if readout isn't enabled the server returns 404 and we
// leave the manifest as None, which disables the amygdala
// screen gracefully.
match agent.client.fetch_readout_manifest().await {
Ok(Some(m)) => {
dbglog!(
"readout manifest: {} concepts, layers={:?}",
m.concepts.len(),
m.layers,
);
if let Ok(mut buf) = agent.readout.lock() {
buf.set_manifest(Some(m));
}
}
Ok(None) => {
dbglog!(
"readout manifest: server has readout disabled (404)"
);
}
Err(e) => {
dbglog!("readout manifest fetch failed: {}", e);
}
}
agent agent
} }
@ -256,6 +290,10 @@ impl Agent {
app_config: self.app_config.clone(), app_config: self.app_config.clone(),
session_id: self.session_id.clone(), session_id: self.session_id.clone(),
context: crate::Mutex::new(ctx), context: crate::Mutex::new(ctx),
// Forks share the parent's readout buffer — it's a
// single-stream phenomenon; the fork is driven by the
// same vLLM server's responses.
readout: self.readout.clone(),
state: crate::Mutex::new(AgentState { state: crate::Mutex::new(AgentState {
tools, tools,
mcp_tools: McpToolAccess::None, mcp_tools: McpToolAccess::None,

75
src/agent/readout.rs Normal file
View file

@ -0,0 +1,75 @@
// agent/readout.rs — live buffer of concept-readout projections.
//
// The vLLM server projects residual-stream activations onto a fixed
// matrix of concept directions during each decode step and ships the
// result back on every streamed chunk (see
// vllm/docs/features/readout.md). This module owns the client-side
// landing pad: a ring of the last N token projections plus the
// concept/layer mapping fetched from `/v1/readout/manifest` at
// startup.
//
// Readers (UI screens) lock briefly, read a snapshot, release. Writers
// (the streaming token handler) push one entry per token. Intentionally
// a simple Mutex<VecDeque> rather than lock-free — the UI ticks at
// ~15 Hz and the stream at token-rate, contention is nil.
use std::collections::VecDeque;
use std::sync::{Arc, Mutex};
use super::api::{ReadoutManifest, TokenReadout};
/// Default ring length — at ~30 tok/s this is ~6 seconds of history,
/// enough for the amygdala screen's scrolling display.
const DEFAULT_RING_LEN: usize = 200;
/// One entry in the readout ring: the sampled token and its per-layer
/// concept projection vector.
#[derive(Debug, Clone)]
pub struct ReadoutEntry {
pub token_id: u32,
/// Shape `[n_layers][n_concepts]`.
pub readout: TokenReadout,
}
/// Shared buffer of recent per-token concept projections plus the
/// manifest that names the layer/concept indices. `manifest` is `None`
/// when the server has readout disabled or the fetch failed — callers
/// should treat that as "readout unavailable" and skip rendering.
#[derive(Default)]
pub struct ReadoutBuffer {
pub manifest: Option<ReadoutManifest>,
pub recent: VecDeque<ReadoutEntry>,
pub max_len: usize,
}
impl ReadoutBuffer {
pub fn new() -> Self {
Self {
manifest: None,
recent: VecDeque::with_capacity(DEFAULT_RING_LEN),
max_len: DEFAULT_RING_LEN,
}
}
pub fn set_manifest(&mut self, manifest: Option<ReadoutManifest>) {
self.manifest = manifest;
}
pub fn push(&mut self, token_id: u32, readout: TokenReadout) {
if self.recent.len() >= self.max_len {
self.recent.pop_front();
}
self.recent.push_back(ReadoutEntry { token_id, readout });
}
pub fn is_enabled(&self) -> bool {
self.manifest.is_some()
}
}
/// A thread-safe handle.
pub type SharedReadoutBuffer = Arc<Mutex<ReadoutBuffer>>;
pub fn new_shared() -> SharedReadoutBuffer {
Arc::new(Mutex::new(ReadoutBuffer::new()))
}

288
src/user/amygdala.rs Normal file
View file

@ -0,0 +1,288 @@
// amygdala.rs — F8 amygdala screen: live per-token concept-readout
// projections from the vLLM server's readout.safetensors.
//
// Left panel: top-K concepts by magnitude at the currently-selected
// layer, as horizontal bars. The concept names come from the manifest
// fetched at agent startup; the values come from the per-token readout
// pushed onto agent.readout by the streaming token handler.
//
// Bottom: scrolling history of the last few tokens' top concept.
//
// Keys:
// 1..9 select layer index (1 = first layer in the manifest)
// t toggle between "current" (last token) and "mean over recent"
use ratatui::{
layout::{Constraint, Direction, Layout, Rect},
style::{Color, Modifier, Style},
text::{Line, Span},
widgets::{Block, Borders, Gauge, Paragraph, Wrap},
Frame,
};
use ratatui::crossterm::event::{Event, KeyCode};
use super::{App, ScreenView};
use crate::agent::api::ReadoutManifest;
use crate::agent::readout::ReadoutEntry;
const TOP_K: usize = 20;
pub(crate) struct AmygdalaScreen {
selected_layer: usize,
mode: DisplayMode,
}
#[derive(Clone, Copy, PartialEq)]
enum DisplayMode {
/// Values from the single most recent token.
Current,
/// Mean over all tokens currently in the ring buffer.
MeanRecent,
}
impl AmygdalaScreen {
pub fn new() -> Self {
Self {
selected_layer: 0,
mode: DisplayMode::Current,
}
}
}
impl ScreenView for AmygdalaScreen {
fn label(&self) -> &'static str { "amygdala" }
fn tick(&mut self, frame: &mut Frame, area: Rect,
events: &[Event], app: &mut App) {
for event in events {
if let Event::Key(key) = event {
match key.code {
KeyCode::Char(c) if c.is_ascii_digit() && c != '0' => {
let idx = (c as u8 - b'1') as usize;
self.selected_layer = idx;
}
KeyCode::Char('t') => {
self.mode = match self.mode {
DisplayMode::Current => DisplayMode::MeanRecent,
DisplayMode::MeanRecent => DisplayMode::Current,
};
}
_ => {}
}
}
}
// Snapshot the shared buffer with a short lock.
let snapshot = match app.agent.readout.lock() {
Ok(buf) => {
if !buf.is_enabled() {
render_disabled(frame, area);
return;
}
let manifest = buf.manifest.clone().unwrap();
let entries: Vec<ReadoutEntry> =
buf.recent.iter().cloned().collect();
(manifest, entries)
}
Err(_) => {
render_disabled(frame, area);
return;
}
};
let (manifest, entries) = snapshot;
// Bound the selected layer to what the manifest actually has.
let n_layers = manifest.layers.len();
if self.selected_layer >= n_layers {
self.selected_layer = 0;
}
// Compute the values to display: either the latest token's row
// for the selected layer, or the mean across recent tokens.
let values: Option<Vec<f32>> = match self.mode {
DisplayMode::Current => entries
.last()
.and_then(|e| e.readout.get(self.selected_layer).cloned()),
DisplayMode::MeanRecent => mean_layer(&entries, self.selected_layer),
};
let layout = Layout::default()
.direction(Direction::Vertical)
.constraints([
Constraint::Length(3), // header
Constraint::Min(10), // bars
Constraint::Length(6), // recent tokens
])
.split(area);
render_header(frame, layout[0], &manifest, self.selected_layer,
self.mode, entries.len());
match values {
Some(v) => render_bars(frame, layout[1], &manifest.concepts, &v),
None => render_empty_bars(frame, layout[1]),
}
render_recent(frame, layout[2], &entries, self.selected_layer,
&manifest.concepts);
}
}
fn render_disabled(frame: &mut Frame, area: Rect) {
let text = Paragraph::new(Line::from(vec![
Span::raw("readout disabled — server did not return a manifest. "),
Span::styled("Start vLLM with ", Style::default().fg(Color::DarkGray)),
Span::styled("VLLM_READOUT_MANIFEST", Style::default().fg(Color::Yellow)),
Span::styled(" + ", Style::default().fg(Color::DarkGray)),
Span::styled("VLLM_READOUT_VECTORS", Style::default().fg(Color::Yellow)),
Span::styled(".", Style::default().fg(Color::DarkGray)),
]))
.wrap(Wrap { trim: true })
.block(Block::default().borders(Borders::ALL).title("amygdala"));
frame.render_widget(text, area);
}
fn render_header(frame: &mut Frame, area: Rect, manifest: &ReadoutManifest,
selected: usize, mode: DisplayMode, n_tokens: usize) {
let mode_str = match mode {
DisplayMode::Current => "current",
DisplayMode::MeanRecent => "mean(recent)",
};
let layer = manifest.layers.get(selected).copied().unwrap_or(0);
let mut spans = vec![
Span::styled("layer ", Style::default().fg(Color::DarkGray)),
Span::styled(
format!("{}/{} ", selected + 1, manifest.layers.len()),
Style::default().add_modifier(Modifier::BOLD),
),
Span::styled("(index ", Style::default().fg(Color::DarkGray)),
Span::styled(format!("{}", layer), Style::default().fg(Color::Cyan)),
Span::styled(") ", Style::default().fg(Color::DarkGray)),
Span::styled("mode ", Style::default().fg(Color::DarkGray)),
Span::styled(mode_str, Style::default().fg(Color::Yellow)),
Span::styled(" ", Style::default()),
Span::styled(
format!("{} toks in ring", n_tokens),
Style::default().fg(Color::DarkGray),
),
];
spans.push(Span::raw(" "));
spans.push(Span::styled(
format!("[1-{}] layer [t] toggle mode", manifest.layers.len().min(9)),
Style::default().fg(Color::DarkGray),
));
let para = Paragraph::new(Line::from(spans))
.block(Block::default().borders(Borders::ALL).title("amygdala"));
frame.render_widget(para, area);
}
fn render_bars(frame: &mut Frame, area: Rect,
concepts: &[String], values: &[f32]) {
// Sort indices by |value| descending, take top K.
let mut indexed: Vec<(usize, f32)> = values.iter()
.enumerate().map(|(i, v)| (i, *v)).collect();
indexed.sort_by(|a, b| b.1.abs().partial_cmp(&a.1.abs())
.unwrap_or(std::cmp::Ordering::Equal));
indexed.truncate(TOP_K.min(concepts.len()));
let inner = Block::default().borders(Borders::ALL)
.title("top concepts");
let inner_area = inner.inner(area);
frame.render_widget(inner, area);
if inner_area.height == 0 || indexed.is_empty() {
return;
}
// Find the max absolute value so bars are comparable.
let max_abs = indexed.iter().map(|(_, v)| v.abs())
.fold(0.0_f32, f32::max)
.max(1e-6);
let rows = (inner_area.height as usize).min(indexed.len());
let row_constraints: Vec<Constraint> =
std::iter::repeat(Constraint::Length(1)).take(rows).collect();
let chunks = Layout::default()
.direction(Direction::Vertical)
.constraints(row_constraints)
.split(inner_area);
for (i, (c_idx, v)) in indexed.iter().take(rows).enumerate() {
let label = concepts.get(*c_idx).cloned()
.unwrap_or_else(|| format!("c{}", c_idx));
let ratio = (v.abs() / max_abs).clamp(0.0, 1.0);
let color = if *v >= 0.0 { Color::Green } else { Color::Red };
let gauge = Gauge::default()
.ratio(ratio as f64)
.gauge_style(Style::default().fg(color).bg(Color::Reset))
.label(format!("{:<26} {:+.3}", truncate_name(&label, 26), v));
frame.render_widget(gauge, chunks[i]);
}
}
fn render_empty_bars(frame: &mut Frame, area: Rect) {
let para = Paragraph::new(Line::from(Span::styled(
"waiting for tokens…",
Style::default().fg(Color::DarkGray),
)))
.block(Block::default().borders(Borders::ALL).title("top concepts"));
frame.render_widget(para, area);
}
fn render_recent(frame: &mut Frame, area: Rect, entries: &[ReadoutEntry],
layer: usize, concepts: &[String]) {
let mut lines: Vec<Line> = Vec::new();
for entry in entries.iter().rev().take(4) {
let row = match entry.readout.get(layer) {
Some(r) => r,
None => continue,
};
// top concept at this layer for this token
let (best_idx, best_val) = row.iter().enumerate()
.fold((0, 0.0_f32), |acc, (i, v)| {
if v.abs() > acc.1.abs() { (i, *v) } else { acc }
});
let name = concepts.get(best_idx).cloned()
.unwrap_or_else(|| format!("c{}", best_idx));
let tok_str = format!("t{:>5}", entry.token_id);
lines.push(Line::from(vec![
Span::styled(tok_str, Style::default().fg(Color::DarkGray)),
Span::raw(" "),
Span::styled(
format!("{:<24}", truncate_name(&name, 24)),
Style::default().fg(
if best_val >= 0.0 { Color::Green } else { Color::Red },
),
),
Span::styled(
format!(" {:+.3}", best_val),
Style::default().add_modifier(Modifier::BOLD),
),
]));
}
let para = Paragraph::new(lines)
.block(Block::default().borders(Borders::ALL).title("recent tokens — top concept"));
frame.render_widget(para, area);
}
fn mean_layer(entries: &[ReadoutEntry], layer: usize) -> Option<Vec<f32>> {
let rows: Vec<&Vec<f32>> = entries.iter()
.filter_map(|e| e.readout.get(layer))
.collect();
if rows.is_empty() {
return None;
}
let n_concepts = rows[0].len();
let mut acc = vec![0.0_f32; n_concepts];
for r in &rows {
for (i, v) in r.iter().enumerate() {
acc[i] += *v;
}
}
let n = rows.len() as f32;
for v in &mut acc { *v /= n; }
Some(acc)
}
fn truncate_name(s: &str, max: usize) -> String {
if s.len() <= max { s.to_string() }
else { format!("{}", &s[..max.saturating_sub(1)]) }
}

View file

@ -3,6 +3,7 @@
// TUI, UI channel, parsing. The cognitive layer (session state // TUI, UI channel, parsing. The cognitive layer (session state
// machine, DMN, identity) lives in mind/. // machine, DMN, identity) lives in mind/.
pub(crate) mod amygdala;
pub(crate) mod chat; pub(crate) mod chat;
pub(crate) mod compare; pub(crate) mod compare;
mod context; mod context;
@ -383,7 +384,7 @@ async fn run(
} }
let notify_rx = crate::thalamus::channels::subscribe_all(); let notify_rx = crate::thalamus::channels::subscribe_all();
// F1=chat, F2=conscious, F3=subconscious, F4=unconscious, F5=thalamus, F6=learn, F7=compare // F1=chat, F2=conscious, F3=subconscious, F4=unconscious, F5=thalamus, F6=learn, F7=compare, F8=amygdala
let mut screens: Vec<Box<dyn tui::ScreenView>> = vec![ let mut screens: Vec<Box<dyn tui::ScreenView>> = vec![
Box::new(crate::user::chat::InteractScreen::new( Box::new(crate::user::chat::InteractScreen::new(
mind.agent.clone(), mind.shared.clone(), mind_tx.clone(), mind.agent.clone(), mind.shared.clone(), mind_tx.clone(),
@ -394,6 +395,7 @@ async fn run(
Box::new(crate::user::thalamus::ThalamusScreen::new()), Box::new(crate::user::thalamus::ThalamusScreen::new()),
Box::new(crate::user::learn::LearnScreen::new(mind_tx.clone())), Box::new(crate::user::learn::LearnScreen::new(mind_tx.clone())),
Box::new(crate::user::compare::CompareScreen::new(mind_tx.clone())), Box::new(crate::user::compare::CompareScreen::new(mind_tx.clone())),
Box::new(crate::user::amygdala::AmygdalaScreen::new()),
]; ];
let mut active_screen: usize = 1; // F-key number let mut active_screen: usize = 1; // F-key number
tui::set_screen_legend(tui::screen_legend_from(&*screens)); tui::set_screen_legend(tui::screen_legend_from(&*screens));