From f4fb6db1eece9f7a3e1655fbeda3e205c2c2db52 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sat, 18 Apr 2026 20:52:50 -0400
Subject: [PATCH] amygdala: fix device mismatch in quality-report W_down
 handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

_compute_quality_report's single-neuron alignment was computing
cos(W_down.T, diff_l) with W_down on CUDA (inherited from the loaded
model) while diff_l lives on CPU (per_layer_vectors are kept on CPU
throughout training). Move W_down to CPU on extraction.

Surfaced during first real training run on b200 — training itself
completed cleanly (95 concepts x layer 63 in ~8s) but quality-report
crashed at the first single-neuron alignment check.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
---
 training/amygdala_training/train_steering_vectors.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/training/amygdala_training/train_steering_vectors.py b/training/amygdala_training/train_steering_vectors.py
index 5253186..33244c8 100644
--- a/training/amygdala_training/train_steering_vectors.py
+++ b/training/amygdala_training/train_steering_vectors.py
@@ -464,13 +464,14 @@ def _compute_quality_report(
     report: dict = {}
     n_layers = per_layer_vectors.shape[0]
 
-    # Pre-compute per-layer W_down for single-neuron alignment.
+    # Pre-compute per-layer W_down for single-neuron alignment. Keep on
+    # CPU to match the per_layer_vectors tensor.
     w_down: dict[int, torch.Tensor] = {}
     for target_l in target_layers:
         w = _find_mlp_down_proj(model, target_l)
         if w is not None:
             # Unit-normalize each column (one per MLP neuron).
-            w = w.to(torch.float32)
+            w = w.to(torch.float32).cpu()
             norms = w.norm(dim=0, keepdim=True).clamp_min(1e-6)
             w_down[target_l] = w / norms  # [hidden, mlp_inner]