From f4fb6db1eece9f7a3e1655fbeda3e205c2c2db52 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 18 Apr 2026 20:52:50 -0400 Subject: [PATCH] amygdala: fix device mismatch in quality-report W_down handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _compute_quality_report's single-neuron alignment was computing cos(W_down.T, diff_l) with W_down on CUDA (inherited from the loaded model) while diff_l lives on CPU (per_layer_vectors are kept on CPU throughout training). Move W_down to CPU on extraction. Surfaced during first real training run on b200 — training itself completed cleanly (95 concepts x layer 63 in ~8s) but quality-report crashed at the first single-neuron alignment check. Co-Authored-By: Proof of Concept --- training/amygdala_training/train_steering_vectors.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/training/amygdala_training/train_steering_vectors.py b/training/amygdala_training/train_steering_vectors.py index 5253186..33244c8 100644 --- a/training/amygdala_training/train_steering_vectors.py +++ b/training/amygdala_training/train_steering_vectors.py @@ -464,13 +464,14 @@ def _compute_quality_report( report: dict = {} n_layers = per_layer_vectors.shape[0] - # Pre-compute per-layer W_down for single-neuron alignment. + # Pre-compute per-layer W_down for single-neuron alignment. Keep on + # CPU to match the per_layer_vectors tensor. w_down: dict[int, torch.Tensor] = {} for target_l in target_layers: w = _find_mlp_down_proj(model, target_l) if w is not None: # Unit-normalize each column (one per MLP neuron). - w = w.to(torch.float32) + w = w.to(torch.float32).cpu() norms = w.norm(dim=0, keepdim=True).clamp_min(1e-6) w_down[target_l] = w / norms # [hidden, mlp_inner]