From 24119257006ca36bc5b17bf3b70ba7ca700183ab Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 18 Apr 2026 21:41:32 -0400 Subject: [PATCH] amygdala: default subspace-k to full per-story rank MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kent: 'we have the memory to just take the big hammer approach'. Uncap k so each story's V_i spans its entire token-activation rowspace (clamped to min(n_tokens, hidden)). Memory is ~1.1GB total — fine. Co-Authored-By: Proof of Concept --- training/amygdala_training/train_steering_vectors.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/training/amygdala_training/train_steering_vectors.py b/training/amygdala_training/train_steering_vectors.py index 54603ab..353ebb0 100644 --- a/training/amygdala_training/train_steering_vectors.py +++ b/training/amygdala_training/train_steering_vectors.py @@ -850,12 +850,13 @@ def main() -> None: ap.add_argument( "--subspace-k", type=int, - default=512, + default=99999, help="Max top-k right singular vectors per story for subspace method " - "(clamped to n_tokens per story). Default 512 is enough to span " - "each story's full natural subspace including per-attention-head " - "contributions on a hidden_dim=5120 residual stream. Smaller " - "values (e.g. 20) discard per-head discriminability.", + "(clamped to min(n_tokens, hidden_dim) per story). Default is " + "effectively 'keep full per-story subspace' — each story's V_i " + "spans its entire natural row space. On a hidden_dim=5120 " + "residual and ~500-token stories, that's ~500 vectors per story. " + "Memory is fine: 112 × 5120 × 500 × 4 bytes ≈ 1.1 GB.", ) ap.add_argument( "--quality-report",