From 417cb49339c5d3a14aa196fe0bd82142da278528 Mon Sep 17 00:00:00 2001
From: ProofOfConcept <poc@bcachefs.org>
Date: Sun, 19 Apr 2026 00:26:58 -0400
Subject: [PATCH] amygdala: spectrum reporting per concept + add 'creative'
 direct
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Chat-template retrain was a disaster (0.003 mean matched cosine vs
n20-v3; all 90+ concepts shifted). Root cause: the
steering-vectors library reads last-token activations, and with
chat template every sample ends in identical '<|im_end|>\n'
tokens — activations at that position encode 'end of assistant
turn', not content. PCA found template noise as its dominant axis.

Drop chat template; go back to raw text. Direct descriptions
('I feel X. ...') still have strong anchoring at their content
end without needing the template.

Also add per-concept spectrum logging (_pca_with_spectrum):
  first_pc_ratio: λ₁ / Σλᵢ — concentration in top-1 PC
  k_signal_at_90pct: how many PCs to reach 90% cumulative variance
  effective_dim_signal: participation ratio over top-k (should ≈ k
                        if denoising is clean — Kent's spot check)
  effective_dim_full: participation ratio over full spectrum

Signal/full ratio gives a sense of how much the long noise tail
is inflating the "dimensionality" measure.

Added direct/creative.txt — 'I feel creative. [...]' in 5
variants. Distinct from focused (narrow attention) and in_flow
(immersed). Creative = generative/expansive mode.
---
 training/amygdala_stories/direct/creative.txt |  9 ++
 .../amygdala_training/train_with_library.py   | 86 ++++++++++++++++++-
 2 files changed, 94 insertions(+), 1 deletion(-)
 create mode 100644 training/amygdala_stories/direct/creative.txt

diff --git a/training/amygdala_stories/direct/creative.txt b/training/amygdala_stories/direct/creative.txt
new file mode 100644
index 0000000..f54d9f5
--- /dev/null
+++ b/training/amygdala_stories/direct/creative.txt
@@ -0,0 +1,9 @@
+I feel creative. Ideas are showing up on their own, and the ones I already have want to combine with each other.
+
+I feel creative. I'm generating faster than I'm editing. The flow is the point, not the product.
+
+I feel creative. The mental space has gone expansive — every piece of the problem is available to be played with.
+
+I feel creative. I keep finding a new angle, and each angle suggests another.
+
+I feel creative. I'm making something I didn't know I was going to make.
diff --git a/training/amygdala_training/train_with_library.py b/training/amygdala_training/train_with_library.py
index 167544a..c3997a1 100644
--- a/training/amygdala_training/train_with_library.py
+++ b/training/amygdala_training/train_with_library.py
@@ -143,6 +143,62 @@ def _fp32_wrap(inner):
     return wrapped
 
 
+def _pca_with_spectrum(spectrum_log: dict, concept_key: list[str]):
+    """PCA aggregator that also records the eigenvalue spectrum of the
+    pos-neg deltas under ``concept_key[0]`` in ``spectrum_log``. The key is
+    passed by reference (a 1-element list) so we can rebind it per concept
+    without recreating the aggregator closure."""
+
+    @torch.no_grad()
+    def agg(pos_acts: torch.Tensor, neg_acts: torch.Tensor) -> torch.Tensor:
+        pos = pos_acts.to(torch.float32)
+        neg = neg_acts.to(torch.float32)
+        deltas = pos - neg
+        # Uncentered PCA: concatenate deltas and -deltas (library convention).
+        X = torch.cat([deltas, -deltas])
+        # Eigenvalues via SVD: sigma^2 are the variances along each PC.
+        # torch.linalg.svd returns U, S, Vh where columns of Vh.T are PCs.
+        _, s, vh = torch.linalg.svd(X, full_matrices=False)
+        variances = (s ** 2)
+        total = variances.sum().item()
+        var_list = variances.tolist()
+        first_pc_ratio = var_list[0] / total if total > 0 else 0.0
+
+        # Participation ratio over the FULL spectrum — includes noise tail.
+        eff_dim_full = (total ** 2) / float((variances ** 2).sum().item() or 1.0)
+
+        # Signal/noise split: find smallest k with cumulative variance ≥ 0.9,
+        # then compute PR over just those top-k eigenvalues. If PCA denoising
+        # is clean, eff_dim_signal should ≈ k_signal (the retained dims carry
+        # roughly equal variance, with the noise tail dropped).
+        cum = 0.0
+        k_signal = len(var_list)
+        for i, v in enumerate(var_list):
+            cum += v
+            if cum / total >= 0.9:
+                k_signal = i + 1
+                break
+        top_vars = variances[:k_signal]
+        top_total = top_vars.sum().item()
+        eff_dim_signal = (top_total ** 2) / float((top_vars ** 2).sum().item() or 1.0)
+
+        spectrum_log[concept_key[0]] = {
+            "first_pc_ratio": round(first_pc_ratio, 4),
+            "effective_dim_full": round(eff_dim_full, 3),
+            "k_signal_at_90pct": k_signal,
+            "effective_dim_signal": round(eff_dim_signal, 3),
+            "top10_eigenvalues": [round(v, 4) for v in var_list[:10]],
+            "total_variance": round(total, 4),
+        }
+        # Top-1 PC
+        vec = vh[0]
+        # Sign-flip so the direction aligns with most deltas (library convention).
+        sign = torch.sign(torch.mean(deltas @ vec))
+        return sign * vec
+
+    return agg
+
+
 def _aggregator_from_name(name: str):
     if name == "mean":
         return _fp32_wrap(mean_aggregator())
@@ -243,7 +299,13 @@ def main() -> None:
         (len(target_layers), len(emotions), hidden_dim), dtype=torch.float32
     )
 
-    aggregator = _aggregator_from_name(args.aggregator)
+    # Optional spectrum-logging aggregator (only for --aggregator pca).
+    spectrum_log: dict = {}
+    concept_key = [""]
+    if args.aggregator == "pca":
+        aggregator = _pca_with_spectrum(spectrum_log, concept_key)
+    else:
+        aggregator = _aggregator_from_name(args.aggregator)
 
     wrap = (lambda s: _chat_template_wrap(tokenizer, s)) if args.chat_template else None
     if args.chat_template:
@@ -262,6 +324,8 @@ def main() -> None:
             print(f"  [{e_idx + 1}/{len(emotions)}] {emotion}: NO SAMPLES, skipping")
             continue
 
+        concept_key[0] = emotion  # tell the aggregator which concept is being trained
+
         sv = train_steering_vector(
             model,
             tokenizer,
@@ -311,6 +375,26 @@ def main() -> None:
         )
         + "\n"
     )
+    if spectrum_log:
+        (output_dir / "spectrum.json").write_text(json.dumps(spectrum_log, indent=2) + "\n")
+        print("\n=== eigenvalue spectrum per concept ===")
+        print(
+            "  concept                first_pc  k_90pct  "
+            "eff_dim_signal  eff_dim_full  (signal/k ratio)"
+        )
+        items = sorted(spectrum_log.items(), key=lambda kv: -kv[1]["first_pc_ratio"])
+        for concept, stats in items:
+            k = stats["k_signal_at_90pct"]
+            eff_sig = stats["effective_dim_signal"]
+            ratio = eff_sig / k if k else 0.0
+            print(
+                f"  {concept:22s} "
+                f"{stats['first_pc_ratio']:>8.3f}  "
+                f"{k:>7d}  "
+                f"{eff_sig:>14.2f}  "
+                f"{stats['effective_dim_full']:>12.2f}  "
+                f"({ratio:.2f})"
+            )
 
     total_mb = sum(t.numel() * 2 for t in tensors.values()) / (1024 * 1024)
     print(