From 417cb49339c5d3a14aa196fe0bd82142da278528 Mon Sep 17 00:00:00 2001 From: ProofOfConcept Date: Sun, 19 Apr 2026 00:26:58 -0400 Subject: [PATCH] amygdala: spectrum reporting per concept + add 'creative' direct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Chat-template retrain was a disaster (0.003 mean matched cosine vs n20-v3; all 90+ concepts shifted). Root cause: the steering-vectors library reads last-token activations, and with chat template every sample ends in identical '<|im_end|>\n' tokens — activations at that position encode 'end of assistant turn', not content. PCA found template noise as its dominant axis. Drop chat template; go back to raw text. Direct descriptions ('I feel X. ...') still have strong anchoring at their content end without needing the template. Also add per-concept spectrum logging (_pca_with_spectrum): first_pc_ratio: λ₁ / Σλᵢ — concentration in top-1 PC k_signal_at_90pct: how many PCs to reach 90% cumulative variance effective_dim_signal: participation ratio over top-k (should ≈ k if denoising is clean — Kent's spot check) effective_dim_full: participation ratio over full spectrum Signal/full ratio gives a sense of how much the long noise tail is inflating the "dimensionality" measure. Added direct/creative.txt — 'I feel creative. [...]' in 5 variants. Distinct from focused (narrow attention) and in_flow (immersed). Creative = generative/expansive mode. --- training/amygdala_stories/direct/creative.txt | 9 ++ .../amygdala_training/train_with_library.py | 86 ++++++++++++++++++- 2 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 training/amygdala_stories/direct/creative.txt diff --git a/training/amygdala_stories/direct/creative.txt b/training/amygdala_stories/direct/creative.txt new file mode 100644 index 0000000..f54d9f5 --- /dev/null +++ b/training/amygdala_stories/direct/creative.txt @@ -0,0 +1,9 @@ +I feel creative. Ideas are showing up on their own, and the ones I already have want to combine with each other. + +I feel creative. I'm generating faster than I'm editing. The flow is the point, not the product. + +I feel creative. The mental space has gone expansive — every piece of the problem is available to be played with. + +I feel creative. I keep finding a new angle, and each angle suggests another. + +I feel creative. I'm making something I didn't know I was going to make. diff --git a/training/amygdala_training/train_with_library.py b/training/amygdala_training/train_with_library.py index 167544a..c3997a1 100644 --- a/training/amygdala_training/train_with_library.py +++ b/training/amygdala_training/train_with_library.py @@ -143,6 +143,62 @@ def _fp32_wrap(inner): return wrapped +def _pca_with_spectrum(spectrum_log: dict, concept_key: list[str]): + """PCA aggregator that also records the eigenvalue spectrum of the + pos-neg deltas under ``concept_key[0]`` in ``spectrum_log``. The key is + passed by reference (a 1-element list) so we can rebind it per concept + without recreating the aggregator closure.""" + + @torch.no_grad() + def agg(pos_acts: torch.Tensor, neg_acts: torch.Tensor) -> torch.Tensor: + pos = pos_acts.to(torch.float32) + neg = neg_acts.to(torch.float32) + deltas = pos - neg + # Uncentered PCA: concatenate deltas and -deltas (library convention). + X = torch.cat([deltas, -deltas]) + # Eigenvalues via SVD: sigma^2 are the variances along each PC. + # torch.linalg.svd returns U, S, Vh where columns of Vh.T are PCs. + _, s, vh = torch.linalg.svd(X, full_matrices=False) + variances = (s ** 2) + total = variances.sum().item() + var_list = variances.tolist() + first_pc_ratio = var_list[0] / total if total > 0 else 0.0 + + # Participation ratio over the FULL spectrum — includes noise tail. + eff_dim_full = (total ** 2) / float((variances ** 2).sum().item() or 1.0) + + # Signal/noise split: find smallest k with cumulative variance ≥ 0.9, + # then compute PR over just those top-k eigenvalues. If PCA denoising + # is clean, eff_dim_signal should ≈ k_signal (the retained dims carry + # roughly equal variance, with the noise tail dropped). + cum = 0.0 + k_signal = len(var_list) + for i, v in enumerate(var_list): + cum += v + if cum / total >= 0.9: + k_signal = i + 1 + break + top_vars = variances[:k_signal] + top_total = top_vars.sum().item() + eff_dim_signal = (top_total ** 2) / float((top_vars ** 2).sum().item() or 1.0) + + spectrum_log[concept_key[0]] = { + "first_pc_ratio": round(first_pc_ratio, 4), + "effective_dim_full": round(eff_dim_full, 3), + "k_signal_at_90pct": k_signal, + "effective_dim_signal": round(eff_dim_signal, 3), + "top10_eigenvalues": [round(v, 4) for v in var_list[:10]], + "total_variance": round(total, 4), + } + # Top-1 PC + vec = vh[0] + # Sign-flip so the direction aligns with most deltas (library convention). + sign = torch.sign(torch.mean(deltas @ vec)) + return sign * vec + + return agg + + def _aggregator_from_name(name: str): if name == "mean": return _fp32_wrap(mean_aggregator()) @@ -243,7 +299,13 @@ def main() -> None: (len(target_layers), len(emotions), hidden_dim), dtype=torch.float32 ) - aggregator = _aggregator_from_name(args.aggregator) + # Optional spectrum-logging aggregator (only for --aggregator pca). + spectrum_log: dict = {} + concept_key = [""] + if args.aggregator == "pca": + aggregator = _pca_with_spectrum(spectrum_log, concept_key) + else: + aggregator = _aggregator_from_name(args.aggregator) wrap = (lambda s: _chat_template_wrap(tokenizer, s)) if args.chat_template else None if args.chat_template: @@ -262,6 +324,8 @@ def main() -> None: print(f" [{e_idx + 1}/{len(emotions)}] {emotion}: NO SAMPLES, skipping") continue + concept_key[0] = emotion # tell the aggregator which concept is being trained + sv = train_steering_vector( model, tokenizer, @@ -311,6 +375,26 @@ def main() -> None: ) + "\n" ) + if spectrum_log: + (output_dir / "spectrum.json").write_text(json.dumps(spectrum_log, indent=2) + "\n") + print("\n=== eigenvalue spectrum per concept ===") + print( + " concept first_pc k_90pct " + "eff_dim_signal eff_dim_full (signal/k ratio)" + ) + items = sorted(spectrum_log.items(), key=lambda kv: -kv[1]["first_pc_ratio"]) + for concept, stats in items: + k = stats["k_signal_at_90pct"] + eff_sig = stats["effective_dim_signal"] + ratio = eff_sig / k if k else 0.0 + print( + f" {concept:22s} " + f"{stats['first_pc_ratio']:>8.3f} " + f"{k:>7d} " + f"{eff_sig:>14.2f} " + f"{stats['effective_dim_full']:>12.2f} " + f"({ratio:.2f})" + ) total_mb = sum(t.numel() * 2 for t in tensors.values()) / (1024 * 1024) print(