consciousness/sa-schedule-gamma-directions.py

122 lines
4.4 KiB
Python
Raw Normal View History

"""Pull input_layernorm.γ vectors from a model and analyze direction
structure across layers.
Question: is γ just scalar magnitude (isotropic SA) or does each layer
have a preferred direction (anisotropic SA / geometry-aware)?
Decomposition: γ_L = ||γ_L|| · γ_L̂
- ||γ_L|| is what our scalar Kirkpatrick fit captured
- γ_L̂ is unit direction if layers share direction, γ is rank-1 +
scaling (classical isotropic). If directions differ per layer, γ
encodes per-layer preferred axis (anisotropic).
We also look at:
- pairwise cos-sim between γ_L̂ across layers
- principal components of [γ_L̂]_L (stacked matrix)
- per-phase structure: is Phase E more anisotropic than Phase C?
"""
import argparse
import numpy as np
import torch
from transformers import AutoModelForCausalLM
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--model", default="Qwen/Qwen3-32B")
ap.add_argument("--out", default="/tmp/gamma-dirs.json")
args = ap.parse_args()
print(f"Loading {args.model} (CPU, layernorm params only)...", flush=True)
m = AutoModelForCausalLM.from_pretrained(
args.model, torch_dtype=torch.float32, device_map="cpu",
trust_remote_code=True,
)
num_layers = m.config.num_hidden_layers
hidden = m.config.hidden_size
print(f" L={num_layers}, hidden={hidden}", flush=True)
gammas = np.stack([
m.model.layers[L].input_layernorm.weight.detach().float().cpu().numpy()
for L in range(num_layers)
]) # (L, hidden)
del m
norms = np.linalg.norm(gammas, axis=1)
units = gammas / norms[:, None]
# Pairwise cos-sim of unit γ
cos_mat = units @ units.T # (L, L)
# PCA on unit vectors
centered = units - units.mean(axis=0, keepdims=True)
_, S, Vt = np.linalg.svd(centered, full_matrices=False)
explained = S**2 / (S**2).sum()
# How much of each γ_L unit is explained by top-1 direction (shared)?
top1 = Vt[0] # (hidden,)
proj_top1 = units @ top1 # (L,)
residual_after_top1 = np.sqrt(np.maximum(1 - proj_top1**2, 0))
# Per-phase summary (Qwen3-32B boundaries)
def phase(L):
if L <= 6: return "A"
if L <= 9: return "B"
if L <= 31: return "C"
if L <= 46: return "D"
if L <= 58: return "E"
return "tail"
phase_ls = {}
for L in range(num_layers):
phase_ls.setdefault(phase(L), []).append(L)
print(f"\n=== ||γ_L|| per layer (scalar magnitude) ===")
for L in range(num_layers):
print(f" L={L:>2} phase={phase(L):>5} ||γ||={norms[L]:>8.3f} "
f"proj_top1={proj_top1[L]:>+.4f} resid={residual_after_top1[L]:>.4f}")
print(f"\n=== PCA of unit γ vectors (direction structure) ===")
print(f" Explained variance, top 10 components:")
for i in range(min(10, len(S))):
print(f" PC{i}: {explained[i]:.4f} (singular_val={S[i]:.4f})")
print(f" Top-3 explain: {explained[:3].sum():.4f}")
print(f" Top-10 explain: {explained[:10].sum():.4f}")
print(f"\n=== Per-phase direction statistics ===")
print(f" {'phase':>6} {'N':>3} {'||γ||_mean':>10} {'||γ||_std':>9} "
f"{'intra_cos':>9} {'vs_other_cos':>12}")
for ph, Ls in phase_ls.items():
u = units[Ls]
intra = (u @ u.T)[np.triu_indices(len(Ls), k=1)]
intra_mean = intra.mean() if len(intra) > 0 else 1.0
# Vs other phases
other_Ls = [L for L in range(num_layers) if L not in Ls]
if other_Ls:
u_other = units[other_Ls]
vs = u @ u_other.T
vs_mean = vs.mean()
else:
vs_mean = 0.0
print(f" {ph:>6} {len(Ls):>3} {norms[Ls].mean():>10.3f} "
f"{norms[Ls].std():>9.3f} {intra_mean:>+9.4f} {vs_mean:>+12.4f}")
print(f"\n=== Adjacent-pair unit-γ cos-sim ===")
for L in range(num_layers - 1):
print(f" L={L:>2}{L+1:>2} phase={phase(L):>5} cos={cos_mat[L, L+1]:>+.4f}")
import json
with open(args.out, "w") as f:
json.dump({
"model": args.model,
"num_layers": num_layers,
"norms": norms.tolist(),
"proj_top1": proj_top1.tolist(),
"explained_var": explained.tolist(),
"cos_adjacent": [float(cos_mat[L, L+1]) for L in range(num_layers - 1)],
}, f, indent=2)
print(f"\nSaved: {args.out}")
if __name__ == "__main__":
main()