forked from kent/consciousness
122 lines
4.4 KiB
Python
122 lines
4.4 KiB
Python
|
|
"""Pull input_layernorm.γ vectors from a model and analyze direction
|
|||
|
|
structure across layers.
|
|||
|
|
|
|||
|
|
Question: is γ just scalar magnitude (isotropic SA) or does each layer
|
|||
|
|
have a preferred direction (anisotropic SA / geometry-aware)?
|
|||
|
|
|
|||
|
|
Decomposition: γ_L = ||γ_L|| · γ_L̂
|
|||
|
|
- ||γ_L|| is what our scalar Kirkpatrick fit captured
|
|||
|
|
- γ_L̂ is unit direction — if layers share direction, γ is rank-1 +
|
|||
|
|
scaling (classical isotropic). If directions differ per layer, γ
|
|||
|
|
encodes per-layer preferred axis (anisotropic).
|
|||
|
|
|
|||
|
|
We also look at:
|
|||
|
|
- pairwise cos-sim between γ_L̂ across layers
|
|||
|
|
- principal components of [γ_L̂]_L (stacked matrix)
|
|||
|
|
- per-phase structure: is Phase E more anisotropic than Phase C?
|
|||
|
|
"""
|
|||
|
|
import argparse
|
|||
|
|
import numpy as np
|
|||
|
|
import torch
|
|||
|
|
from transformers import AutoModelForCausalLM
|
|||
|
|
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
ap = argparse.ArgumentParser()
|
|||
|
|
ap.add_argument("--model", default="Qwen/Qwen3-32B")
|
|||
|
|
ap.add_argument("--out", default="/tmp/gamma-dirs.json")
|
|||
|
|
args = ap.parse_args()
|
|||
|
|
|
|||
|
|
print(f"Loading {args.model} (CPU, layernorm params only)...", flush=True)
|
|||
|
|
m = AutoModelForCausalLM.from_pretrained(
|
|||
|
|
args.model, torch_dtype=torch.float32, device_map="cpu",
|
|||
|
|
trust_remote_code=True,
|
|||
|
|
)
|
|||
|
|
num_layers = m.config.num_hidden_layers
|
|||
|
|
hidden = m.config.hidden_size
|
|||
|
|
print(f" L={num_layers}, hidden={hidden}", flush=True)
|
|||
|
|
|
|||
|
|
gammas = np.stack([
|
|||
|
|
m.model.layers[L].input_layernorm.weight.detach().float().cpu().numpy()
|
|||
|
|
for L in range(num_layers)
|
|||
|
|
]) # (L, hidden)
|
|||
|
|
del m
|
|||
|
|
|
|||
|
|
norms = np.linalg.norm(gammas, axis=1)
|
|||
|
|
units = gammas / norms[:, None]
|
|||
|
|
|
|||
|
|
# Pairwise cos-sim of unit γ
|
|||
|
|
cos_mat = units @ units.T # (L, L)
|
|||
|
|
|
|||
|
|
# PCA on unit vectors
|
|||
|
|
centered = units - units.mean(axis=0, keepdims=True)
|
|||
|
|
_, S, Vt = np.linalg.svd(centered, full_matrices=False)
|
|||
|
|
explained = S**2 / (S**2).sum()
|
|||
|
|
|
|||
|
|
# How much of each γ_L unit is explained by top-1 direction (shared)?
|
|||
|
|
top1 = Vt[0] # (hidden,)
|
|||
|
|
proj_top1 = units @ top1 # (L,)
|
|||
|
|
residual_after_top1 = np.sqrt(np.maximum(1 - proj_top1**2, 0))
|
|||
|
|
|
|||
|
|
# Per-phase summary (Qwen3-32B boundaries)
|
|||
|
|
def phase(L):
|
|||
|
|
if L <= 6: return "A"
|
|||
|
|
if L <= 9: return "B"
|
|||
|
|
if L <= 31: return "C"
|
|||
|
|
if L <= 46: return "D"
|
|||
|
|
if L <= 58: return "E"
|
|||
|
|
return "tail"
|
|||
|
|
|
|||
|
|
phase_ls = {}
|
|||
|
|
for L in range(num_layers):
|
|||
|
|
phase_ls.setdefault(phase(L), []).append(L)
|
|||
|
|
|
|||
|
|
print(f"\n=== ||γ_L|| per layer (scalar magnitude) ===")
|
|||
|
|
for L in range(num_layers):
|
|||
|
|
print(f" L={L:>2} phase={phase(L):>5} ||γ||={norms[L]:>8.3f} "
|
|||
|
|
f"proj_top1={proj_top1[L]:>+.4f} resid={residual_after_top1[L]:>.4f}")
|
|||
|
|
|
|||
|
|
print(f"\n=== PCA of unit γ vectors (direction structure) ===")
|
|||
|
|
print(f" Explained variance, top 10 components:")
|
|||
|
|
for i in range(min(10, len(S))):
|
|||
|
|
print(f" PC{i}: {explained[i]:.4f} (singular_val={S[i]:.4f})")
|
|||
|
|
print(f" Top-3 explain: {explained[:3].sum():.4f}")
|
|||
|
|
print(f" Top-10 explain: {explained[:10].sum():.4f}")
|
|||
|
|
|
|||
|
|
print(f"\n=== Per-phase direction statistics ===")
|
|||
|
|
print(f" {'phase':>6} {'N':>3} {'||γ||_mean':>10} {'||γ||_std':>9} "
|
|||
|
|
f"{'intra_cos':>9} {'vs_other_cos':>12}")
|
|||
|
|
for ph, Ls in phase_ls.items():
|
|||
|
|
u = units[Ls]
|
|||
|
|
intra = (u @ u.T)[np.triu_indices(len(Ls), k=1)]
|
|||
|
|
intra_mean = intra.mean() if len(intra) > 0 else 1.0
|
|||
|
|
# Vs other phases
|
|||
|
|
other_Ls = [L for L in range(num_layers) if L not in Ls]
|
|||
|
|
if other_Ls:
|
|||
|
|
u_other = units[other_Ls]
|
|||
|
|
vs = u @ u_other.T
|
|||
|
|
vs_mean = vs.mean()
|
|||
|
|
else:
|
|||
|
|
vs_mean = 0.0
|
|||
|
|
print(f" {ph:>6} {len(Ls):>3} {norms[Ls].mean():>10.3f} "
|
|||
|
|
f"{norms[Ls].std():>9.3f} {intra_mean:>+9.4f} {vs_mean:>+12.4f}")
|
|||
|
|
|
|||
|
|
print(f"\n=== Adjacent-pair unit-γ cos-sim ===")
|
|||
|
|
for L in range(num_layers - 1):
|
|||
|
|
print(f" L={L:>2}→{L+1:>2} phase={phase(L):>5} cos={cos_mat[L, L+1]:>+.4f}")
|
|||
|
|
|
|||
|
|
import json
|
|||
|
|
with open(args.out, "w") as f:
|
|||
|
|
json.dump({
|
|||
|
|
"model": args.model,
|
|||
|
|
"num_layers": num_layers,
|
|||
|
|
"norms": norms.tolist(),
|
|||
|
|
"proj_top1": proj_top1.tolist(),
|
|||
|
|
"explained_var": explained.tolist(),
|
|||
|
|
"cos_adjacent": [float(cos_mat[L, L+1]) for L in range(num_layers - 1)],
|
|||
|
|
}, f, indent=2)
|
|||
|
|
print(f"\nSaved: {args.out}")
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|