forked from kent/consciousness
237 lines
9 KiB
Python
237 lines
9 KiB
Python
|
|
"""Null test: before any fitting, how similar are adjacent layers in the
|
||
|
|
raw weight-matrix sense?
|
||
|
|
|
||
|
|
For each adjacent layer pair (L, L+1) and each parameter family:
|
||
|
|
1. Normalize each matrix by its Frobenius norm (unit sphere).
|
||
|
|
2. Compute cos-sim = <W_L, W_{L+1}> / (||W_L|| ||W_{L+1}||).
|
||
|
|
3. Compute residual Δ = W_{L+1,norm} - W_{L,norm}; report ||Δ||_F
|
||
|
|
(null-if-orthogonal = sqrt(2) ≈ 1.414; null-if-identical = 0).
|
||
|
|
4. Report effective rank of Δ (via entropy of normalized spectrum).
|
||
|
|
|
||
|
|
Whole network, not just middle block. Plots cos-sim and residual-rank
|
||
|
|
trajectories across depth.
|
||
|
|
"""
|
||
|
|
import argparse
|
||
|
|
import json
|
||
|
|
import numpy as np
|
||
|
|
import torch
|
||
|
|
from transformers import AutoModelForCausalLM
|
||
|
|
|
||
|
|
|
||
|
|
def spec_entropy(singvals, eps=1e-12):
|
||
|
|
p = (singvals ** 2)
|
||
|
|
p = p / max(p.sum(), eps)
|
||
|
|
p = np.clip(p, eps, 1.0)
|
||
|
|
return float(-(p * np.log(p)).sum())
|
||
|
|
|
||
|
|
|
||
|
|
def frob(x):
|
||
|
|
return float(np.linalg.norm(x))
|
||
|
|
|
||
|
|
|
||
|
|
def norm_mat(x, eps=1e-12):
|
||
|
|
return x / max(frob(x), eps)
|
||
|
|
|
||
|
|
|
||
|
|
def null_test_pair(A_dict, B_dict, family_names, num_heads, num_kv_heads, head_dim):
|
||
|
|
"""For each family, compute cos-sim and normalized residual between
|
||
|
|
adjacent layers. Returns dict of per-family stats."""
|
||
|
|
out = {}
|
||
|
|
for fam in family_names:
|
||
|
|
if fam not in A_dict or fam not in B_dict:
|
||
|
|
continue
|
||
|
|
Wa = A_dict[fam]
|
||
|
|
Wb = B_dict[fam]
|
||
|
|
if Wa.shape != Wb.shape:
|
||
|
|
continue
|
||
|
|
fa = frob(Wa)
|
||
|
|
fb = frob(Wb)
|
||
|
|
if fa < 1e-12 or fb < 1e-12:
|
||
|
|
continue
|
||
|
|
cos = float((Wa * Wb).sum() / (fa * fb))
|
||
|
|
resid_norm_sq = 2.0 - 2.0 * cos # ||Wa/|| - Wb/|| ||^2
|
||
|
|
resid_norm = float(np.sqrt(max(resid_norm_sq, 0.0)))
|
||
|
|
|
||
|
|
# Skip residual SVD — was bottleneck on large matrices; cos-sim
|
||
|
|
# + scalar fit give us the main signal. Can add back selectively.
|
||
|
|
eff_rank = None
|
||
|
|
se = None
|
||
|
|
|
||
|
|
out[fam] = {
|
||
|
|
"cos": cos,
|
||
|
|
"resid_norm": resid_norm,
|
||
|
|
"resid_eff_rank": eff_rank,
|
||
|
|
"resid_spec_entropy": se,
|
||
|
|
}
|
||
|
|
return out
|
||
|
|
|
||
|
|
|
||
|
|
@torch.no_grad()
|
||
|
|
def main():
|
||
|
|
ap = argparse.ArgumentParser()
|
||
|
|
ap.add_argument("--model", default="Qwen/Qwen3-4B")
|
||
|
|
ap.add_argument("--out", default="/tmp/sa-null-residual.json")
|
||
|
|
args = ap.parse_args()
|
||
|
|
|
||
|
|
print(f"Loading {args.model} ...", flush=True)
|
||
|
|
model = AutoModelForCausalLM.from_pretrained(
|
||
|
|
args.model,
|
||
|
|
torch_dtype=torch.bfloat16, # halve memory vs fp32
|
||
|
|
device_map="cpu",
|
||
|
|
trust_remote_code=True,
|
||
|
|
attn_implementation="eager",
|
||
|
|
)
|
||
|
|
cfg = model.config
|
||
|
|
num_layers = cfg.num_hidden_layers
|
||
|
|
num_heads = cfg.num_attention_heads
|
||
|
|
num_kv_heads = getattr(cfg, "num_key_value_heads", num_heads)
|
||
|
|
hidden = cfg.hidden_size
|
||
|
|
head_dim = getattr(cfg, "head_dim", hidden // num_heads)
|
||
|
|
intermediate = cfg.intermediate_size
|
||
|
|
print(f" L={num_layers} H={num_heads} kv={num_kv_heads} hd={head_dim} "
|
||
|
|
f"hidden={hidden} ff={intermediate}", flush=True)
|
||
|
|
|
||
|
|
families = ["q_proj", "k_proj", "v_proj", "o_proj",
|
||
|
|
"gate_proj", "up_proj", "down_proj",
|
||
|
|
"input_ln", "post_attn_ln", "q_norm", "k_norm"]
|
||
|
|
|
||
|
|
layers = {}
|
||
|
|
for L in range(num_layers):
|
||
|
|
layer = model.model.layers[L]
|
||
|
|
attn = layer.self_attn
|
||
|
|
mlp = layer.mlp
|
||
|
|
entry = {
|
||
|
|
"q_proj": attn.q_proj.weight.detach().float().numpy(),
|
||
|
|
"k_proj": attn.k_proj.weight.detach().float().numpy(),
|
||
|
|
"v_proj": attn.v_proj.weight.detach().float().numpy(),
|
||
|
|
"o_proj": attn.o_proj.weight.detach().float().numpy(),
|
||
|
|
"gate_proj": mlp.gate_proj.weight.detach().float().numpy(),
|
||
|
|
"up_proj": mlp.up_proj.weight.detach().float().numpy(),
|
||
|
|
"down_proj": mlp.down_proj.weight.detach().float().numpy(),
|
||
|
|
"input_ln": layer.input_layernorm.weight.detach().float().numpy(),
|
||
|
|
"post_attn_ln": layer.post_attention_layernorm.weight.detach().float().numpy(),
|
||
|
|
}
|
||
|
|
qn = getattr(attn, "q_norm", None)
|
||
|
|
kn = getattr(attn, "k_norm", None)
|
||
|
|
if qn is not None:
|
||
|
|
entry["q_norm"] = qn.weight.detach().float().numpy()
|
||
|
|
if kn is not None:
|
||
|
|
entry["k_norm"] = kn.weight.detach().float().numpy()
|
||
|
|
layers[L] = entry
|
||
|
|
|
||
|
|
del model
|
||
|
|
|
||
|
|
# Also record per-layer scale (Frobenius norm) for the scale-track PCA
|
||
|
|
scales = {fam: [] for fam in families}
|
||
|
|
for L in range(num_layers):
|
||
|
|
for fam in families:
|
||
|
|
if fam in layers[L]:
|
||
|
|
scales[fam].append(frob(layers[L][fam]))
|
||
|
|
else:
|
||
|
|
scales[fam].append(None)
|
||
|
|
|
||
|
|
# Pairwise null test
|
||
|
|
pair_results = []
|
||
|
|
for L in range(num_layers - 1):
|
||
|
|
r = null_test_pair(layers[L], layers[L + 1], families,
|
||
|
|
num_heads, num_kv_heads, head_dim)
|
||
|
|
pair_results.append({"L": L, "L_next": L + 1, "families": r})
|
||
|
|
|
||
|
|
# Report
|
||
|
|
print("\n=== Adjacent-layer raw cos-sim per family ===")
|
||
|
|
print(" null interpretation: 1.0 = identical matrices up to scale, 0 = orthogonal")
|
||
|
|
print(f"\n {'L':>3}", end="")
|
||
|
|
for fam in families:
|
||
|
|
if any(fam in pr["families"] for pr in pair_results):
|
||
|
|
print(f" {fam:>12}", end="")
|
||
|
|
print()
|
||
|
|
for pr in pair_results:
|
||
|
|
print(f" {pr['L']:>3}", end="")
|
||
|
|
for fam in families:
|
||
|
|
if fam in pr["families"]:
|
||
|
|
print(f" {pr['families'][fam]['cos']:>+12.4f}", end="")
|
||
|
|
else:
|
||
|
|
print(f" {'':>12}", end="")
|
||
|
|
print()
|
||
|
|
|
||
|
|
# Summary per family + scalar-T fit comparison
|
||
|
|
# raw_resid = sqrt(2 - 2*cos); scalar_fit = sqrt(1 - cos²) = sin(angle).
|
||
|
|
# random_baseline = sqrt(2) ≈ 1.414.
|
||
|
|
print("\n=== Per-family summary (across all adjacent pairs) ===")
|
||
|
|
print(" random baseline = sqrt(2) ≈ 1.414 (what we'd see with no relationship)")
|
||
|
|
print(f"\n {'family':>14} {'mean_cos':>10} {'median_cos':>11} "
|
||
|
|
f"{'raw_resid':>10} {'scalar_fit':>11} {'improve_frac':>13} {'mean_SE':>8}")
|
||
|
|
for fam in families:
|
||
|
|
cs = [pr["families"].get(fam, {}).get("cos") for pr in pair_results]
|
||
|
|
cs = [x for x in cs if x is not None]
|
||
|
|
rs = [pr["families"].get(fam, {}).get("resid_norm") for pr in pair_results]
|
||
|
|
rs = [x for x in rs if x is not None]
|
||
|
|
ers = [pr["families"].get(fam, {}).get("resid_eff_rank") for pr in pair_results]
|
||
|
|
ers = [x for x in ers if x is not None]
|
||
|
|
ses = [pr["families"].get(fam, {}).get("resid_spec_entropy") for pr in pair_results]
|
||
|
|
ses = [x for x in ses if x is not None]
|
||
|
|
if not cs:
|
||
|
|
continue
|
||
|
|
raw = np.sqrt(np.maximum(2.0 - 2.0 * np.array(cs), 0.0)).mean()
|
||
|
|
scalar_fit = np.sqrt(np.maximum(1.0 - np.array(cs) ** 2, 0.0)).mean()
|
||
|
|
# Improvement fraction: (raw - scalar_fit) / (raw - 0) normalized
|
||
|
|
# to [0, 1] where 0 = scalar does nothing, 1 = scalar reconstructs.
|
||
|
|
improve_frac = (raw - scalar_fit) / max(raw, 1e-12)
|
||
|
|
print(f" {fam:>14} {np.mean(cs):>+10.4f} {np.median(cs):>+11.4f} "
|
||
|
|
f"{raw:>10.4f} {scalar_fit:>11.4f} {improve_frac:>13.4f} "
|
||
|
|
f"{np.mean(ses) if ses else 0:>8.4f}")
|
||
|
|
|
||
|
|
# Scale-track: Frobenius norm of each family across layers
|
||
|
|
print("\n=== Scale track: ||W_family||_F across layers ===")
|
||
|
|
print(f" {'L':>3}", end="")
|
||
|
|
for fam in families:
|
||
|
|
if any(s is not None for s in scales[fam]):
|
||
|
|
print(f" {fam:>12}", end="")
|
||
|
|
print()
|
||
|
|
for L in range(num_layers):
|
||
|
|
print(f" {L:>3}", end="")
|
||
|
|
for fam in families:
|
||
|
|
if scales[fam][L] is not None:
|
||
|
|
print(f" {scales[fam][L]:>12.4f}", end="")
|
||
|
|
else:
|
||
|
|
print(f" {'':>12}", end="")
|
||
|
|
print()
|
||
|
|
|
||
|
|
# PCA of log-scale-track to see dimensionality of schedule
|
||
|
|
print("\n=== PCA of log-scale-track (dimensionality of schedule) ===")
|
||
|
|
scale_matrix = []
|
||
|
|
fam_used = []
|
||
|
|
for fam in families:
|
||
|
|
vals = scales[fam]
|
||
|
|
if all(v is not None for v in vals):
|
||
|
|
scale_matrix.append(np.log(np.array(vals)))
|
||
|
|
fam_used.append(fam)
|
||
|
|
scale_matrix = np.array(scale_matrix) # (num_families, L)
|
||
|
|
# Center per-family
|
||
|
|
sm_c = scale_matrix - scale_matrix.mean(axis=1, keepdims=True)
|
||
|
|
# SVD: columns are layers, rows are families
|
||
|
|
U, S, Vh = np.linalg.svd(sm_c, full_matrices=False)
|
||
|
|
total = (S ** 2).sum()
|
||
|
|
print(f" explained variance by mode:")
|
||
|
|
for i, s in enumerate(S):
|
||
|
|
pct = float(s ** 2 / max(total, 1e-20)) * 100
|
||
|
|
print(f" mode {i+1:>2}: {pct:>6.2f}% "
|
||
|
|
f"(loadings per family: "
|
||
|
|
f"{', '.join(f'{fam_used[j]}={U[j, i]:+.2f}' for j in range(len(fam_used)))})")
|
||
|
|
|
||
|
|
# Save
|
||
|
|
with open(args.out, "w") as f:
|
||
|
|
json.dump({
|
||
|
|
"model": args.model,
|
||
|
|
"pair_results": pair_results,
|
||
|
|
"scales": scales,
|
||
|
|
"scale_pca_singvals": S.tolist(),
|
||
|
|
"scale_pca_loadings": U.tolist(),
|
||
|
|
"scale_pca_scores": (np.diag(S) @ Vh).tolist(),
|
||
|
|
"fam_used": fam_used,
|
||
|
|
}, f, indent=2)
|
||
|
|
print(f"\nSaved: {args.out}")
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|