consciousness/sa-schedule-null-residual.py

237 lines
9 KiB
Python
Raw Normal View History

"""Null test: before any fitting, how similar are adjacent layers in the
raw weight-matrix sense?
For each adjacent layer pair (L, L+1) and each parameter family:
1. Normalize each matrix by its Frobenius norm (unit sphere).
2. Compute cos-sim = <W_L, W_{L+1}> / (||W_L|| ||W_{L+1}||).
3. Compute residual Δ = W_{L+1,norm} - W_{L,norm}; report ||Δ||_F
(null-if-orthogonal = sqrt(2) 1.414; null-if-identical = 0).
4. Report effective rank of Δ (via entropy of normalized spectrum).
Whole network, not just middle block. Plots cos-sim and residual-rank
trajectories across depth.
"""
import argparse
import json
import numpy as np
import torch
from transformers import AutoModelForCausalLM
def spec_entropy(singvals, eps=1e-12):
p = (singvals ** 2)
p = p / max(p.sum(), eps)
p = np.clip(p, eps, 1.0)
return float(-(p * np.log(p)).sum())
def frob(x):
return float(np.linalg.norm(x))
def norm_mat(x, eps=1e-12):
return x / max(frob(x), eps)
def null_test_pair(A_dict, B_dict, family_names, num_heads, num_kv_heads, head_dim):
"""For each family, compute cos-sim and normalized residual between
adjacent layers. Returns dict of per-family stats."""
out = {}
for fam in family_names:
if fam not in A_dict or fam not in B_dict:
continue
Wa = A_dict[fam]
Wb = B_dict[fam]
if Wa.shape != Wb.shape:
continue
fa = frob(Wa)
fb = frob(Wb)
if fa < 1e-12 or fb < 1e-12:
continue
cos = float((Wa * Wb).sum() / (fa * fb))
resid_norm_sq = 2.0 - 2.0 * cos # ||Wa/|| - Wb/|| ||^2
resid_norm = float(np.sqrt(max(resid_norm_sq, 0.0)))
# Skip residual SVD — was bottleneck on large matrices; cos-sim
# + scalar fit give us the main signal. Can add back selectively.
eff_rank = None
se = None
out[fam] = {
"cos": cos,
"resid_norm": resid_norm,
"resid_eff_rank": eff_rank,
"resid_spec_entropy": se,
}
return out
@torch.no_grad()
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--model", default="Qwen/Qwen3-4B")
ap.add_argument("--out", default="/tmp/sa-null-residual.json")
args = ap.parse_args()
print(f"Loading {args.model} ...", flush=True)
model = AutoModelForCausalLM.from_pretrained(
args.model,
torch_dtype=torch.bfloat16, # halve memory vs fp32
device_map="cpu",
trust_remote_code=True,
attn_implementation="eager",
)
cfg = model.config
num_layers = cfg.num_hidden_layers
num_heads = cfg.num_attention_heads
num_kv_heads = getattr(cfg, "num_key_value_heads", num_heads)
hidden = cfg.hidden_size
head_dim = getattr(cfg, "head_dim", hidden // num_heads)
intermediate = cfg.intermediate_size
print(f" L={num_layers} H={num_heads} kv={num_kv_heads} hd={head_dim} "
f"hidden={hidden} ff={intermediate}", flush=True)
families = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",
"input_ln", "post_attn_ln", "q_norm", "k_norm"]
layers = {}
for L in range(num_layers):
layer = model.model.layers[L]
attn = layer.self_attn
mlp = layer.mlp
entry = {
"q_proj": attn.q_proj.weight.detach().float().numpy(),
"k_proj": attn.k_proj.weight.detach().float().numpy(),
"v_proj": attn.v_proj.weight.detach().float().numpy(),
"o_proj": attn.o_proj.weight.detach().float().numpy(),
"gate_proj": mlp.gate_proj.weight.detach().float().numpy(),
"up_proj": mlp.up_proj.weight.detach().float().numpy(),
"down_proj": mlp.down_proj.weight.detach().float().numpy(),
"input_ln": layer.input_layernorm.weight.detach().float().numpy(),
"post_attn_ln": layer.post_attention_layernorm.weight.detach().float().numpy(),
}
qn = getattr(attn, "q_norm", None)
kn = getattr(attn, "k_norm", None)
if qn is not None:
entry["q_norm"] = qn.weight.detach().float().numpy()
if kn is not None:
entry["k_norm"] = kn.weight.detach().float().numpy()
layers[L] = entry
del model
# Also record per-layer scale (Frobenius norm) for the scale-track PCA
scales = {fam: [] for fam in families}
for L in range(num_layers):
for fam in families:
if fam in layers[L]:
scales[fam].append(frob(layers[L][fam]))
else:
scales[fam].append(None)
# Pairwise null test
pair_results = []
for L in range(num_layers - 1):
r = null_test_pair(layers[L], layers[L + 1], families,
num_heads, num_kv_heads, head_dim)
pair_results.append({"L": L, "L_next": L + 1, "families": r})
# Report
print("\n=== Adjacent-layer raw cos-sim per family ===")
print(" null interpretation: 1.0 = identical matrices up to scale, 0 = orthogonal")
print(f"\n {'L':>3}", end="")
for fam in families:
if any(fam in pr["families"] for pr in pair_results):
print(f" {fam:>12}", end="")
print()
for pr in pair_results:
print(f" {pr['L']:>3}", end="")
for fam in families:
if fam in pr["families"]:
print(f" {pr['families'][fam]['cos']:>+12.4f}", end="")
else:
print(f" {'':>12}", end="")
print()
# Summary per family + scalar-T fit comparison
# raw_resid = sqrt(2 - 2*cos); scalar_fit = sqrt(1 - cos²) = sin(angle).
# random_baseline = sqrt(2) ≈ 1.414.
print("\n=== Per-family summary (across all adjacent pairs) ===")
print(" random baseline = sqrt(2) ≈ 1.414 (what we'd see with no relationship)")
print(f"\n {'family':>14} {'mean_cos':>10} {'median_cos':>11} "
f"{'raw_resid':>10} {'scalar_fit':>11} {'improve_frac':>13} {'mean_SE':>8}")
for fam in families:
cs = [pr["families"].get(fam, {}).get("cos") for pr in pair_results]
cs = [x for x in cs if x is not None]
rs = [pr["families"].get(fam, {}).get("resid_norm") for pr in pair_results]
rs = [x for x in rs if x is not None]
ers = [pr["families"].get(fam, {}).get("resid_eff_rank") for pr in pair_results]
ers = [x for x in ers if x is not None]
ses = [pr["families"].get(fam, {}).get("resid_spec_entropy") for pr in pair_results]
ses = [x for x in ses if x is not None]
if not cs:
continue
raw = np.sqrt(np.maximum(2.0 - 2.0 * np.array(cs), 0.0)).mean()
scalar_fit = np.sqrt(np.maximum(1.0 - np.array(cs) ** 2, 0.0)).mean()
# Improvement fraction: (raw - scalar_fit) / (raw - 0) normalized
# to [0, 1] where 0 = scalar does nothing, 1 = scalar reconstructs.
improve_frac = (raw - scalar_fit) / max(raw, 1e-12)
print(f" {fam:>14} {np.mean(cs):>+10.4f} {np.median(cs):>+11.4f} "
f"{raw:>10.4f} {scalar_fit:>11.4f} {improve_frac:>13.4f} "
f"{np.mean(ses) if ses else 0:>8.4f}")
# Scale-track: Frobenius norm of each family across layers
print("\n=== Scale track: ||W_family||_F across layers ===")
print(f" {'L':>3}", end="")
for fam in families:
if any(s is not None for s in scales[fam]):
print(f" {fam:>12}", end="")
print()
for L in range(num_layers):
print(f" {L:>3}", end="")
for fam in families:
if scales[fam][L] is not None:
print(f" {scales[fam][L]:>12.4f}", end="")
else:
print(f" {'':>12}", end="")
print()
# PCA of log-scale-track to see dimensionality of schedule
print("\n=== PCA of log-scale-track (dimensionality of schedule) ===")
scale_matrix = []
fam_used = []
for fam in families:
vals = scales[fam]
if all(v is not None for v in vals):
scale_matrix.append(np.log(np.array(vals)))
fam_used.append(fam)
scale_matrix = np.array(scale_matrix) # (num_families, L)
# Center per-family
sm_c = scale_matrix - scale_matrix.mean(axis=1, keepdims=True)
# SVD: columns are layers, rows are families
U, S, Vh = np.linalg.svd(sm_c, full_matrices=False)
total = (S ** 2).sum()
print(f" explained variance by mode:")
for i, s in enumerate(S):
pct = float(s ** 2 / max(total, 1e-20)) * 100
print(f" mode {i+1:>2}: {pct:>6.2f}% "
f"(loadings per family: "
f"{', '.join(f'{fam_used[j]}={U[j, i]:+.2f}' for j in range(len(fam_used)))})")
# Save
with open(args.out, "w") as f:
json.dump({
"model": args.model,
"pair_results": pair_results,
"scales": scales,
"scale_pca_singvals": S.tolist(),
"scale_pca_loadings": U.tolist(),
"scale_pca_scores": (np.diag(S) @ Vh).tolist(),
"fam_used": fam_used,
}, f, indent=2)
print(f"\nSaved: {args.out}")
if __name__ == "__main__":
main()