consciousness/sa-schedule-delta-svd.py

234 lines
9.6 KiB
Python
Raw Normal View History

"""Per-layer residual-stream delta SVD: δ_L = h_{L+1} - h_L stacked
over all tokens in a calibration set. SVD gives us:
- top singular value per layer γ_L (scalar magnitude, what Kirkpatrick fit)
- top right-singular-vector per layer v_L (direction in hidden space)
- effective rank per layer is this one direction or many?
- pairwise v_L cos-sim across layers are layers subspace-disjoint or -shared?
This directly tests the anisotropic-SA hypothesis:
h_{L+1} = h_L + T_shared(h_L) + γ_L · v_L · f(...)
Phase C prediction: v_L vectors cover broad shared subspace (high mutual cos-sim,
rank-few overall), δ_L is mostly noise around a shared update.
Phase E prediction: v_L vectors are specialized (low pairwise cos-sim, each layer
its own direction), effective rank of the block is close to N.
Qwen3-32B phases: A 0-6, B 7-9, C 10-31, D 32-46, E 47-58, tail 59-63.
"""
import argparse
import json
import numpy as np
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
CALIB = [
"The Eiffel Tower is located in",
"Photosynthesis is the process by which",
"The three branches of the US government are the legislative, executive, and",
"If a train travels 60 miles per hour for 2.5 hours, the total distance covered is",
"Solve for x: 3x + 7 = 22. The answer is x =",
"The derivative of x^3 + 2x^2 is",
"def fibonacci(n):\n if n < 2:\n return n\n return",
"# Python list comprehension to square even numbers in 0-9\nresult = ",
"SELECT name, age FROM users WHERE",
"She opened the old wooden box and found",
"The argument in favor of renewable energy is",
"User: What is the capital of Australia?\nAssistant:",
"Write a haiku about autumn:\n",
"Albert Einstein was born in the year",
"The speed of light in vacuum is approximately",
"I really loved that movie because",
"The main difference between a virus and a bacterium is",
"The French word for 'apple' is",
"1 + 1 = ",
"Once upon a time, in a land far away,",
"The key insight of general relativity is that gravity is not a force but",
"Water boils at 100 degrees Celsius at standard atmospheric pressure. At higher",
"In object-oriented programming, encapsulation refers to",
"The mitochondria is often called the powerhouse of the cell because it",
"Shakespeare's Hamlet begins with the famous line",
]
def phase_of(L, num_layers):
if num_layers == 64:
if L <= 6: return "A"
if L <= 9: return "B"
if L <= 31: return "C"
if L <= 46: return "D"
if L <= 58: return "E"
return "tail"
frac = L / num_layers
if frac < 0.11: return "A"
if frac < 0.15: return "B"
if frac < 0.5: return "C"
if frac < 0.75: return "D"
if frac < 0.92: return "E"
return "tail"
@torch.no_grad()
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--model", default="Qwen/Qwen3-32B")
ap.add_argument("--out", default="/tmp/delta-svd.json")
ap.add_argument("--top-k", type=int, default=8,
help="keep top-k singular values / directions per layer")
args = ap.parse_args()
print(f"Loading {args.model} ...", flush=True)
tok = AutoTokenizer.from_pretrained(args.model, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
args.model, torch_dtype=torch.bfloat16, device_map="cuda",
trust_remote_code=True, attn_implementation="eager",
).eval()
num_layers = model.config.num_hidden_layers
hidden = model.config.hidden_size
print(f" L={num_layers}, hidden={hidden}", flush=True)
# Concat calib and tokenize as one stream
text = "\n\n".join(CALIB)
enc = tok(text, return_tensors="pt", truncation=True, max_length=2048).to("cuda")
n_tok = enc.input_ids.shape[1]
print(f" calibration tokens: {n_tok}", flush=True)
out = model(**enc, output_hidden_states=True, use_cache=False)
# hidden_states: tuple of (num_layers+1) tensors, each (1, n_tok, hidden)
hs = [h[0].float().cpu().numpy() for h in out.hidden_states]
# hs[L] = residual stream entering layer L (or leaving layer L-1). So
# δ_L = hs[L+1] - hs[L] is layer L's contribution.
print(f" hidden_states count: {len(hs)} (expect {num_layers+1})", flush=True)
del model, out
torch.cuda.empty_cache()
# Per-layer SVD
per_layer = []
for L in range(num_layers):
delta = hs[L+1] - hs[L] # (n_tok, hidden)
h_in = hs[L] # (n_tok, hidden)
# Remove BOS / first-token artifacts (often outlier)
delta = delta[1:]
h_in = h_in[1:]
n, d = delta.shape
# Norm per token
token_norms = np.linalg.norm(delta, axis=1) # (n,)
h_norms = np.linalg.norm(h_in, axis=1) # (n,)
# Relative step size: ||δ_L|| / ||h_L||
rel_step = (token_norms / np.maximum(h_norms, 1e-8))
# Angle between δ and h, per token: cos = <δ, h> / (||δ||||h||)
dot = np.einsum("nd,nd->n", delta, h_in)
cos_delta_h = dot / np.maximum(token_norms * h_norms, 1e-8)
# "Parallel" component: how much of δ points along ±h
parallel_frac = np.abs(cos_delta_h).mean()
# SVD in economy mode (on CPU; 2047x5120 fits easy)
U, S, Vt = np.linalg.svd(delta, full_matrices=False)
# S: singular values, decreasing. Vt: right singular vectors (directions).
# Effective rank (entropy of normalized squared SVs)
p = S**2 / (S**2).sum()
p_nz = p[p > 1e-12]
eff_rank = float(np.exp(-(p_nz * np.log(p_nz)).sum()))
# Energy concentration
top1_frac = float(p[0])
top3_frac = float(p[:3].sum())
top10_frac = float(p[:min(10, len(p))].sum())
per_layer.append({
"L": L,
"phase": phase_of(L, num_layers),
"frob": float(np.linalg.norm(delta)),
"token_norm_mean": float(token_norms.mean()),
"token_norm_std": float(token_norms.std()),
"h_norm_mean": float(h_norms.mean()),
"rel_step_mean": float(rel_step.mean()),
"rel_step_std": float(rel_step.std()),
"parallel_frac": float(parallel_frac),
"cos_delta_h_mean": float(cos_delta_h.mean()),
"top_singvals": S[:args.top_k].tolist(),
"top_dirs": Vt[:args.top_k].astype(np.float32).tolist(),
"eff_rank": eff_rank,
"top1_frac": top1_frac,
"top3_frac": top3_frac,
"top10_frac": top10_frac,
})
print(f" L={L:>2} phase={phase_of(L, num_layers):>4} "
f"||h||={h_norms.mean():>7.1f} "
f"||δ||={token_norms.mean():>7.2f} "
f"rel={rel_step.mean():.4f} "
f"‖parallel‖={parallel_frac:.4f} "
f"eff_rank={eff_rank:>6.2f}",
flush=True)
# Pairwise cos-sim of top-1 directions across layers
top1_dirs = np.array([pl["top_dirs"][0] for pl in per_layer]) # (L, d)
top1_cos = top1_dirs @ top1_dirs.T # (L, L)
# Subspace principal angles: project each layer's top-k into others' span
print(f"\n=== Pairwise top-1 cos-sim (adjacent) ===")
for L in range(num_layers - 1):
print(f" L={L:>2}{L+1:>2} phase={phase_of(L, num_layers):>4} "
f"|cos|={abs(top1_cos[L, L+1]):>.4f}")
# Per-phase summary: mean |cos| within phase vs cross-phase
phase_members = {}
for L in range(num_layers):
phase_members.setdefault(phase_of(L, num_layers), []).append(L)
print(f"\n=== Per-phase top-1 direction overlap ===")
print(f" {'phase':>6} {'N':>3} {'intra_cos_mean':>14} {'cross_cos_mean':>14}")
for ph, Ls in phase_members.items():
intra = abs(top1_cos[np.ix_(Ls, Ls)])
if len(Ls) >= 2:
intra_vals = intra[np.triu_indices(len(Ls), k=1)]
intra_mean = float(intra_vals.mean())
else:
intra_mean = 1.0
other_Ls = [L for L in range(num_layers) if L not in Ls]
if other_Ls:
cross = abs(top1_cos[np.ix_(Ls, other_Ls)])
cross_mean = float(cross.mean())
else:
cross_mean = 0.0
print(f" {ph:>6} {len(Ls):>3} {intra_mean:>14.4f} {cross_mean:>14.4f}")
# Subspace overlap: for each phase, find the block's overall principal subspace
# and measure how much of each individual layer sits in it.
print(f"\n=== Block-shared subspace (rank-8) capture fraction per layer ===")
for ph, Ls in phase_members.items():
if len(Ls) < 2:
continue
# Stack top-k directions from all layers in phase
block_dirs = np.concatenate([per_layer[L]["top_dirs"] for L in Ls], axis=0)
# SVD to get the shared basis of the union
U_b, S_b, Vt_b = np.linalg.svd(block_dirs, full_matrices=False)
shared_basis = Vt_b[:8] # top-8 shared directions of the block's top-k union
# Project each layer's top-1 direction and measure capture
for L in Ls:
v1 = np.array(per_layer[L]["top_dirs"][0])
capture = float((shared_basis @ v1).__pow__(2).sum())
print(f" phase={ph:>4} L={L:>2} v1 captured by block top-8: {capture:.4f}")
# Save
save = {
"model": args.model,
"num_layers": num_layers,
"hidden": hidden,
"n_calib_tokens": int(n_tok),
"per_layer": [
{k: v for k, v in pl.items() if k != "top_dirs"} # directions too big
for pl in per_layer
],
"top1_cos_adjacent": [float(top1_cos[L, L+1]) for L in range(num_layers-1)],
}
with open(args.out, "w") as f:
json.dump(save, f, indent=2)
print(f"\nSaved: {args.out}")
if __name__ == "__main__":
main()