"""Analyze operator-level inter-layer alignment from the grams + eigdirs files. Input: qwen3-4b-grams.json (gram[L,L',h], fro_sq[L,h]) qwen3-4b-grams-eigdirs.pt (eig_dirs[L,h,topk,hidden], sym_eigs[L,h,2*head_dim]) Questions: (a) Operator cos-sim between layers. cos(g_L^h, g_L'^h) = gram / √(fro_sq fro_sq'). If ~1 → same operator up to scalar. If low → distinct operators. (b) Scalar-rescale residual using full operator (not spectrum): optimal T = gram / fro_sq', residual_frac = √(1 - cos²). (c) Curvature-sign alignment. For each (L, anchor) pair, what fraction of top-k signed eigenvalues share sign with the anchor's? (d) Top-k eigensubspace alignment. Principal angles between span{eig_dirs_L} and span{eig_dirs_anchor}. Compare: operator cos-sim vs spectral cos-sim (from prior analysis). The sheaf-rs finding was that spectral shape converges across layers while eigenvectors don't. We want to confirm/refute that within QK in Qwen3-4B. """ import argparse import json import numpy as np import torch def main(): ap = argparse.ArgumentParser() ap.add_argument("gram_json") ap.add_argument("--anchor", type=int, default=-1, help="anchor layer index; -1 = last") args = ap.parse_args() with open(args.gram_json) as f: d = json.load(f) num_layers = d["num_layers"] num_heads = d["num_heads"] head_dim = d["head_dim"] hidden = d["hidden_size"] gram = np.array(d["gram"]) # (L, L, H) # NOTE: fro_sq from the json is ||W_K W_Q^T||_F^2 (the measure.py # shortcut), NOT ||g||_F^2 = ||W_K^T W_Q||_F^2 which is what the gram # diagonal gives. Different objects. Use gram diagonal for normalization. diag_sq = np.array([[gram[L, L, h] for h in range(num_heads)] for L in range(num_layers)]) # (L, H) diag = np.sqrt(np.maximum(diag_sq, 1e-20)) # ||g_L^h||_F pt = torch.load(d.get("eigdirs_path", args.gram_json.replace(".json", "-eigdirs.pt")), weights_only=True) eig_dirs = pt["eig_dirs"].double().numpy() # (L, H, topk, hidden) sym_eigs = pt["sym_eigs"].double().numpy() # (L, H, 2*head_dim) topk = eig_dirs.shape[2] anchor = args.anchor if args.anchor >= 0 else num_layers - 1 # ========================================================== # (a) Operator cos-sim matrix, averaged over heads # ========================================================== cos_mat = np.zeros((num_layers, num_layers)) for L in range(num_layers): for Lp in range(num_layers): denom = diag[L] * diag[Lp] per_h = gram[L, Lp] / np.maximum(denom, 1e-20) cos_mat[L, Lp] = per_h.mean() print(f"=== (a) Operator cos-sim between layers, averaged over {num_heads} heads ===") print(f" diagonal (should be 1.0): mean {np.diag(cos_mat).mean():.4f}") # Adjacent-layer cos-sim adj = np.array([cos_mat[L, L+1] for L in range(num_layers-1)]) print(f" adjacent layers cos-sim: mean {adj.mean():.4f} min {adj.min():.4f} max {adj.max():.4f}") # Layer-to-anchor cos-sim to_anchor = cos_mat[:, anchor] print(f" layer -> anchor L={anchor} cos-sim:") print(f" {'L':>3} {'cos':>7} {'T_opt':>7} {'resid_frac':>10}") for L in range(num_layers): c = to_anchor[L] T = float(np.mean(gram[L, anchor] / np.maximum(diag_sq[anchor], 1e-20))) r = float(np.sqrt(max(1.0 - c**2, 0.0))) print(f" {L:>3} {c:+.4f} {T:+7.3f} {r:>10.4f}") # Long-range cos-sim (L=0 to L=35 vs L=17 to L=35 etc.) print(f"\n long-range: cos(L=0, last) = {cos_mat[0, -1]:+.3f} " f"cos(L=midish, last) = {cos_mat[num_layers//2, -1]:+.3f}") # ========================================================== # (b) Full scalar-rescale residual using the gram # ========================================================== print(f"\n=== (b) Operator-level scalar rescale to anchor L={anchor} ===") # residual_frac² = 1 - cos²(g_L, g_anchor) (per head) print(f" {'L':>3} {'mean_cos':>9} {'mean_resid':>10}") for L in range(num_layers): per_h_cos = gram[L, anchor] / np.maximum(diag[L] * diag[anchor], 1e-20) per_h_resid = np.sqrt(np.clip(1.0 - per_h_cos**2, 0.0, 1.0)) print(f" {L:>3} {per_h_cos.mean():>+9.4f} {per_h_resid.mean():>10.4f}") # ========================================================== # (c) Curvature-sign alignment # ========================================================== print(f"\n=== (c) Curvature-sign alignment vs anchor L={anchor} ===") # Look at top-k eigenvalues by magnitude (already sorted that way in measure). # Fraction of top-k (L, h) whose sign matches the anchor's i-th eigenvalue. for k_use in [2, 4, 8, 16, 32, 64, 128, 256]: if k_use > sym_eigs.shape[-1]: continue # sign of top-k_use eigenvalues at layer L vs at anchor, per (L, h) sign_L = np.sign(sym_eigs[:, :, :k_use]) # (L, H, k_use) sign_a = np.sign(sym_eigs[anchor, :, :k_use]) # (H, k_use) agree = (sign_L == sign_a[None, :, :]).mean(axis=-1) # (L, H) print(f" top-{k_use:>3} signs: mean agree = {agree.mean():.3f} " f"by layer range: early {agree[:12].mean():.3f} " f"mid {agree[12:24].mean():.3f} late {agree[24:].mean():.3f}") # Also: distribution of sign-balance per layer (fraction positive eigenvalues) frac_pos = (sym_eigs[:, :, :2 * head_dim] > 0).mean(axis=(1, 2)) print(f"\n fraction positive eigenvalues per layer:") for L in range(num_layers): print(f" L={L:2} frac+ = {frac_pos[L]:.3f}") # ========================================================== # (d) Eigenspace principal angles # ========================================================== print(f"\n=== (d) Top-{topk} eigensubspace principal angles vs anchor L={anchor} ===") # Per-head: cos of principal angles between row-spans of eig_dirs[L, h] # and eig_dirs[anchor, h]. Report mean cos angle per layer. print(f" {'L':>3} {'meanCosPA':>10} {'minCosPA':>10} {'max_top1':>10}") for L in range(num_layers): mean_cos_pa_per_h = [] min_cos_pa_per_h = [] top1_overlap = [] for h in range(num_heads): A = eig_dirs[L, h] # (topk, hidden) rows are unit vectors B = eig_dirs[anchor, h] # (topk, hidden) # Orthonormalize rows (they're close-to-orthonormal but not exactly) Qa, _ = np.linalg.qr(A.T) # hidden × topk Qb, _ = np.linalg.qr(B.T) M = Qa.T @ Qb # topk × topk s = np.linalg.svd(M, compute_uv=False) mean_cos_pa_per_h.append(s.mean()) min_cos_pa_per_h.append(s.min()) # ||² — top-1 eigenvector overlap top1_overlap.append(float((A[0] @ B[0]) ** 2)) print(f" {L:>3} {np.mean(mean_cos_pa_per_h):>10.4f} " f"{np.mean(min_cos_pa_per_h):>10.4f} " f"{np.mean(top1_overlap):>10.4f}") # ========================================================== # Verdict # ========================================================== to_anchor_per_head = np.array([ (gram[L, anchor] / np.maximum(diag[L] * diag[anchor], 1e-20)).mean() for L in range(num_layers) ]) mean_cos_to_anchor = to_anchor_per_head.mean() print(f"\n=== Verdict ===") print(f" mean operator cos-sim to anchor: {mean_cos_to_anchor:+.4f}") adj_mean = adj.mean() print(f" mean operator cos-sim adjacent layers: {adj_mean:+.4f}") if mean_cos_to_anchor > 0.9: print(" STRONG: same operator up to scalar across all layers.") elif mean_cos_to_anchor > 0.5: print(" MEDIUM: substantial shared operator, but layer-specific drift.") elif mean_cos_to_anchor > 0.1: print(" WEAK: some alignment; far from single-operator interpretation.") else: print(" REJECTED: operators are effectively orthogonal across layers.") if __name__ == "__main__": main()