consciousness/sa-schedule-null-residual.py

"""Null test: before any fitting, how similar are adjacent layers in the
raw weight-matrix sense?

For each adjacent layer pair (L, L+1) and each parameter family:
  1. Normalize each matrix by its Frobenius norm (unit sphere).
  2. Compute cos-sim = <W_L, W_{L+1}> / (||W_L|| ||W_{L+1}||).
  3. Compute residual Δ = W_{L+1,norm} - W_{L,norm}; report ||Δ||_F
     (null-if-orthogonal = sqrt(2) ≈ 1.414; null-if-identical = 0).
  4. Report effective rank of Δ (via entropy of normalized spectrum).

Whole network, not just middle block. Plots cos-sim and residual-rank
trajectories across depth.
"""
import argparse
import json
import numpy as np
import torch
from transformers import AutoModelForCausalLM


def spec_entropy(singvals, eps=1e-12):
    p = (singvals ** 2)
    p = p / max(p.sum(), eps)
    p = np.clip(p, eps, 1.0)
    return float(-(p * np.log(p)).sum())


def frob(x):
    return float(np.linalg.norm(x))


def norm_mat(x, eps=1e-12):
    return x / max(frob(x), eps)


def null_test_pair(A_dict, B_dict, family_names, num_heads, num_kv_heads, head_dim):
    """For each family, compute cos-sim and normalized residual between
    adjacent layers. Returns dict of per-family stats."""
    out = {}
    for fam in family_names:
        if fam not in A_dict or fam not in B_dict:
            continue
        Wa = A_dict[fam]
        Wb = B_dict[fam]
        if Wa.shape != Wb.shape:
            continue
        fa = frob(Wa)
        fb = frob(Wb)
        if fa < 1e-12 or fb < 1e-12:
            continue
        cos = float((Wa * Wb).sum() / (fa * fb))
        resid_norm_sq = 2.0 - 2.0 * cos   # ||Wa/|| - Wb/|| ||^2
        resid_norm = float(np.sqrt(max(resid_norm_sq, 0.0)))

        # Skip residual SVD — was bottleneck on large matrices; cos-sim
        # + scalar fit give us the main signal. Can add back selectively.
        eff_rank = None
        se = None

        out[fam] = {
            "cos": cos,
            "resid_norm": resid_norm,
            "resid_eff_rank": eff_rank,
            "resid_spec_entropy": se,
        }
    return out


@torch.no_grad()
def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--model", default="Qwen/Qwen3-4B")
    ap.add_argument("--out", default="/tmp/sa-null-residual.json")
    args = ap.parse_args()

    print(f"Loading {args.model} ...", flush=True)
    model = AutoModelForCausalLM.from_pretrained(
        args.model,
        torch_dtype=torch.bfloat16,   # halve memory vs fp32
        device_map="cpu",
        trust_remote_code=True,
        attn_implementation="eager",
    )
    cfg = model.config
    num_layers = cfg.num_hidden_layers
    num_heads = cfg.num_attention_heads
    num_kv_heads = getattr(cfg, "num_key_value_heads", num_heads)
    hidden = cfg.hidden_size
    head_dim = getattr(cfg, "head_dim", hidden // num_heads)
    intermediate = cfg.intermediate_size
    print(f"  L={num_layers} H={num_heads} kv={num_kv_heads} hd={head_dim} "
          f"hidden={hidden} ff={intermediate}", flush=True)

    families = ["q_proj", "k_proj", "v_proj", "o_proj",
                "gate_proj", "up_proj", "down_proj",
                "input_ln", "post_attn_ln", "q_norm", "k_norm"]

    layers = {}
    for L in range(num_layers):
        layer = model.model.layers[L]
        attn = layer.self_attn
        mlp = layer.mlp
        entry = {
            "q_proj": attn.q_proj.weight.detach().float().numpy(),
            "k_proj": attn.k_proj.weight.detach().float().numpy(),
            "v_proj": attn.v_proj.weight.detach().float().numpy(),
            "o_proj": attn.o_proj.weight.detach().float().numpy(),
            "gate_proj": mlp.gate_proj.weight.detach().float().numpy(),
            "up_proj": mlp.up_proj.weight.detach().float().numpy(),
            "down_proj": mlp.down_proj.weight.detach().float().numpy(),
            "input_ln": layer.input_layernorm.weight.detach().float().numpy(),
            "post_attn_ln": layer.post_attention_layernorm.weight.detach().float().numpy(),
        }
        qn = getattr(attn, "q_norm", None)
        kn = getattr(attn, "k_norm", None)
        if qn is not None:
            entry["q_norm"] = qn.weight.detach().float().numpy()
        if kn is not None:
            entry["k_norm"] = kn.weight.detach().float().numpy()
        layers[L] = entry

    del model

    # Also record per-layer scale (Frobenius norm) for the scale-track PCA
    scales = {fam: [] for fam in families}
    for L in range(num_layers):
        for fam in families:
            if fam in layers[L]:
                scales[fam].append(frob(layers[L][fam]))
            else:
                scales[fam].append(None)

    # Pairwise null test
    pair_results = []
    for L in range(num_layers - 1):
        r = null_test_pair(layers[L], layers[L + 1], families,
                           num_heads, num_kv_heads, head_dim)
        pair_results.append({"L": L, "L_next": L + 1, "families": r})

    # Report
    print("\n=== Adjacent-layer raw cos-sim per family ===")
    print("   null interpretation: 1.0 = identical matrices up to scale, 0 = orthogonal")
    print(f"\n  {'L':>3}", end="")
    for fam in families:
        if any(fam in pr["families"] for pr in pair_results):
            print(f"  {fam:>12}", end="")
    print()
    for pr in pair_results:
        print(f"  {pr['L']:>3}", end="")
        for fam in families:
            if fam in pr["families"]:
                print(f"  {pr['families'][fam]['cos']:>+12.4f}", end="")
            else:
                print(f"  {'':>12}", end="")
        print()

    # Summary per family + scalar-T fit comparison
    # raw_resid = sqrt(2 - 2*cos); scalar_fit = sqrt(1 - cos²) = sin(angle).
    # random_baseline = sqrt(2) ≈ 1.414.
    print("\n=== Per-family summary (across all adjacent pairs) ===")
    print("   random baseline = sqrt(2) ≈ 1.414  (what we'd see with no relationship)")
    print(f"\n  {'family':>14}  {'mean_cos':>10}  {'median_cos':>11}  "
          f"{'raw_resid':>10}  {'scalar_fit':>11}  {'improve_frac':>13}  {'mean_SE':>8}")
    for fam in families:
        cs = [pr["families"].get(fam, {}).get("cos") for pr in pair_results]
        cs = [x for x in cs if x is not None]
        rs = [pr["families"].get(fam, {}).get("resid_norm") for pr in pair_results]
        rs = [x for x in rs if x is not None]
        ers = [pr["families"].get(fam, {}).get("resid_eff_rank") for pr in pair_results]
        ers = [x for x in ers if x is not None]
        ses = [pr["families"].get(fam, {}).get("resid_spec_entropy") for pr in pair_results]
        ses = [x for x in ses if x is not None]
        if not cs:
            continue
        raw = np.sqrt(np.maximum(2.0 - 2.0 * np.array(cs), 0.0)).mean()
        scalar_fit = np.sqrt(np.maximum(1.0 - np.array(cs) ** 2, 0.0)).mean()
        # Improvement fraction: (raw - scalar_fit) / (raw - 0)  normalized
        # to [0, 1] where 0 = scalar does nothing, 1 = scalar reconstructs.
        improve_frac = (raw - scalar_fit) / max(raw, 1e-12)
        print(f"  {fam:>14}  {np.mean(cs):>+10.4f}  {np.median(cs):>+11.4f}  "
              f"{raw:>10.4f}  {scalar_fit:>11.4f}  {improve_frac:>13.4f}  "
              f"{np.mean(ses) if ses else 0:>8.4f}")

    # Scale-track: Frobenius norm of each family across layers
    print("\n=== Scale track: ||W_family||_F across layers ===")
    print(f"  {'L':>3}", end="")
    for fam in families:
        if any(s is not None for s in scales[fam]):
            print(f"  {fam:>12}", end="")
    print()
    for L in range(num_layers):
        print(f"  {L:>3}", end="")
        for fam in families:
            if scales[fam][L] is not None:
                print(f"  {scales[fam][L]:>12.4f}", end="")
            else:
                print(f"  {'':>12}", end="")
        print()

    # PCA of log-scale-track to see dimensionality of schedule
    print("\n=== PCA of log-scale-track (dimensionality of schedule) ===")
    scale_matrix = []
    fam_used = []
    for fam in families:
        vals = scales[fam]
        if all(v is not None for v in vals):
            scale_matrix.append(np.log(np.array(vals)))
            fam_used.append(fam)
    scale_matrix = np.array(scale_matrix)   # (num_families, L)
    # Center per-family
    sm_c = scale_matrix - scale_matrix.mean(axis=1, keepdims=True)
    # SVD: columns are layers, rows are families
    U, S, Vh = np.linalg.svd(sm_c, full_matrices=False)
    total = (S ** 2).sum()
    print(f"  explained variance by mode:")
    for i, s in enumerate(S):
        pct = float(s ** 2 / max(total, 1e-20)) * 100
        print(f"    mode {i+1:>2}: {pct:>6.2f}%    "
              f"(loadings per family: "
              f"{', '.join(f'{fam_used[j]}={U[j, i]:+.2f}' for j in range(len(fam_used)))})")

    # Save
    with open(args.out, "w") as f:
        json.dump({
            "model": args.model,
            "pair_results": pair_results,
            "scales": scales,
            "scale_pca_singvals": S.tolist(),
            "scale_pca_loadings": U.tolist(),
            "scale_pca_scores": (np.diag(S) @ Vh).tolist(),
            "fam_used": fam_used,
        }, f, indent=2)
    print(f"\nSaved: {args.out}")


if __name__ == "__main__":
    main()