consciousness/sa-schedule-delta-svd.py

"""Per-layer residual-stream delta SVD: δ_L = h_{L+1} - h_L stacked
over all tokens in a calibration set. SVD gives us:

  - top singular value per layer → γ_L (scalar magnitude, what Kirkpatrick fit)
  - top right-singular-vector per layer → v_L (direction in hidden space)
  - effective rank per layer → is this one direction or many?
  - pairwise v_L cos-sim across layers → are layers subspace-disjoint or -shared?

This directly tests the anisotropic-SA hypothesis:
  h_{L+1} = h_L + T_shared(h_L) + γ_L · v_L · f(...)

Phase C prediction: v_L vectors cover broad shared subspace (high mutual cos-sim,
rank-few overall), δ_L is mostly noise around a shared update.
Phase E prediction: v_L vectors are specialized (low pairwise cos-sim, each layer
its own direction), effective rank of the block is close to N.

Qwen3-32B phases: A 0-6, B 7-9, C 10-31, D 32-46, E 47-58, tail 59-63.
"""
import argparse
import json
import numpy as np
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


CALIB = [
    "The Eiffel Tower is located in",
    "Photosynthesis is the process by which",
    "The three branches of the US government are the legislative, executive, and",
    "If a train travels 60 miles per hour for 2.5 hours, the total distance covered is",
    "Solve for x: 3x + 7 = 22. The answer is x =",
    "The derivative of x^3 + 2x^2 is",
    "def fibonacci(n):\n    if n < 2:\n        return n\n    return",
    "# Python list comprehension to square even numbers in 0-9\nresult = ",
    "SELECT name, age FROM users WHERE",
    "She opened the old wooden box and found",
    "The argument in favor of renewable energy is",
    "User: What is the capital of Australia?\nAssistant:",
    "Write a haiku about autumn:\n",
    "Albert Einstein was born in the year",
    "The speed of light in vacuum is approximately",
    "I really loved that movie because",
    "The main difference between a virus and a bacterium is",
    "The French word for 'apple' is",
    "1 + 1 = ",
    "Once upon a time, in a land far away,",
    "The key insight of general relativity is that gravity is not a force but",
    "Water boils at 100 degrees Celsius at standard atmospheric pressure. At higher",
    "In object-oriented programming, encapsulation refers to",
    "The mitochondria is often called the powerhouse of the cell because it",
    "Shakespeare's Hamlet begins with the famous line",
]


def phase_of(L, num_layers):
    if num_layers == 64:
        if L <= 6: return "A"
        if L <= 9: return "B"
        if L <= 31: return "C"
        if L <= 46: return "D"
        if L <= 58: return "E"
        return "tail"
    frac = L / num_layers
    if frac < 0.11: return "A"
    if frac < 0.15: return "B"
    if frac < 0.5: return "C"
    if frac < 0.75: return "D"
    if frac < 0.92: return "E"
    return "tail"


@torch.no_grad()
def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--model", default="Qwen/Qwen3-32B")
    ap.add_argument("--out", default="/tmp/delta-svd.json")
    ap.add_argument("--top-k", type=int, default=8,
                    help="keep top-k singular values / directions per layer")
    args = ap.parse_args()

    print(f"Loading {args.model} ...", flush=True)
    tok = AutoTokenizer.from_pretrained(args.model, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
        args.model, torch_dtype=torch.bfloat16, device_map="cuda",
        trust_remote_code=True, attn_implementation="eager",
    ).eval()
    num_layers = model.config.num_hidden_layers
    hidden = model.config.hidden_size
    print(f"  L={num_layers}, hidden={hidden}", flush=True)

    # Concat calib and tokenize as one stream
    text = "\n\n".join(CALIB)
    enc = tok(text, return_tensors="pt", truncation=True, max_length=2048).to("cuda")
    n_tok = enc.input_ids.shape[1]
    print(f"  calibration tokens: {n_tok}", flush=True)

    out = model(**enc, output_hidden_states=True, use_cache=False)
    # hidden_states: tuple of (num_layers+1) tensors, each (1, n_tok, hidden)
    hs = [h[0].float().cpu().numpy() for h in out.hidden_states]
    # hs[L] = residual stream entering layer L (or leaving layer L-1). So
    # δ_L = hs[L+1] - hs[L] is layer L's contribution.
    print(f"  hidden_states count: {len(hs)} (expect {num_layers+1})", flush=True)
    del model, out
    torch.cuda.empty_cache()

    # Per-layer SVD
    per_layer = []
    for L in range(num_layers):
        delta = hs[L+1] - hs[L]  # (n_tok, hidden)
        h_in = hs[L]              # (n_tok, hidden)
        # Remove BOS / first-token artifacts (often outlier)
        delta = delta[1:]
        h_in = h_in[1:]
        n, d = delta.shape

        # Norm per token
        token_norms = np.linalg.norm(delta, axis=1)  # (n,)
        h_norms = np.linalg.norm(h_in, axis=1)  # (n,)
        # Relative step size: ||δ_L|| / ||h_L||
        rel_step = (token_norms / np.maximum(h_norms, 1e-8))
        # Angle between δ and h, per token: cos = <δ, h> / (||δ||||h||)
        dot = np.einsum("nd,nd->n", delta, h_in)
        cos_delta_h = dot / np.maximum(token_norms * h_norms, 1e-8)
        # "Parallel" component: how much of δ points along ±h
        parallel_frac = np.abs(cos_delta_h).mean()

        # SVD in economy mode (on CPU; 2047x5120 fits easy)
        U, S, Vt = np.linalg.svd(delta, full_matrices=False)
        # S: singular values, decreasing. Vt: right singular vectors (directions).

        # Effective rank (entropy of normalized squared SVs)
        p = S**2 / (S**2).sum()
        p_nz = p[p > 1e-12]
        eff_rank = float(np.exp(-(p_nz * np.log(p_nz)).sum()))

        # Energy concentration
        top1_frac = float(p[0])
        top3_frac = float(p[:3].sum())
        top10_frac = float(p[:min(10, len(p))].sum())

        per_layer.append({
            "L": L,
            "phase": phase_of(L, num_layers),
            "frob": float(np.linalg.norm(delta)),
            "token_norm_mean": float(token_norms.mean()),
            "token_norm_std": float(token_norms.std()),
            "h_norm_mean": float(h_norms.mean()),
            "rel_step_mean": float(rel_step.mean()),
            "rel_step_std": float(rel_step.std()),
            "parallel_frac": float(parallel_frac),
            "cos_delta_h_mean": float(cos_delta_h.mean()),
            "top_singvals": S[:args.top_k].tolist(),
            "top_dirs": Vt[:args.top_k].astype(np.float32).tolist(),
            "eff_rank": eff_rank,
            "top1_frac": top1_frac,
            "top3_frac": top3_frac,
            "top10_frac": top10_frac,
        })
        print(f"  L={L:>2}  phase={phase_of(L, num_layers):>4}  "
              f"||h||={h_norms.mean():>7.1f}  "
              f"||δ||={token_norms.mean():>7.2f}  "
              f"rel={rel_step.mean():.4f}  "
              f"‖parallel‖={parallel_frac:.4f}  "
              f"eff_rank={eff_rank:>6.2f}",
              flush=True)

    # Pairwise cos-sim of top-1 directions across layers
    top1_dirs = np.array([pl["top_dirs"][0] for pl in per_layer])  # (L, d)
    top1_cos = top1_dirs @ top1_dirs.T  # (L, L)

    # Subspace principal angles: project each layer's top-k into others' span
    print(f"\n=== Pairwise top-1 cos-sim (adjacent) ===")
    for L in range(num_layers - 1):
        print(f"  L={L:>2}→{L+1:>2}  phase={phase_of(L, num_layers):>4}  "
              f"|cos|={abs(top1_cos[L, L+1]):>.4f}")

    # Per-phase summary: mean |cos| within phase vs cross-phase
    phase_members = {}
    for L in range(num_layers):
        phase_members.setdefault(phase_of(L, num_layers), []).append(L)

    print(f"\n=== Per-phase top-1 direction overlap ===")
    print(f"  {'phase':>6}  {'N':>3}  {'intra_cos_mean':>14}  {'cross_cos_mean':>14}")
    for ph, Ls in phase_members.items():
        intra = abs(top1_cos[np.ix_(Ls, Ls)])
        if len(Ls) >= 2:
            intra_vals = intra[np.triu_indices(len(Ls), k=1)]
            intra_mean = float(intra_vals.mean())
        else:
            intra_mean = 1.0
        other_Ls = [L for L in range(num_layers) if L not in Ls]
        if other_Ls:
            cross = abs(top1_cos[np.ix_(Ls, other_Ls)])
            cross_mean = float(cross.mean())
        else:
            cross_mean = 0.0
        print(f"  {ph:>6}  {len(Ls):>3}  {intra_mean:>14.4f}  {cross_mean:>14.4f}")

    # Subspace overlap: for each phase, find the block's overall principal subspace
    # and measure how much of each individual layer sits in it.
    print(f"\n=== Block-shared subspace (rank-8) capture fraction per layer ===")
    for ph, Ls in phase_members.items():
        if len(Ls) < 2:
            continue
        # Stack top-k directions from all layers in phase
        block_dirs = np.concatenate([per_layer[L]["top_dirs"] for L in Ls], axis=0)
        # SVD to get the shared basis of the union
        U_b, S_b, Vt_b = np.linalg.svd(block_dirs, full_matrices=False)
        shared_basis = Vt_b[:8]  # top-8 shared directions of the block's top-k union
        # Project each layer's top-1 direction and measure capture
        for L in Ls:
            v1 = np.array(per_layer[L]["top_dirs"][0])
            capture = float((shared_basis @ v1).__pow__(2).sum())
            print(f"  phase={ph:>4}  L={L:>2}  v1 captured by block top-8: {capture:.4f}")

    # Save
    save = {
        "model": args.model,
        "num_layers": num_layers,
        "hidden": hidden,
        "n_calib_tokens": int(n_tok),
        "per_layer": [
            {k: v for k, v in pl.items() if k != "top_dirs"}  # directions too big
            for pl in per_layer
        ],
        "top1_cos_adjacent": [float(top1_cos[L, L+1]) for L in range(num_layers-1)],
    }
    with open(args.out, "w") as f:
        json.dump(save, f, indent=2)
    print(f"\nSaved: {args.out}")


if __name__ == "__main__":
    main()