consciousness/sa-schedule-analyze-aligned.py
Kent Overstreet 4225294d16 replace try_lock() with lock_blocking() across UI thread
Add lock_blocking() to TrackedMutex: blocks current thread using
block_in_place + futures::executor::block_on, safe for sync contexts.

Replace all try_lock() calls with lock_blocking() in slash commands,
UI rendering, and status reads. Lock hold times are fast enough that
blocking briefly is fine, and this eliminates the spurious 'lock
unavailable' paths that were never actually hit.

Kept rx_mutex.try_lock() in mod.rs (std::sync::Mutex for stderr rx).
2026-04-25 15:35:14 -04:00

157 lines
5.3 KiB
Python

"""Analyze aligned_variation output to answer the training-artifact vs
specialization question.
Inputs: qwen3-*-null.json (raw cos-sim) + qwen3-*-aligned.json (aligned cos-sim)
For each layer pair where aligned data exists, compare:
raw_cos(L) — before Procrustes alignment
aligned_cos(L) — after Procrustes alignment
delta = aligned_cos - raw_cos
If delta is substantial (aligned much larger than raw), rotation gauge
was hiding shared structure → training-artifact hypothesis supported.
If delta ≈ 0, specialization is real (rotation can't find shared
structure because there isn't any).
Stratify by phase to test prediction that LATE layers have LARGER delta
(more rotation-gauge noise, less real specialization).
"""
import argparse
import json
import numpy as np
def phase_of(L, num_layers):
"""Rough phase assignment based on measured 32B entropy boundaries.
For other models we'd refit — but shape should be similar."""
if num_layers == 64: # Qwen3-32B
if L <= 6:
return "A"
elif L <= 9:
return "B"
elif L <= 31:
return "C"
elif L <= 46:
return "D"
elif L <= 58:
return "E"
else:
return "tail"
elif num_layers == 36: # Qwen3-4B
if L <= 6:
return "A"
elif L <= 9:
return "B"
elif L <= 23:
return "C"
elif L <= 33:
return "D"
else:
return "tail"
else:
frac = L / num_layers
if frac < 0.11:
return "A"
elif frac < 0.15:
return "B"
elif frac < 0.5:
return "C"
elif frac < 0.75:
return "D"
elif frac < 0.92:
return "E"
else:
return "tail"
def main():
ap = argparse.ArgumentParser()
ap.add_argument("null_json", help="output of null_residual.py")
ap.add_argument("aligned_json", help="output of aligned_variation.py")
args = ap.parse_args()
null = json.load(open(args.null_json))
aligned = json.load(open(args.aligned_json))
num_layers = aligned["num_layers"]
aligned_cos = aligned["aligned_cos"] # dict: family -> {L: cos}
pair_results = null["pair_results"] # list of {L, L_next, families: {family: {cos, ...}}}
# Build raw_cos dict from null output
raw_cos = {fam: {} for fam in ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj"]}
for pr in pair_results:
L = pr["L"]
for fam in raw_cos:
if fam in pr["families"]:
raw_cos[fam][L] = pr["families"][fam]["cos"]
print(f"=== Aligned vs Raw cos-sim comparison ({args.aligned_json}) ===")
print(f" {num_layers} layers total; aligned data for "
f"{len(aligned_cos['q_proj'])} pairs\n")
# Per-pair table: L, phase, family cos-sims raw and aligned
families = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj"]
print(f" {'L':>3} {'phase':>5}", end="")
for fam in families:
print(f" {fam+'_raw':>10} {fam+'_ali':>10}", end="")
print()
L_keys = sorted([int(L) for L in aligned_cos["q_proj"].keys()])
for L in L_keys:
Lstr = str(L)
phase = phase_of(L, num_layers)
row = f" {L:>3} {phase:>5}"
for fam in families:
r = raw_cos[fam].get(L, None)
a = aligned_cos[fam].get(Lstr, None)
rstr = f"{r:+10.4f}" if r is not None else " N/A"
astr = f"{a:+10.4f}" if a is not None else " N/A"
row += f" {rstr} {astr}"
print(row)
# Aggregate by phase: mean (aligned - raw) per family per phase
print("\n=== Per-phase mean delta (aligned_cos - raw_cos) by family ===")
print(f" Large positive delta = rotation alignment revealed shared")
print(f" structure. Small delta = specialization is gauge-independent.\n")
phase_deltas = {}
for L in L_keys:
Lstr = str(L)
ph = phase_of(L, num_layers)
for fam in families:
r = raw_cos[fam].get(L, None)
a = aligned_cos[fam].get(Lstr, None)
if r is not None and a is not None:
phase_deltas.setdefault(ph, {}).setdefault(fam, []).append(a - r)
print(f" {'phase':>6}", end="")
for fam in families:
print(f" {fam:>10}", end="")
print()
for ph in sorted(phase_deltas.keys()):
print(f" {ph:>6}", end="")
for fam in families:
vals = phase_deltas[ph].get(fam, [])
if vals:
print(f" {np.mean(vals):+10.4f}", end="")
else:
print(f" {'':>10}", end="")
print()
# Interpretation
print("\n=== Interpretation ===")
print(" Prediction under training-artifact hypothesis:")
print(" delta(Phase E) > delta(Phase C) for projection families")
print(" → late layers have more rotation-gauge-hidden structure")
print(" → specialization is partly training noise, not structural")
print("")
print(" Prediction under real-specialization hypothesis:")
print(" delta ~ 0 everywhere")
print(" → layers genuinely point in different directions, gauge irrelevant")
if __name__ == "__main__":
main()