forked from kent/consciousness
replace try_lock() with lock_blocking() across UI thread
Add lock_blocking() to TrackedMutex: blocks current thread using block_in_place + futures::executor::block_on, safe for sync contexts. Replace all try_lock() calls with lock_blocking() in slash commands, UI rendering, and status reads. Lock hold times are fast enough that blocking briefly is fine, and this eliminates the spurious 'lock unavailable' paths that were never actually hit. Kept rx_mutex.try_lock() in mod.rs (std::sync::Mutex for stderr rx).
This commit is contained in:
parent
5210f7dd66
commit
4225294d16
28 changed files with 4199 additions and 67 deletions
157
sa-schedule-analyze-aligned.py
Normal file
157
sa-schedule-analyze-aligned.py
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
"""Analyze aligned_variation output to answer the training-artifact vs
|
||||
specialization question.
|
||||
|
||||
Inputs: qwen3-*-null.json (raw cos-sim) + qwen3-*-aligned.json (aligned cos-sim)
|
||||
|
||||
For each layer pair where aligned data exists, compare:
|
||||
raw_cos(L) — before Procrustes alignment
|
||||
aligned_cos(L) — after Procrustes alignment
|
||||
delta = aligned_cos - raw_cos
|
||||
|
||||
If delta is substantial (aligned much larger than raw), rotation gauge
|
||||
was hiding shared structure → training-artifact hypothesis supported.
|
||||
If delta ≈ 0, specialization is real (rotation can't find shared
|
||||
structure because there isn't any).
|
||||
|
||||
Stratify by phase to test prediction that LATE layers have LARGER delta
|
||||
(more rotation-gauge noise, less real specialization).
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import numpy as np
|
||||
|
||||
|
||||
def phase_of(L, num_layers):
|
||||
"""Rough phase assignment based on measured 32B entropy boundaries.
|
||||
For other models we'd refit — but shape should be similar."""
|
||||
if num_layers == 64: # Qwen3-32B
|
||||
if L <= 6:
|
||||
return "A"
|
||||
elif L <= 9:
|
||||
return "B"
|
||||
elif L <= 31:
|
||||
return "C"
|
||||
elif L <= 46:
|
||||
return "D"
|
||||
elif L <= 58:
|
||||
return "E"
|
||||
else:
|
||||
return "tail"
|
||||
elif num_layers == 36: # Qwen3-4B
|
||||
if L <= 6:
|
||||
return "A"
|
||||
elif L <= 9:
|
||||
return "B"
|
||||
elif L <= 23:
|
||||
return "C"
|
||||
elif L <= 33:
|
||||
return "D"
|
||||
else:
|
||||
return "tail"
|
||||
else:
|
||||
frac = L / num_layers
|
||||
if frac < 0.11:
|
||||
return "A"
|
||||
elif frac < 0.15:
|
||||
return "B"
|
||||
elif frac < 0.5:
|
||||
return "C"
|
||||
elif frac < 0.75:
|
||||
return "D"
|
||||
elif frac < 0.92:
|
||||
return "E"
|
||||
else:
|
||||
return "tail"
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("null_json", help="output of null_residual.py")
|
||||
ap.add_argument("aligned_json", help="output of aligned_variation.py")
|
||||
args = ap.parse_args()
|
||||
|
||||
null = json.load(open(args.null_json))
|
||||
aligned = json.load(open(args.aligned_json))
|
||||
|
||||
num_layers = aligned["num_layers"]
|
||||
aligned_cos = aligned["aligned_cos"] # dict: family -> {L: cos}
|
||||
pair_results = null["pair_results"] # list of {L, L_next, families: {family: {cos, ...}}}
|
||||
|
||||
# Build raw_cos dict from null output
|
||||
raw_cos = {fam: {} for fam in ["q_proj", "k_proj", "v_proj", "o_proj",
|
||||
"gate_proj", "up_proj", "down_proj"]}
|
||||
for pr in pair_results:
|
||||
L = pr["L"]
|
||||
for fam in raw_cos:
|
||||
if fam in pr["families"]:
|
||||
raw_cos[fam][L] = pr["families"][fam]["cos"]
|
||||
|
||||
print(f"=== Aligned vs Raw cos-sim comparison ({args.aligned_json}) ===")
|
||||
print(f" {num_layers} layers total; aligned data for "
|
||||
f"{len(aligned_cos['q_proj'])} pairs\n")
|
||||
|
||||
# Per-pair table: L, phase, family cos-sims raw and aligned
|
||||
families = ["q_proj", "k_proj", "v_proj", "o_proj",
|
||||
"gate_proj", "up_proj", "down_proj"]
|
||||
|
||||
print(f" {'L':>3} {'phase':>5}", end="")
|
||||
for fam in families:
|
||||
print(f" {fam+'_raw':>10} {fam+'_ali':>10}", end="")
|
||||
print()
|
||||
|
||||
L_keys = sorted([int(L) for L in aligned_cos["q_proj"].keys()])
|
||||
for L in L_keys:
|
||||
Lstr = str(L)
|
||||
phase = phase_of(L, num_layers)
|
||||
row = f" {L:>3} {phase:>5}"
|
||||
for fam in families:
|
||||
r = raw_cos[fam].get(L, None)
|
||||
a = aligned_cos[fam].get(Lstr, None)
|
||||
rstr = f"{r:+10.4f}" if r is not None else " N/A"
|
||||
astr = f"{a:+10.4f}" if a is not None else " N/A"
|
||||
row += f" {rstr} {astr}"
|
||||
print(row)
|
||||
|
||||
# Aggregate by phase: mean (aligned - raw) per family per phase
|
||||
print("\n=== Per-phase mean delta (aligned_cos - raw_cos) by family ===")
|
||||
print(f" Large positive delta = rotation alignment revealed shared")
|
||||
print(f" structure. Small delta = specialization is gauge-independent.\n")
|
||||
|
||||
phase_deltas = {}
|
||||
for L in L_keys:
|
||||
Lstr = str(L)
|
||||
ph = phase_of(L, num_layers)
|
||||
for fam in families:
|
||||
r = raw_cos[fam].get(L, None)
|
||||
a = aligned_cos[fam].get(Lstr, None)
|
||||
if r is not None and a is not None:
|
||||
phase_deltas.setdefault(ph, {}).setdefault(fam, []).append(a - r)
|
||||
|
||||
print(f" {'phase':>6}", end="")
|
||||
for fam in families:
|
||||
print(f" {fam:>10}", end="")
|
||||
print()
|
||||
for ph in sorted(phase_deltas.keys()):
|
||||
print(f" {ph:>6}", end="")
|
||||
for fam in families:
|
||||
vals = phase_deltas[ph].get(fam, [])
|
||||
if vals:
|
||||
print(f" {np.mean(vals):+10.4f}", end="")
|
||||
else:
|
||||
print(f" {'—':>10}", end="")
|
||||
print()
|
||||
|
||||
# Interpretation
|
||||
print("\n=== Interpretation ===")
|
||||
print(" Prediction under training-artifact hypothesis:")
|
||||
print(" delta(Phase E) > delta(Phase C) for projection families")
|
||||
print(" → late layers have more rotation-gauge-hidden structure")
|
||||
print(" → specialization is partly training noise, not structural")
|
||||
print("")
|
||||
print(" Prediction under real-specialization hypothesis:")
|
||||
print(" delta ~ 0 everywhere")
|
||||
print(" → layers genuinely point in different directions, gauge irrelevant")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue