amygdala: report eigenvalue spectrum for subspace method
When --method subspace, record top-20 eigenvalues of (M_pos - M_base) per concept per layer. Added to quality.json as 'subspace_eigvals'. Tells us whether the concept lives in a single dominant direction (λ_0 >> λ_1, top-eigenvector is enough) or a spread of shared common directions (λ_0 ≈ λ_1, top-1 loses signal). Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
fe0fb8253a
commit
974c6c7fd2
1 changed files with 19 additions and 1 deletions
|
|
@ -1001,6 +1001,9 @@ def main() -> None:
|
||||||
# and use sum-of-projection-operators per concept. --------------------
|
# and use sum-of-projection-operators per concept. --------------------
|
||||||
pos_subspaces: list[dict[int, torch.Tensor]] | None = None
|
pos_subspaces: list[dict[int, torch.Tensor]] | None = None
|
||||||
base_subspaces: list[dict[int, torch.Tensor]] | None = None
|
base_subspaces: list[dict[int, torch.Tensor]] | None = None
|
||||||
|
# Per (concept, layer): top-20 eigenvalues of (M_pos - M_base), descending.
|
||||||
|
# Populated only when --method subspace.
|
||||||
|
subspace_eigvals: dict[str, dict[int, list[float]]] = {}
|
||||||
if args.method == "subspace":
|
if args.method == "subspace":
|
||||||
print("\nCollecting per-story subspaces (SVD, top-k right singular "
|
print("\nCollecting per-story subspaces (SVD, top-k right singular "
|
||||||
f"vectors, k={args.subspace_k})...")
|
f"vectors, k={args.subspace_k})...")
|
||||||
|
|
@ -1035,10 +1038,14 @@ def main() -> None:
|
||||||
pos_V = [pos_subspaces[j][target_l] for j in pos_rows]
|
pos_V = [pos_subspaces[j][target_l] for j in pos_rows]
|
||||||
base_V = [pos_subspaces[j][target_l] for j in neg_rows]
|
base_V = [pos_subspaces[j][target_l] for j in neg_rows]
|
||||||
base_V += [bs[target_l] for bs in (base_subspaces or [])]
|
base_V += [bs[target_l] for bs in (base_subspaces or [])]
|
||||||
top_vec, _eigvals = _subspace_concept_direction(
|
top_vec, eigvals = _subspace_concept_direction(
|
||||||
pos_V, base_V, hidden=hidden_dim,
|
pos_V, base_V, hidden=hidden_dim,
|
||||||
)
|
)
|
||||||
per_layer_vectors[l_idx, e_idx] = top_vec
|
per_layer_vectors[l_idx, e_idx] = top_vec
|
||||||
|
# Keep the top-20 eigenvalues for quality-report diagnostics.
|
||||||
|
subspace_eigvals.setdefault(emotion, {})[target_l] = (
|
||||||
|
eigvals[-20:].flip(0).tolist()
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
pos = positive_acts[pos_rows] # [n_pos, n_layers, hidden]
|
pos = positive_acts[pos_rows] # [n_pos, n_layers, hidden]
|
||||||
neg = positive_acts[neg_rows] # [n_neg, n_layers, hidden]
|
neg = positive_acts[neg_rows] # [n_neg, n_layers, hidden]
|
||||||
|
|
@ -1149,6 +1156,17 @@ def main() -> None:
|
||||||
"per-head analysis skipped."
|
"per-head analysis skipped."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Eigenvalue spectrum from the subspace method — if populated, report
|
||||||
|
# the top-20 eigenvalues per concept per layer. Tells us whether the
|
||||||
|
# concept direction lives in a single dominant dimension (λ_0 >> λ_1)
|
||||||
|
# or a spread of common directions (λ_0 ≈ λ_1 ≈ ...).
|
||||||
|
if subspace_eigvals:
|
||||||
|
for emotion, per_l in subspace_eigvals.items():
|
||||||
|
if emotion in report:
|
||||||
|
report[emotion]["subspace_eigvals"] = {
|
||||||
|
str(l): vals for l, vals in per_l.items()
|
||||||
|
}
|
||||||
|
|
||||||
# Linear combinations — for each concept, how much of its direction
|
# Linear combinations — for each concept, how much of its direction
|
||||||
# is explained by a ridge regression on the others. R² > 0.9 flags
|
# is explained by a ridge regression on the others. R² > 0.9 flags
|
||||||
# concepts that are essentially linear combinations of their peers
|
# concepts that are essentially linear combinations of their peers
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue