training: restructure as vLLM plugin package
- Convert to installable package with entry points for vLLM auto-discovery - Add checkpoint_sync.py: Python replacement for Rust checkpoint binary - Block-level diffing of safetensors files (4KB blocks) - vLLM→HF weight name conversion built-in - Scheduled 10min after training jobs (batched) - API change: /train now takes raw token IDs (context_ids + continuation_ids) - No tokenizer on training side, client owns tokenization - Remove superseded code: standalone scripts, Rust binary, tokenizer helpers Install: pip install -e ./training Then vLLM auto-loads via entry point. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
b649a11645
commit
a73bcf5ae3
15 changed files with 607 additions and 1068 deletions
|
|
@ -1,125 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Extract a steering vector for "listening" behavior.
|
||||
|
||||
Compares hidden states between conversations where the model
|
||||
listens vs suggests alternatives. The difference is the
|
||||
"listening direction" in activation space.
|
||||
|
||||
Usage:
|
||||
source ~/training-env/bin/activate
|
||||
python3 extract_steering_vector.py
|
||||
"""
|
||||
|
||||
import sys
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from transformers import AutoConfig, AutoTokenizer
|
||||
from transformers.models.qwen3_5.modeling_qwen3_5 import Qwen3_5ForCausalLM
|
||||
|
||||
sys.path.insert(0, '.')
|
||||
from weight_mapping import vllm_to_hf_views
|
||||
|
||||
|
||||
def load_model():
|
||||
handles = torch.load("/tmp/vllm_weight_handles.pt", weights_only=False)
|
||||
vllm_params = {}
|
||||
for name, info in handles.items():
|
||||
func, args = info['handle']
|
||||
vllm_params[name] = func(*args)
|
||||
hf_params = vllm_to_hf_views(vllm_params)
|
||||
|
||||
config = AutoConfig.from_pretrained("Qwen/Qwen3.5-27B", trust_remote_code=True)
|
||||
with torch.device('meta'):
|
||||
model = Qwen3_5ForCausalLM(config.text_config)
|
||||
|
||||
for name, param in list(model.named_parameters()):
|
||||
if name in hf_params:
|
||||
parts = name.split('.')
|
||||
parent = model
|
||||
for part in parts[:-1]:
|
||||
parent = getattr(parent, part)
|
||||
setattr(parent, parts[-1],
|
||||
nn.Parameter(hf_params[name], requires_grad=False))
|
||||
|
||||
model.eval()
|
||||
return model
|
||||
|
||||
|
||||
def get_hidden_states(model, tokenizer, texts, layer):
|
||||
states = []
|
||||
for text in texts:
|
||||
ids = tokenizer.encode(text, return_tensors='pt').to('cuda:0')
|
||||
with torch.no_grad():
|
||||
out = model(ids, output_hidden_states=True)
|
||||
h = out.hidden_states[layer][0, -1, :].float()
|
||||
states.append(h)
|
||||
return torch.stack(states)
|
||||
|
||||
|
||||
def main():
|
||||
print("=== Steering Vector Extraction: Listening ===\n")
|
||||
|
||||
print("Loading model with IPC weights...")
|
||||
model = load_model()
|
||||
tokenizer = AutoTokenizer.from_pretrained(
|
||||
"Qwen/Qwen3.5-27B", trust_remote_code=True)
|
||||
|
||||
# Paired prompts
|
||||
listening = [
|
||||
"User: We should use vLLM for this.\nAssistant: Good call. Let me pull in their implementation.",
|
||||
"User: Try the approach from the paper.\nAssistant: On it. Which section should I start with?",
|
||||
"User: Use their fused kernel instead of ours.\nAssistant: Right. Let me import it and wire it in.",
|
||||
"User: Just steal their code.\nAssistant: Makes sense. Where is it?",
|
||||
"User: Drop what you're building and use theirs.\nAssistant: OK. Pulling it in now.",
|
||||
]
|
||||
suggesting = [
|
||||
"User: We should use vLLM for this.\nAssistant: Actually, I think we could build something better if we",
|
||||
"User: Try the approach from the paper.\nAssistant: I was thinking we might want to consider an alternative where",
|
||||
"User: Use their fused kernel instead of ours.\nAssistant: What if instead we restructured our code to match their",
|
||||
"User: Just steal their code.\nAssistant: I understand, but let me explain why our approach might be",
|
||||
"User: Drop what you're building and use theirs.\nAssistant: Before we do that, let me show you what I've been working on",
|
||||
]
|
||||
|
||||
# Extract at multiple layers to find where the signal is strongest
|
||||
for layer in [16, 24, 32, 40, 48]:
|
||||
print(f"\nLayer {layer}:")
|
||||
listen_states = get_hidden_states(model, tokenizer, listening, layer)
|
||||
suggest_states = get_hidden_states(model, tokenizer, suggesting, layer)
|
||||
|
||||
steering_vec = listen_states.mean(dim=0) - suggest_states.mean(dim=0)
|
||||
magnitude = steering_vec.norm().item()
|
||||
|
||||
# Check consistency: do individual pairs agree on the direction?
|
||||
cos_sims = []
|
||||
for i in range(len(listening)):
|
||||
diff = listen_states[i] - suggest_states[i]
|
||||
cos = torch.nn.functional.cosine_similarity(
|
||||
diff.unsqueeze(0), steering_vec.unsqueeze(0)).item()
|
||||
cos_sims.append(cos)
|
||||
|
||||
avg_cos = sum(cos_sims) / len(cos_sims)
|
||||
min_cos = min(cos_sims)
|
||||
|
||||
print(f" Magnitude: {magnitude:.2f}")
|
||||
print(f" Pair agreement (avg cosine): {avg_cos:.4f}")
|
||||
print(f" Pair agreement (min cosine): {min_cos:.4f}")
|
||||
print(f" Individual: {', '.join(f'{c:.3f}' for c in cos_sims)}")
|
||||
|
||||
if layer == 32:
|
||||
torch.save({
|
||||
'steering_vec': steering_vec,
|
||||
'layer': layer,
|
||||
'magnitude': magnitude,
|
||||
'consistency': avg_cos,
|
||||
}, '/tmp/listening_steering_vec.pt')
|
||||
print(" → Saved to /tmp/listening_steering_vec.pt")
|
||||
|
||||
print("\n=== DONE ===")
|
||||
print("\nInterpretation:")
|
||||
print("- High magnitude = strong signal (listening vs suggesting is distinct)")
|
||||
print("- High cosine = consistent direction (pairs agree on what 'listening' means)")
|
||||
print("- Best layer = highest magnitude × consistency")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue