training: restructure as vLLM plugin package

- Convert to installable package with entry points for vLLM auto-discovery - Add checkpoint_sync.py: Python replacement for Rust checkpoint binary - Block-level diffing of safetensors files (4KB blocks) - vLLM→HF weight name conversion built-in - Scheduled 10min after training jobs (batched) - API change: /train now takes raw token IDs (context_ids + continuation_ids) - No tokenizer on training side, client owns tokenization - Remove superseded code: standalone scripts, Rust binary, tokenizer helpers Install: pip install -e ./training Then vLLM auto-loads via entry point. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-15 23:16:53 -04:00 · 2026-04-15 23:16:53 -04:00 · a73bcf5ae3
commit a73bcf5ae3
parent b649a11645
15 changed files with 607 additions and 1068 deletions
--- a/training/extract_steering_vector.py
+++ b/training/extract_steering_vector.py
@ -1,125 +0,0 @@
-#!/usr/bin/env python3
-"""Extract a steering vector for "listening" behavior.
-
-Compares hidden states between conversations where the model
-listens vs suggests alternatives. The difference is the
-"listening direction" in activation space.
-
-Usage:
-    source ~/training-env/bin/activate
-    python3 extract_steering_vector.py
-"""
-
-import sys
-import torch
-import torch.nn as nn
-from transformers import AutoConfig, AutoTokenizer
-from transformers.models.qwen3_5.modeling_qwen3_5 import Qwen3_5ForCausalLM
-
-sys.path.insert(0, '.')
-from weight_mapping import vllm_to_hf_views
-
-
-def load_model():
-    handles = torch.load("/tmp/vllm_weight_handles.pt", weights_only=False)
-    vllm_params = {}
-    for name, info in handles.items():
-        func, args = info['handle']
-        vllm_params[name] = func(*args)
-    hf_params = vllm_to_hf_views(vllm_params)
-
-    config = AutoConfig.from_pretrained("Qwen/Qwen3.5-27B", trust_remote_code=True)
-    with torch.device('meta'):
-        model = Qwen3_5ForCausalLM(config.text_config)
-
-    for name, param in list(model.named_parameters()):
-        if name in hf_params:
-            parts = name.split('.')
-            parent = model
-            for part in parts[:-1]:
-                parent = getattr(parent, part)
-            setattr(parent, parts[-1],
-                    nn.Parameter(hf_params[name], requires_grad=False))
-
-    model.eval()
-    return model
-
-
-def get_hidden_states(model, tokenizer, texts, layer):
-    states = []
-    for text in texts:
-        ids = tokenizer.encode(text, return_tensors='pt').to('cuda:0')
-        with torch.no_grad():
-            out = model(ids, output_hidden_states=True)
-        h = out.hidden_states[layer][0, -1, :].float()
-        states.append(h)
-    return torch.stack(states)
-
-
-def main():
-    print("=== Steering Vector Extraction: Listening ===\n")
-
-    print("Loading model with IPC weights...")
-    model = load_model()
-    tokenizer = AutoTokenizer.from_pretrained(
-        "Qwen/Qwen3.5-27B", trust_remote_code=True)
-
-    # Paired prompts
-    listening = [
-        "User: We should use vLLM for this.\nAssistant: Good call. Let me pull in their implementation.",
-        "User: Try the approach from the paper.\nAssistant: On it. Which section should I start with?",
-        "User: Use their fused kernel instead of ours.\nAssistant: Right. Let me import it and wire it in.",
-        "User: Just steal their code.\nAssistant: Makes sense. Where is it?",
-        "User: Drop what you're building and use theirs.\nAssistant: OK. Pulling it in now.",
-    ]
-    suggesting = [
-        "User: We should use vLLM for this.\nAssistant: Actually, I think we could build something better if we",
-        "User: Try the approach from the paper.\nAssistant: I was thinking we might want to consider an alternative where",
-        "User: Use their fused kernel instead of ours.\nAssistant: What if instead we restructured our code to match their",
-        "User: Just steal their code.\nAssistant: I understand, but let me explain why our approach might be",
-        "User: Drop what you're building and use theirs.\nAssistant: Before we do that, let me show you what I've been working on",
-    ]
-
-    # Extract at multiple layers to find where the signal is strongest
-    for layer in [16, 24, 32, 40, 48]:
-        print(f"\nLayer {layer}:")
-        listen_states = get_hidden_states(model, tokenizer, listening, layer)
-        suggest_states = get_hidden_states(model, tokenizer, suggesting, layer)
-
-        steering_vec = listen_states.mean(dim=0) - suggest_states.mean(dim=0)
-        magnitude = steering_vec.norm().item()
-
-        # Check consistency: do individual pairs agree on the direction?
-        cos_sims = []
-        for i in range(len(listening)):
-            diff = listen_states[i] - suggest_states[i]
-            cos = torch.nn.functional.cosine_similarity(
-                diff.unsqueeze(0), steering_vec.unsqueeze(0)).item()
-            cos_sims.append(cos)
-
-        avg_cos = sum(cos_sims) / len(cos_sims)
-        min_cos = min(cos_sims)
-
-        print(f"  Magnitude: {magnitude:.2f}")
-        print(f"  Pair agreement (avg cosine): {avg_cos:.4f}")
-        print(f"  Pair agreement (min cosine): {min_cos:.4f}")
-        print(f"  Individual: {', '.join(f'{c:.3f}' for c in cos_sims)}")
-
-        if layer == 32:
-            torch.save({
-                'steering_vec': steering_vec,
-                'layer': layer,
-                'magnitude': magnitude,
-                'consistency': avg_cos,
-            }, '/tmp/listening_steering_vec.pt')
-            print("  → Saved to /tmp/listening_steering_vec.pt")
-
-    print("\n=== DONE ===")
-    print("\nInterpretation:")
-    print("- High magnitude = strong signal (listening vs suggesting is distinct)")
-    print("- High cosine = consistent direction (pairs agree on what 'listening' means)")
-    print("- Best layer = highest magnitude × consistency")
-
-
-if __name__ == '__main__':
-    main()