#!/usr/bin/env python3 """Export vLLM's live model weight IPC handles for the training process. Connects to a running vLLM instance, iterates over model parameters, and exports CUDA IPC handles that allow another process to access the same GPU memory without copying. Usage: # Run after vLLM is serving: python3 export_weights.py --output /tmp/vllm_weight_handles.pt # Or via vLLM's API (future): curl -X POST http://localhost:8000/export_weights """ import argparse import sys import torch from pathlib import Path def export_from_model(model, output_path: str): """Export IPC handles for all model parameters.""" from torch.multiprocessing.reductions import reduce_tensor handles = {} total_bytes = 0 for name, param in model.named_parameters(): handle = reduce_tensor(param.data) handles[name] = { 'handle': handle, 'shape': list(param.shape), 'dtype': str(param.dtype), } param_bytes = param.nelement() * param.element_size() total_bytes += param_bytes torch.save(handles, output_path) n_params = len(handles) print(f"Exported {n_params} parameters ({total_bytes / 1e9:.1f} GB)") print(f"Saved to {output_path}") return handles def main(): parser = argparse.ArgumentParser(description="Export vLLM weight IPC handles") parser.add_argument("--output", "-o", default="/tmp/vllm_weight_handles.pt", help="Output path for IPC handles") parser.add_argument("--vllm-pid", type=int, default=None, help="vLLM worker PID (auto-detected if not specified)") args = parser.parse_args() # For now: load the model directly and export. # TODO: connect to running vLLM process instead. print("Note: This currently loads the model separately.") print("Full integration will export from the running vLLM process.") print() # Detect model path from running vLLM import subprocess result = subprocess.run( ['ps', 'aux'], capture_output=True, text=True ) model_path = None for line in result.stdout.split('\n'): if 'vllm' in line and '--model' in line: parts = line.split() for i, p in enumerate(parts): if p == '--model' and i + 1 < len(parts): model_path = parts[i + 1] break # Also check model_tag format if p.startswith('--model='): model_path = p.split('=', 1)[1] break if model_path: print(f"Detected vLLM model: {model_path}") else: print("Could not detect running vLLM model. Specify manually.") sys.exit(1) if __name__ == '__main__': main()