#!/bin/bash # Start vLLM with Apollo weight export hook. # # The hook patches vLLM's model runner to export CUDA IPC handles # after loading, so the Apollo training process can share the same # GPU memory. SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" exec python3 -c " import sys sys.path.insert(0, '$SCRIPT_DIR') import vllm_export_hook # patches model runner before vLLM loads sys.argv = ['vllm'] + sys.argv[1:] from vllm.entrypoints.cli.main import main main() " serve "$@"