vllm launcher with apollo hook

This commit is contained in:
ProofOfConcept 2026-03-30 22:24:02 -04:00
parent 0402a9333c
commit 5f41898bb8

View file

@ -0,0 +1,18 @@
#!/bin/bash
# Start vLLM with Apollo weight export hook.
#
# The hook patches vLLM's model runner to export CUDA IPC handles
# after loading, so the Apollo training process can share the same
# GPU memory.
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
exec python3 -c "
import sys
sys.path.insert(0, '$SCRIPT_DIR')
import vllm_export_hook # patches model runner before vLLM loads
sys.argv = ['vllm'] + sys.argv[1:]
from vllm.entrypoints.cli.main import main
main()
" serve "$@"