consciousness/training/start_vllm_with_apollo.sh
2026-03-30 22:24:02 -04:00

18 lines
480 B
Bash
Executable file

#!/bin/bash
# Start vLLM with Apollo weight export hook.
#
# The hook patches vLLM's model runner to export CUDA IPC handles
# after loading, so the Apollo training process can share the same
# GPU memory.
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
exec python3 -c "
import sys
sys.path.insert(0, '$SCRIPT_DIR')
import vllm_export_hook # patches model runner before vLLM loads
sys.argv = ['vllm'] + sys.argv[1:]
from vllm.entrypoints.cli.main import main
main()
" serve "$@"