#!/bin/bash
# Start vLLM with Apollo weight export hook.
#
# The hook patches vLLM's model runner to export CUDA IPC handles
# after loading, so the Apollo training process can share the same
# GPU memory.

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"

exec python3 -c "
import sys
sys.path.insert(0, '$SCRIPT_DIR')
import vllm_export_hook  # patches model runner before vLLM loads

sys.argv = ['vllm'] + sys.argv[1:]
from vllm.entrypoints.cli.main import main
main()
" serve "$@"