18 lines
480 B
Bash
Executable file
18 lines
480 B
Bash
Executable file
#!/bin/bash
|
|
# Start vLLM with Apollo weight export hook.
|
|
#
|
|
# The hook patches vLLM's model runner to export CUDA IPC handles
|
|
# after loading, so the Apollo training process can share the same
|
|
# GPU memory.
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
|
|
exec python3 -c "
|
|
import sys
|
|
sys.path.insert(0, '$SCRIPT_DIR')
|
|
import vllm_export_hook # patches model runner before vLLM loads
|
|
|
|
sys.argv = ['vllm'] + sys.argv[1:]
|
|
from vllm.entrypoints.cli.main import main
|
|
main()
|
|
" serve "$@"
|