From 5f41898bb8ce398d0451ddb64680f1019ee76a6b Mon Sep 17 00:00:00 2001 From: ProofOfConcept Date: Mon, 30 Mar 2026 22:24:02 -0400 Subject: [PATCH] vllm launcher with apollo hook --- training/start_vllm_with_apollo.sh | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100755 training/start_vllm_with_apollo.sh diff --git a/training/start_vllm_with_apollo.sh b/training/start_vllm_with_apollo.sh new file mode 100755 index 0000000..98dfedb --- /dev/null +++ b/training/start_vllm_with_apollo.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Start vLLM with Apollo weight export hook. +# +# The hook patches vLLM's model runner to export CUDA IPC handles +# after loading, so the Apollo training process can share the same +# GPU memory. + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" + +exec python3 -c " +import sys +sys.path.insert(0, '$SCRIPT_DIR') +import vllm_export_hook # patches model runner before vLLM loads + +sys.argv = ['vllm'] + sys.argv[1:] +from vllm.entrypoints.cli.main import main +main() +" serve "$@"