From 5f41898bb8ce398d0451ddb64680f1019ee76a6b Mon Sep 17 00:00:00 2001
From: ProofOfConcept <poc@bcachefs.org>
Date: Mon, 30 Mar 2026 22:24:02 -0400
Subject: [PATCH] vllm launcher with apollo hook

---
 training/start_vllm_with_apollo.sh | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100755 training/start_vllm_with_apollo.sh

diff --git a/training/start_vllm_with_apollo.sh b/training/start_vllm_with_apollo.sh
new file mode 100755
index 0000000..98dfedb
--- /dev/null
+++ b/training/start_vllm_with_apollo.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+# Start vLLM with Apollo weight export hook.
+#
+# The hook patches vLLM's model runner to export CUDA IPC handles
+# after loading, so the Apollo training process can share the same
+# GPU memory.
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+
+exec python3 -c "
+import sys
+sys.path.insert(0, '$SCRIPT_DIR')
+import vllm_export_hook  # patches model runner before vLLM loads
+
+sys.argv = ['vllm'] + sys.argv[1:]
+from vllm.entrypoints.cli.main import main
+main()
+" serve "$@"