"""Apollo training plugin for vLLM. Enables continuous fine-tuning alongside live inference by: 1. Exporting CUDA IPC handles for weight sharing (export_hook) 2. Adding /train endpoint to vLLM's HTTP server (train_router) 3. Block-level checkpoint sync to safetensors files Install: pip install -e /path/to/training Then vLLM auto-loads via entry point. """ from .export_hook import _patch_model_runner from .train_router import _patch_api_server def register(): """Called by vLLM's plugin loader on startup.""" _patch_model_runner() _patch_api_server()