2026-04-15 23:16:53 -04:00
|
|
|
"""Apollo training plugin for vLLM.
|
|
|
|
|
|
|
|
|
|
Enables continuous fine-tuning alongside live inference by:
|
2026-04-16 00:48:05 -04:00
|
|
|
1. Exporting CUDA IPC handles for weight sharing (export_hook)
|
|
|
|
|
2. Adding /train endpoint to vLLM's HTTP server (train_router)
|
2026-04-15 23:16:53 -04:00
|
|
|
3. Block-level checkpoint sync to safetensors files
|
|
|
|
|
|
|
|
|
|
Install: pip install -e /path/to/training
|
|
|
|
|
Then vLLM auto-loads via entry point.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
from .export_hook import _patch_model_runner
|
2026-04-16 00:48:05 -04:00
|
|
|
from .train_router import _patch_api_server
|
2026-04-15 23:16:53 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def register():
|
|
|
|
|
"""Called by vLLM's plugin loader on startup."""
|
|
|
|
|
_patch_model_runner()
|
2026-04-16 00:48:05 -04:00
|
|
|
_patch_api_server()
|