18 lines
482 B
Python
18 lines
482 B
Python
|
|
"""Apollo training plugin for vLLM.
|
||
|
|
|
||
|
|
Enables continuous fine-tuning alongside live inference by:
|
||
|
|
1. Exporting CUDA IPC handles for weight sharing
|
||
|
|
2. Providing a training worker daemon (/train endpoint)
|
||
|
|
3. Block-level checkpoint sync to safetensors files
|
||
|
|
|
||
|
|
Install: pip install -e /path/to/training
|
||
|
|
Then vLLM auto-loads via entry point.
|
||
|
|
"""
|
||
|
|
|
||
|
|
from .export_hook import _patch_model_runner
|
||
|
|
|
||
|
|
|
||
|
|
def register():
|
||
|
|
"""Called by vLLM's plugin loader on startup."""
|
||
|
|
_patch_model_runner()
|