consciousness/training/apollo_plugin/__init__.py

20 lines
571 B
Python
Raw Normal View History

"""Apollo training plugin for vLLM.
Enables continuous fine-tuning alongside live inference by:
1. Exporting CUDA IPC handles for weight sharing (export_hook)
2. Adding /train endpoint to vLLM's HTTP server (train_router)
3. Block-level checkpoint sync to safetensors files
Install: pip install -e /path/to/training
Then vLLM auto-loads via entry point.
"""
from .export_hook import _patch_model_runner
from .train_router import _patch_api_server
def register():
"""Called by vLLM's plugin loader on startup."""
_patch_model_runner()
_patch_api_server()