training: restructure as vLLM plugin package
- Convert to installable package with entry points for vLLM auto-discovery - Add checkpoint_sync.py: Python replacement for Rust checkpoint binary - Block-level diffing of safetensors files (4KB blocks) - vLLM→HF weight name conversion built-in - Scheduled 10min after training jobs (batched) - API change: /train now takes raw token IDs (context_ids + continuation_ids) - No tokenizer on training side, client owns tokenization - Remove superseded code: standalone scripts, Rust binary, tokenizer helpers Install: pip install -e ./training Then vLLM auto-loads via entry point. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
b649a11645
commit
a73bcf5ae3
15 changed files with 607 additions and 1068 deletions
28
training/pyproject.toml
Normal file
28
training/pyproject.toml
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
[build-system]
|
||||
requires = ["setuptools>=61.0"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "apollo-plugin"
|
||||
version = "0.1.0"
|
||||
description = "Apollo training plugin for vLLM"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"torch",
|
||||
"aiohttp",
|
||||
"safetensors",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = ["pytest"]
|
||||
|
||||
[project.entry-points."vllm.general_plugins"]
|
||||
apollo = "apollo_plugin:register"
|
||||
|
||||
[project.scripts]
|
||||
apollo-worker = "apollo_plugin.worker:main"
|
||||
apollo-checkpoint = "apollo_plugin.checkpoint_sync:main"
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["."]
|
||||
include = ["apollo_plugin*"]
|
||||
Loading…
Add table
Add a link
Reference in a new issue