Remove standalone worker.py daemon. Training now runs inside vLLM: - train_router.py: FastAPI router patched into vLLM's build_app() - /train served on same port as /completions, /score - Lazy-loads HF model with vLLM weight views on first request - HOGWILD training: no pause, weights updated in-place The previous architecture had a separate daemon on port 8080 that communicated with vLLM via pause/resume endpoints. This was wrong - training should run in-process, sharing GPU memory directly. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
27 lines
563 B
TOML
27 lines
563 B
TOML
[build-system]
|
|
requires = ["setuptools>=61.0"]
|
|
build-backend = "setuptools.build_meta"
|
|
|
|
[project]
|
|
name = "apollo-plugin"
|
|
version = "0.1.0"
|
|
description = "Apollo training plugin for vLLM"
|
|
requires-python = ">=3.10"
|
|
dependencies = [
|
|
"torch",
|
|
"aiohttp",
|
|
"safetensors",
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
dev = ["pytest"]
|
|
|
|
[project.entry-points."vllm.general_plugins"]
|
|
apollo = "apollo_plugin:register"
|
|
|
|
[project.scripts]
|
|
apollo-checkpoint = "apollo_plugin.checkpoint_sync:main"
|
|
|
|
[tool.setuptools.packages.find]
|
|
where = ["."]
|
|
include = ["apollo_plugin*"]
|