This commit is contained in:
hailin 2025-07-04 17:45:05 +08:00
parent 3ecc1e66be
commit 023d2a0868
3 changed files with 30 additions and 13 deletions

View File

@ -144,13 +144,14 @@ RUN ls -lh /tmp/wheels && \
python3 -c "from torch.distributed import Backend; print('✅ Runtime torch distributed OK, GLOO =', Backend.GLOO)" && \
rm -rf /tmp/wheels
# # 安装运行时漏掉的依赖
# RUN python3 -m pip install --no-cache-dir pydantic orjson psutil pyzmq pynvml transformers==4.48.3 uvicorn fastapi IPython aiohttp setproctitle uvloop sentencepiece triton
# ✅ 安装 Prometheus client
RUN python3 -m pip install --no-cache-dir prometheus_client
# ✅ 离线安装全部依赖(包含所有运行时必需包)
# RUN python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/* && \
# python3 -c "from torch.distributed import Backend; print('✅ Runtime torch distributed OK, GLOO =', Backend.GLOO)" && \
# rm -rf /tmp/wheels
# ✅ 设置多进程 metrics 收集目录(用于 MultiProcessCollector
ENV PROMETHEUS_MULTIPROC_DIR=/tmp/prometheus
# ✅ 确保目录存在
RUN mkdir -p /tmp/prometheus
# ✅ 添加 Tini推荐
ENV TINI_VERSION=v0.19.0
@ -161,6 +162,8 @@ ENTRYPOINT ["/tini", "--"]
# ---- 拷贝模型(路径可换) ----
COPY ./Alibaba/Qwen3-8B /root/.cradle/Alibaba/Qwen3-8B
HEALTHCHECK --interval=30s --timeout=2s --start-period=600s --retries=5 CMD curl -fs http://localhost:30000/health || exit 1
# ---- 暴露端口 ----
EXPOSE 30000
@ -170,4 +173,5 @@ CMD ["python3", "-m", "sglang.launch_server", \
"--port", "30000", \
"--model-path", "/root/.cradle/Alibaba/Qwen3-8B/", \
"--tp", "1", \
"--api-key", "token-abc123"]
"--api-key", "token-abc123", \
"--enable-metrics"]

View File

@ -216,9 +216,12 @@ HEALTH_CHECK_TIMEOUT = int(os.getenv("SGLANG_HEALTH_CHECK_TIMEOUT", 20))
@app.get("/health")
async def health() -> Response:
"""Check the health of the http server."""
return Response(status_code=200)
async def health():
"""Check the health of the http server and return version info."""
return {
"status": "ok",
"version": "v1.0.0" # 这里写上你希望显示的版本号
}
@app.get("/health_generate")

View File

@ -868,12 +868,22 @@ def set_ulimit(target_soft_limit=65535):
def add_api_key_middleware(app, api_key: str):
@app.middleware("http")
async def authentication(request, call_next):
# OPTIONS 请求CORS 预检)直接放行
if request.method == "OPTIONS":
return await call_next(request)
if request.url.path.startswith("/health"):
return await call_next(request)
if request.url.path.startswith("/metrics"):
# 明确列出无需鉴权的路径前缀
whitelist_prefixes = (
"/health",
"/metrics",
"/ping",
"/get_model_info",
)
if any(request.url.path.startswith(prefix) for prefix in whitelist_prefixes):
return await call_next(request)
# Bearer Token 鉴权
if request.headers.get("Authorization") != "Bearer " + api_key:
return ORJSONResponse(content={"error": "Unauthorized"}, status_code=401)
return await call_next(request)