This commit is contained in:
parent
3ecc1e66be
commit
023d2a0868
18
Dockerfile
18
Dockerfile
|
|
@ -144,13 +144,14 @@ RUN ls -lh /tmp/wheels && \
|
|||
python3 -c "from torch.distributed import Backend; print('✅ Runtime torch distributed OK, GLOO =', Backend.GLOO)" && \
|
||||
rm -rf /tmp/wheels
|
||||
|
||||
# # 安装运行时漏掉的依赖
|
||||
# RUN python3 -m pip install --no-cache-dir pydantic orjson psutil pyzmq pynvml transformers==4.48.3 uvicorn fastapi IPython aiohttp setproctitle uvloop sentencepiece triton
|
||||
# ✅ 安装 Prometheus client
|
||||
RUN python3 -m pip install --no-cache-dir prometheus_client
|
||||
|
||||
# ✅ 离线安装全部依赖(包含所有运行时必需包)
|
||||
# RUN python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/* && \
|
||||
# python3 -c "from torch.distributed import Backend; print('✅ Runtime torch distributed OK, GLOO =', Backend.GLOO)" && \
|
||||
# rm -rf /tmp/wheels
|
||||
# ✅ 设置多进程 metrics 收集目录(用于 MultiProcessCollector)
|
||||
ENV PROMETHEUS_MULTIPROC_DIR=/tmp/prometheus
|
||||
|
||||
# ✅ 确保目录存在
|
||||
RUN mkdir -p /tmp/prometheus
|
||||
|
||||
# ✅ 添加 Tini(推荐)
|
||||
ENV TINI_VERSION=v0.19.0
|
||||
|
|
@ -161,6 +162,8 @@ ENTRYPOINT ["/tini", "--"]
|
|||
# ---- 拷贝模型(路径可换) ----
|
||||
COPY ./Alibaba/Qwen3-8B /root/.cradle/Alibaba/Qwen3-8B
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=2s --start-period=600s --retries=5 CMD curl -fs http://localhost:30000/health || exit 1
|
||||
|
||||
# ---- 暴露端口 ----
|
||||
EXPOSE 30000
|
||||
|
||||
|
|
@ -170,4 +173,5 @@ CMD ["python3", "-m", "sglang.launch_server", \
|
|||
"--port", "30000", \
|
||||
"--model-path", "/root/.cradle/Alibaba/Qwen3-8B/", \
|
||||
"--tp", "1", \
|
||||
"--api-key", "token-abc123"]
|
||||
"--api-key", "token-abc123", \
|
||||
"--enable-metrics"]
|
||||
|
|
@ -216,9 +216,12 @@ HEALTH_CHECK_TIMEOUT = int(os.getenv("SGLANG_HEALTH_CHECK_TIMEOUT", 20))
|
|||
|
||||
|
||||
@app.get("/health")
|
||||
async def health() -> Response:
|
||||
"""Check the health of the http server."""
|
||||
return Response(status_code=200)
|
||||
async def health():
|
||||
"""Check the health of the http server and return version info."""
|
||||
return {
|
||||
"status": "ok",
|
||||
"version": "v1.0.0" # 这里写上你希望显示的版本号
|
||||
}
|
||||
|
||||
|
||||
@app.get("/health_generate")
|
||||
|
|
|
|||
|
|
@ -868,12 +868,22 @@ def set_ulimit(target_soft_limit=65535):
|
|||
def add_api_key_middleware(app, api_key: str):
|
||||
@app.middleware("http")
|
||||
async def authentication(request, call_next):
|
||||
# OPTIONS 请求(CORS 预检)直接放行
|
||||
if request.method == "OPTIONS":
|
||||
return await call_next(request)
|
||||
if request.url.path.startswith("/health"):
|
||||
return await call_next(request)
|
||||
if request.url.path.startswith("/metrics"):
|
||||
|
||||
# 明确列出无需鉴权的路径前缀
|
||||
whitelist_prefixes = (
|
||||
"/health",
|
||||
"/metrics",
|
||||
"/ping",
|
||||
"/get_model_info",
|
||||
)
|
||||
|
||||
if any(request.url.path.startswith(prefix) for prefix in whitelist_prefixes):
|
||||
return await call_next(request)
|
||||
|
||||
# Bearer Token 鉴权
|
||||
if request.headers.get("Authorization") != "Bearer " + api_key:
|
||||
return ORJSONResponse(content={"error": "Unauthorized"}, status_code=401)
|
||||
return await call_next(request)
|
||||
|
|
|
|||
Loading…
Reference in New Issue