This commit is contained in:
parent
3ecc1e66be
commit
023d2a0868
18
Dockerfile
18
Dockerfile
|
|
@ -144,13 +144,14 @@ RUN ls -lh /tmp/wheels && \
|
||||||
python3 -c "from torch.distributed import Backend; print('✅ Runtime torch distributed OK, GLOO =', Backend.GLOO)" && \
|
python3 -c "from torch.distributed import Backend; print('✅ Runtime torch distributed OK, GLOO =', Backend.GLOO)" && \
|
||||||
rm -rf /tmp/wheels
|
rm -rf /tmp/wheels
|
||||||
|
|
||||||
# # 安装运行时漏掉的依赖
|
# ✅ 安装 Prometheus client
|
||||||
# RUN python3 -m pip install --no-cache-dir pydantic orjson psutil pyzmq pynvml transformers==4.48.3 uvicorn fastapi IPython aiohttp setproctitle uvloop sentencepiece triton
|
RUN python3 -m pip install --no-cache-dir prometheus_client
|
||||||
|
|
||||||
# ✅ 离线安装全部依赖(包含所有运行时必需包)
|
# ✅ 设置多进程 metrics 收集目录(用于 MultiProcessCollector)
|
||||||
# RUN python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/* && \
|
ENV PROMETHEUS_MULTIPROC_DIR=/tmp/prometheus
|
||||||
# python3 -c "from torch.distributed import Backend; print('✅ Runtime torch distributed OK, GLOO =', Backend.GLOO)" && \
|
|
||||||
# rm -rf /tmp/wheels
|
# ✅ 确保目录存在
|
||||||
|
RUN mkdir -p /tmp/prometheus
|
||||||
|
|
||||||
# ✅ 添加 Tini(推荐)
|
# ✅ 添加 Tini(推荐)
|
||||||
ENV TINI_VERSION=v0.19.0
|
ENV TINI_VERSION=v0.19.0
|
||||||
|
|
@ -161,6 +162,8 @@ ENTRYPOINT ["/tini", "--"]
|
||||||
# ---- 拷贝模型(路径可换) ----
|
# ---- 拷贝模型(路径可换) ----
|
||||||
COPY ./Alibaba/Qwen3-8B /root/.cradle/Alibaba/Qwen3-8B
|
COPY ./Alibaba/Qwen3-8B /root/.cradle/Alibaba/Qwen3-8B
|
||||||
|
|
||||||
|
HEALTHCHECK --interval=30s --timeout=2s --start-period=600s --retries=5 CMD curl -fs http://localhost:30000/health || exit 1
|
||||||
|
|
||||||
# ---- 暴露端口 ----
|
# ---- 暴露端口 ----
|
||||||
EXPOSE 30000
|
EXPOSE 30000
|
||||||
|
|
||||||
|
|
@ -170,4 +173,5 @@ CMD ["python3", "-m", "sglang.launch_server", \
|
||||||
"--port", "30000", \
|
"--port", "30000", \
|
||||||
"--model-path", "/root/.cradle/Alibaba/Qwen3-8B/", \
|
"--model-path", "/root/.cradle/Alibaba/Qwen3-8B/", \
|
||||||
"--tp", "1", \
|
"--tp", "1", \
|
||||||
"--api-key", "token-abc123"]
|
"--api-key", "token-abc123", \
|
||||||
|
"--enable-metrics"]
|
||||||
|
|
@ -216,9 +216,12 @@ HEALTH_CHECK_TIMEOUT = int(os.getenv("SGLANG_HEALTH_CHECK_TIMEOUT", 20))
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
async def health() -> Response:
|
async def health():
|
||||||
"""Check the health of the http server."""
|
"""Check the health of the http server and return version info."""
|
||||||
return Response(status_code=200)
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"version": "v1.0.0" # 这里写上你希望显示的版本号
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health_generate")
|
@app.get("/health_generate")
|
||||||
|
|
|
||||||
|
|
@ -868,12 +868,22 @@ def set_ulimit(target_soft_limit=65535):
|
||||||
def add_api_key_middleware(app, api_key: str):
|
def add_api_key_middleware(app, api_key: str):
|
||||||
@app.middleware("http")
|
@app.middleware("http")
|
||||||
async def authentication(request, call_next):
|
async def authentication(request, call_next):
|
||||||
|
# OPTIONS 请求(CORS 预检)直接放行
|
||||||
if request.method == "OPTIONS":
|
if request.method == "OPTIONS":
|
||||||
return await call_next(request)
|
return await call_next(request)
|
||||||
if request.url.path.startswith("/health"):
|
|
||||||
return await call_next(request)
|
# 明确列出无需鉴权的路径前缀
|
||||||
if request.url.path.startswith("/metrics"):
|
whitelist_prefixes = (
|
||||||
|
"/health",
|
||||||
|
"/metrics",
|
||||||
|
"/ping",
|
||||||
|
"/get_model_info",
|
||||||
|
)
|
||||||
|
|
||||||
|
if any(request.url.path.startswith(prefix) for prefix in whitelist_prefixes):
|
||||||
return await call_next(request)
|
return await call_next(request)
|
||||||
|
|
||||||
|
# Bearer Token 鉴权
|
||||||
if request.headers.get("Authorization") != "Bearer " + api_key:
|
if request.headers.get("Authorization") != "Bearer " + api_key:
|
||||||
return ORJSONResponse(content={"error": "Unauthorized"}, status_code=401)
|
return ORJSONResponse(content={"error": "Unauthorized"}, status_code=401)
|
||||||
return await call_next(request)
|
return await call_next(request)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue