From 023d2a0868192e9707d6506aabd8623f427a7f51 Mon Sep 17 00:00:00 2001 From: hailin Date: Fri, 4 Jul 2025 17:45:05 +0800 Subject: [PATCH] . --- Dockerfile | 18 +++++++++++------- .../sglang/srt/entrypoints/http_server.py | 9 ++++++--- sglang/python/sglang/srt/utils.py | 16 +++++++++++++--- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5f3ac78..89957b4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -144,13 +144,14 @@ RUN ls -lh /tmp/wheels && \ python3 -c "from torch.distributed import Backend; print('✅ Runtime torch distributed OK, GLOO =', Backend.GLOO)" && \ rm -rf /tmp/wheels -# # 安装运行时漏掉的依赖 -# RUN python3 -m pip install --no-cache-dir pydantic orjson psutil pyzmq pynvml transformers==4.48.3 uvicorn fastapi IPython aiohttp setproctitle uvloop sentencepiece triton +# ✅ 安装 Prometheus client +RUN python3 -m pip install --no-cache-dir prometheus_client -# ✅ 离线安装全部依赖(包含所有运行时必需包) -# RUN python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/* && \ -# python3 -c "from torch.distributed import Backend; print('✅ Runtime torch distributed OK, GLOO =', Backend.GLOO)" && \ -# rm -rf /tmp/wheels +# ✅ 设置多进程 metrics 收集目录(用于 MultiProcessCollector) +ENV PROMETHEUS_MULTIPROC_DIR=/tmp/prometheus + +# ✅ 确保目录存在 +RUN mkdir -p /tmp/prometheus # ✅ 添加 Tini(推荐) ENV TINI_VERSION=v0.19.0 @@ -161,6 +162,8 @@ ENTRYPOINT ["/tini", "--"] # ---- 拷贝模型(路径可换) ---- COPY ./Alibaba/Qwen3-8B /root/.cradle/Alibaba/Qwen3-8B +HEALTHCHECK --interval=30s --timeout=2s --start-period=600s --retries=5 CMD curl -fs http://localhost:30000/health || exit 1 + # ---- 暴露端口 ---- EXPOSE 30000 @@ -170,4 +173,5 @@ CMD ["python3", "-m", "sglang.launch_server", \ "--port", "30000", \ "--model-path", "/root/.cradle/Alibaba/Qwen3-8B/", \ "--tp", "1", \ - "--api-key", "token-abc123"] \ No newline at end of file + "--api-key", "token-abc123", \ + "--enable-metrics"] \ No newline at end of file diff --git a/sglang/python/sglang/srt/entrypoints/http_server.py b/sglang/python/sglang/srt/entrypoints/http_server.py index daa8999..54418e7 100644 --- a/sglang/python/sglang/srt/entrypoints/http_server.py +++ b/sglang/python/sglang/srt/entrypoints/http_server.py @@ -216,9 +216,12 @@ HEALTH_CHECK_TIMEOUT = int(os.getenv("SGLANG_HEALTH_CHECK_TIMEOUT", 20)) @app.get("/health") -async def health() -> Response: - """Check the health of the http server.""" - return Response(status_code=200) +async def health(): + """Check the health of the http server and return version info.""" + return { + "status": "ok", + "version": "v1.0.0" # 这里写上你希望显示的版本号 + } @app.get("/health_generate") diff --git a/sglang/python/sglang/srt/utils.py b/sglang/python/sglang/srt/utils.py index 2c0c86f..c45ca9d 100644 --- a/sglang/python/sglang/srt/utils.py +++ b/sglang/python/sglang/srt/utils.py @@ -868,12 +868,22 @@ def set_ulimit(target_soft_limit=65535): def add_api_key_middleware(app, api_key: str): @app.middleware("http") async def authentication(request, call_next): + # OPTIONS 请求(CORS 预检)直接放行 if request.method == "OPTIONS": return await call_next(request) - if request.url.path.startswith("/health"): - return await call_next(request) - if request.url.path.startswith("/metrics"): + + # 明确列出无需鉴权的路径前缀 + whitelist_prefixes = ( + "/health", + "/metrics", + "/ping", + "/get_model_info", + ) + + if any(request.url.path.startswith(prefix) for prefix in whitelist_prefixes): return await call_next(request) + + # Bearer Token 鉴权 if request.headers.get("Authorization") != "Bearer " + api_key: return ORJSONResponse(content={"error": "Unauthorized"}, status_code=401) return await call_next(request)