44 lines
1.4 KiB
Docker
44 lines
1.4 KiB
Docker
FROM python:3.10-slim
|
||
|
||
# 安装系统依赖
|
||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||
gcc libglib2.0-0 curl && rm -rf /var/lib/apt/lists/*
|
||
|
||
|
||
# 设置工作目录
|
||
WORKDIR /app
|
||
|
||
# 安装 Python 依赖
|
||
COPY requirements.txt .
|
||
RUN pip install --upgrade pip && pip install --no-cache-dir -r requirements.txt
|
||
|
||
# 安装本地 FlagEmbedding 源码
|
||
COPY FlagEmbedding /opt/FlagEmbedding
|
||
RUN pip install --no-cache-dir --no-deps /opt/FlagEmbedding
|
||
|
||
|
||
# 拷贝应用代码和模型权重
|
||
COPY app /app/app
|
||
COPY model/bge-m3 /app/model/bge-m3
|
||
|
||
# 暴露端口
|
||
EXPOSE 8001
|
||
|
||
# # 启动 FastAPI 服务
|
||
# CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8001"]
|
||
|
||
# 新增:给 PT 显存分段配置,减少碎片 (可选但推荐)
|
||
ENV PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:32
|
||
ENV TOKENIZERS_PARALLELISM=false
|
||
ENV HF_HUB_DISABLE_TELEMETRY=1 TRANSFORMERS_NO_ADVISORY_WARNINGS=1
|
||
|
||
# 健康检查:给启动和预热留时间(按你模型体量调整 start-period)
|
||
HEALTHCHECK --interval=30s --timeout=5s --start-period=120s --retries=3 \
|
||
CMD curl -fsS http://127.0.0.1:8001/ready >/dev/null || exit 1
|
||
|
||
# 启动:Gunicorn + 1 worker,每个 worker 一个独立进程
|
||
CMD ["gunicorn","app.main:app", \
|
||
"-k","uvicorn.workers.UvicornWorker", \
|
||
"-w","1","-b","0.0.0.0:8001", \
|
||
"--timeout","120","--graceful-timeout","30", \
|
||
"--max-requests","200000","--max-requests-jitter","20000"] |