embed-bge-m3/Dockerfile

44 lines
1.4 KiB
Docker
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

FROM python:3.10-slim
# 安装系统依赖
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libglib2.0-0 curl && rm -rf /var/lib/apt/lists/*
# 设置工作目录
WORKDIR /app
# 安装 Python 依赖
COPY requirements.txt .
RUN pip install --upgrade pip && pip install --no-cache-dir -r requirements.txt
# 安装本地 FlagEmbedding 源码
COPY FlagEmbedding /opt/FlagEmbedding
RUN pip install --no-cache-dir --no-deps /opt/FlagEmbedding
# 拷贝应用代码和模型权重
COPY app /app/app
COPY model/bge-m3 /app/model/bge-m3
# 暴露端口
EXPOSE 8001
# # 启动 FastAPI 服务
# CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8001"]
# 新增:给 PT 显存分段配置,减少碎片 (可选但推荐)
ENV PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:32
ENV TOKENIZERS_PARALLELISM=false
ENV HF_HUB_DISABLE_TELEMETRY=1 TRANSFORMERS_NO_ADVISORY_WARNINGS=1
# 健康检查:给启动和预热留时间(按你模型体量调整 start-period
HEALTHCHECK --interval=30s --timeout=5s --start-period=120s --retries=3 \
CMD curl -fsS http://127.0.0.1:8001/ready >/dev/null || exit 1
# 启动Gunicorn + 1 worker每个 worker 一个独立进程
CMD ["gunicorn","app.main:app", \
"-k","uvicorn.workers.UvicornWorker", \
"-w","1","-b","0.0.0.0:8001", \
"--timeout","120","--graceful-timeout","30", \
"--max-requests","1000","--max-requests-jitter","200"]