FROM python:3.10-slim # ---------- 系统依赖 ---------- RUN apt-get update && apt-get install -y --no-install-recommends \ git build-essential curl && \ rm -rf /var/lib/apt/lists/* WORKDIR /app COPY . /app # ---------- Python 依赖 ---------- # 逐个安装 requirements/*.txt,再装 evalscope 本身 RUN pip install --upgrade pip && \ for f in requirements/*.txt ; do pip install -r "$f" ; done && \ pip install . && \ pip install openai tiktoken && \ pip cache purge # ---------- 环境变量 ---------- ENV OPENAI_API_BASE=https://api.your-openai-compatible-server.com/v1 # 运行时用 -e 覆盖 ENV OPENAI_API_KEY=dummy_key # ---------- 默认命令(Shell 形式,变量才能展开) ---------- #CMD sh -c 'evalscope eval \ # --eval-type service \ # --model openai \ # --model-id gpt-3.5-turbo \ # --api-url "$OPENAI_API_BASE" \ # --api-key "$OPENAI_API_KEY" \ # --datasets gsm8k \ # --limit 20 \ # --eval-batch-size 1' # ---------- 入口脚本 ---------- # 说明: # MODE=eval → EvalScope eval (智力抽样打分) # MODE=perf → EvalScope perf (吞吐/延迟压测) # MODE=opencompass → OpenCompass run (多基准整体评分) # # 其余参数都可在 docker run 后面追加。 ENTRYPOINT ["/bin/sh", "-c"] CMD \ 'case "$MODE" in \ eval) \ evalscope eval \ --eval-type service \ --model openai \ --model-id gpt-3.5-turbo \ --api-url "$OPENAI_API_BASE" \ --api-key "$OPENAI_API_KEY" \ --datasets gsm8k \ --limit 20 \ --eval-batch-size 1 \ "$@";; \ perf) \ evalscope perf \ --parallel 1 8 32 \ --number 20 200 800 \ --model-id gpt-3.5-turbo \ --api-url "$OPENAI_API_BASE" \ --api-key "$OPENAI_API_KEY" \ --dataset random \ --min-prompt-length 1024 --max-prompt-length 1024 \ "$@";; \ opencompass) \ python run.py \ --models openai_gpt_3_5_turbo \ --datasets gsm8k mmlu hellaswag humaneval \ --mode all -w /app/results \ "$@";; \ *) echo "Unknown MODE=$MODE"; exit 1;; \ esac'