diff --git a/Dockerfile b/Dockerfile index 1ddaf1b..8d0a488 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,5 +16,5 @@ RUN pip install --upgrade pip && \ pip install -e ./gradio-5.35.0 # 验证安装:打印版本号 -CMD ["python", "-c", "import gradio; print('✅ Gradio version =', gradio.__version__)"] +CMD ["python", "evalscope_ui.py"] diff --git a/build-and-run.sh b/build-and-run.sh index 7692916..d0688e3 100644 --- a/build-and-run.sh +++ b/build-and-run.sh @@ -40,8 +40,8 @@ cd .. # ======== 构建 Docker 镜像 ======== echo "🐳 开始构建 Docker 镜像..." docker build \ - --build-arg proxy="${PROXY_URL}" \ - --network=host \ +# --build-arg proxy="${PROXY_URL}" \ +# --network=host \ -t "${IMAGE_NAME}" \ -f Dockerfile . # 你应当把 Dockerfile 放在当前目录(或加路径) @@ -49,7 +49,7 @@ docker build \ echo "🚀 启动 Gradio 容器(后台运行)..." docker run -d \ --name "${CONTAINER_NAME}" \ - --network=host \ +# --network=host \ "${IMAGE_NAME}" echo "✅ Gradio 容器已启动成功,监听端口 ${PORT}" diff --git a/evalscope_ui.py b/evalscope_ui.py new file mode 100644 index 0000000..995b9c2 --- /dev/null +++ b/evalscope_ui.py @@ -0,0 +1,141 @@ +""" +evalscope_ui.py +Gradio 端到端配置面板(针对 EvalScope ≥5.35.0) +运行方式:python evalscope_ui.py +""" + +import json, subprocess, tempfile, os, datetime +import gradio as gr + +# ---------- 常量 ---------- +DEFAULT_LIMIT = 5 +DEFAULT_PARALLEL = 1 +PORT = 7860 # 与 Dockerfile 的 EXPOSE 保持一致 +REPORT_DIR = "./reports" # 评测报告输出目录 + +os.makedirs(REPORT_DIR, exist_ok=True) + +# ---------- 核心回调 ---------- +def run_eval( + model, api_mode, api_url, api_key, local_device_map, # 模型 / API + datasets, limit, gen_cfg_json, ds_cfg_json, # 数据集 & 参数 + backend, save_wandb, save_swanlab, save_gradio, # 可视化输出 + stress_parallel, stress_number, stress_stream, # 性能压测 + extra_yaml_json # 高级自定义 +): + """ + • 将表单参数组织成 EvalScope TaskConfig 字典 + • 写入临时 YAML + • subprocess 调用 evalscope.run.run_task + """ + try: + task_cfg = { + "model" : model.strip(), + "datasets": [d.strip() for d in datasets.split()] if datasets else [], + "limit" : limit or None, + "backend" : backend, + } + + # ---------- Model / API 细节 ---------- + if api_mode != "local": + task_cfg["api"] = api_mode + if api_url: task_cfg["url"] = api_url + if api_key: task_cfg["api_key"] = api_key + else: + task_cfg["model_args"] = { + "device_map": local_device_map or "auto" + } + + # ---------- 生成与数据集高级 JSON ---------- + if gen_cfg_json: + task_cfg["generation_config"] = json.loads(gen_cfg_json) + if ds_cfg_json: + task_cfg["dataset_args"] = json.loads(ds_cfg_json) + + # ---------- 性能压测 ---------- + task_cfg["stress_test"] = { + "parallel": stress_parallel, + "number" : stress_number, + "stream" : stress_stream + } + + # ---------- 额外 YAML/JSON 直接 merge ---------- + if extra_yaml_json: + extra_dict = json.loads(extra_yaml_json) + task_cfg.update(extra_dict) + + # ---------- 写入临时 YAML 并执行 ---------- + ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + yaml_path = os.path.join("/tmp", f"task_{ts}.yaml") + with open(yaml_path, "w") as f: + import yaml; yaml.safe_dump(task_cfg, f, allow_unicode=True) + + # 结果输出路径 + report_path = os.path.join(REPORT_DIR, f"report_{ts}.json") + + cmd = [ + "python", "-m", "evalscope.run", + "--task-cfg", yaml_path, + "--report-path", report_path + ] + + # 可选第三方可视化 + if save_wandb: cmd += ["--wandb"] + if save_swanlab: cmd += ["--swanlab"] + if save_gradio: cmd += ["--gradio"] + + completed = subprocess.run(cmd, capture_output=True, text=True, check=True) + return f"✅ 评测完成!报告位于: {report_path}\n\n{completed.stdout}" + + except subprocess.CalledProcessError as e: + return f"❌ EvalScope 执行失败\nSTDERR:\n{e.stderr}" + except Exception as eg: + return f"❌ 脚本内部异常: {eg}" + +# ---------- Gradio UI ---------- +with gr.Blocks(title="EvalScope 全量配置面板") as demo: + gr.Markdown("## EvalScope 评测配置界面(Gradio@5.35.0)") + + with gr.Tab("模型与 API"): + model = gr.Textbox(label="模型 ID / 本地路径") + api_mode = gr.Radio(["openai", "dashscope", "local", "local_vllm"], value="local", label="API / 模式") + api_url = gr.Textbox(label="API URL(remote 模式必填)", placeholder="http://host:port/chat/completion") + api_key = gr.Textbox(label="API Key(可选)", type="password") + local_device = gr.Textbox(label="device_map(local 模式)", value="auto") + + with gr.Tab("数据集与参数"): + datasets = gr.Textbox(label="Datasets(空格分隔)", placeholder="gsm8k arc mmlu") + limit = gr.Number(label="limit", value=DEFAULT_LIMIT, precision=0) + gen_cfg_json = gr.JSON(label="generation_config(JSON)", value={}) + ds_cfg_json = gr.JSON(label="dataset_args(JSON)", value={}) + + with gr.Tab("后端与可视化"): + backend = gr.Dropdown(["native", "opencompass", "vlmevalkit", "ragas", "mteb"], value="native", label="Evaluation Backend") + save_wandb = gr.Checkbox(label="推送 WandB", value=False) + save_swanlab = gr.Checkbox(label="推送 SwanLab", value=False) + save_gradio = gr.Checkbox(label="生成本地 Gradio 报告", value=True) + + with gr.Tab("性能压测(可选)"): + stress_parallel = gr.Number(label="并发 parallel", value=DEFAULT_PARALLEL, precision=0) + stress_number = gr.Number(label="请求数 number", value=1000, precision=0) + stress_stream = gr.Checkbox(label="开启 stream", value=True) + + with gr.Tab("高级配置 YAML/JSON 合并"): + extra_yaml_json = gr.JSON(label="额外 TaskConfig 字段", value={}) + + run_btn = gr.Button("🚀 运行 EvalScope") + output = gr.Textbox(label="控制台输出 / 错误信息", lines=15) + + run_btn.click( + run_eval, + inputs=[model, api_mode, api_url, api_key, local_device, + datasets, limit, gen_cfg_json, ds_cfg_json, + backend, save_wandb, save_swanlab, save_gradio, + stress_parallel, stress_number, stress_stream, + extra_yaml_json], + outputs=output + ) + +# 启动 +if __name__ == "__main__": + demo.launch(server_name="0.0.0.0", server_port=PORT)