.

2025-07-06 14:44:31 +08:00 · 2025-07-06 14:44:31 +08:00 · 914a75bb15
parent 53dcdcdb12
commit 914a75bb15
3 changed files with 145 additions and 4 deletions
--- a/2
+++ b/2
@ -16,5 +16,5 @@ RUN pip install --upgrade pip && \
    pip install -e ./gradio-5.35.0

 # 验证安装：打印版本号
-CMD ["python", "-c", "import gradio; print('✅ Gradio version =', gradio.__version__)"]
+CMD ["python", "evalscope_ui.py"]

--- a/build-and-run.sh
+++ b/build-and-run.sh
@ -40,8 +40,8 @@ cd ..
 # ======== 构建 Docker 镜像 ========
 echo "🐳 开始构建 Docker 镜像..."
 docker build \
-  --build-arg proxy="${PROXY_URL}" \
-  --network=host \
+#  --build-arg proxy="${PROXY_URL}" \
+#  --network=host \
  -t "${IMAGE_NAME}" \
  -f Dockerfile .  # 你应当把 Dockerfile 放在当前目录（或加路径）

@ -49,7 +49,7 @@ docker build \
 echo "🚀 启动 Gradio 容器（后台运行）..."
 docker run -d \
  --name "${CONTAINER_NAME}" \
-  --network=host \
+#  --network=host \
  "${IMAGE_NAME}"

 echo "✅ Gradio 容器已启动成功，监听端口 ${PORT}"
--- a/evalscope_ui.py
+++ b/evalscope_ui.py
@ -0,0 +1,141 @@
+"""
+evalscope_ui.py
+Gradio 端到端配置面板（针对 EvalScope ≥5.35.0）
+运行方式：python evalscope_ui.py
+"""
+
+import json, subprocess, tempfile, os, datetime
+import gradio as gr
+
+# ---------- 常量 ----------
+DEFAULT_LIMIT = 5
+DEFAULT_PARALLEL = 1
+PORT = 7860                      # 与 Dockerfile 的 EXPOSE 保持一致
+REPORT_DIR = "./reports"         # 评测报告输出目录
+
+os.makedirs(REPORT_DIR, exist_ok=True)
+
+# ---------- 核心回调 ----------
+def run_eval(
+    model, api_mode, api_url, api_key, local_device_map,  # 模型 / API
+    datasets, limit, gen_cfg_json, ds_cfg_json,           # 数据集 & 参数
+    backend, save_wandb, save_swanlab, save_gradio,       # 可视化输出
+    stress_parallel, stress_number, stress_stream,        # 性能压测
+    extra_yaml_json                                       # 高级自定义
+):
+    """
+    • 将表单参数组织成 EvalScope TaskConfig 字典
+    • 写入临时 YAML
+    • subprocess 调用 evalscope.run.run_task
+    """
+    try:
+        task_cfg = {
+            "model"   : model.strip(),
+            "datasets": [d.strip() for d in datasets.split()] if datasets else [],
+            "limit"   : limit or None,
+            "backend" : backend,
+        }
+
+        # ---------- Model / API 细节 ----------
+        if api_mode != "local":
+            task_cfg["api"] = api_mode
+            if api_url: task_cfg["url"] = api_url
+            if api_key: task_cfg["api_key"] = api_key
+        else:
+            task_cfg["model_args"] = {
+                "device_map": local_device_map or "auto"
+            }
+
+        # ---------- 生成与数据集高级 JSON ----------
+        if gen_cfg_json:
+            task_cfg["generation_config"] = json.loads(gen_cfg_json)
+        if ds_cfg_json:
+            task_cfg["dataset_args"] = json.loads(ds_cfg_json)
+
+        # ---------- 性能压测 ----------
+        task_cfg["stress_test"] = {
+            "parallel": stress_parallel,
+            "number"  : stress_number,
+            "stream"  : stress_stream
+        }
+
+        # ---------- 额外 YAML/JSON 直接 merge ----------
+        if extra_yaml_json:
+            extra_dict = json.loads(extra_yaml_json)
+            task_cfg.update(extra_dict)
+
+        # ---------- 写入临时 YAML 并执行 ----------
+        ts   = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        yaml_path = os.path.join("/tmp", f"task_{ts}.yaml")
+        with open(yaml_path, "w") as f:
+            import yaml; yaml.safe_dump(task_cfg, f, allow_unicode=True)
+
+        # 结果输出路径
+        report_path = os.path.join(REPORT_DIR, f"report_{ts}.json")
+
+        cmd = [
+            "python", "-m", "evalscope.run",
+            "--task-cfg", yaml_path,
+            "--report-path", report_path
+        ]
+
+        # 可选第三方可视化
+        if save_wandb:   cmd += ["--wandb"]
+        if save_swanlab: cmd += ["--swanlab"]
+        if save_gradio:  cmd += ["--gradio"]
+
+        completed = subprocess.run(cmd, capture_output=True, text=True, check=True)
+        return f"✅ 评测完成！报告位于: {report_path}\n\n{completed.stdout}"
+
+    except subprocess.CalledProcessError as e:
+        return f"❌ EvalScope 执行失败\nSTDERR:\n{e.stderr}"
+    except Exception as eg:
+        return f"❌ 脚本内部异常: {eg}"
+
+# ---------- Gradio UI ----------
+with gr.Blocks(title="EvalScope 全量配置面板") as demo:
+    gr.Markdown("## EvalScope 评测配置界面（Gradio@5.35.0）")
+
+    with gr.Tab("模型与 API"):
+        model          = gr.Textbox(label="模型 ID / 本地路径")
+        api_mode       = gr.Radio(["openai", "dashscope", "local", "local_vllm"], value="local", label="API / 模式")
+        api_url        = gr.Textbox(label="API URL（remote 模式必填）", placeholder="http://host:port/chat/completion")
+        api_key        = gr.Textbox(label="API Key（可选）", type="password")
+        local_device   = gr.Textbox(label="device_map（local 模式）", value="auto")
+
+    with gr.Tab("数据集与参数"):
+        datasets       = gr.Textbox(label="Datasets（空格分隔）", placeholder="gsm8k arc mmlu")
+        limit          = gr.Number(label="limit", value=DEFAULT_LIMIT, precision=0)
+        gen_cfg_json   = gr.JSON(label="generation_config（JSON）", value={})
+        ds_cfg_json    = gr.JSON(label="dataset_args（JSON）", value={})
+
+    with gr.Tab("后端与可视化"):
+        backend        = gr.Dropdown(["native", "opencompass", "vlmevalkit", "ragas", "mteb"], value="native", label="Evaluation Backend")
+        save_wandb     = gr.Checkbox(label="推送 WandB", value=False)
+        save_swanlab   = gr.Checkbox(label="推送 SwanLab", value=False)
+        save_gradio    = gr.Checkbox(label="生成本地 Gradio 报告", value=True)
+
+    with gr.Tab("性能压测（可选）"):
+        stress_parallel = gr.Number(label="并发 parallel", value=DEFAULT_PARALLEL, precision=0)
+        stress_number   = gr.Number(label="请求数 number", value=1000, precision=0)
+        stress_stream   = gr.Checkbox(label="开启 stream", value=True)
+
+    with gr.Tab("高级配置 YAML/JSON 合并"):
+        extra_yaml_json = gr.JSON(label="额外 TaskConfig 字段", value={})
+
+    run_btn  = gr.Button("🚀 运行 EvalScope")
+    output   = gr.Textbox(label="控制台输出 / 错误信息", lines=15)
+
+    run_btn.click(
+        run_eval,
+        inputs=[model, api_mode, api_url, api_key, local_device,
+                datasets, limit, gen_cfg_json, ds_cfg_json,
+                backend, save_wandb, save_swanlab, save_gradio,
+                stress_parallel, stress_number, stress_stream,
+                extra_yaml_json],
+        outputs=output
+    )
+
+# 启动
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=PORT)