This commit is contained in:
parent
53dcdcdb12
commit
914a75bb15
|
|
@ -16,5 +16,5 @@ RUN pip install --upgrade pip && \
|
|||
pip install -e ./gradio-5.35.0
|
||||
|
||||
# 验证安装:打印版本号
|
||||
CMD ["python", "-c", "import gradio; print('✅ Gradio version =', gradio.__version__)"]
|
||||
CMD ["python", "evalscope_ui.py"]
|
||||
|
||||
|
|
|
|||
|
|
@ -40,8 +40,8 @@ cd ..
|
|||
# ======== 构建 Docker 镜像 ========
|
||||
echo "🐳 开始构建 Docker 镜像..."
|
||||
docker build \
|
||||
--build-arg proxy="${PROXY_URL}" \
|
||||
--network=host \
|
||||
# --build-arg proxy="${PROXY_URL}" \
|
||||
# --network=host \
|
||||
-t "${IMAGE_NAME}" \
|
||||
-f Dockerfile . # 你应当把 Dockerfile 放在当前目录(或加路径)
|
||||
|
||||
|
|
@ -49,7 +49,7 @@ docker build \
|
|||
echo "🚀 启动 Gradio 容器(后台运行)..."
|
||||
docker run -d \
|
||||
--name "${CONTAINER_NAME}" \
|
||||
--network=host \
|
||||
# --network=host \
|
||||
"${IMAGE_NAME}"
|
||||
|
||||
echo "✅ Gradio 容器已启动成功,监听端口 ${PORT}"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,141 @@
|
|||
"""
|
||||
evalscope_ui.py
|
||||
Gradio 端到端配置面板(针对 EvalScope ≥5.35.0)
|
||||
运行方式:python evalscope_ui.py
|
||||
"""
|
||||
|
||||
import json, subprocess, tempfile, os, datetime
|
||||
import gradio as gr
|
||||
|
||||
# ---------- 常量 ----------
|
||||
DEFAULT_LIMIT = 5
|
||||
DEFAULT_PARALLEL = 1
|
||||
PORT = 7860 # 与 Dockerfile 的 EXPOSE 保持一致
|
||||
REPORT_DIR = "./reports" # 评测报告输出目录
|
||||
|
||||
os.makedirs(REPORT_DIR, exist_ok=True)
|
||||
|
||||
# ---------- 核心回调 ----------
|
||||
def run_eval(
|
||||
model, api_mode, api_url, api_key, local_device_map, # 模型 / API
|
||||
datasets, limit, gen_cfg_json, ds_cfg_json, # 数据集 & 参数
|
||||
backend, save_wandb, save_swanlab, save_gradio, # 可视化输出
|
||||
stress_parallel, stress_number, stress_stream, # 性能压测
|
||||
extra_yaml_json # 高级自定义
|
||||
):
|
||||
"""
|
||||
• 将表单参数组织成 EvalScope TaskConfig 字典
|
||||
• 写入临时 YAML
|
||||
• subprocess 调用 evalscope.run.run_task
|
||||
"""
|
||||
try:
|
||||
task_cfg = {
|
||||
"model" : model.strip(),
|
||||
"datasets": [d.strip() for d in datasets.split()] if datasets else [],
|
||||
"limit" : limit or None,
|
||||
"backend" : backend,
|
||||
}
|
||||
|
||||
# ---------- Model / API 细节 ----------
|
||||
if api_mode != "local":
|
||||
task_cfg["api"] = api_mode
|
||||
if api_url: task_cfg["url"] = api_url
|
||||
if api_key: task_cfg["api_key"] = api_key
|
||||
else:
|
||||
task_cfg["model_args"] = {
|
||||
"device_map": local_device_map or "auto"
|
||||
}
|
||||
|
||||
# ---------- 生成与数据集高级 JSON ----------
|
||||
if gen_cfg_json:
|
||||
task_cfg["generation_config"] = json.loads(gen_cfg_json)
|
||||
if ds_cfg_json:
|
||||
task_cfg["dataset_args"] = json.loads(ds_cfg_json)
|
||||
|
||||
# ---------- 性能压测 ----------
|
||||
task_cfg["stress_test"] = {
|
||||
"parallel": stress_parallel,
|
||||
"number" : stress_number,
|
||||
"stream" : stress_stream
|
||||
}
|
||||
|
||||
# ---------- 额外 YAML/JSON 直接 merge ----------
|
||||
if extra_yaml_json:
|
||||
extra_dict = json.loads(extra_yaml_json)
|
||||
task_cfg.update(extra_dict)
|
||||
|
||||
# ---------- 写入临时 YAML 并执行 ----------
|
||||
ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
yaml_path = os.path.join("/tmp", f"task_{ts}.yaml")
|
||||
with open(yaml_path, "w") as f:
|
||||
import yaml; yaml.safe_dump(task_cfg, f, allow_unicode=True)
|
||||
|
||||
# 结果输出路径
|
||||
report_path = os.path.join(REPORT_DIR, f"report_{ts}.json")
|
||||
|
||||
cmd = [
|
||||
"python", "-m", "evalscope.run",
|
||||
"--task-cfg", yaml_path,
|
||||
"--report-path", report_path
|
||||
]
|
||||
|
||||
# 可选第三方可视化
|
||||
if save_wandb: cmd += ["--wandb"]
|
||||
if save_swanlab: cmd += ["--swanlab"]
|
||||
if save_gradio: cmd += ["--gradio"]
|
||||
|
||||
completed = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
return f"✅ 评测完成!报告位于: {report_path}\n\n{completed.stdout}"
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
return f"❌ EvalScope 执行失败\nSTDERR:\n{e.stderr}"
|
||||
except Exception as eg:
|
||||
return f"❌ 脚本内部异常: {eg}"
|
||||
|
||||
# ---------- Gradio UI ----------
|
||||
with gr.Blocks(title="EvalScope 全量配置面板") as demo:
|
||||
gr.Markdown("## EvalScope 评测配置界面(Gradio@5.35.0)")
|
||||
|
||||
with gr.Tab("模型与 API"):
|
||||
model = gr.Textbox(label="模型 ID / 本地路径")
|
||||
api_mode = gr.Radio(["openai", "dashscope", "local", "local_vllm"], value="local", label="API / 模式")
|
||||
api_url = gr.Textbox(label="API URL(remote 模式必填)", placeholder="http://host:port/chat/completion")
|
||||
api_key = gr.Textbox(label="API Key(可选)", type="password")
|
||||
local_device = gr.Textbox(label="device_map(local 模式)", value="auto")
|
||||
|
||||
with gr.Tab("数据集与参数"):
|
||||
datasets = gr.Textbox(label="Datasets(空格分隔)", placeholder="gsm8k arc mmlu")
|
||||
limit = gr.Number(label="limit", value=DEFAULT_LIMIT, precision=0)
|
||||
gen_cfg_json = gr.JSON(label="generation_config(JSON)", value={})
|
||||
ds_cfg_json = gr.JSON(label="dataset_args(JSON)", value={})
|
||||
|
||||
with gr.Tab("后端与可视化"):
|
||||
backend = gr.Dropdown(["native", "opencompass", "vlmevalkit", "ragas", "mteb"], value="native", label="Evaluation Backend")
|
||||
save_wandb = gr.Checkbox(label="推送 WandB", value=False)
|
||||
save_swanlab = gr.Checkbox(label="推送 SwanLab", value=False)
|
||||
save_gradio = gr.Checkbox(label="生成本地 Gradio 报告", value=True)
|
||||
|
||||
with gr.Tab("性能压测(可选)"):
|
||||
stress_parallel = gr.Number(label="并发 parallel", value=DEFAULT_PARALLEL, precision=0)
|
||||
stress_number = gr.Number(label="请求数 number", value=1000, precision=0)
|
||||
stress_stream = gr.Checkbox(label="开启 stream", value=True)
|
||||
|
||||
with gr.Tab("高级配置 YAML/JSON 合并"):
|
||||
extra_yaml_json = gr.JSON(label="额外 TaskConfig 字段", value={})
|
||||
|
||||
run_btn = gr.Button("🚀 运行 EvalScope")
|
||||
output = gr.Textbox(label="控制台输出 / 错误信息", lines=15)
|
||||
|
||||
run_btn.click(
|
||||
run_eval,
|
||||
inputs=[model, api_mode, api_url, api_key, local_device,
|
||||
datasets, limit, gen_cfg_json, ds_cfg_json,
|
||||
backend, save_wandb, save_swanlab, save_gradio,
|
||||
stress_parallel, stress_number, stress_stream,
|
||||
extra_yaml_json],
|
||||
outputs=output
|
||||
)
|
||||
|
||||
# 启动
|
||||
if __name__ == "__main__":
|
||||
demo.launch(server_name="0.0.0.0", server_port=PORT)
|
||||
Loading…
Reference in New Issue