evalscope/examples/viz/20250117_154119/reports/Qwen2.5-0.5B-Instruct/ceval.json

47 lines
1.4 KiB
JSON

{
"name": "Qwen2.5-0.5B-Instruct_ceval",
"dataset_name": "ceval",
"model_name": "Qwen2.5-0.5B-Instruct",
"score": 0.35,
"metrics": [
{
"name": "AverageAccuracy",
"num": 40,
"score": 0.35,
"macro_score": 0.35,
"categories": [
{
"name": [
"STEM"
],
"num": 40,
"score": 0.35,
"macro_score": 0.35,
"subsets": [
{
"name": "computer_network",
"score": 0.1,
"num": 10
},
{
"name": "operating_system",
"score": 0.3,
"num": 10
},
{
"name": "computer_architecture",
"score": 0.6,
"num": 10
},
{
"name": "college_programming",
"score": 0.4,
"num": 10
}
]
}
]
}
]
}