{ "name": "Qwen2.5-0.5B-Instruct_ceval", "dataset_name": "ceval", "model_name": "Qwen2.5-0.5B-Instruct", "score": 0.35, "metrics": [ { "name": "AverageAccuracy", "num": 40, "score": 0.35, "macro_score": 0.35, "categories": [ { "name": [ "STEM" ], "num": 40, "score": 0.35, "macro_score": 0.35, "subsets": [ { "name": "computer_network", "score": 0.1, "num": 10 }, { "name": "operating_system", "score": 0.3, "num": 10 }, { "name": "computer_architecture", "score": 0.6, "num": 10 }, { "name": "college_programming", "score": 0.4, "num": 10 } ] } ] } ] }