evalscope_v0.17.0/evalscope.0.17.0/evalscope/benchmarks/mmlu/mmlu.py

161 lines
4.9 KiB
Python

# isort: skip_file
# Copyright (c) Alibaba, Inc. and its affiliates.
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# flake8: noqa
import datasets
import os
import pandas as pd
"""The MMLU dataset on ModelScope hub. READ ONLY, DO NOT MODIFY."""
_CITATION = """\
@article{hendryckstest2021,
title={Measuring Massive Multitask Language Understanding},
author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},
journal={Proceedings of the International Conference on Learning Representations (ICLR)},
year={2021}
}
"""
_DESCRIPTION = """\
Measuring Massive Multitask Language Understanding by Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas
Mazeika, Dawn Song, and Jacob Steinhardt (ICLR 2021).
"""
_HOMEPAGE = 'https://modelscope.cn/datasets/modelscope/mmlu/summary'
_LICENSE = 'MIT'
# _URL = "https://people.eecs.berkeley.edu/~hendrycks/data.tar"
_URL = 'https://modelscope.cn/api/v1/datasets/modelscope/mmlu/repo?Revision=master&FilePath=data.tar'
task_list = [
'high_school_european_history',
'business_ethics',
'clinical_knowledge',
'medical_genetics',
'high_school_us_history',
'high_school_physics',
'high_school_world_history',
'virology',
'high_school_microeconomics',
'econometrics',
'college_computer_science',
'high_school_biology',
'abstract_algebra',
'professional_accounting',
'philosophy',
'professional_medicine',
'nutrition',
'global_facts',
'machine_learning',
'security_studies',
'public_relations',
'professional_psychology',
'prehistory',
'anatomy',
'human_sexuality',
'college_medicine',
'high_school_government_and_politics',
'college_chemistry',
'logical_fallacies',
'high_school_geography',
'elementary_mathematics',
'human_aging',
'college_mathematics',
'high_school_psychology',
'formal_logic',
'high_school_statistics',
'international_law',
'high_school_mathematics',
'high_school_computer_science',
'conceptual_physics',
'miscellaneous',
'high_school_chemistry',
'marketing',
'professional_law',
'management',
'college_physics',
'jurisprudence',
'world_religions',
'sociology',
'us_foreign_policy',
'high_school_macroeconomics',
'computer_security',
'moral_scenarios',
'moral_disputes',
'electrical_engineering',
'astronomy',
'college_biology',
]
class MMLUConfig(datasets.BuilderConfig):
def __init__(self, **kwargs):
super().__init__(version=datasets.Version('1.0.0'), **kwargs)
class MMLU(datasets.GeneratorBasedBuilder):
BUILDER_CONFIGS = [MMLUConfig(name=task_name, ) for task_name in task_list]
def _info(self):
features = datasets.Features({
'input': datasets.Value('string'),
'A': datasets.Value('string'),
'B': datasets.Value('string'),
'C': datasets.Value('string'),
'D': datasets.Value('string'),
'target': datasets.Value('string'),
})
return datasets.DatasetInfo(
description=_DESCRIPTION,
features=features,
homepage=_HOMEPAGE,
license=_LICENSE,
citation=_CITATION,
)
def _split_generators(self, dl_manager):
data_dir = dl_manager.download_and_extract(_URL)
task_name = self.config.name
return [
datasets.SplitGenerator(
name=datasets.Split.TEST,
gen_kwargs={
'filepath': os.path.join(data_dir, 'data', 'test', f'{task_name}_test.csv'),
},
),
datasets.SplitGenerator(
name=datasets.Split.VALIDATION,
gen_kwargs={
'filepath': os.path.join(data_dir, 'data', 'val', f'{task_name}_val.csv'),
},
),
datasets.SplitGenerator(
name=datasets.Split.TRAIN,
gen_kwargs={
'filepath': os.path.join(data_dir, 'data', 'dev', f'{task_name}_dev.csv'),
},
),
]
def _generate_examples(self, filepath):
df = pd.read_csv(filepath)
df.columns = ['input', 'A', 'B', 'C', 'D', 'target']
for i, instance in enumerate(df.to_dict(orient='records')):
yield i, instance