161 lines
4.9 KiB
Python
161 lines
4.9 KiB
Python
# isort: skip_file
|
|
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# flake8: noqa
|
|
|
|
import datasets
|
|
import os
|
|
import pandas as pd
|
|
"""The MMLU dataset on ModelScope hub. READ ONLY, DO NOT MODIFY."""
|
|
|
|
_CITATION = """\
|
|
@article{hendryckstest2021,
|
|
title={Measuring Massive Multitask Language Understanding},
|
|
author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},
|
|
journal={Proceedings of the International Conference on Learning Representations (ICLR)},
|
|
year={2021}
|
|
}
|
|
"""
|
|
|
|
_DESCRIPTION = """\
|
|
Measuring Massive Multitask Language Understanding by Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas
|
|
Mazeika, Dawn Song, and Jacob Steinhardt (ICLR 2021).
|
|
"""
|
|
|
|
_HOMEPAGE = 'https://modelscope.cn/datasets/modelscope/mmlu/summary'
|
|
|
|
_LICENSE = 'MIT'
|
|
|
|
# _URL = "https://people.eecs.berkeley.edu/~hendrycks/data.tar"
|
|
_URL = 'https://modelscope.cn/api/v1/datasets/modelscope/mmlu/repo?Revision=master&FilePath=data.tar'
|
|
|
|
task_list = [
|
|
'high_school_european_history',
|
|
'business_ethics',
|
|
'clinical_knowledge',
|
|
'medical_genetics',
|
|
'high_school_us_history',
|
|
'high_school_physics',
|
|
'high_school_world_history',
|
|
'virology',
|
|
'high_school_microeconomics',
|
|
'econometrics',
|
|
'college_computer_science',
|
|
'high_school_biology',
|
|
'abstract_algebra',
|
|
'professional_accounting',
|
|
'philosophy',
|
|
'professional_medicine',
|
|
'nutrition',
|
|
'global_facts',
|
|
'machine_learning',
|
|
'security_studies',
|
|
'public_relations',
|
|
'professional_psychology',
|
|
'prehistory',
|
|
'anatomy',
|
|
'human_sexuality',
|
|
'college_medicine',
|
|
'high_school_government_and_politics',
|
|
'college_chemistry',
|
|
'logical_fallacies',
|
|
'high_school_geography',
|
|
'elementary_mathematics',
|
|
'human_aging',
|
|
'college_mathematics',
|
|
'high_school_psychology',
|
|
'formal_logic',
|
|
'high_school_statistics',
|
|
'international_law',
|
|
'high_school_mathematics',
|
|
'high_school_computer_science',
|
|
'conceptual_physics',
|
|
'miscellaneous',
|
|
'high_school_chemistry',
|
|
'marketing',
|
|
'professional_law',
|
|
'management',
|
|
'college_physics',
|
|
'jurisprudence',
|
|
'world_religions',
|
|
'sociology',
|
|
'us_foreign_policy',
|
|
'high_school_macroeconomics',
|
|
'computer_security',
|
|
'moral_scenarios',
|
|
'moral_disputes',
|
|
'electrical_engineering',
|
|
'astronomy',
|
|
'college_biology',
|
|
]
|
|
|
|
|
|
class MMLUConfig(datasets.BuilderConfig):
|
|
|
|
def __init__(self, **kwargs):
|
|
super().__init__(version=datasets.Version('1.0.0'), **kwargs)
|
|
|
|
|
|
class MMLU(datasets.GeneratorBasedBuilder):
|
|
BUILDER_CONFIGS = [MMLUConfig(name=task_name, ) for task_name in task_list]
|
|
|
|
def _info(self):
|
|
features = datasets.Features({
|
|
'input': datasets.Value('string'),
|
|
'A': datasets.Value('string'),
|
|
'B': datasets.Value('string'),
|
|
'C': datasets.Value('string'),
|
|
'D': datasets.Value('string'),
|
|
'target': datasets.Value('string'),
|
|
})
|
|
return datasets.DatasetInfo(
|
|
description=_DESCRIPTION,
|
|
features=features,
|
|
homepage=_HOMEPAGE,
|
|
license=_LICENSE,
|
|
citation=_CITATION,
|
|
)
|
|
|
|
def _split_generators(self, dl_manager):
|
|
data_dir = dl_manager.download_and_extract(_URL)
|
|
task_name = self.config.name
|
|
return [
|
|
datasets.SplitGenerator(
|
|
name=datasets.Split.TEST,
|
|
gen_kwargs={
|
|
'filepath': os.path.join(data_dir, 'data', 'test', f'{task_name}_test.csv'),
|
|
},
|
|
),
|
|
datasets.SplitGenerator(
|
|
name=datasets.Split.VALIDATION,
|
|
gen_kwargs={
|
|
'filepath': os.path.join(data_dir, 'data', 'val', f'{task_name}_val.csv'),
|
|
},
|
|
),
|
|
datasets.SplitGenerator(
|
|
name=datasets.Split.TRAIN,
|
|
gen_kwargs={
|
|
'filepath': os.path.join(data_dir, 'data', 'dev', f'{task_name}_dev.csv'),
|
|
},
|
|
),
|
|
]
|
|
|
|
def _generate_examples(self, filepath):
|
|
df = pd.read_csv(filepath)
|
|
df.columns = ['input', 'A', 'B', 'C', 'D', 'target']
|
|
|
|
for i, instance in enumerate(df.to_dict(orient='records')):
|
|
yield i, instance
|