evalscope_v0.17.0/evalscope.0.17.0/evalscope/benchmarks/data_collection/data_collection_adapter.py

73 lines
2.5 KiB
Python

import math
import os
import re
from typing import Any, Optional
from evalscope.benchmarks import Benchmark, DataAdapter
from evalscope.constants import DEFAULT_DATASET_CACHE_DIR, EvalType, HubType
from evalscope.utils.io_utils import jsonl_to_list
from evalscope.utils.logger import get_logger
logger = get_logger()
@Benchmark.register(
name='data_collection',
dataset_id='', # dataset_id need to be set
description='Data collection',
subset_list=['default'],
metric_list=['AverageAccuracy'],
few_shot_num=0,
train_split=None,
eval_split='test',
prompt_template='',
)
class DataCollectionAdapter(DataAdapter):
def __init__(self, **kwargs):
"""
Data adapter for collection dataset.
"""
super().__init__(**kwargs)
def load(self,
dataset_name_or_path: str = None,
subset_list: list = None,
work_dir: Optional[str] = DEFAULT_DATASET_CACHE_DIR,
datasets_hub: str = HubType.MODELSCOPE,
**kwargs) -> dict:
"""
Load the dataset. Remote and local datasets are supported.
"""
dataset_name_or_path = os.path.expanduser(dataset_name_or_path or self.dataset_id)
subset_list = subset_list or self.subset_list
# Try to load dataset from local disk
if os.path.exists(dataset_name_or_path):
logger.info(f'Loading dataset from {dataset_name_or_path}')
dataset = jsonl_to_list(dataset_name_or_path)
if len(dataset) == 0:
raise ValueError(f'Local dataset is empty: {dataset_name_or_path}')
else:
from modelscope import dataset_snapshot_download
# Load dataset from remote
logger.info(f'Loading dataset from {datasets_hub}: > dataset_name: {dataset_name_or_path}')
dataset_path = dataset_snapshot_download(
dataset_name_or_path, cache_dir=work_dir, allow_file_pattern='*.jsonl')
# find the jsonl file
dataset_files = [os.path.join(dataset_path, f) for f in os.listdir(dataset_path) if f.endswith('.jsonl')]
dataset = jsonl_to_list(dataset_files[0])
return dataset
def get_gold_answer(self, input_d: Any) -> Any:
return super().get_gold_answer(input_d)
def match(self, gold: Any, pred: Any) -> Any:
return super().match(gold, pred)
def parse_pred_result(self, result: Any, raw_input_d: dict = None, eval_type: str = EvalType.CHECKPOINT) -> Any:
return super().parse_pred_result(result, raw_input_d, eval_type)