evalscope_v0.17.0/evalscope.0.17.0/evalscope/perf/plugin/datasets/base.py

67 lines
1.7 KiB
Python

import json
import sys
from abc import abstractmethod
from typing import Any, Dict, Iterator, List, Tuple
from evalscope.perf.arguments import Arguments
class DatasetPluginBase:
def __init__(self, query_parameters: Arguments):
"""Build data set plugin
Args:
dataset_path (str, optional): The input dataset path. Defaults to None.
"""
self.query_parameters = query_parameters
def __next__(self):
for item in self.build_messages():
yield item
raise StopIteration
def __iter__(self):
return self.build_messages()
@abstractmethod
def build_messages(self) -> Iterator[List[Dict]]:
"""Build the request.
Raises:
NotImplementedError: The request is not impletion.
Yields:
Iterator[List[Dict]]: Yield request messages.
"""
raise NotImplementedError
def dataset_line_by_line(self, dataset: str) -> Iterator[str]:
"""Get content line by line of dataset.
Args:
dataset (str): The dataset path.
Yields:
Iterator[str]: Each line of file.
"""
with open(dataset, 'r', encoding='utf-8') as f:
for line in f:
yield line
def dataset_json_list(self, dataset: str) -> Iterator[Dict]:
"""Read data from file which is list of requests.
Sample: https://huggingface.co/datasets/Yukang/LongAlpaca-12k
Args:
dataset (str): The dataset path.
Yields:
Iterator[Dict]: The each request object.
"""
with open(dataset, 'r', encoding='utf-8') as f:
content = f.read()
data = json.loads(content)
for item in data:
yield item