67 lines
1.7 KiB
Python
67 lines
1.7 KiB
Python
import json
|
|
import sys
|
|
from abc import abstractmethod
|
|
from typing import Any, Dict, Iterator, List, Tuple
|
|
|
|
from evalscope.perf.arguments import Arguments
|
|
|
|
|
|
class DatasetPluginBase:
|
|
|
|
def __init__(self, query_parameters: Arguments):
|
|
"""Build data set plugin
|
|
|
|
Args:
|
|
dataset_path (str, optional): The input dataset path. Defaults to None.
|
|
"""
|
|
self.query_parameters = query_parameters
|
|
|
|
def __next__(self):
|
|
for item in self.build_messages():
|
|
yield item
|
|
raise StopIteration
|
|
|
|
def __iter__(self):
|
|
return self.build_messages()
|
|
|
|
@abstractmethod
|
|
def build_messages(self) -> Iterator[List[Dict]]:
|
|
"""Build the request.
|
|
|
|
Raises:
|
|
NotImplementedError: The request is not impletion.
|
|
|
|
Yields:
|
|
Iterator[List[Dict]]: Yield request messages.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def dataset_line_by_line(self, dataset: str) -> Iterator[str]:
|
|
"""Get content line by line of dataset.
|
|
|
|
Args:
|
|
dataset (str): The dataset path.
|
|
|
|
Yields:
|
|
Iterator[str]: Each line of file.
|
|
"""
|
|
with open(dataset, 'r', encoding='utf-8') as f:
|
|
for line in f:
|
|
yield line
|
|
|
|
def dataset_json_list(self, dataset: str) -> Iterator[Dict]:
|
|
"""Read data from file which is list of requests.
|
|
Sample: https://huggingface.co/datasets/Yukang/LongAlpaca-12k
|
|
|
|
Args:
|
|
dataset (str): The dataset path.
|
|
|
|
Yields:
|
|
Iterator[Dict]: The each request object.
|
|
"""
|
|
with open(dataset, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
data = json.loads(content)
|
|
for item in data:
|
|
yield item
|