evalscope_v0.17.0/evalscope.0.17.0/evalscope/models/custom/dummy_model.py

100 lines
3.1 KiB
Python

# Copyright (c) Alibaba, Inc. and its affiliates.
import time
from typing import List
from evalscope.models import CustomModel
from evalscope.utils.logger import get_logger
logger = get_logger()
class DummyCustomModel(CustomModel):
def __init__(self, config: dict = {}, **kwargs):
super(DummyCustomModel, self).__init__(config=config, **kwargs)
def make_request_messages(self, input_item: dict) -> list:
"""
Make request messages for OpenAI API.
"""
if input_item.get('messages', None):
return input_item['messages']
data: list = input_item['data']
if isinstance(data[0], tuple): # for truthful_qa and hellaswag
query = '\n'.join(''.join(item) for item in data)
system_prompt = input_item.get('system_prompt', None)
else:
query = data[0]
system_prompt = input_item.get('system_prompt', None)
messages = []
if system_prompt:
messages.append({'role': 'system', 'content': system_prompt})
messages.append({'role': 'user', 'content': query})
return messages
def predict(self, prompts: List[dict], **kwargs):
original_inputs = kwargs.get('origin_inputs', None)
infer_cfg = kwargs.get('infer_cfg', None)
logger.debug(f'** Prompts: {prompts}')
if original_inputs is not None:
logger.debug(f'** Original inputs: {original_inputs}')
if infer_cfg is not None:
logger.debug(f'** Inference config: {infer_cfg}')
# Simulate a response based on the prompts
# Must return a list of dicts with the same format as the OpenAI API.
responses = []
for input_item in original_inputs:
# message = self.make_request_messages(input_item)
# response = f'Dummy response for prompt: {message}'
res_d = {
'choices': [{
'index': 0,
'message': {
'content': '*PlaceHolder*',
'role': 'assistant'
}
}],
'created': time.time(),
'model': self.config.get('model_id'),
'object': 'chat.completion',
'usage': {
'completion_tokens': 0,
'prompt_tokens': 0,
'total_tokens': 0
}
}
responses.append(res_d)
return responses
if __name__ == '__main__':
from evalscope import TaskConfig, run_task
dummy_model = DummyCustomModel()
task_config = TaskConfig(
model=dummy_model,
model_id='evalscope-model-dummy',
datasets=['gsm8k'],
eval_type='custom', # must be custom for custom model evaluation
generation_config={
'max_new_tokens': 100,
'temperature': 0.0,
'top_p': 1.0,
'top_k': 50,
'repetition_penalty': 1.0
},
debug=True,
limit=5,
)
eval_results = run_task(task_cfg=task_config)