evalscope_v0.17.0/evalscope.0.17.0/evalscope/models/custom/dummy_model.py

# Copyright (c) Alibaba, Inc. and its affiliates.
import time
from typing import List

from evalscope.models import CustomModel
from evalscope.utils.logger import get_logger

logger = get_logger()


class DummyCustomModel(CustomModel):

    def __init__(self, config: dict = {}, **kwargs):
        super(DummyCustomModel, self).__init__(config=config, **kwargs)

    def make_request_messages(self, input_item: dict) -> list:
        """
        Make request messages for OpenAI API.
        """
        if input_item.get('messages', None):
            return input_item['messages']

        data: list = input_item['data']
        if isinstance(data[0], tuple):  # for truthful_qa and hellaswag
            query = '\n'.join(''.join(item) for item in data)
            system_prompt = input_item.get('system_prompt', None)
        else:
            query = data[0]
            system_prompt = input_item.get('system_prompt', None)

        messages = []
        if system_prompt:
            messages.append({'role': 'system', 'content': system_prompt})

        messages.append({'role': 'user', 'content': query})

        return messages

    def predict(self, prompts: List[dict], **kwargs):
        original_inputs = kwargs.get('origin_inputs', None)
        infer_cfg = kwargs.get('infer_cfg', None)

        logger.debug(f'** Prompts: {prompts}')
        if original_inputs is not None:
            logger.debug(f'** Original inputs: {original_inputs}')
        if infer_cfg is not None:
            logger.debug(f'** Inference config: {infer_cfg}')

        # Simulate a response based on the prompts
        # Must return a list of dicts with the same format as the OpenAI API.
        responses = []
        for input_item in original_inputs:
            # message = self.make_request_messages(input_item)
            # response = f'Dummy response for prompt: {message}'

            res_d = {
                'choices': [{
                    'index': 0,
                    'message': {
                        'content': '*PlaceHolder*',
                        'role': 'assistant'
                    }
                }],
                'created': time.time(),
                'model': self.config.get('model_id'),
                'object': 'chat.completion',
                'usage': {
                    'completion_tokens': 0,
                    'prompt_tokens': 0,
                    'total_tokens': 0
                }
            }

            responses.append(res_d)

        return responses


if __name__ == '__main__':
    from evalscope import TaskConfig, run_task

    dummy_model = DummyCustomModel()
    task_config = TaskConfig(
        model=dummy_model,
        model_id='evalscope-model-dummy',
        datasets=['gsm8k'],
        eval_type='custom',  # must be custom for custom model evaluation
        generation_config={
            'max_new_tokens': 100,
            'temperature': 0.0,
            'top_p': 1.0,
            'top_k': 50,
            'repetition_penalty': 1.0
        },
        debug=True,
        limit=5,
    )

    eval_results = run_task(task_cfg=task_config)