evalscope_v0.17.0/evalscope.0.17.0/evalscope/models/adapters/server_adapter.py

import copy
import openai
from collections import defaultdict
from openai.types.chat import ChatCompletion, ChatCompletionChunk
from openai.types.chat.chat_completion import ChatCompletionMessage, Choice
from typing import List, Optional, Union

from evalscope.utils.argument_utils import get_supported_params
from evalscope.utils.logger import get_logger
from .base_adapter import BaseModelAdapter

logger = get_logger()


class ServerModelAdapter(BaseModelAdapter):
    """
    Server model adapter to request remote API model and generate results.
    """

    def __init__(self, api_url: str, model_id: str, api_key: str = 'EMPTY', **kwargs):
        """
        Args:
            api_url: The URL of the remote API model.
            model_id: The ID of the remote API model.
            api_key: The API key of the remote API model.
        """
        self.api_url = api_url.rstrip('/').rsplit('/chat/completions', 1)[0]
        self.model_id = model_id
        self.api_key = api_key

        self.client = openai.OpenAI(
            api_key=self.api_key,
            base_url=self.api_url,
        )
        self.supported_params = get_supported_params(self.client.chat.completions.create)

        self.seed = kwargs.get('seed', None)
        self.timeout = kwargs.get('timeout', 60)
        self.stream = kwargs.get('stream', False)
        self.model_cfg = {'api_url': api_url, 'model_id': model_id, 'api_key': api_key}
        super().__init__(model=None, model_cfg=self.model_cfg, **kwargs)

    def predict(self, inputs: List[dict], infer_cfg: Optional[dict] = None) -> List[dict]:
        """
        Model prediction func.

        Args:
            inputs (List[dict]): The input data.
            infer_cfg (dict): Inference configuration.

        Returns:
            res (List[dict]): The model prediction results.
        """
        infer_cfg = infer_cfg or {}
        results = []

        for input_item in inputs:
            response = self.process_single_input(input_item, infer_cfg)
            results.append(response)

        return results

    def process_single_input(self, input_item: dict, infer_cfg: dict) -> dict:
        """Process a single input item."""
        request_json = self.make_request(input_item, infer_cfg)
        response = self.send_request(request_json)
        return response

    def make_request_messages(self, input_item: dict) -> list:
        """
        Make request messages for OpenAI API.
        """
        if input_item.get('messages', None):
            return input_item['messages']

        data: list = input_item['data']
        if isinstance(data[0], tuple):  # for truthful_qa and hellaswag
            query = '\n'.join(''.join(item) for item in data)
            system_prompt = input_item.get('system_prompt', None)
        else:
            query = data[0]
            system_prompt = input_item.get('system_prompt', None)

        messages = []
        if system_prompt:
            messages.append({'role': 'system', 'content': system_prompt})

        messages.append({'role': 'user', 'content': query})

        return messages

    def make_request(self, input_item: dict, infer_cfg: dict) -> dict:
        """Make request to remote API."""
        messages = self.make_request_messages(input_item)
        # Format request JSON according to OpenAI API format
        request_json = {'model': self.model_id, 'messages': messages, **infer_cfg}

        if self.timeout:
            request_json['timeout'] = self.timeout

        request_json['stream'] = self.stream
        if self.stream:
            request_json['stream_options'] = {'include_usage': True}

        if input_item.get('tools', None):
            tools_copy = copy.deepcopy(input_item.get('tools'))
            # Remove the "responses" from the functions, as that doesn't
            # need to go to the model
            for tool in tools_copy:
                if 'function' in tool and 'response' in tool['function']:
                    del tool['function']['response']
            request_json['tools'] = tools_copy

        logger.debug(f'Request to remote API: {request_json}')

        return request_json

    def _parse_extra_params(self, request_json):
        api_params = {}
        extra_body = {}
        for key, value in request_json.items():
            if key in self.supported_params:
                api_params[key] = value
            else:
                extra_body[key] = value

        if extra_body:
            api_params['extra_body'] = extra_body
        return api_params

    def send_request(self, request_json: dict) -> dict:
        try:
            parsed_request = self._parse_extra_params(request_json)
            response = self.client.chat.completions.create(**parsed_request)

            if response and self.stream:
                response = self._collect_stream_response(response)

            return response.model_dump(exclude_unset=True)
        except Exception as e:
            logger.error(f'Error when calling remote API: {str(e)}')
            raise e

    def _collect_stream_response(self, response_stream: List[ChatCompletionChunk]) -> ChatCompletion:
        collected_chunks = []
        collected_messages = defaultdict(list)
        collected_reasoning = defaultdict(list)
        collected_tool_calls = defaultdict(dict)

        for chunk in response_stream:
            collected_chunks.append(chunk)
            for choice in chunk.choices:
                # Handle reasoning content
                if hasattr(choice.delta, 'reasoning_content') and choice.delta.reasoning_content is not None:
                    collected_reasoning[choice.index].append(choice.delta.reasoning_content)

                # Handle regular content
                if choice.delta.content is not None:
                    collected_messages[choice.index].append(choice.delta.content)

                # Handle tool calls
                if hasattr(choice.delta, 'tool_calls') and choice.delta.tool_calls:
                    for tool_call in choice.delta.tool_calls:
                        tool_id = tool_call.index

                        # Initialize tool call if not present
                        if tool_id not in collected_tool_calls[choice.index]:
                            collected_tool_calls[choice.index][tool_id] = {
                                'id': tool_call.id if hasattr(tool_call, 'id') and tool_call.id else None,
                                'type': tool_call.type if hasattr(tool_call, 'type') and tool_call.type else None,
                                'function': {
                                    'name': '',
                                    'arguments': ''
                                }
                            }

                        # Update tool call with new chunks
                        if hasattr(tool_call, 'function'):
                            if hasattr(tool_call.function, 'name') and tool_call.function.name:
                                collected_tool_calls[
                                    choice.index][tool_id]['function']['name'] = tool_call.function.name

                            if hasattr(tool_call.function, 'arguments') and tool_call.function.arguments:
                                collected_tool_calls[
                                    choice.index][tool_id]['function']['arguments'] += tool_call.function.arguments

                        # Update ID if it was received later
                        if hasattr(tool_call, 'id') and tool_call.id:
                            collected_tool_calls[choice.index][tool_id]['id'] = tool_call.id

        # Get all unique choice indices from all collections
        all_indices = set(collected_messages.keys()) | set(collected_reasoning.keys()) | set(
            collected_tool_calls.keys())

        choices = []
        for index in all_indices:
            full_reply_content = ''.join(collected_messages.get(index, []))
            reasoning = ''.join(collected_reasoning.get(index, []))

            # Process tool_calls for this choice if any exists
            tool_calls_list = None
            if index in collected_tool_calls and collected_tool_calls[index]:
                tool_calls_list = list(collected_tool_calls[index].values())
                # Filter out any tool calls with None id (incomplete tool calls)
                tool_calls_list = [tc for tc in tool_calls_list if tc['id'] is not None]

            # use the finish_reason from the last chunk that generated this choice
            finish_reason = None
            for chunk in reversed(collected_chunks):
                if chunk.choices and chunk.choices[0].index == index:
                    finish_reason = chunk.choices[0].finish_reason
                    break

            message_kwargs = {'role': 'assistant', 'content': full_reply_content}

            if reasoning:
                message_kwargs['reasoning_content'] = reasoning

            if tool_calls_list:
                message_kwargs['tool_calls'] = tool_calls_list

            choice = Choice(
                finish_reason=finish_reason or 'stop', index=index, message=ChatCompletionMessage(**message_kwargs))
            choices.append(choice)

        # build the final completion object
        return ChatCompletion(
            id=collected_chunks[0].id,
            choices=choices,
            created=collected_chunks[0].created,
            model=collected_chunks[0].model,
            object='chat.completion',
            usage=collected_chunks[-1].usage  # use the usage from the last chunk
        )