evalscope_v0.17.0/evalscope.0.17.0/evalscope/app/utils/text_utils.py

"""
Text processing utilities for the Evalscope dashboard.
"""
import json
import numpy as np
import os
import pandas as pd
import re
from typing import Any, Dict, List

from evalscope.utils.logger import get_logger
from ..constants import LATEX_DELIMITERS

logger = get_logger()


def convert_markdown_image(text):
    if not os.path.isfile(text):
        return text
    # Convert the image path to a markdown image tag
    if text.endswith('.png') or text.endswith('.jpg') or text.endswith('.jpeg'):
        text = os.path.abspath(text)
        image_tag = f'![image](gradio_api/file={text})'
        logger.debug(f'Converting image path to markdown: {text} -> {image_tag}')
        return image_tag
    return text


def convert_html_tags(text):
    # match begin label
    text = re.sub(r'<(\w+)>', r'[\1]', text)
    # match end label
    text = re.sub(r'</(\w+)>', r'[/\1]', text)
    return text


def process_string(string: str, max_length: int = 2048) -> str:
    string = convert_html_tags(string)  # for display labels e.g.
    if max_length and len(string) > max_length:
        return f'{string[:max_length // 2]}......{string[-max_length // 2:]}'
    return string


def dict_to_markdown(data) -> str:
    markdown_lines = []

    for key, value in data.items():
        bold_key = f'**{key}**'

        if isinstance(value, list):
            value_str = '\n' + '\n'.join([f'- {process_model_prediction(item, max_length=None)}' for item in value])
        elif isinstance(value, dict):
            value_str = dict_to_markdown(value)
        else:
            value_str = str(value)

        value_str = process_string(value_str, max_length=None)  # Convert HTML tags but don't truncate
        markdown_line = f'{bold_key}:\n{value_str}'
        markdown_lines.append(markdown_line)

    return '\n\n'.join(markdown_lines)


def process_model_prediction_old(item: Any, max_length: int = 2048) -> str:
    """
    Process model prediction output into a formatted string.

    Args:
        item: The item to process. Can be a string, list, or dictionary.
        max_length: The maximum length of the output string.

    Returns:
        A formatted string representation of the input.
    """
    if isinstance(item, dict):
        result = dict_to_markdown(item)
    elif isinstance(item, list):
        result = '\n'.join([f'- {process_model_prediction(i, max_length=None)}' for i in item])
    else:
        result = str(item)

    # Apply HTML tag conversion and truncation only at the final output
    if max_length is not None:
        return process_string(result, max_length)
    return result


def process_model_prediction(item: Any, max_length: int = 32000) -> str:
    if isinstance(item, (dict, list)):
        result = json.dumps(item, ensure_ascii=False, indent=2)
        result = f'```json\n{result}\n```'
    else:
        result = str(item)

    # Apply HTML tag conversion and truncation only at the final output
    if max_length is not None:
        return process_string(result, max_length)

    return result


def process_json_content(content: Any) -> str:
    """
    Process JSON content to convert it into a markdown-friendly format.

    Args:
        content (str): The JSON content as a string.

    Returns:
        str: The processed content formatted for markdown display.
    """
    if isinstance(content, (np.bool_, np.int_, np.float_)):
        content = str(content)

    if isinstance(content, str):
        content = {'content': content}

    content_json = json.dumps(content, ensure_ascii=False, indent=2)
    return content_json