evalscope_v0.17.0/evalscope.0.17.0/evalscope/app/utils/text_utils.py

120 lines
3.5 KiB
Python

"""
Text processing utilities for the Evalscope dashboard.
"""
import json
import numpy as np
import os
import pandas as pd
import re
from typing import Any, Dict, List
from evalscope.utils.logger import get_logger
from ..constants import LATEX_DELIMITERS
logger = get_logger()
def convert_markdown_image(text):
if not os.path.isfile(text):
return text
# Convert the image path to a markdown image tag
if text.endswith('.png') or text.endswith('.jpg') or text.endswith('.jpeg'):
text = os.path.abspath(text)
image_tag = f'![image](gradio_api/file={text})'
logger.debug(f'Converting image path to markdown: {text} -> {image_tag}')
return image_tag
return text
def convert_html_tags(text):
# match begin label
text = re.sub(r'<(\w+)>', r'[\1]', text)
# match end label
text = re.sub(r'</(\w+)>', r'[/\1]', text)
return text
def process_string(string: str, max_length: int = 2048) -> str:
string = convert_html_tags(string) # for display labels e.g.
if max_length and len(string) > max_length:
return f'{string[:max_length // 2]}......{string[-max_length // 2:]}'
return string
def dict_to_markdown(data) -> str:
markdown_lines = []
for key, value in data.items():
bold_key = f'**{key}**'
if isinstance(value, list):
value_str = '\n' + '\n'.join([f'- {process_model_prediction(item, max_length=None)}' for item in value])
elif isinstance(value, dict):
value_str = dict_to_markdown(value)
else:
value_str = str(value)
value_str = process_string(value_str, max_length=None) # Convert HTML tags but don't truncate
markdown_line = f'{bold_key}:\n{value_str}'
markdown_lines.append(markdown_line)
return '\n\n'.join(markdown_lines)
def process_model_prediction_old(item: Any, max_length: int = 2048) -> str:
"""
Process model prediction output into a formatted string.
Args:
item: The item to process. Can be a string, list, or dictionary.
max_length: The maximum length of the output string.
Returns:
A formatted string representation of the input.
"""
if isinstance(item, dict):
result = dict_to_markdown(item)
elif isinstance(item, list):
result = '\n'.join([f'- {process_model_prediction(i, max_length=None)}' for i in item])
else:
result = str(item)
# Apply HTML tag conversion and truncation only at the final output
if max_length is not None:
return process_string(result, max_length)
return result
def process_model_prediction(item: Any, max_length: int = 32000) -> str:
if isinstance(item, (dict, list)):
result = json.dumps(item, ensure_ascii=False, indent=2)
result = f'```json\n{result}\n```'
else:
result = str(item)
# Apply HTML tag conversion and truncation only at the final output
if max_length is not None:
return process_string(result, max_length)
return result
def process_json_content(content: Any) -> str:
"""
Process JSON content to convert it into a markdown-friendly format.
Args:
content (str): The JSON content as a string.
Returns:
str: The processed content formatted for markdown display.
"""
if isinstance(content, (np.bool_, np.int_, np.float_)):
content = str(content)
if isinstance(content, str):
content = {'content': content}
content_json = json.dumps(content, ensure_ascii=False, indent=2)
return content_json