evalscope_v0.17.0/evalscope.0.17.0/evalscope/report/combinator.py

108 lines
4.1 KiB
Python

# Copyright (c) Alibaba, Inc. and its affiliates.
import glob
import os
import pandas as pd
from tabulate import tabulate
from typing import List, Tuple
from evalscope.report.utils import Report
from evalscope.utils.logger import get_logger
logger = get_logger()
"""
Combine and generate table for reports of LLMs.
"""
def get_report_list(reports_path_list: List[str]) -> List[Report]:
report_list: List[Report] = []
# Iterate over each report path
for report_path in reports_path_list:
model_report_dir = os.path.normpath(report_path)
report_files = glob.glob(os.path.join(model_report_dir, '**', '*.json'), recursive=True)
# Iterate over each report file
for file_path in report_files:
try:
report = Report.from_json(file_path)
report_list.append(report)
except Exception as e:
logger.error(f'Error loading report from {file_path}: {e}')
report_list = sorted(report_list, key=lambda x: (x.model_name, x.dataset_name))
return report_list
def get_data_frame(report_list: List[Report],
flatten_metrics: bool = True,
flatten_categories: bool = True,
add_overall_metric: bool = False) -> pd.DataFrame:
tables = []
for report in report_list:
df = report.to_dataframe(
flatten_metrics=flatten_metrics,
flatten_categories=flatten_categories,
add_overall_metric=add_overall_metric)
tables.append(df)
return pd.concat(tables, ignore_index=True)
def gen_table(reports_path_list: list[str] = None,
report_list: list[Report] = None,
flatten_metrics: bool = True,
flatten_categories: bool = True,
add_overall_metric: bool = False) -> str:
"""
Generates a formatted table from a list of report paths or Report objects.
Args:
reports_path_list (list[str], optional): List of file paths to report files.
Either this or `report_list` must be provided.
report_list (list[Report], optional): List of Report objects.
Either this or `reports_path_list` must be provided.
flatten_metrics (bool, optional): Whether to flatten the metrics in the output table. Defaults to True.
flatten_categories (bool, optional): Whether to flatten the categories in the output table. Defaults to True.
add_overall_metric (bool, optional): Whether to add an overall metric column to the table. Defaults to False.
Returns:
str: A string representation of the table in grid format.
Raises:
AssertionError: If neither `reports_path_list` nor `report_list` is provided.
"""
assert (reports_path_list is not None) or (report_list is not None), \
'Either reports_path_list or report_list must be provided.'
if report_list is None:
report_list = get_report_list(reports_path_list)
# Generate a DataFrame from the report list
table = get_data_frame(
report_list,
flatten_metrics=flatten_metrics,
flatten_categories=flatten_categories,
add_overall_metric=add_overall_metric)
return tabulate(table, headers=table.columns, tablefmt='grid', showindex=False)
class ReportsRecorder:
COMMON_DATASET_PATH = []
CUSTOM_DATASET_PATH = []
def __init__(self, oss_url: str = '', endpoint: str = ''):
pass
if __name__ == '__main__':
report_dir_1 = './outputs/20250117_151926'
# report_dir_2 = './outputs/20250107_204445/reports'
report_table = gen_table(reports_path_list=[report_dir_1])
print(report_table)
# ALL VALUES ONLY FOR EXAMPLE
# +--------------------------+-------------------+-------------+
# | Model | CompetitionMath | GSM8K |
# +==========================+===================+=============+
# | ZhipuAI_chatglm2-6b-base | 25.0 (acc) | 30.50 (acc) |
# +--------------------------+-------------------+-------------+
# | ZhipuAI_chatglm2-6b | 30.5 (acc) | 40.50 (acc) |
# +--------------------------+-------------------+-------------+