108 lines
4.1 KiB
Python
108 lines
4.1 KiB
Python
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
|
|
import glob
|
|
import os
|
|
import pandas as pd
|
|
from tabulate import tabulate
|
|
from typing import List, Tuple
|
|
|
|
from evalscope.report.utils import Report
|
|
from evalscope.utils.logger import get_logger
|
|
|
|
logger = get_logger()
|
|
"""
|
|
Combine and generate table for reports of LLMs.
|
|
"""
|
|
|
|
|
|
def get_report_list(reports_path_list: List[str]) -> List[Report]:
|
|
report_list: List[Report] = []
|
|
# Iterate over each report path
|
|
for report_path in reports_path_list:
|
|
model_report_dir = os.path.normpath(report_path)
|
|
report_files = glob.glob(os.path.join(model_report_dir, '**', '*.json'), recursive=True)
|
|
# Iterate over each report file
|
|
for file_path in report_files:
|
|
try:
|
|
report = Report.from_json(file_path)
|
|
report_list.append(report)
|
|
except Exception as e:
|
|
logger.error(f'Error loading report from {file_path}: {e}')
|
|
report_list = sorted(report_list, key=lambda x: (x.model_name, x.dataset_name))
|
|
return report_list
|
|
|
|
|
|
def get_data_frame(report_list: List[Report],
|
|
flatten_metrics: bool = True,
|
|
flatten_categories: bool = True,
|
|
add_overall_metric: bool = False) -> pd.DataFrame:
|
|
tables = []
|
|
for report in report_list:
|
|
df = report.to_dataframe(
|
|
flatten_metrics=flatten_metrics,
|
|
flatten_categories=flatten_categories,
|
|
add_overall_metric=add_overall_metric)
|
|
tables.append(df)
|
|
return pd.concat(tables, ignore_index=True)
|
|
|
|
|
|
def gen_table(reports_path_list: list[str] = None,
|
|
report_list: list[Report] = None,
|
|
flatten_metrics: bool = True,
|
|
flatten_categories: bool = True,
|
|
add_overall_metric: bool = False) -> str:
|
|
"""
|
|
Generates a formatted table from a list of report paths or Report objects.
|
|
|
|
Args:
|
|
reports_path_list (list[str], optional): List of file paths to report files.
|
|
Either this or `report_list` must be provided.
|
|
report_list (list[Report], optional): List of Report objects.
|
|
Either this or `reports_path_list` must be provided.
|
|
flatten_metrics (bool, optional): Whether to flatten the metrics in the output table. Defaults to True.
|
|
flatten_categories (bool, optional): Whether to flatten the categories in the output table. Defaults to True.
|
|
add_overall_metric (bool, optional): Whether to add an overall metric column to the table. Defaults to False.
|
|
|
|
Returns:
|
|
str: A string representation of the table in grid format.
|
|
|
|
Raises:
|
|
AssertionError: If neither `reports_path_list` nor `report_list` is provided.
|
|
"""
|
|
assert (reports_path_list is not None) or (report_list is not None), \
|
|
'Either reports_path_list or report_list must be provided.'
|
|
if report_list is None:
|
|
report_list = get_report_list(reports_path_list)
|
|
# Generate a DataFrame from the report list
|
|
table = get_data_frame(
|
|
report_list,
|
|
flatten_metrics=flatten_metrics,
|
|
flatten_categories=flatten_categories,
|
|
add_overall_metric=add_overall_metric)
|
|
return tabulate(table, headers=table.columns, tablefmt='grid', showindex=False)
|
|
|
|
|
|
class ReportsRecorder:
|
|
COMMON_DATASET_PATH = []
|
|
CUSTOM_DATASET_PATH = []
|
|
|
|
def __init__(self, oss_url: str = '', endpoint: str = ''):
|
|
pass
|
|
|
|
|
|
if __name__ == '__main__':
|
|
report_dir_1 = './outputs/20250117_151926'
|
|
# report_dir_2 = './outputs/20250107_204445/reports'
|
|
|
|
report_table = gen_table(reports_path_list=[report_dir_1])
|
|
print(report_table)
|
|
|
|
# ALL VALUES ONLY FOR EXAMPLE
|
|
# +--------------------------+-------------------+-------------+
|
|
# | Model | CompetitionMath | GSM8K |
|
|
# +==========================+===================+=============+
|
|
# | ZhipuAI_chatglm2-6b-base | 25.0 (acc) | 30.50 (acc) |
|
|
# +--------------------------+-------------------+-------------+
|
|
# | ZhipuAI_chatglm2-6b | 30.5 (acc) | 40.50 (acc) |
|
|
# +--------------------------+-------------------+-------------+
|