evalscope_v0.17.0/evalscope.0.17.0/evalscope/report/combinator.py

# Copyright (c) Alibaba, Inc. and its affiliates.

import glob
import os
import pandas as pd
from tabulate import tabulate
from typing import List, Tuple

from evalscope.report.utils import Report
from evalscope.utils.logger import get_logger

logger = get_logger()
"""
Combine and generate table for reports of LLMs.
"""


def get_report_list(reports_path_list: List[str]) -> List[Report]:
    report_list: List[Report] = []
    # Iterate over each report path
    for report_path in reports_path_list:
        model_report_dir = os.path.normpath(report_path)
        report_files = glob.glob(os.path.join(model_report_dir, '**', '*.json'), recursive=True)
        # Iterate over each report file
        for file_path in report_files:
            try:
                report = Report.from_json(file_path)
                report_list.append(report)
            except Exception as e:
                logger.error(f'Error loading report from {file_path}: {e}')
    report_list = sorted(report_list, key=lambda x: (x.model_name, x.dataset_name))
    return report_list


def get_data_frame(report_list: List[Report],
                   flatten_metrics: bool = True,
                   flatten_categories: bool = True,
                   add_overall_metric: bool = False) -> pd.DataFrame:
    tables = []
    for report in report_list:
        df = report.to_dataframe(
            flatten_metrics=flatten_metrics,
            flatten_categories=flatten_categories,
            add_overall_metric=add_overall_metric)
        tables.append(df)
    return pd.concat(tables, ignore_index=True)


def gen_table(reports_path_list: list[str] = None,
              report_list: list[Report] = None,
              flatten_metrics: bool = True,
              flatten_categories: bool = True,
              add_overall_metric: bool = False) -> str:
    """
    Generates a formatted table from a list of report paths or Report objects.

    Args:
        reports_path_list (list[str], optional): List of file paths to report files.
            Either this or `report_list` must be provided.
        report_list (list[Report], optional): List of Report objects.
            Either this or `reports_path_list` must be provided.
        flatten_metrics (bool, optional): Whether to flatten the metrics in the output table. Defaults to True.
        flatten_categories (bool, optional): Whether to flatten the categories in the output table. Defaults to True.
        add_overall_metric (bool, optional): Whether to add an overall metric column to the table. Defaults to False.

    Returns:
        str: A string representation of the table in grid format.

    Raises:
        AssertionError: If neither `reports_path_list` nor `report_list` is provided.
    """
    assert (reports_path_list is not None) or (report_list is not None), \
        'Either reports_path_list or report_list must be provided.'
    if report_list is None:
        report_list = get_report_list(reports_path_list)
    # Generate a DataFrame from the report list
    table = get_data_frame(
        report_list,
        flatten_metrics=flatten_metrics,
        flatten_categories=flatten_categories,
        add_overall_metric=add_overall_metric)
    return tabulate(table, headers=table.columns, tablefmt='grid', showindex=False)


class ReportsRecorder:
    COMMON_DATASET_PATH = []
    CUSTOM_DATASET_PATH = []

    def __init__(self, oss_url: str = '', endpoint: str = ''):
        pass


if __name__ == '__main__':
    report_dir_1 = './outputs/20250117_151926'
    # report_dir_2 = './outputs/20250107_204445/reports'

    report_table = gen_table(reports_path_list=[report_dir_1])
    print(report_table)

    # ALL VALUES ONLY FOR EXAMPLE
    # +--------------------------+-------------------+-------------+
    # | Model                    | CompetitionMath   | GSM8K       |
    # +==========================+===================+=============+
    # | ZhipuAI_chatglm2-6b-base | 25.0 (acc)        | 30.50 (acc) |
    # +--------------------------+-------------------+-------------+
    # | ZhipuAI_chatglm2-6b      | 30.5 (acc)        | 40.50 (acc) |
    # +--------------------------+-------------------+-------------+