evalscope_v0.17.0/evalscope.0.17.0/evalscope/backend/rag_eval/backend_manager.py

94 lines
3.4 KiB
Python

import os
from typing import Optional, Union
from evalscope.backend.base import BackendManager
from evalscope.utils.import_utils import is_module_installed
from evalscope.utils.io_utils import get_valid_list
from evalscope.utils.logger import get_logger
logger = get_logger()
class Tools:
MTEB = 'mteb'
RAGAS = 'ragas'
CLIP_BENCHMARK = 'clip_benchmark'
class RAGEvalBackendManager(BackendManager):
def __init__(self, config: Union[str, dict], **kwargs):
"""BackendManager for VLM Evaluation Kit
Args:
config (Union[str, dict]): the configuration yaml-file or the configuration dictionary
"""
super().__init__(config, **kwargs)
@staticmethod
def _check_env(module_name: str):
if is_module_installed(module_name):
logger.info(f'Check `{module_name}` Installed')
else:
logger.error(f'Please install `{module_name}` first')
@staticmethod
def run_mteb(model_args, eval_args):
from evalscope.backend.rag_eval.cmteb import EvalArguments, ModelArguments, one_stage_eval, two_stage_eval
if len(model_args) > 2:
raise ValueError('Not support multiple models yet')
# Convert arguments to dictionary
model_args_list = [ModelArguments(**args).to_dict() for args in model_args]
eval_args = EvalArguments(**eval_args).to_dict()
if len(model_args_list) == 1:
one_stage_eval(model_args_list[0], eval_args)
else: # len(model_args_list) == 2
two_stage_eval(model_args_list[0], model_args_list[1], eval_args)
@staticmethod
def run_ragas(testset_args, eval_args):
from evalscope.backend.rag_eval.ragas import EvaluationArguments, TestsetGenerationArguments, rag_eval
from evalscope.backend.rag_eval.ragas.tasks import generate_testset
if testset_args is not None:
if isinstance(testset_args, dict):
generate_testset(TestsetGenerationArguments(**testset_args))
elif isinstance(testset_args, TestsetGenerationArguments):
generate_testset(testset_args)
else:
raise ValueError('Please provide the testset generation arguments.')
if eval_args is not None:
if isinstance(eval_args, dict):
rag_eval(EvaluationArguments(**eval_args))
elif isinstance(eval_args, EvaluationArguments):
rag_eval(eval_args)
else:
raise ValueError('Please provide the evaluation arguments.')
@staticmethod
def run_clip_benchmark(args):
from evalscope.backend.rag_eval.clip_benchmark import Arguments, evaluate
evaluate(Arguments(**args))
def run(self, *args, **kwargs):
tool = self.config_d.pop('tool')
if tool.lower() == Tools.MTEB:
self._check_env('mteb')
model_args = self.config_d['model']
eval_args = self.config_d['eval']
self.run_mteb(model_args, eval_args)
elif tool.lower() == Tools.RAGAS:
self._check_env('ragas')
testset_args = self.config_d.get('testset_generation', None)
eval_args = self.config_d.get('eval', None)
self.run_ragas(testset_args, eval_args)
elif tool.lower() == Tools.CLIP_BENCHMARK:
self._check_env('webdataset')
self.run_clip_benchmark(self.config_d['eval'])
else:
raise ValueError(f'Unknown tool: {tool}')