evalscope/examples/example_eval_mteb.py

103 lines
2.9 KiB
Python

# Copyright (c) Alibaba, Inc. and its affiliates.
"""
1. Installation
EvalScope: pip install mteb
2. Run eval task
"""
import torch
from evalscope.run import run_task
from evalscope.utils.logger import get_logger
logger = get_logger()
def run_eval():
# Prepare the config
one_stage_task_cfg = { # noqa
'work_dir': 'outputs',
'eval_backend': 'RAGEval',
'eval_config': {
'tool': 'MTEB',
'model': [
{
'model_name_or_path': 'AI-ModelScope/bge-large-zh',
'pooling_mode': 'cls', # if not set, load from model config; use `cls` for bge series model
'max_seq_length': 512,
'prompt': '为这个句子生成表示以用于检索相关文章:',
'encode_kwargs': {
'batch_size': 512,
},
}
],
'eval': {
'tasks': [
'TNews',
'CLSClusteringS2S',
'T2Reranking',
'ATEC',
'T2Retrieval',
'MMarcoRetrieval',
'DuRetrieval',
'CovidRetrieval',
'CmedqaRetrieval',
'EcomRetrieval',
'MedicalRetrieval',
'VideoRetrieval'
],
'verbosity': 2,
'overwrite_results': True,
'top_k': 10,
'limits': 1000, # don't limit for retrieval task
},
},
}
two_stage_task_cfg = {
'work_dir': 'outputs',
'eval_backend': 'RAGEval',
'eval_config': {
'tool': 'MTEB',
'model': [
{
'model_name_or_path': 'AI-ModelScope/m3e-base',
'is_cross_encoder': False,
'max_seq_length': 512,
'prompt': '',
'model_kwargs': {'torch_dtype': 'auto'},
'encode_kwargs': {
'batch_size': 64,
},
},
{
'model_name_or_path': 'OpenBMB/MiniCPM-Reranker',
'is_cross_encoder': True,
'max_seq_length': 512,
'prompt': '为这个问题生成一个检索用的表示',
'model_kwargs': {'torch_dtype': 'auto'},
'encode_kwargs': {
'batch_size': 32,
},
},
],
'eval': {
'tasks': ['T2Retrieval'],
'verbosity': 2,
'overwrite_results': True,
'limits': 100,
},
},
}
# Run task
# run_task(task_cfg=one_stage_task_cfg)
run_task(task_cfg=two_stage_task_cfg)
if __name__ == '__main__':
run_eval()