103 lines
2.9 KiB
Python
103 lines
2.9 KiB
Python
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
|
|
"""
|
|
1. Installation
|
|
EvalScope: pip install mteb
|
|
|
|
2. Run eval task
|
|
"""
|
|
import torch
|
|
|
|
from evalscope.run import run_task
|
|
from evalscope.utils.logger import get_logger
|
|
|
|
logger = get_logger()
|
|
|
|
|
|
def run_eval():
|
|
|
|
# Prepare the config
|
|
|
|
one_stage_task_cfg = { # noqa
|
|
'work_dir': 'outputs',
|
|
'eval_backend': 'RAGEval',
|
|
'eval_config': {
|
|
'tool': 'MTEB',
|
|
'model': [
|
|
{
|
|
'model_name_or_path': 'AI-ModelScope/bge-large-zh',
|
|
'pooling_mode': 'cls', # if not set, load from model config; use `cls` for bge series model
|
|
'max_seq_length': 512,
|
|
'prompt': '为这个句子生成表示以用于检索相关文章:',
|
|
'encode_kwargs': {
|
|
'batch_size': 512,
|
|
},
|
|
}
|
|
],
|
|
'eval': {
|
|
'tasks': [
|
|
'TNews',
|
|
'CLSClusteringS2S',
|
|
'T2Reranking',
|
|
'ATEC',
|
|
'T2Retrieval',
|
|
'MMarcoRetrieval',
|
|
'DuRetrieval',
|
|
'CovidRetrieval',
|
|
'CmedqaRetrieval',
|
|
'EcomRetrieval',
|
|
'MedicalRetrieval',
|
|
'VideoRetrieval'
|
|
],
|
|
'verbosity': 2,
|
|
'overwrite_results': True,
|
|
'top_k': 10,
|
|
'limits': 1000, # don't limit for retrieval task
|
|
},
|
|
},
|
|
}
|
|
|
|
two_stage_task_cfg = {
|
|
'work_dir': 'outputs',
|
|
'eval_backend': 'RAGEval',
|
|
'eval_config': {
|
|
'tool': 'MTEB',
|
|
'model': [
|
|
{
|
|
'model_name_or_path': 'AI-ModelScope/m3e-base',
|
|
'is_cross_encoder': False,
|
|
'max_seq_length': 512,
|
|
'prompt': '',
|
|
'model_kwargs': {'torch_dtype': 'auto'},
|
|
'encode_kwargs': {
|
|
'batch_size': 64,
|
|
},
|
|
},
|
|
{
|
|
'model_name_or_path': 'OpenBMB/MiniCPM-Reranker',
|
|
'is_cross_encoder': True,
|
|
'max_seq_length': 512,
|
|
'prompt': '为这个问题生成一个检索用的表示',
|
|
'model_kwargs': {'torch_dtype': 'auto'},
|
|
'encode_kwargs': {
|
|
'batch_size': 32,
|
|
},
|
|
},
|
|
],
|
|
'eval': {
|
|
'tasks': ['T2Retrieval'],
|
|
'verbosity': 2,
|
|
'overwrite_results': True,
|
|
'limits': 100,
|
|
},
|
|
},
|
|
}
|
|
|
|
# Run task
|
|
# run_task(task_cfg=one_stage_task_cfg)
|
|
run_task(task_cfg=two_stage_task_cfg)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
run_eval()
|