45 lines
1.6 KiB
Python
45 lines
1.6 KiB
Python
from evalscope.collections import CollectionSchema, DatasetInfo, WeightedSampler
|
|
from evalscope.utils.io_utils import dump_jsonl_data
|
|
|
|
# define the schema
|
|
schema = CollectionSchema(name='DeepSeekDistill', datasets=[
|
|
CollectionSchema(name='Math', datasets=[
|
|
DatasetInfo(name='math_500', weight=1, task_type='math', tags=['en'], args={'few_shot_num': 0}),
|
|
DatasetInfo(name='gpqa', weight=1, task_type='math', tags=['en'], args={'subset_list': ['gpqa_diamond'], 'few_shot_num': 0}),
|
|
DatasetInfo(name='aime24', weight=1, task_type='math', tags=['en'], args={'few_shot_num': 0}),
|
|
])
|
|
])
|
|
|
|
# get the mixed data
|
|
mixed_data = WeightedSampler(schema).sample(100000) # set a large number to ensure all datasets are sampled
|
|
# dump the mixed data to a jsonl file
|
|
dump_jsonl_data(mixed_data, 'outputs/deepseek_distill_test.jsonl')
|
|
|
|
from evalscope import TaskConfig, run_task
|
|
# start the task
|
|
from evalscope.constants import EvalType
|
|
|
|
task_cfg = TaskConfig(
|
|
model='DeepSeek-R1-Distill-Qwen-1.5B',
|
|
api_url='http://127.0.0.1:8801/v1/chat/completions',
|
|
api_key='EMPTY',
|
|
eval_type=EvalType.SERVICE,
|
|
datasets=[
|
|
'data_collection',
|
|
],
|
|
dataset_args={
|
|
'data_collection': {
|
|
'local_path': 'outputs/deepseek_distill_test.jsonl'
|
|
}
|
|
},
|
|
eval_batch_size=32,
|
|
generation_config={
|
|
'max_tokens': 30000, # avoid exceed max length
|
|
'temperature': 0.6,
|
|
'top_p': 0.95,
|
|
'n': 1, # num of responses per sample, note that lmdeploy only supports n=1
|
|
},
|
|
)
|
|
|
|
run_task(task_cfg=task_cfg)
|