evalscope/tests/perf/test_perf.py

150 lines
4.7 KiB
Python

# Copyright (c) Alibaba, Inc. and its affiliates.
import os
from dotenv import dotenv_values
env = dotenv_values('.env')
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import unittest
from evalscope.perf.main import run_perf_benchmark
from evalscope.utils import test_level_list
class TestPerf(unittest.TestCase):
def setUp(self) -> None:
pass
def tearDown(self) -> None:
pass
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
def test_run_perf(self):
task_cfg = {
'url': 'http://127.0.0.1:8001/v1/chat/completions',
'parallel': 1,
'model': 'qwen2.5',
'number': 15,
'api': 'openai',
'dataset': 'openqa',
# 'stream': True,
'debug': True,
}
run_perf_benchmark(task_cfg)
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
def test_run_perf_stream(self):
task_cfg = {
'url': 'http://127.0.0.1:8000/v1/chat/completions',
'parallel': 1,
'model': 'qwen2.5',
'number': 15,
'api': 'openai',
'dataset': 'openqa',
'stream': True,
'debug': True,
}
run_perf_benchmark(task_cfg)
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
def test_run_perf_speed_benchmark(self):
task_cfg = {
'url': 'http://127.0.0.1:8001/v1/completions',
'parallel': 1,
'model': 'qwen2.5',
'api': 'openai',
'dataset': 'speed_benchmark',
'min_tokens': 2048,
'max_tokens': 2048,
'debug': True,
}
run_perf_benchmark(task_cfg)
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
def test_run_perf_local(self):
task_cfg = {
'parallel': 1,
'model': 'Qwen/Qwen2.5-0.5B-Instruct',
'number': 5,
'api': 'local',
'dataset': 'openqa',
'debug': True,
}
run_perf_benchmark(task_cfg)
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
def test_run_perf_local_stream(self):
task_cfg = {
'parallel': 1,
'model': 'Qwen/Qwen2.5-0.5B-Instruct',
'number': 5,
'api': 'local',
'dataset': 'openqa',
'stream': True,
'debug': True,
}
run_perf_benchmark(task_cfg)
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
def test_run_perf_local_speed_benchmark(self):
task_cfg = {
'parallel': 1,
'model': 'Qwen/Qwen2.5-0.5B-Instruct',
'api': 'local_vllm',
'dataset': 'speed_benchmark',
'min_tokens': 2048,
'max_tokens': 2048,
'debug': True,
}
run_perf_benchmark(task_cfg)
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
def test_run_perf_local_random(self):
from evalscope.perf.arguments import Arguments
task_cfg = Arguments(
parallel=20,
model='Qwen3-1.7B',
url='http://127.0.0.1:8801/v1/completions',
api='openai',
dataset='random',
min_tokens=1024,
max_tokens=1024,
prefix_length=0,
min_prompt_length=1024,
max_prompt_length=1024,
number=20,
tokenizer_path='Qwen/Qwen2.5-0.5B-Instruct',
seed=None,
extra_args={'ignore_eos': True}
)
metrics_result, percentile_result = run_perf_benchmark(task_cfg)
print(metrics_result)
print(percentile_result)
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
def test_run_perf_multi_parallel(self):
from evalscope.perf.arguments import Arguments
task_cfg = Arguments(
parallel=[1, 2],
number=[2, 5],
model='qwen2.5-7b-instruct',
url='https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions',
api_key=env.get('DASHSCOPE_API_KEY'),
api='openai',
dataset='random',
min_tokens=100,
max_tokens=100,
prefix_length=0,
min_prompt_length=1024,
max_prompt_length=1024,
tokenizer_path='Qwen/Qwen2.5-0.5B-Instruct',
seed=None,
extra_args={'ignore_eos': True}
)
metrics_result, percentile_result = run_perf_benchmark(task_cfg)
print(metrics_result)
print(percentile_result)
if __name__ == '__main__':
unittest.main(buffer=False)