150 lines
4.7 KiB
Python
150 lines
4.7 KiB
Python
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
import os
|
|
from dotenv import dotenv_values
|
|
|
|
env = dotenv_values('.env')
|
|
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
|
import unittest
|
|
|
|
from evalscope.perf.main import run_perf_benchmark
|
|
from evalscope.utils import test_level_list
|
|
|
|
|
|
class TestPerf(unittest.TestCase):
|
|
|
|
def setUp(self) -> None:
|
|
pass
|
|
|
|
def tearDown(self) -> None:
|
|
pass
|
|
|
|
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
|
|
def test_run_perf(self):
|
|
task_cfg = {
|
|
'url': 'http://127.0.0.1:8001/v1/chat/completions',
|
|
'parallel': 1,
|
|
'model': 'qwen2.5',
|
|
'number': 15,
|
|
'api': 'openai',
|
|
'dataset': 'openqa',
|
|
# 'stream': True,
|
|
'debug': True,
|
|
}
|
|
run_perf_benchmark(task_cfg)
|
|
|
|
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
|
|
def test_run_perf_stream(self):
|
|
task_cfg = {
|
|
'url': 'http://127.0.0.1:8000/v1/chat/completions',
|
|
'parallel': 1,
|
|
'model': 'qwen2.5',
|
|
'number': 15,
|
|
'api': 'openai',
|
|
'dataset': 'openqa',
|
|
'stream': True,
|
|
'debug': True,
|
|
}
|
|
run_perf_benchmark(task_cfg)
|
|
|
|
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
|
|
def test_run_perf_speed_benchmark(self):
|
|
task_cfg = {
|
|
'url': 'http://127.0.0.1:8001/v1/completions',
|
|
'parallel': 1,
|
|
'model': 'qwen2.5',
|
|
'api': 'openai',
|
|
'dataset': 'speed_benchmark',
|
|
'min_tokens': 2048,
|
|
'max_tokens': 2048,
|
|
'debug': True,
|
|
}
|
|
run_perf_benchmark(task_cfg)
|
|
|
|
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
|
|
def test_run_perf_local(self):
|
|
task_cfg = {
|
|
'parallel': 1,
|
|
'model': 'Qwen/Qwen2.5-0.5B-Instruct',
|
|
'number': 5,
|
|
'api': 'local',
|
|
'dataset': 'openqa',
|
|
'debug': True,
|
|
}
|
|
run_perf_benchmark(task_cfg)
|
|
|
|
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
|
|
def test_run_perf_local_stream(self):
|
|
task_cfg = {
|
|
'parallel': 1,
|
|
'model': 'Qwen/Qwen2.5-0.5B-Instruct',
|
|
'number': 5,
|
|
'api': 'local',
|
|
'dataset': 'openqa',
|
|
'stream': True,
|
|
'debug': True,
|
|
}
|
|
run_perf_benchmark(task_cfg)
|
|
|
|
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
|
|
def test_run_perf_local_speed_benchmark(self):
|
|
task_cfg = {
|
|
'parallel': 1,
|
|
'model': 'Qwen/Qwen2.5-0.5B-Instruct',
|
|
'api': 'local_vllm',
|
|
'dataset': 'speed_benchmark',
|
|
'min_tokens': 2048,
|
|
'max_tokens': 2048,
|
|
'debug': True,
|
|
}
|
|
run_perf_benchmark(task_cfg)
|
|
|
|
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
|
|
def test_run_perf_local_random(self):
|
|
from evalscope.perf.arguments import Arguments
|
|
task_cfg = Arguments(
|
|
parallel=20,
|
|
model='Qwen3-1.7B',
|
|
url='http://127.0.0.1:8801/v1/completions',
|
|
api='openai',
|
|
dataset='random',
|
|
min_tokens=1024,
|
|
max_tokens=1024,
|
|
prefix_length=0,
|
|
min_prompt_length=1024,
|
|
max_prompt_length=1024,
|
|
number=20,
|
|
tokenizer_path='Qwen/Qwen2.5-0.5B-Instruct',
|
|
seed=None,
|
|
extra_args={'ignore_eos': True}
|
|
)
|
|
metrics_result, percentile_result = run_perf_benchmark(task_cfg)
|
|
print(metrics_result)
|
|
print(percentile_result)
|
|
|
|
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
|
|
def test_run_perf_multi_parallel(self):
|
|
from evalscope.perf.arguments import Arguments
|
|
task_cfg = Arguments(
|
|
parallel=[1, 2],
|
|
number=[2, 5],
|
|
model='qwen2.5-7b-instruct',
|
|
url='https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions',
|
|
api_key=env.get('DASHSCOPE_API_KEY'),
|
|
api='openai',
|
|
dataset='random',
|
|
min_tokens=100,
|
|
max_tokens=100,
|
|
prefix_length=0,
|
|
min_prompt_length=1024,
|
|
max_prompt_length=1024,
|
|
tokenizer_path='Qwen/Qwen2.5-0.5B-Instruct',
|
|
seed=None,
|
|
extra_args={'ignore_eos': True}
|
|
)
|
|
metrics_result, percentile_result = run_perf_benchmark(task_cfg)
|
|
print(metrics_result)
|
|
print(percentile_result)
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main(buffer=False)
|