23 lines
632 B
JSON
23 lines
632 B
JSON
[
|
|
{
|
|
"test_name": "llama8B_tp1_genai_perf",
|
|
"qps_list": [4,8,16,32],
|
|
"common_parameters": {
|
|
"model": "meta-llama/Meta-Llama-3-8B-Instruct",
|
|
"tp": 1,
|
|
"port": 8000,
|
|
"num_prompts": 500,
|
|
"reuse_server": false
|
|
},
|
|
"vllm_server_parameters": {
|
|
"disable_log_stats": "",
|
|
"disable_log_requests": "",
|
|
"gpu_memory_utilization": 0.9,
|
|
"num_scheduler_steps": 10,
|
|
"max_num_seqs": 512,
|
|
"dtype": "bfloat16"
|
|
},
|
|
"genai_perf_input_parameters": {
|
|
}
|
|
}
|
|
] |