vllm/configs at 38d813617c82722a00822b33ce62556f0e9a06df - vllm

hailin 38d813617c first commit	2025-08-03 20:28:19 +08:00
..
DeepSeek-V2-Lite-Chat.yaml	first commit	2025-08-03 20:28:19 +08:00
Meta-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml	first commit	2025-08-03 20:28:19 +08:00
Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform.yaml	first commit	2025-08-03 20:28:19 +08:00
Meta-Llama-3-8B-Instruct-FP8-compressed-tensors.yaml	first commit	2025-08-03 20:28:19 +08:00
Meta-Llama-3-8B-Instruct-FP8.yaml	first commit	2025-08-03 20:28:19 +08:00
Meta-Llama-3-8B-Instruct-INT8-compressed-tensors-asym.yaml	first commit	2025-08-03 20:28:19 +08:00
Meta-Llama-3-8B-Instruct-INT8-compressed-tensors.yaml	first commit	2025-08-03 20:28:19 +08:00
Meta-Llama-3-8B-Instruct-nonuniform-compressed-tensors.yaml	first commit	2025-08-03 20:28:19 +08:00
Meta-Llama-3-8B-Instruct.yaml	first commit	2025-08-03 20:28:19 +08:00
Meta-Llama-3-8B-QQQ.yaml	first commit	2025-08-03 20:28:19 +08:00
Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform.yaml	first commit	2025-08-03 20:28:19 +08:00
Meta-Llama-3-70B-Instruct.yaml	first commit	2025-08-03 20:28:19 +08:00
Meta-Llama-3.2-1B-Instruct-FP8-compressed-tensors.yaml	first commit	2025-08-03 20:28:19 +08:00
Meta-Llama-3.2-1B-Instruct-INT8-compressed-tensors.yaml	first commit	2025-08-03 20:28:19 +08:00
Minitron-4B-Base-FP8.yaml	first commit	2025-08-03 20:28:19 +08:00
Mixtral-8x7B-Instruct-v0.1-FP8.yaml	first commit	2025-08-03 20:28:19 +08:00
Mixtral-8x7B-Instruct-v0.1.yaml	first commit	2025-08-03 20:28:19 +08:00
Mixtral-8x22B-Instruct-v0.1-FP8-Dynamic.yaml	first commit	2025-08-03 20:28:19 +08:00
Qwen1.5-MoE-W4A16-compressed-tensors.yaml	first commit	2025-08-03 20:28:19 +08:00
Qwen2-1.5B-Instruct-FP8W8.yaml	first commit	2025-08-03 20:28:19 +08:00
Qwen2-1.5B-Instruct-INT8-compressed-tensors.yaml	first commit	2025-08-03 20:28:19 +08:00
Qwen2-1.5B-Instruct-W8A16-compressed-tensors.yaml	first commit	2025-08-03 20:28:19 +08:00
Qwen2-57B-A14-Instruct.yaml	first commit	2025-08-03 20:28:19 +08:00
Qwen2.5-1.5B-Instruct.yaml	first commit	2025-08-03 20:28:19 +08:00
Qwen2.5-VL-3B-Instruct-FP8-dynamic.yaml	first commit	2025-08-03 20:28:19 +08:00
SparseLlama3.1_2of4_fp8_compressed.yaml	first commit	2025-08-03 20:28:19 +08:00
models-large.txt	first commit	2025-08-03 20:28:19 +08:00
models-small.txt	first commit	2025-08-03 20:28:19 +08:00

DeepSeek-V2-Lite-Chat.yaml

first commit

2025-08-03 20:28:19 +08:00

Meta-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml

first commit

2025-08-03 20:28:19 +08:00

Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform.yaml

first commit

2025-08-03 20:28:19 +08:00

Meta-Llama-3-8B-Instruct-FP8-compressed-tensors.yaml

first commit

2025-08-03 20:28:19 +08:00

Meta-Llama-3-8B-Instruct-FP8.yaml

first commit

2025-08-03 20:28:19 +08:00

Meta-Llama-3-8B-Instruct-INT8-compressed-tensors-asym.yaml

first commit

2025-08-03 20:28:19 +08:00

Meta-Llama-3-8B-Instruct-INT8-compressed-tensors.yaml

first commit

2025-08-03 20:28:19 +08:00

Meta-Llama-3-8B-Instruct-nonuniform-compressed-tensors.yaml

first commit

2025-08-03 20:28:19 +08:00

Meta-Llama-3-8B-Instruct.yaml

first commit

2025-08-03 20:28:19 +08:00

Meta-Llama-3-8B-QQQ.yaml

first commit

2025-08-03 20:28:19 +08:00

Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform.yaml

first commit

2025-08-03 20:28:19 +08:00

Meta-Llama-3-70B-Instruct.yaml

first commit

2025-08-03 20:28:19 +08:00

Meta-Llama-3.2-1B-Instruct-FP8-compressed-tensors.yaml

first commit

2025-08-03 20:28:19 +08:00

Meta-Llama-3.2-1B-Instruct-INT8-compressed-tensors.yaml

first commit

2025-08-03 20:28:19 +08:00

Minitron-4B-Base-FP8.yaml

first commit

2025-08-03 20:28:19 +08:00

Mixtral-8x7B-Instruct-v0.1-FP8.yaml

first commit

2025-08-03 20:28:19 +08:00

Mixtral-8x7B-Instruct-v0.1.yaml

first commit

2025-08-03 20:28:19 +08:00

Mixtral-8x22B-Instruct-v0.1-FP8-Dynamic.yaml

first commit

2025-08-03 20:28:19 +08:00

Qwen1.5-MoE-W4A16-compressed-tensors.yaml

first commit

2025-08-03 20:28:19 +08:00

Qwen2-1.5B-Instruct-FP8W8.yaml

first commit

2025-08-03 20:28:19 +08:00

Qwen2-1.5B-Instruct-INT8-compressed-tensors.yaml

first commit

2025-08-03 20:28:19 +08:00

Qwen2-1.5B-Instruct-W8A16-compressed-tensors.yaml

first commit

2025-08-03 20:28:19 +08:00

Qwen2-57B-A14-Instruct.yaml

first commit

2025-08-03 20:28:19 +08:00

Qwen2.5-1.5B-Instruct.yaml

first commit

2025-08-03 20:28:19 +08:00

Qwen2.5-VL-3B-Instruct-FP8-dynamic.yaml

first commit

2025-08-03 20:28:19 +08:00

SparseLlama3.1_2of4_fp8_compressed.yaml

first commit

2025-08-03 20:28:19 +08:00

models-large.txt

first commit

2025-08-03 20:28:19 +08:00

models-small.txt

first commit

2025-08-03 20:28:19 +08:00