vllm/vllm_v0.10.0/examples/offline_inference
hailin 38d813617c first commit 2025-08-03 20:28:19 +08:00
..
basic first commit 2025-08-03 20:28:19 +08:00
disaggregated-prefill-v1 first commit 2025-08-03 20:28:19 +08:00
openai_batch first commit 2025-08-03 20:28:19 +08:00
profiling_tpu first commit 2025-08-03 20:28:19 +08:00
qwen2_5_omni first commit 2025-08-03 20:28:19 +08:00
audio_language.py first commit 2025-08-03 20:28:19 +08:00
automatic_prefix_caching.py first commit 2025-08-03 20:28:19 +08:00
batch_llm_inference.py first commit 2025-08-03 20:28:19 +08:00
chat_with_tools.py first commit 2025-08-03 20:28:19 +08:00
context_extension.py first commit 2025-08-03 20:28:19 +08:00
convert_model_to_seq_cls.py first commit 2025-08-03 20:28:19 +08:00
data_parallel.py first commit 2025-08-03 20:28:19 +08:00
disaggregated_prefill.py first commit 2025-08-03 20:28:19 +08:00
embed_jina_embeddings_v3.py first commit 2025-08-03 20:28:19 +08:00
embed_matryoshka_fy.py first commit 2025-08-03 20:28:19 +08:00
encoder_decoder.py first commit 2025-08-03 20:28:19 +08:00
encoder_decoder_multimodal.py first commit 2025-08-03 20:28:19 +08:00
llm_engine_example.py first commit 2025-08-03 20:28:19 +08:00
load_sharded_state.py first commit 2025-08-03 20:28:19 +08:00
lora_with_quantization_inference.py first commit 2025-08-03 20:28:19 +08:00
metrics.py first commit 2025-08-03 20:28:19 +08:00
mistral-small.py first commit 2025-08-03 20:28:19 +08:00
mlpspeculator.py first commit 2025-08-03 20:28:19 +08:00
multilora_inference.py first commit 2025-08-03 20:28:19 +08:00
neuron.py first commit 2025-08-03 20:28:19 +08:00
neuron_eagle.py first commit 2025-08-03 20:28:19 +08:00
neuron_int8_quantization.py first commit 2025-08-03 20:28:19 +08:00
neuron_multimodal.py first commit 2025-08-03 20:28:19 +08:00
neuron_speculation.py first commit 2025-08-03 20:28:19 +08:00
prefix_caching.py first commit 2025-08-03 20:28:19 +08:00
prithvi_geospatial_mae.py first commit 2025-08-03 20:28:19 +08:00
profiling.py first commit 2025-08-03 20:28:19 +08:00
prompt_embed_inference.py first commit 2025-08-03 20:28:19 +08:00
qwen3_reranker.py first commit 2025-08-03 20:28:19 +08:00
qwen_1m.py first commit 2025-08-03 20:28:19 +08:00
reproducibility.py first commit 2025-08-03 20:28:19 +08:00
rlhf.py first commit 2025-08-03 20:28:19 +08:00
rlhf_colocate.py first commit 2025-08-03 20:28:19 +08:00
rlhf_utils.py first commit 2025-08-03 20:28:19 +08:00
save_sharded_state.py first commit 2025-08-03 20:28:19 +08:00
simple_profiling.py first commit 2025-08-03 20:28:19 +08:00
skip_loading_weights_in_engine_init.py first commit 2025-08-03 20:28:19 +08:00
spec_decode.py first commit 2025-08-03 20:28:19 +08:00
structured_outputs.py first commit 2025-08-03 20:28:19 +08:00
torchrun_example.py first commit 2025-08-03 20:28:19 +08:00
tpu.py first commit 2025-08-03 20:28:19 +08:00
vision_language.py first commit 2025-08-03 20:28:19 +08:00
vision_language_multi_image.py first commit 2025-08-03 20:28:19 +08:00
vision_language_pooling.py first commit 2025-08-03 20:28:19 +08:00