steps: - label: "Wait for container to be ready" key: wait-for-container-image agents: queue: A100 plugins: - kubernetes: podSpec: containers: - image: badouralix/curl-jq command: - sh .buildkite/nightly-benchmarks/scripts/wait-for-image.sh - label: "Cleanup H100" agents: queue: H100 depends_on: ~ command: docker system prune -a --volumes --force - label: "A100" # skip: "use this flag to conditionally skip the benchmark step, useful for PR testing" agents: queue: A100 depends_on: wait-for-container-image if: build.branch == "main" plugins: - kubernetes: podSpec: priorityClassName: perf-benchmark containers: - image: public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT command: - bash .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh resources: limits: nvidia.com/gpu: 8 volumeMounts: - name: devshm mountPath: /dev/shm env: - name: VLLM_USAGE_SOURCE value: ci-test - name: HF_TOKEN valueFrom: secretKeyRef: name: hf-token-secret key: token nodeSelector: nvidia.com/gpu.product: NVIDIA-A100-SXM4-80GB volumes: - name: devshm emptyDir: medium: Memory - label: "H200" # skip: "use this flag to conditionally skip the benchmark step, useful for PR testing" agents: queue: H200 depends_on: wait-for-container-image if: build.branch == "main" plugins: - docker#v5.12.0: image: public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT command: - bash - .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh mount-buildkite-agent: true propagate-environment: true ipc: host gpus: 4,5,6,7 volumes: - /data/benchmark-hf-cache:/root/.cache/huggingface environment: - VLLM_USAGE_SOURCE - HF_TOKEN #- block: "Run H100 Benchmark" #key: block-h100 #depends_on: ~ - label: "H100" # skip: "use this flag to conditionally skip the benchmark step, useful for PR testing" agents: queue: H100 depends_on: wait-for-container-image if: build.branch == "main" plugins: - docker#v5.12.0: image: public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT command: - bash - .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh mount-buildkite-agent: true propagate-environment: true ipc: host gpus: all # see CUDA_VISIBLE_DEVICES for actual GPUs used volumes: - /data/benchmark-hf-cache:/root/.cache/huggingface environment: - VLLM_USAGE_SOURCE - HF_TOKEN # Premerge benchmark - label: "A100" # skip: "use this flag to conditionally skip the benchmark step, useful for PR testing" agents: queue: A100 depends_on: wait-for-container-image if: build.branch != "main" plugins: - kubernetes: podSpec: priorityClassName: perf-benchmark containers: - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT command: - bash .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh resources: limits: nvidia.com/gpu: 8 volumeMounts: - name: devshm mountPath: /dev/shm env: - name: VLLM_USAGE_SOURCE value: ci-test - name: HF_TOKEN valueFrom: secretKeyRef: name: hf-token-secret key: token nodeSelector: nvidia.com/gpu.product: NVIDIA-A100-SXM4-80GB volumes: - name: devshm emptyDir: medium: Memory - label: "H200" # skip: "use this flag to conditionally skip the benchmark step, useful for PR testing" agents: queue: H200 depends_on: wait-for-container-image if: build.branch != "main" plugins: - docker#v5.12.0: image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT command: - bash - .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh mount-buildkite-agent: true propagate-environment: true ipc: host gpus: 4,5,6,7 volumes: - /data/benchmark-hf-cache:/root/.cache/huggingface environment: - VLLM_USAGE_SOURCE - HF_TOKEN #- block: "Run H100 Benchmark" #key: block-h100 #depends_on: ~ - label: "H100" # skip: "use this flag to conditionally skip the benchmark step, useful for PR testing" agents: queue: H100 depends_on: wait-for-container-image if: build.branch != "main" plugins: - docker#v5.12.0: image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT command: - bash - .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh mount-buildkite-agent: true propagate-environment: true ipc: host gpus: all # see CUDA_VISIBLE_DEVICES for actual GPUs used volumes: - /data/benchmark-hf-cache:/root/.cache/huggingface environment: - VLLM_USAGE_SOURCE - HF_TOKEN