sglang_v0.5.2/flashinfer_0.3.1/scripts/task_jit_run_tests_part4.sh

15 lines
425 B
Bash
Executable File

#!/bin/bash
set -eo pipefail
set -x
: ${MAX_JOBS:=$(nproc)}
: ${CUDA_VISIBLE_DEVICES:=0}
pip install -e . -v
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True # avoid memory fragmentation
pytest -s tests/test_deepseek_mla.py
pytest -s tests/test_group_gemm.py
# NOTE(Zihao): need to fix tile size on KV dimension for head_dim=256 on small shared memory architecture (sm89)
# pytest -s tests/test_batch_attention.py