diff --git a/mm-zero3.sh b/mm-zero3.sh index 82c008e..0d80e67 100755 --- a/mm-zero3.sh +++ b/mm-zero3.sh @@ -4,7 +4,9 @@ export TORCH_EXTENSIONS_DIR=/tmp/$USER/torch_ext export PYTORCH_CUDA_ALLOC_CONF="max_split_size_mb:128,expandable_segments:True,garbage_collection_threshold:0.9" -export PYTORCH_CUDA_ALLOC_CONF="max_split_size_mb:64" +export OMP_NUM_THREADS=8 +export MKL_NUM_THREADS=8 +export OPENBLAS_NUM_THREADS=8 deepspeed --hostfile hostfile \ --num_nodes 6 --num_gpus 4 \