/* * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation * and any modifications thereto. Any use, reproduction, disclosure or * distribution of this software and related documentation without an express * license agreement from NVIDIA CORPORATION is strictly prohibited. * * See COPYRIGHT.txt for license information */ #include #include "mpi.h" #include "nvshmem.h" #include "nvshmemx.h" #undef CUDA_CHECK #define CUDA_CHECK(stmt) \ do { \ cudaError_t result = (stmt); \ if (cudaSuccess != result) { \ fprintf(stderr, "[%s:%d] cuda failed with %s \n", __FILE__, __LINE__, \ cudaGetErrorString(result)); \ exit(-1); \ } \ } while (0) #define MPI_CHECK(stmt) \ do { \ int result = (stmt); \ if (MPI_SUCCESS != result) { \ fprintf(stderr, "[%s:%d] MPI failed with error %d \n", __FILE__, __LINE__, result); \ exit(-1); \ } \ } while (0) __global__ void simple_shift(int *target, int mype, int npes) { int peer = (mype + 1) % npes; nvshmem_int_p(target, mype, peer); } int main(int c, char *v[]) { int *target; int rank, nranks; MPI_Comm mpi_comm; nvshmemx_init_attr_t attr = NVSHMEMX_INIT_ATTR_INITIALIZER; int mype, npes, mype_node; MPI_CHECK(MPI_Init(&c, &v)); MPI_CHECK(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); MPI_CHECK(MPI_Comm_size(MPI_COMM_WORLD, &nranks)); mpi_comm = MPI_COMM_WORLD; attr.mpi_comm = &mpi_comm; nvshmemx_init_attr(NVSHMEMX_INIT_WITH_MPI_COMM, &attr); mype = nvshmem_my_pe(); npes = nvshmem_n_pes(); mype_node = nvshmem_team_my_pe(NVSHMEMX_TEAM_NODE); // application picks the device each PE will use CUDA_CHECK(cudaSetDevice(mype_node)); target = (int *)nvshmem_malloc(sizeof(int)); simple_shift<<<1, 1>>>(target, mype, npes); CUDA_CHECK(cudaDeviceSynchronize()); printf("[%d of %d] run complete \n", mype, npes); nvshmem_free(target); nvshmem_finalize(); MPI_CHECK(MPI_Finalize()); return 0; }