48 lines
1.3 KiB
Plaintext
48 lines
1.3 KiB
Plaintext
/*
|
|
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
* and proprietary rights in and to this software, related documentation
|
|
* and any modifications thereto. Any use, reproduction, disclosure or
|
|
* distribution of this software and related documentation without an express
|
|
* license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
*
|
|
* See COPYRIGHT.txt for license information
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <cuda.h>
|
|
#include <nvshmem.h>
|
|
#include <nvshmemx.h>
|
|
|
|
__global__ void simple_shift(int *destination) {
|
|
int mype = nvshmem_my_pe();
|
|
int npes = nvshmem_n_pes();
|
|
int peer = (mype + 1) % npes;
|
|
|
|
nvshmem_int_p(destination, mype, peer);
|
|
}
|
|
|
|
int main(void) {
|
|
int mype_node, msg;
|
|
cudaStream_t stream;
|
|
|
|
nvshmem_init();
|
|
mype_node = nvshmem_team_my_pe(NVSHMEMX_TEAM_NODE);
|
|
cudaSetDevice(mype_node);
|
|
cudaStreamCreate(&stream);
|
|
|
|
int *destination = (int *)nvshmem_malloc(sizeof(int));
|
|
|
|
simple_shift<<<1, 1, 0, stream>>>(destination);
|
|
nvshmemx_barrier_all_on_stream(stream);
|
|
cudaMemcpyAsync(&msg, destination, sizeof(int), cudaMemcpyDeviceToHost, stream);
|
|
|
|
cudaStreamSynchronize(stream);
|
|
printf("%d: received message %d\n", nvshmem_my_pe(), msg);
|
|
|
|
nvshmem_free(destination);
|
|
nvshmem_finalize();
|
|
return 0;
|
|
}
|