sglang.0.4.8.post1/nvshmem_src/perftest/host/coll/sync_on_stream.cpp

64 lines
1.9 KiB
C++

/*
* Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
*
* NVIDIA CORPORATION and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA CORPORATION is strictly prohibited.
*
* See COPYRIGHT.txt for license information
*/
#include "coll_test.h"
int main(int argc, char *argv[]) {
int status = 0;
int mype;
size_t size = 1;
struct timeval t_start, t_stop;
float ms = 0;
double latency_value;
cudaEvent_t start_event, stop_event;
cudaStream_t stream;
read_args(argc, argv);
init_wrapper(&argc, &argv);
mype = nvshmem_my_pe();
#ifdef _NVSHMEM_DEBUG
int npes = nvshmem_n_pes();
#endif
CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
CUDA_CHECK(cudaEventCreate(&start_event));
CUDA_CHECK(cudaEventCreate(&stop_event));
DEBUG_PRINT("SHMEM: [%d of %d] hello shmem world! \n", mype, npes);
for (size_t iter = 0; iter < iters + warmup_iters; iter++) {
if (iter == warmup_iters) CUDA_CHECK(cudaEventRecord(start_event, stream));
nvshmemx_team_sync_on_stream(NVSHMEM_TEAM_WORLD, stream);
}
CUDA_CHECK(cudaEventRecord(stop_event, stream));
CUDA_CHECK(cudaStreamSynchronize(stream));
CUDA_CHECK(cudaEventElapsedTime(&ms, start_event, stop_event));
if (!mype) {
latency_value = (ms / iters) * 1000;
print_table_basic("sync_on_stream", "None", "size (Bytes)", "latency", "us", '-', &size,
&latency_value, 1);
}
nvshmem_barrier_all();
CUDA_CHECK(cudaStreamDestroy(stream));
CUDA_CHECK(cudaEventDestroy(start_event));
CUDA_CHECK(cudaEventDestroy(stop_event));
finalize_wrapper();
return status;
}