/* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #pragma once #include #include #include #include #include #include #include #include #include #ifndef ACCESS_ONCE #define ACCESS_ONCE(x) (*(volatile typeof((x)) *)&(x)) #endif #ifndef READ_ONCE #define READ_ONCE(x) ACCESS_ONCE(x) #endif #ifndef WRITE_ONCE #define WRITE_ONCE(x, v) (ACCESS_ONCE(x) = (v)) #endif /** * Memory barrier */ #if defined(GDRAPI_X86) #define MB() asm volatile("mfence":::"memory") #define SB() asm volatile("sfence":::"memory") #define LB() asm volatile("lfence":::"memory") #elif defined(GDRAPI_POWER) #define MB() asm volatile("sync":::"memory") #define SB() MB() #define LB() MB() #elif defined(GDRAPI_ARM64) #define MB() asm volatile("dmb sy":::"memory") #define SB() asm volatile("dmb st":::"memory") #define LB() MB() #else #error "Compiling on an unsupported architecture." #endif /** * Clock used for timing */ //#define MYCLOCK CLOCK_REALTIME //#define MYCLOCK CLOCK_RAW_MONOTONIC #define MYCLOCK CLOCK_MONOTONIC #define EXIT_WAIVED 2 #define ASSERT(x) \ do \ { \ if (!(x)) \ { \ fprintf(stderr, "Assertion \"%s\" failed at %s:%d\n", #x, __FILE__, __LINE__); \ exit(EXIT_FAILURE); \ } \ } while (0) #define ASSERTDRV(stmt) \ do \ { \ CUresult result = (stmt); \ if (result != CUDA_SUCCESS) { \ const char *_err_name; \ cuGetErrorName(result, &_err_name); \ fprintf(stderr, "CUDA error: %s\n", _err_name); \ } \ ASSERT(CUDA_SUCCESS == result); \ } while (0) #define ASSERT_EQ(P, V) ASSERT((P) == (V)) #define CHECK_EQ(P, V) ASSERT((P) == (V)) #define ASSERT_NEQ(P, V) ASSERT(!((P) == (V))) #define BREAK_IF_NEQ(P, V) if((P) != (V)) break #define BEGIN_CHECK do #define END_CHECK while(0) #define PAGE_ROUND_UP(x, n) (((x) + ((n) - 1)) & ~((n) - 1)) namespace gdrcopy { namespace test { typedef struct gpuMemHandle { CUdeviceptr ptr; // aligned ptr if requested; otherwise, the same as unaligned_ptr. union { CUdeviceptr unaligned_ptr; // for tracking original ptr; may be unaligned. #if CUDA_VERSION >= 11000 // VMM with GDR support is available from CUDA 11.0 CUmemGenericAllocationHandle handle; #endif }; size_t size; size_t allocated_size; } gpu_mem_handle_t; typedef CUresult (*gpu_memalloc_fn_t)(gpu_mem_handle_t *handle, const size_t size, bool aligned_mapping, bool set_sync_memops); typedef CUresult (*gpu_memfree_fn_t)(gpu_mem_handle_t *handle); static inline gdr_t gdr_open_safe() { gdr_t g = gdr_open(); if (!g) { fprintf(stderr, "gdr_open error: Is gdrdrv driver installed and loaded?\n"); exit(EXIT_FAILURE); } return g; } extern bool print_dbg_msg; extern const char *testname; void print_dbg(const char* fmt, ...); CUresult gpu_mem_alloc(gpu_mem_handle_t *handle, const size_t size, bool aligned_mapping, bool set_sync_memops); CUresult gpu_mem_free(gpu_mem_handle_t *handle); CUresult gpu_vmm_alloc(gpu_mem_handle_t *handle, const size_t size, bool aligned_mapping, bool set_sync_memops); CUresult gpu_vmm_free(gpu_mem_handle_t *handle); static inline bool operator==(const gdr_mh_t &a, const gdr_mh_t &b) { return a.h == b.h; } static const gdr_mh_t null_mh = {0}; int compare_buf(uint32_t *ref_buf, uint32_t *buf, size_t size); void init_hbuf_walking_bit(uint32_t *h_buf, size_t size); void init_hbuf_linear_ramp(uint32_t *h_buf, size_t size); bool check_gdr_support(CUdevice dev); void print_histogram(double *lat_arr, int count, int *bin_arr, int num_bins, double min, double max); } }