sglang.0.4.8.post1/gdrcopy/tests/common.hpp

163 lines
5.7 KiB
C++

/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <stdarg.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <cuda.h>
#include <cstring>
#include <map>
#include <gdrapi.h>
#include <gdrconfig.h>
#ifndef ACCESS_ONCE
#define ACCESS_ONCE(x) (*(volatile typeof((x)) *)&(x))
#endif
#ifndef READ_ONCE
#define READ_ONCE(x) ACCESS_ONCE(x)
#endif
#ifndef WRITE_ONCE
#define WRITE_ONCE(x, v) (ACCESS_ONCE(x) = (v))
#endif
/**
* Memory barrier
*/
#if defined(GDRAPI_X86)
#define MB() asm volatile("mfence":::"memory")
#define SB() asm volatile("sfence":::"memory")
#define LB() asm volatile("lfence":::"memory")
#elif defined(GDRAPI_POWER)
#define MB() asm volatile("sync":::"memory")
#define SB() MB()
#define LB() MB()
#elif defined(GDRAPI_ARM64)
#define MB() asm volatile("dmb sy":::"memory")
#define SB() asm volatile("dmb st":::"memory")
#define LB() MB()
#else
#error "Compiling on an unsupported architecture."
#endif
/**
* Clock used for timing
*/
//#define MYCLOCK CLOCK_REALTIME
//#define MYCLOCK CLOCK_RAW_MONOTONIC
#define MYCLOCK CLOCK_MONOTONIC
#define EXIT_WAIVED 2
#define ASSERT(x) \
do \
{ \
if (!(x)) \
{ \
fprintf(stderr, "Assertion \"%s\" failed at %s:%d\n", #x, __FILE__, __LINE__); \
exit(EXIT_FAILURE); \
} \
} while (0)
#define ASSERTDRV(stmt) \
do \
{ \
CUresult result = (stmt); \
if (result != CUDA_SUCCESS) { \
const char *_err_name; \
cuGetErrorName(result, &_err_name); \
fprintf(stderr, "CUDA error: %s\n", _err_name); \
} \
ASSERT(CUDA_SUCCESS == result); \
} while (0)
#define ASSERT_EQ(P, V) ASSERT((P) == (V))
#define CHECK_EQ(P, V) ASSERT((P) == (V))
#define ASSERT_NEQ(P, V) ASSERT(!((P) == (V)))
#define BREAK_IF_NEQ(P, V) if((P) != (V)) break
#define BEGIN_CHECK do
#define END_CHECK while(0)
#define PAGE_ROUND_UP(x, n) (((x) + ((n) - 1)) & ~((n) - 1))
namespace gdrcopy {
namespace test {
typedef struct gpuMemHandle
{
CUdeviceptr ptr; // aligned ptr if requested; otherwise, the same as unaligned_ptr.
union {
CUdeviceptr unaligned_ptr; // for tracking original ptr; may be unaligned.
#if CUDA_VERSION >= 11000
// VMM with GDR support is available from CUDA 11.0
CUmemGenericAllocationHandle handle;
#endif
};
size_t size;
size_t allocated_size;
} gpu_mem_handle_t;
typedef CUresult (*gpu_memalloc_fn_t)(gpu_mem_handle_t *handle, const size_t size, bool aligned_mapping, bool set_sync_memops);
typedef CUresult (*gpu_memfree_fn_t)(gpu_mem_handle_t *handle);
static inline gdr_t gdr_open_safe()
{
gdr_t g = gdr_open();
if (!g) {
fprintf(stderr, "gdr_open error: Is gdrdrv driver installed and loaded?\n");
exit(EXIT_FAILURE);
}
return g;
}
extern bool print_dbg_msg;
extern const char *testname;
void print_dbg(const char* fmt, ...);
CUresult gpu_mem_alloc(gpu_mem_handle_t *handle, const size_t size, bool aligned_mapping, bool set_sync_memops);
CUresult gpu_mem_free(gpu_mem_handle_t *handle);
CUresult gpu_vmm_alloc(gpu_mem_handle_t *handle, const size_t size, bool aligned_mapping, bool set_sync_memops);
CUresult gpu_vmm_free(gpu_mem_handle_t *handle);
static inline bool operator==(const gdr_mh_t &a, const gdr_mh_t &b) {
return a.h == b.h;
}
static const gdr_mh_t null_mh = {0};
int compare_buf(uint32_t *ref_buf, uint32_t *buf, size_t size);
void init_hbuf_walking_bit(uint32_t *h_buf, size_t size);
void init_hbuf_linear_ramp(uint32_t *h_buf, size_t size);
bool check_gdr_support(CUdevice dev);
void print_histogram(double *lat_arr, int count, int *bin_arr, int num_bins, double min, double max);
}
}