sglang.0.4.8.post1/gdrcopy/include/gdrapi.h

155 lines
5.5 KiB
C

/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __GDRAPI_H__
#define __GDRAPI_H__
#include <stdint.h> // for standard [u]intX_t types
#include <stddef.h>
#define MAJOR_VERSION_SHIFT 16
#define MINOR_VERSION_MASK (((uint32_t)1 << MAJOR_VERSION_SHIFT) - 1)
#define GDR_API_MAJOR_VERSION 2
#define GDR_API_MINOR_VERSION 4
#define GDR_API_VERSION ((GDR_API_MAJOR_VERSION << MAJOR_VERSION_SHIFT) | GDR_API_MINOR_VERSION)
#define MINIMUM_GDRDRV_MAJOR_VERSION 2
#define MINIMUM_GDRDRV_MINOR_VERSION 0
#define MINIMUM_GDRDRV_VERSION ((MINIMUM_GDRDRV_MAJOR_VERSION << MAJOR_VERSION_SHIFT) | MINIMUM_GDRDRV_MINOR_VERSION)
#define GPU_PAGE_SHIFT 16
#define GPU_PAGE_SIZE (1UL << GPU_PAGE_SHIFT)
#define GPU_PAGE_OFFSET (GPU_PAGE_SIZE-1)
#define GPU_PAGE_MASK (~GPU_PAGE_OFFSET)
/*
* GDRCopy, a low-latency GPU memory copy library (and a kernel-mode
* driver) based on NVIDIA GPUDirect RDMA technology.
*
* supported environment variables:
*
* - GDRCOPY_ENABLE_LOGGING, if defined logging is enabled, default is
* disabled.
*
* - GDRCOPY_LOG_LEVEL, overrides log threshold, default is to print errors
* only.
*/
#ifdef __cplusplus
extern "C" {
#endif
struct gdr;
typedef struct gdr *gdr_t;
// Initialize the library, e.g. by opening a connection to the kernel-mode
// driver. Returns an handle to the library state object.
gdr_t gdr_open(void);
// Destroy library state object, e.g. it closes the connection to kernel-mode
// driver.
int gdr_close(gdr_t g);
// The handle to a user-space GPU memory mapping
typedef struct gdr_mh_s {
unsigned long h;
} gdr_mh_t;
// Create a peer-to-peer mapping of the device memory buffer, returning an opaque handle.
// Note that at this point the mapping is still not accessible to user-space.
int gdr_pin_buffer(gdr_t g, unsigned long addr, size_t size, uint64_t p2p_token, uint32_t va_space, gdr_mh_t *handle);
// Destroys the peer-to-peer mapping and frees the handle.
//
// If there exists a corresponding user-space mapping, gdr_unmap should be
// called before this one.
int gdr_unpin_buffer(gdr_t g, gdr_mh_t handle);
// flag is set when the kernel callback (relative to the
// nvidia_p2p_get_pages) gets invoked, e.g. cuMemFree() before
// gdr_unpin_buffer.
int gdr_get_callback_flag(gdr_t g, gdr_mh_t handle, int *flag);
typedef enum gdr_mapping_type {
GDR_MAPPING_TYPE_NONE = 0,
GDR_MAPPING_TYPE_WC = 1,
GDR_MAPPING_TYPE_CACHING = 2,
GDR_MAPPING_TYPE_DEVICE = 3
} gdr_mapping_type_t;
// After pinning, info struct contains details of the mapped area.
//
// Note that both info->va and info->mapped_size might be different from
// the original address passed to gdr_pin_buffer due to aligning happening
// in the kernel-mode driver
typedef struct gdr_info_v2 {
uint64_t va;
uint64_t mapped_size;
uint32_t page_size;
// tm_cycles and cycles_per_ms are deprecated and will be removed in future.
uint64_t tm_cycles;
uint32_t cycles_per_ms;
unsigned mapped:1;
unsigned wc_mapping:1;
gdr_mapping_type_t mapping_type;
} gdr_info_v2_t;
typedef gdr_info_v2_t gdr_info_t;
int gdr_get_info_v2(gdr_t g, gdr_mh_t handle, gdr_info_v2_t *info);
#define gdr_get_info gdr_get_info_v2
// Create a user-space mapping of the memory handle.
//
// WARNING: the address could be potentially aligned to the boundary of the page size
// before being mapped in user-space, so the pointer returned might be
// affected by an offset. gdr_get_info can be used to calculate that
// offset.
int gdr_map(gdr_t g, gdr_mh_t handle, void **va, size_t size);
// get rid of a user-space mapping.
// First invoke gdr_unmap() then gdr_unpin_buffer().
int gdr_unmap(gdr_t g, gdr_mh_t handle, void *va, size_t size);
// map_d_ptr is the user-space virtual address belonging to a mapping of a device memory buffer,
// i.e. one returned by gdr_map()
//
// WARNING: Both integrity and ordering of data as observed by pre-launched GPU
// work is not guaranteed by this API. For more information, see
// https://docs.nvidia.com/cuda/gpudirect-rdma/index.html#sync-behavior
int gdr_copy_to_mapping(gdr_mh_t handle, void *map_d_ptr, const void *h_ptr, size_t size);
int gdr_copy_from_mapping(gdr_mh_t handle, void *h_ptr, const void *map_d_ptr, size_t size);
// Query the version of libgdrapi
void gdr_runtime_get_version(int *major, int *minor);
// Query the version of gdrdrv driver
int gdr_driver_get_version(gdr_t g, int *major, int *minor);
#ifdef __cplusplus
}
#endif
#endif // __GDRAPI_H__