1510 lines
46 KiB
C
1510 lines
46 KiB
C
/*
|
|
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include <linux/version.h>
|
|
#include <linux/module.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/types.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/compiler.h>
|
|
#include <linux/string.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/list.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/io.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/timex.h>
|
|
#include <linux/timer.h>
|
|
#include <linux/pci.h>
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,11,0)
|
|
#include <linux/sched/signal.h>
|
|
#endif
|
|
|
|
/**
|
|
* This is needed for round_up()
|
|
*/
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0)
|
|
#include <linux/math.h>
|
|
#endif
|
|
|
|
/**
|
|
* HAVE_UNLOCKED_IOCTL has been dropped in kernel version 5.9.
|
|
* There is a chance that the removal might be ported back to 5.x.
|
|
* So if HAVE_UNLOCKED_IOCTL is not defined in kernel v5, we define it.
|
|
* This also allows backward-compatibility with kernel < 2.6.11.
|
|
*/
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0) && !defined(HAVE_UNLOCKED_IOCTL)
|
|
#define HAVE_UNLOCKED_IOCTL 1
|
|
#endif
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
static const unsigned int GDRDRV_BF3_PCI_ROOT_DEV_VENDOR_ID = 0x15b3;
|
|
static const unsigned int GDRDRV_BF3_PCI_ROOT_DEV_DEVICE_ID[2] = {0xa2da, 0xa2db};
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
static int gdrdrv_major = 0;
|
|
static int gdrdrv_cpu_can_cache_gpu_mappings = 0;
|
|
static int gdrdrv_cpu_must_use_device_mapping = 0;
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,32)
|
|
/**
|
|
* This API is available after Linux kernel 2.6.32
|
|
*/
|
|
void address_space_init_once(struct address_space *mapping)
|
|
{
|
|
memset(mapping, 0, sizeof(*mapping));
|
|
INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
|
|
|
|
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,26)
|
|
//
|
|
// The .tree_lock member variable was changed from type rwlock_t, to
|
|
// spinlock_t, on 25 July 2008, by mainline commit
|
|
// 19fd6231279be3c3bdd02ed99f9b0eb195978064.
|
|
//
|
|
rwlock_init(&mapping->tree_lock);
|
|
#else
|
|
spin_lock_init(&mapping->tree_lock);
|
|
#endif
|
|
|
|
spin_lock_init(&mapping->i_mmap_lock);
|
|
INIT_LIST_HEAD(&mapping->private_list);
|
|
spin_lock_init(&mapping->private_lock);
|
|
INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
|
|
INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
|
|
}
|
|
#endif
|
|
|
|
|
|
#ifndef GDRDRV_HAVE_VM_FLAGS_SET
|
|
/**
|
|
* This API requires Linux kernel 6.3.
|
|
* See https://github.com/torvalds/linux/commit/bc292ab00f6c7a661a8a605c714e8a148f629ef6
|
|
*/
|
|
static inline void vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags)
|
|
{
|
|
vma->vm_flags |= flags;
|
|
}
|
|
#endif
|
|
|
|
|
|
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_32)
|
|
static inline pgprot_t pgprot_modify_writecombine(pgprot_t old_prot)
|
|
{
|
|
pgprot_t new_prot = old_prot;
|
|
pgprot_val(new_prot) &= ~(_PAGE_PSE | _PAGE_PCD | _PAGE_PWT);
|
|
new_prot = __pgprot(pgprot_val(new_prot) | _PAGE_PWT);
|
|
return new_prot;
|
|
}
|
|
static inline pgprot_t pgprot_modify_device(pgprot_t old_prot)
|
|
{
|
|
// Device mapping should never be called on x86
|
|
BUG_ON(1);
|
|
return old_prot;
|
|
}
|
|
#define get_tsc_khz() cpu_khz // tsc_khz
|
|
static inline int gdr_pfn_is_ram(unsigned long pfn)
|
|
{
|
|
// page_is_ram is GPL-only. Regardless there are no x86_64
|
|
// platforms supporting coherent GPU mappings, so we would not use
|
|
// this function anyway.
|
|
return 0;
|
|
}
|
|
|
|
#elif defined(CONFIG_PPC64)
|
|
#include <asm/reg.h>
|
|
static inline pgprot_t pgprot_modify_writecombine(pgprot_t old_prot)
|
|
{
|
|
return pgprot_writecombine(old_prot);
|
|
}
|
|
static inline pgprot_t pgprot_modify_device(pgprot_t old_prot)
|
|
{
|
|
// Device mapping should never be called on PPC64
|
|
BUG_ON(1);
|
|
return old_prot;
|
|
}
|
|
#define get_tsc_khz() (get_cycles()/1000) // dirty hack
|
|
static inline int gdr_pfn_is_ram(unsigned long pfn)
|
|
{
|
|
// catch platforms, e.g. POWER8, POWER9 with GPUs not attached via NVLink,
|
|
// where GPU memory is non-coherent
|
|
#ifdef GDRDRV_OPENSOURCE_NVIDIA
|
|
// page_is_ram is a GPL symbol. We can use it with the open flavor of NVIDIA driver.
|
|
return page_is_ram(pfn);
|
|
#else
|
|
// For the proprietary flavor, we approximate using the following algorithm.
|
|
unsigned long start = pfn << PAGE_SHIFT;
|
|
unsigned long mask_47bits = (1UL<<47)-1;
|
|
return gdrdrv_cpu_can_cache_gpu_mappings && (0 == (start & ~mask_47bits));
|
|
#endif
|
|
}
|
|
|
|
#elif defined(CONFIG_ARM64)
|
|
static inline pgprot_t pgprot_modify_writecombine(pgprot_t old_prot)
|
|
{
|
|
return pgprot_writecombine(old_prot);
|
|
}
|
|
static inline pgprot_t pgprot_modify_device(pgprot_t old_prot)
|
|
{
|
|
return pgprot_device(old_prot);
|
|
}
|
|
static inline int gdr_pfn_is_ram(unsigned long pfn)
|
|
{
|
|
#ifdef GDRDRV_OPENSOURCE_NVIDIA
|
|
// page_is_ram is a GPL symbol. We can use it with the open flavor.
|
|
return page_is_ram(pfn);
|
|
#else
|
|
// For the proprietary flavor of NVIDIA driver, we use WC mapping.
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
#else
|
|
#error "X86_64/32 or PPC64 or ARM64 is required"
|
|
#endif
|
|
|
|
#include "gdrdrv.h"
|
|
#include "nv-p2p.h"
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
#ifndef NVIDIA_P2P_MAJOR_VERSION_MASK
|
|
#define NVIDIA_P2P_MAJOR_VERSION_MASK 0xffff0000
|
|
#endif
|
|
#ifndef NVIDIA_P2P_MINOR_VERSION_MASK
|
|
#define NVIDIA_P2P_MINOR_VERSION_MASK 0x0000ffff
|
|
#endif
|
|
|
|
#ifndef NVIDIA_P2P_MAJOR_VERSION
|
|
#define NVIDIA_P2P_MAJOR_VERSION(v) \
|
|
(((v) & NVIDIA_P2P_MAJOR_VERSION_MASK) >> 16)
|
|
#endif
|
|
|
|
#ifndef NVIDIA_P2P_MINOR_VERSION
|
|
#define NVIDIA_P2P_MINOR_VERSION(v) \
|
|
(((v) & NVIDIA_P2P_MINOR_VERSION_MASK))
|
|
#endif
|
|
|
|
#ifndef NVIDIA_P2P_MAJOR_VERSION_MATCHES
|
|
#define NVIDIA_P2P_MAJOR_VERSION_MATCHES(p, v) \
|
|
(NVIDIA_P2P_MAJOR_VERSION((p)->version) == NVIDIA_P2P_MAJOR_VERSION(v))
|
|
#endif
|
|
|
|
#ifndef NVIDIA_P2P_VERSION_COMPATIBLE
|
|
#define NVIDIA_P2P_VERSION_COMPATIBLE(p, v) \
|
|
(NVIDIA_P2P_MAJOR_VERSION_MATCHES(p, v) && \
|
|
(NVIDIA_P2P_MINOR_VERSION((p)->version) >= NVIDIA_P2P_MINOR_VERSION(v)))
|
|
#endif
|
|
|
|
#ifndef NVIDIA_P2P_PAGE_TABLE_VERSION_COMPATIBLE
|
|
#define NVIDIA_P2P_PAGE_TABLE_VERSION_COMPATIBLE(p) \
|
|
NVIDIA_P2P_VERSION_COMPATIBLE(p, NVIDIA_P2P_PAGE_TABLE_VERSION)
|
|
#endif
|
|
|
|
#ifdef GDRDRV_OPENSOURCE_NVIDIA
|
|
#define GDRDRV_BUILT_FOR_NVIDIA_FLAVOR_STRING "opensource"
|
|
#else
|
|
#define GDRDRV_BUILT_FOR_NVIDIA_FLAVOR_STRING "proprietary"
|
|
#endif
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
#define DEVNAME "gdrdrv"
|
|
|
|
#define gdr_msg(KRNLVL, FMT, ARGS...) printk(KRNLVL DEVNAME ":%s:" FMT, __func__, ## ARGS)
|
|
//#define gdr_msg(KRNLVL, FMT, ARGS...) printk_ratelimited(KRNLVL DEVNAME ":" FMT, ## ARGS)
|
|
|
|
static int dbg_enabled = 0;
|
|
#define gdr_dbg(FMT, ARGS...) \
|
|
do { \
|
|
if (dbg_enabled) \
|
|
gdr_msg(KERN_DEBUG, FMT, ## ARGS); \
|
|
} while(0)
|
|
|
|
static int info_enabled = 0;
|
|
#define gdr_info(FMT, ARGS...) \
|
|
do { \
|
|
if (info_enabled) \
|
|
gdr_msg(KERN_INFO, FMT, ## ARGS); \
|
|
} while(0)
|
|
|
|
#define gdr_err(FMT, ARGS...) \
|
|
gdr_msg(KERN_DEBUG, FMT, ## ARGS)
|
|
|
|
static int use_persistent_mapping = 0;
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
MODULE_AUTHOR("drossetti@nvidia.com");
|
|
MODULE_LICENSE("Dual MIT/GPL");
|
|
MODULE_DESCRIPTION("GDRCopy kernel-mode driver built for " GDRDRV_BUILT_FOR_NVIDIA_FLAVOR_STRING " NVIDIA driver");
|
|
MODULE_VERSION(GDRDRV_VERSION_STRING);
|
|
module_param(dbg_enabled, int, 0000);
|
|
MODULE_PARM_DESC(dbg_enabled, "enable debug tracing");
|
|
module_param(info_enabled, int, 0000);
|
|
MODULE_PARM_DESC(info_enabled, "enable info tracing");
|
|
module_param(use_persistent_mapping, int, 0000);
|
|
MODULE_PARM_DESC(use_persistent_mapping, "use persistent mapping instead of traditional (non-persistent) mapping");
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
#define GPU_PAGE_SHIFT 16
|
|
#define GPU_PAGE_SIZE ((u64)1 << GPU_PAGE_SHIFT)
|
|
#define GPU_PAGE_OFFSET (GPU_PAGE_SIZE-1)
|
|
#define GPU_PAGE_MASK (~GPU_PAGE_OFFSET)
|
|
|
|
#ifndef MAX
|
|
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
|
#endif
|
|
|
|
#ifndef MIN
|
|
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
|
#endif
|
|
|
|
|
|
// compatibility with old Linux kernels
|
|
|
|
#ifndef ACCESS_ONCE
|
|
#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
|
|
#endif
|
|
|
|
#ifndef READ_ONCE
|
|
#define READ_ONCE(x) ACCESS_ONCE(x)
|
|
#endif
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
struct gdr_mr {
|
|
struct list_head node;
|
|
gdr_hnd_t handle;
|
|
u64 offset;
|
|
u64 p2p_token;
|
|
u32 va_space;
|
|
u32 page_size;
|
|
u64 va;
|
|
u64 mapped_size;
|
|
gdr_mr_type_t cpu_mapping_type;
|
|
nvidia_p2p_page_table_t *page_table;
|
|
int cb_flag;
|
|
cycles_t tm_cycles;
|
|
unsigned int tsc_khz;
|
|
struct vm_area_struct *vma;
|
|
struct address_space *mapping;
|
|
struct rw_semaphore sem;
|
|
};
|
|
typedef struct gdr_mr gdr_mr_t;
|
|
|
|
/**
|
|
* Prerequisite:
|
|
* - mr must be protected by down_read(mr->sem) or stronger.
|
|
*/
|
|
static int gdr_mr_is_mapped(gdr_mr_t *mr)
|
|
{
|
|
return mr->cpu_mapping_type != GDR_MR_NONE;
|
|
}
|
|
|
|
static inline void gdrdrv_zap_vma(struct address_space *mapping, struct vm_area_struct *vma)
|
|
{
|
|
// This function is mainly used for files and the address is relative to
|
|
// the file offset. We use vma->pg_off here to unmap this entire range but
|
|
// not the other mapped ranges.
|
|
unmap_mapping_range(mapping, vma->vm_pgoff << PAGE_SHIFT, vma->vm_end - vma->vm_start, 0);
|
|
}
|
|
|
|
/**
|
|
* Prerequisite:
|
|
* - mr must be protected by down_write(mr->sem).
|
|
*/
|
|
static void gdr_mr_destroy_all_mappings(gdr_mr_t *mr)
|
|
{
|
|
// there is a single mapping at the moment
|
|
if (mr->vma)
|
|
gdrdrv_zap_vma(mr->mapping, mr->vma);
|
|
|
|
mr->cpu_mapping_type = GDR_MR_NONE;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
struct gdr_info {
|
|
// simple low-performance linked-list implementation
|
|
struct list_head mr_list;
|
|
struct mutex lock;
|
|
|
|
// Pointer to the pid struct of the creator task group.
|
|
// We do not use numerical pid here to avoid issues from pid reuse.
|
|
struct pid *tgid;
|
|
|
|
// Address space unique to this opened file. We need to create a new one
|
|
// because filp->f_mapping usually points to inode->i_mapping.
|
|
struct address_space mapping;
|
|
|
|
// The handle number and mmap's offset are equivalent. However, the mmap
|
|
// offset is used by the linux kernel when doing m(un)map; hence the range
|
|
// cannot be overlapped. We place two ranges next two each other to avoid
|
|
// this issue.
|
|
gdr_hnd_t next_handle;
|
|
int next_handle_overflow;
|
|
};
|
|
typedef struct gdr_info gdr_info_t;
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
static int gdrdrv_check_same_process(gdr_info_t *info, struct task_struct *tsk)
|
|
{
|
|
int same_proc;
|
|
BUG_ON(0 == info);
|
|
BUG_ON(0 == tsk);
|
|
same_proc = (info->tgid == task_tgid(tsk)) ; // these tasks belong to the same task group
|
|
if (!same_proc) {
|
|
gdr_dbg("check failed, info:{tgid=%p} this tsk={tgid=%p}\n",
|
|
info->tgid, task_tgid(tsk));
|
|
}
|
|
return same_proc;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
static inline int gdr_support_persistent_mapping(void)
|
|
{
|
|
#if defined(NVIDIA_P2P_CAP_GET_PAGES_PERSISTENT_API)
|
|
return 1;
|
|
#elif defined(NVIDIA_P2P_CAP_PERSISTENT_PAGES)
|
|
return !!(nvidia_p2p_cap_persistent_pages);
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
static inline int gdr_use_persistent_mapping(void)
|
|
{
|
|
return use_persistent_mapping && gdr_support_persistent_mapping();
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
static int gdrdrv_open(struct inode *inode, struct file *filp)
|
|
{
|
|
unsigned int minor = MINOR(inode->i_rdev);
|
|
int ret = 0;
|
|
gdr_info_t *info = NULL;
|
|
|
|
gdr_dbg("minor=%d filep=0x%px\n", minor, filp);
|
|
if(minor >= 1) {
|
|
gdr_err("device minor number too big!\n");
|
|
ret = -ENXIO;
|
|
goto out;
|
|
}
|
|
|
|
info = kzalloc(sizeof(gdr_info_t), GFP_KERNEL);
|
|
if (!info) {
|
|
gdr_err("can't alloc kernel memory\n");
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
INIT_LIST_HEAD(&info->mr_list);
|
|
mutex_init(&info->lock);
|
|
|
|
// GPU driver does not support sharing GPU allocations at fork time. Hence
|
|
// here we track the task group owning the driver fd and prevent other processes
|
|
// to use it.
|
|
info->tgid = task_tgid(current);
|
|
|
|
address_space_init_once(&info->mapping);
|
|
info->mapping.host = inode;
|
|
info->mapping.a_ops = inode->i_mapping->a_ops;
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,0,0)
|
|
info->mapping.backing_dev_info = inode->i_mapping->backing_dev_info;
|
|
#endif
|
|
filp->f_mapping = &info->mapping;
|
|
|
|
filp->private_data = info;
|
|
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Clean up and free all resources (e.g., page_table) associated with this mr.
|
|
*
|
|
* Prerequisites:
|
|
* - mr->sem must be under down_write before calling this function.
|
|
* - There is no mapping associated with this mr.
|
|
*
|
|
* After this function returns, mr is freed and cannot be accessed anymore.
|
|
*
|
|
*/
|
|
static void gdr_free_mr_unlocked(gdr_mr_t *mr)
|
|
{
|
|
int status = 0;
|
|
nvidia_p2p_page_table_t *page_table = NULL;
|
|
|
|
BUG_ON(!mr);
|
|
BUG_ON(gdr_mr_is_mapped(mr));
|
|
|
|
page_table = mr->page_table;
|
|
if (page_table) {
|
|
gdr_info("invoking nvidia_p2p_put_pages(va=0x%llx p2p_tok=%llx va_tok=%x)\n",
|
|
mr->va, mr->p2p_token, mr->va_space);
|
|
|
|
// We reach here before gdrdrv_get_pages_free_callback.
|
|
// However, it might be waiting on semaphore.
|
|
// Release the semaphore to let it progresses.
|
|
up_write(&mr->sem);
|
|
|
|
// In case gdrdrv_get_pages_free_callback is inflight, nvidia_p2p_put_pages will be blocked.
|
|
#ifdef NVIDIA_P2P_CAP_GET_PAGES_PERSISTENT_API
|
|
if (gdr_use_persistent_mapping()) {
|
|
status = nvidia_p2p_put_pages_persistent(mr->va, page_table, 0);
|
|
if (status) {
|
|
gdr_err("nvidia_p2p_put_pages_persistent error %d\n", status);
|
|
}
|
|
} else {
|
|
status = nvidia_p2p_put_pages(mr->p2p_token, mr->va_space, mr->va, page_table);
|
|
if (status) {
|
|
gdr_err("nvidia_p2p_put_pages error %d, async callback may have been fired\n", status);
|
|
}
|
|
}
|
|
#else
|
|
status = nvidia_p2p_put_pages(mr->p2p_token, mr->va_space, mr->va, page_table);
|
|
if (status) {
|
|
gdr_err("nvidia_p2p_put_pages error %d, async callback may have been fired\n", status);
|
|
}
|
|
#endif
|
|
|
|
} else {
|
|
gdr_dbg("invoking unpin_buffer while callback has already been fired\n");
|
|
|
|
// From this point, no other code paths will access this mr.
|
|
// We release semaphore and clear the mr.
|
|
up_write(&mr->sem);
|
|
}
|
|
|
|
memset(mr, 0, sizeof(*mr));
|
|
kfree(mr);
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
static int gdrdrv_release(struct inode *inode, struct file *filp)
|
|
{
|
|
gdr_info_t *info = filp->private_data;
|
|
gdr_mr_t *mr = NULL;
|
|
nvidia_p2p_page_table_t *page_table = NULL;
|
|
struct list_head *p, *n;
|
|
|
|
gdr_dbg("closing\n");
|
|
|
|
if (!info) {
|
|
gdr_err("filp contains no info\n");
|
|
return -EIO;
|
|
}
|
|
|
|
mutex_lock(&info->lock);
|
|
list_for_each_safe(p, n, &info->mr_list) {
|
|
page_table = NULL;
|
|
|
|
mr = list_entry(p, gdr_mr_t, node);
|
|
|
|
down_write(&mr->sem);
|
|
gdr_info("freeing MR=0x%px\n", mr);
|
|
|
|
if (gdr_mr_is_mapped(mr)) {
|
|
gdr_mr_destroy_all_mappings(mr);
|
|
}
|
|
|
|
list_del(&mr->node);
|
|
|
|
gdr_free_mr_unlocked(mr);
|
|
}
|
|
mutex_unlock(&info->lock);
|
|
|
|
filp->f_mapping = NULL;
|
|
|
|
kfree(info);
|
|
filp->private_data = NULL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
static gdr_mr_t *gdr_mr_from_handle_unlocked(gdr_info_t *info, gdr_hnd_t handle)
|
|
{
|
|
gdr_mr_t *mr = NULL;
|
|
struct list_head *p;
|
|
|
|
list_for_each(p, &info->mr_list) {
|
|
mr = list_entry(p, gdr_mr_t, node);
|
|
gdr_dbg("mr->handle=0x%llx handle=0x%llx\n", mr->handle, handle);
|
|
if (handle == mr->handle)
|
|
break;
|
|
}
|
|
|
|
return mr;
|
|
}
|
|
|
|
/**
|
|
* Convert handle to mr and semaphore-acquire it with read or write.
|
|
* If success, that mr is guaranteed to be available until gdr_put_mr is called.
|
|
* On success, return mr. Otherwise, return NULL.
|
|
*/
|
|
static inline gdr_mr_t *gdr_get_mr_from_handle(gdr_info_t *info, gdr_hnd_t handle, int write)
|
|
{
|
|
gdr_mr_t *mr;
|
|
mutex_lock(&info->lock);
|
|
mr = gdr_mr_from_handle_unlocked(info, handle);
|
|
if (mr) {
|
|
if (write)
|
|
down_write(&mr->sem);
|
|
else
|
|
down_read(&mr->sem);
|
|
}
|
|
mutex_unlock(&info->lock);
|
|
return mr;
|
|
}
|
|
|
|
#define gdr_get_mr_from_handle_read(info, handle) (gdr_get_mr_from_handle((info), (handle), 0))
|
|
#define gdr_get_mr_from_handle_write(info, handle) (gdr_get_mr_from_handle((info), (handle), 1))
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Put the mr object. The `write` parameter must match the previous gdr_get_mr_from_handle call.
|
|
* After this function returns, mr may cease to exist (freed). It must not be accessed again.
|
|
*/
|
|
static inline void gdr_put_mr(gdr_mr_t *mr, int write)
|
|
{
|
|
if (write)
|
|
up_write(&mr->sem);
|
|
else
|
|
up_read(&mr->sem);
|
|
}
|
|
|
|
#define gdr_put_mr_read(mr) (gdr_put_mr((mr), 0))
|
|
#define gdr_put_mr_write(mr) (gdr_put_mr((mr), 1))
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// off is host page aligned, because of the kernel interface
|
|
// could abuse extra available bits for other purposes
|
|
|
|
static gdr_hnd_t gdrdrv_handle_from_off(unsigned long off)
|
|
{
|
|
return (gdr_hnd_t)(off);
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
typedef void (*gdr_free_callback_fn_t)(void *);
|
|
|
|
static void gdrdrv_get_pages_free_callback(void *data)
|
|
{
|
|
gdr_mr_t *mr = data;
|
|
nvidia_p2p_page_table_t *page_table = NULL;
|
|
gdr_info("free callback\n");
|
|
// can't take the info->lock here due to potential AB-BA
|
|
// deadlock with internal NV driver lock(s)
|
|
down_write(&mr->sem);
|
|
mr->cb_flag = 1;
|
|
page_table = mr->page_table;
|
|
if (page_table) {
|
|
nvidia_p2p_free_page_table(page_table);
|
|
if (gdr_mr_is_mapped(mr))
|
|
gdr_mr_destroy_all_mappings(mr);
|
|
} else {
|
|
gdr_dbg("free callback, page_table is NULL\n");
|
|
}
|
|
mr->page_table = NULL;
|
|
up_write(&mr->sem);
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Generate mr->handle. This function should be called under info->lock.
|
|
*
|
|
* Prerequisite:
|
|
* - mr->mapped_size is set and round to max(PAGE_SIZE, GPU_PAGE_SIZE)
|
|
* - mr->sem must be under down_write before calling this function.
|
|
*
|
|
* Return 0 if success, -1 if failed.
|
|
*/
|
|
static inline int gdr_generate_mr_handle(gdr_info_t *info, gdr_mr_t *mr)
|
|
{
|
|
// The user-space library passes the memory (handle << PAGE_SHIFT) as the
|
|
// mmap offset, and offsets are used to determine the VMAs to delete during
|
|
// invalidation.
|
|
// Hence, we need [(handle << PAGE_SHIFT), (handle << PAGE_SHIFT) + size - 1]
|
|
// to correspond to a unique VMA. Note that size here must match the
|
|
// original mmap size
|
|
|
|
gdr_hnd_t next_handle;
|
|
|
|
WARN_ON(!mutex_is_locked(&info->lock));
|
|
|
|
// We run out of handle, so fail.
|
|
if (unlikely(info->next_handle_overflow))
|
|
return -1;
|
|
|
|
next_handle = info->next_handle + MAX(1, mr->mapped_size >> PAGE_SHIFT);
|
|
|
|
// The next handle will be overflowed, so we mark it.
|
|
if (unlikely((next_handle & ((gdr_hnd_t)(-1) >> PAGE_SHIFT)) < info->next_handle))
|
|
info->next_handle_overflow = 1;
|
|
|
|
mr->handle = info->next_handle;
|
|
info->next_handle = next_handle;
|
|
|
|
return 0;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
static int __gdrdrv_pin_buffer(gdr_info_t *info, u64 addr, u64 size, u64 p2p_token, u32 va_space, gdr_hnd_t *p_handle)
|
|
{
|
|
int ret = 0;
|
|
struct nvidia_p2p_page_table *page_table = NULL;
|
|
u64 page_virt_start;
|
|
u64 page_virt_end;
|
|
size_t rounded_size;
|
|
gdr_mr_t *mr = NULL;
|
|
gdr_free_callback_fn_t free_callback_fn;
|
|
#ifndef CONFIG_ARM64
|
|
cycles_t ta, tb;
|
|
#endif
|
|
|
|
mr = kmalloc(sizeof(gdr_mr_t), GFP_KERNEL);
|
|
if (!mr) {
|
|
gdr_err("can't alloc kernel memory\n");
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
}
|
|
memset(mr, 0, sizeof(*mr));
|
|
|
|
// do proper alignment, as required by NVIDIA driver.
|
|
// align both size and addr as it is a requirement of nvidia_p2p_get_pages* API
|
|
page_virt_start = addr & GPU_PAGE_MASK;
|
|
page_virt_end = round_up((addr + size), GPU_PAGE_SIZE);
|
|
rounded_size = page_virt_end - page_virt_start;
|
|
|
|
init_rwsem(&mr->sem);
|
|
|
|
free_callback_fn = gdr_use_persistent_mapping() ? NULL : gdrdrv_get_pages_free_callback;
|
|
|
|
mr->offset = addr & GPU_PAGE_OFFSET;
|
|
if (free_callback_fn) {
|
|
mr->p2p_token = p2p_token;
|
|
mr->va_space = va_space;
|
|
} else {
|
|
// Token cannot be used with persistent mapping.
|
|
mr->p2p_token = 0;
|
|
mr->va_space = 0;
|
|
}
|
|
mr->va = page_virt_start;
|
|
mr->mapped_size = rounded_size;
|
|
mr->cpu_mapping_type = GDR_MR_NONE;
|
|
mr->page_table = NULL;
|
|
mr->cb_flag = 0;
|
|
|
|
#ifndef CONFIG_ARM64
|
|
ta = get_cycles();
|
|
#endif
|
|
|
|
// After nvidia_p2p_get_pages returns (successfully), gdrdrv_get_pages_free_callback may be invoked anytime.
|
|
// mr setup must be done before calling that API. The memory barrier is included in down_write.
|
|
|
|
// We take this semaphore to prevent race with gdrdrv_get_pages_free_callback.
|
|
down_write(&mr->sem);
|
|
|
|
#ifdef NVIDIA_P2P_CAP_GET_PAGES_PERSISTENT_API
|
|
if (free_callback_fn) {
|
|
ret = nvidia_p2p_get_pages(mr->p2p_token, mr->va_space, mr->va, mr->mapped_size, &page_table,
|
|
free_callback_fn, mr);
|
|
gdr_info("invoking nvidia_p2p_get_pages(va=0x%llx len=%lld p2p_tok=%llx va_tok=%x callback=%px)\n",
|
|
mr->va, mr->mapped_size, mr->p2p_token, mr->va_space, free_callback_fn);
|
|
} else {
|
|
ret = nvidia_p2p_get_pages_persistent(mr->va, mr->mapped_size, &page_table, 0);
|
|
gdr_info("invoking nvidia_p2p_get_pages_persistent(va=0x%llx len=%lld)\n",
|
|
mr->va, mr->mapped_size);
|
|
}
|
|
#else
|
|
ret = nvidia_p2p_get_pages(mr->p2p_token, mr->va_space, mr->va, mr->mapped_size, &page_table,
|
|
free_callback_fn, mr);
|
|
gdr_info("invoking nvidia_p2p_get_pages(va=0x%llx len=%lld p2p_tok=%llx va_tok=%x callback=%px)\n",
|
|
mr->va, mr->mapped_size, mr->p2p_token, mr->va_space, free_callback_fn);
|
|
#endif
|
|
|
|
#ifndef CONFIG_ARM64
|
|
tb = get_cycles();
|
|
#endif
|
|
if (ret < 0) {
|
|
gdr_err("nvidia_p2p_get_pages(va=%llx len=%lld p2p_token=%llx va_space=%x callback=%px) failed [ret = %d]\n",
|
|
mr->va, mr->mapped_size, mr->p2p_token, mr->va_space, free_callback_fn, ret);
|
|
goto out;
|
|
}
|
|
mr->page_table = page_table;
|
|
#ifndef CONFIG_ARM64
|
|
mr->tm_cycles = tb - ta;
|
|
mr->tsc_khz = get_tsc_khz();
|
|
#endif
|
|
|
|
|
|
// check version before accessing page table
|
|
if (!NVIDIA_P2P_PAGE_TABLE_VERSION_COMPATIBLE(page_table)) {
|
|
gdr_err("incompatible page table version 0x%08x\n", page_table->version);
|
|
ret = -EFAULT;
|
|
goto out;
|
|
}
|
|
|
|
switch (page_table->page_size) {
|
|
case NVIDIA_P2P_PAGE_SIZE_4KB:
|
|
mr->page_size = 4*1024;
|
|
break;
|
|
case NVIDIA_P2P_PAGE_SIZE_64KB:
|
|
mr->page_size = 64*1024;
|
|
break;
|
|
case NVIDIA_P2P_PAGE_SIZE_128KB:
|
|
mr->page_size = 128*1024;
|
|
break;
|
|
default:
|
|
gdr_err("unexpected page_size\n");
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
// we are not really ready for a different page size
|
|
if (page_table->page_size != NVIDIA_P2P_PAGE_SIZE_64KB) {
|
|
gdr_err("nvidia_p2p_get_pages assumption of 64KB pages failed size_id=%d\n", page_table->page_size);
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
{
|
|
int i;
|
|
gdr_dbg("page table entries: %d\n", page_table->entries);
|
|
for (i=0; i<MIN(20,page_table->entries); ++i) {
|
|
gdr_dbg("page[%d]=0x%016llx%s\n", i, page_table->pages[i]->physical_address, (i>19)?"and counting":"");
|
|
}
|
|
}
|
|
|
|
// here a typical driver would use the page_table to fill in some HW
|
|
// DMA data structure
|
|
|
|
mutex_lock(&info->lock);
|
|
if (gdr_generate_mr_handle(info, mr) != 0) {
|
|
gdr_err("No address space left for BAR1 mapping.\n");
|
|
ret = -ENOMEM;
|
|
}
|
|
|
|
if (!ret) {
|
|
list_add(&mr->node, &info->mr_list);
|
|
*p_handle = mr->handle;
|
|
up_write(&mr->sem);
|
|
}
|
|
mutex_unlock(&info->lock);
|
|
|
|
|
|
out:
|
|
if (ret && mr) {
|
|
gdr_free_mr_unlocked(mr);
|
|
mr = NULL;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
static int __gdrdrv_unpin_buffer(gdr_info_t *info, gdr_hnd_t handle)
|
|
{
|
|
int ret = 0;
|
|
|
|
gdr_mr_t *mr = NULL;
|
|
|
|
// someone might try to traverse the list and/or to do something
|
|
// to the mr at the same time, so let's lock here
|
|
mutex_lock(&info->lock);
|
|
mr = gdr_mr_from_handle_unlocked(info, handle);
|
|
if (NULL == mr) {
|
|
gdr_err("unexpected handle %llx while unmapping buffer\n", handle);
|
|
ret = -EINVAL;
|
|
} else {
|
|
// Found the mr. Let's lock it.
|
|
down_write(&mr->sem);
|
|
if (gdr_mr_is_mapped(mr)) {
|
|
gdr_mr_destroy_all_mappings(mr);
|
|
}
|
|
|
|
// Remove this handle from the list under info->lock.
|
|
// Now race with gdrdrv_get_pages_free_callback is the only thing we need to care about.
|
|
list_del(&mr->node);
|
|
}
|
|
mutex_unlock(&info->lock);
|
|
|
|
if (ret)
|
|
goto out;
|
|
|
|
gdr_free_mr_unlocked(mr);
|
|
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
static int gdrdrv_pin_buffer(gdr_info_t *info, void __user *_params)
|
|
{
|
|
int ret = 0;
|
|
|
|
struct GDRDRV_IOC_PIN_BUFFER_PARAMS params = {0};
|
|
|
|
int has_handle = 0;
|
|
gdr_hnd_t handle;
|
|
|
|
if (copy_from_user(¶ms, _params, sizeof(params))) {
|
|
gdr_err("copy_from_user failed on user pointer 0x%px\n", _params);
|
|
ret = -EFAULT;
|
|
goto out;
|
|
}
|
|
|
|
if (!params.addr) {
|
|
gdr_err("NULL device pointer\n");
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
ret = __gdrdrv_pin_buffer(info, params.addr, params.size, params.p2p_token, params.va_space, &handle);
|
|
if (ret)
|
|
goto out;
|
|
|
|
has_handle = 1;
|
|
params.handle = handle;
|
|
|
|
if (copy_to_user(_params, ¶ms, sizeof(params))) {
|
|
gdr_err("copy_to_user failed on user pointer 0x%px\n", _params);
|
|
ret = -EFAULT;
|
|
}
|
|
|
|
|
|
out:
|
|
if (ret) {
|
|
if (has_handle)
|
|
__gdrdrv_unpin_buffer(info, handle);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
static int gdrdrv_unpin_buffer(gdr_info_t *info, void __user *_params)
|
|
{
|
|
struct GDRDRV_IOC_UNPIN_BUFFER_PARAMS params = {0};
|
|
int ret = 0;
|
|
|
|
if (copy_from_user(¶ms, _params, sizeof(params))) {
|
|
gdr_err("copy_from_user failed on user pointer 0x%px\n", _params);
|
|
return -EFAULT;
|
|
}
|
|
|
|
ret = __gdrdrv_unpin_buffer(info, params.handle);
|
|
|
|
return ret;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
static int gdrdrv_get_cb_flag(gdr_info_t *info, void __user *_params)
|
|
{
|
|
struct GDRDRV_IOC_GET_CB_FLAG_PARAMS params = {0};
|
|
int ret = 0;
|
|
gdr_mr_t *mr = NULL;
|
|
|
|
if (copy_from_user(¶ms, _params, sizeof(params))) {
|
|
gdr_err("copy_from_user failed on user pointer 0x%px\n", _params);
|
|
return -EFAULT;
|
|
}
|
|
|
|
mr = gdr_get_mr_from_handle_read(info, params.handle);
|
|
if (NULL == mr) {
|
|
gdr_err("unexpected handle %llx in get_cb_flag\n", params.handle);
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
params.flag = !!(mr->cb_flag);
|
|
|
|
gdr_put_mr_read(mr);
|
|
|
|
if (copy_to_user(_params, ¶ms, sizeof(params))) {
|
|
gdr_err("copy_to_user failed on user pointer 0x%px\n", _params);
|
|
ret = -EFAULT;
|
|
}
|
|
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
static int gdrdrv_get_info(gdr_info_t *info, void __user *_params)
|
|
{
|
|
struct GDRDRV_IOC_GET_INFO_PARAMS params = {0};
|
|
int ret = 0;
|
|
gdr_mr_t *mr = NULL;
|
|
|
|
if (copy_from_user(¶ms, _params, sizeof(params))) {
|
|
gdr_err("copy_from_user failed on user pointer 0x%px\n", _params);
|
|
ret = -EFAULT;
|
|
goto out;
|
|
}
|
|
|
|
mr = gdr_get_mr_from_handle_read(info, params.handle);
|
|
if (NULL == mr) {
|
|
gdr_err("unexpected handle %llx in get_cb_flag\n", params.handle);
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
params.va = mr->va;
|
|
params.mapped_size = mr->mapped_size;
|
|
params.page_size = mr->page_size;
|
|
params.tm_cycles = mr->tm_cycles;
|
|
params.tsc_khz = mr->tsc_khz;
|
|
params.mapped = gdr_mr_is_mapped(mr);
|
|
params.wc_mapping = (mr->cpu_mapping_type == GDR_MR_WC);
|
|
|
|
gdr_put_mr_read(mr);
|
|
|
|
if (copy_to_user(_params, ¶ms, sizeof(params))) {
|
|
gdr_err("copy_to_user failed on user pointer 0x%px\n", _params);
|
|
ret = -EFAULT;
|
|
}
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
static int gdrdrv_get_info_v2(gdr_info_t *info, void __user *_params)
|
|
{
|
|
struct GDRDRV_IOC_GET_INFO_V2_PARAMS params = {0};
|
|
int ret = 0;
|
|
gdr_mr_t *mr = NULL;
|
|
|
|
if (copy_from_user(¶ms, _params, sizeof(params))) {
|
|
gdr_err("copy_from_user failed on user pointer 0x%px\n", _params);
|
|
ret = -EFAULT;
|
|
goto out;
|
|
}
|
|
|
|
mr = gdr_get_mr_from_handle_read(info, params.handle);
|
|
if (NULL == mr) {
|
|
gdr_err("unexpected handle %llx in get_cb_flag\n", params.handle);
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
params.va = mr->va;
|
|
params.mapped_size = mr->mapped_size;
|
|
params.page_size = mr->page_size;
|
|
params.tm_cycles = mr->tm_cycles;
|
|
params.tsc_khz = mr->tsc_khz;
|
|
params.mapping_type = mr->cpu_mapping_type;
|
|
|
|
gdr_put_mr_read(mr);
|
|
|
|
if (copy_to_user(_params, ¶ms, sizeof(params))) {
|
|
gdr_err("copy_to_user failed on user pointer 0x%px\n", _params);
|
|
ret = -EFAULT;
|
|
}
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
static int gdrdrv_get_version(gdr_info_t *info, void __user *_params)
|
|
{
|
|
struct GDRDRV_IOC_GET_VERSION_PARAMS params = {0};
|
|
int ret = 0;
|
|
|
|
params.gdrdrv_version = GDRDRV_VERSION;
|
|
params.minimum_gdr_api_version = MINIMUM_GDR_API_VERSION;
|
|
|
|
if (copy_to_user(_params, ¶ms, sizeof(params))) {
|
|
gdr_err("copy_to_user failed on user pointer %p\n", _params);
|
|
ret = -EFAULT;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
static int gdrdrv_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
|
|
{
|
|
int ret = 0;
|
|
gdr_info_t *info = filp->private_data;
|
|
void __user *argp = (void __user *)arg;
|
|
|
|
gdr_dbg("ioctl called (cmd 0x%x)\n", cmd);
|
|
|
|
if (_IOC_TYPE(cmd) != GDRDRV_IOCTL) {
|
|
gdr_err("malformed IOCTL code type=%08x\n", _IOC_TYPE(cmd));
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (!info) {
|
|
gdr_err("filp contains no info\n");
|
|
return -EIO;
|
|
}
|
|
// Check that the caller is the same process that did gdrdrv_open
|
|
if (!gdrdrv_check_same_process(info, current)) {
|
|
gdr_dbg("filp is not opened by the current process\n");
|
|
return -EACCES;
|
|
}
|
|
|
|
switch (cmd) {
|
|
case GDRDRV_IOC_PIN_BUFFER:
|
|
ret = gdrdrv_pin_buffer(info, argp);
|
|
break;
|
|
|
|
case GDRDRV_IOC_UNPIN_BUFFER:
|
|
ret = gdrdrv_unpin_buffer(info, argp);
|
|
break;
|
|
|
|
case GDRDRV_IOC_GET_CB_FLAG:
|
|
ret = gdrdrv_get_cb_flag(info, argp);
|
|
break;
|
|
|
|
case GDRDRV_IOC_GET_INFO:
|
|
ret = gdrdrv_get_info(info, argp);
|
|
break;
|
|
|
|
case GDRDRV_IOC_GET_INFO_V2:
|
|
ret = gdrdrv_get_info_v2(info, argp);
|
|
break;
|
|
|
|
case GDRDRV_IOC_GET_VERSION:
|
|
ret = gdrdrv_get_version(info, argp);
|
|
break;
|
|
|
|
default:
|
|
gdr_err("unsupported IOCTL code\n");
|
|
ret = -ENOTTY;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
#ifdef HAVE_UNLOCKED_IOCTL
|
|
static long gdrdrv_unlocked_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
|
{
|
|
return gdrdrv_ioctl(0, filp, cmd, arg);
|
|
}
|
|
#endif
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
void gdrdrv_vma_close(struct vm_area_struct *vma)
|
|
{
|
|
gdr_hnd_t handle;
|
|
gdr_mr_t *mr = NULL;
|
|
gdr_info_t* info = NULL;
|
|
|
|
if (!vma->vm_file)
|
|
return;
|
|
|
|
info = vma->vm_file->private_data;
|
|
if (!info)
|
|
return;
|
|
|
|
handle = gdrdrv_handle_from_off(vma->vm_pgoff);
|
|
mr = gdr_get_mr_from_handle_write(info, handle);
|
|
if (!mr)
|
|
return;
|
|
|
|
gdr_dbg("closing vma=0x%px vm_file=0x%px mr=0x%px mr->vma=0x%px\n", vma, vma->vm_file, mr, mr->vma);
|
|
// TODO: handle multiple vma's
|
|
mr->vma = NULL;
|
|
mr->cpu_mapping_type = GDR_MR_NONE;
|
|
gdr_put_mr_write(mr);
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
static const struct vm_operations_struct gdrdrv_vm_ops = {
|
|
.close = gdrdrv_vma_close,
|
|
};
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
/**
|
|
* Starting from kernel version 5.18-rc1, io_remap_pfn_range may use a GPL
|
|
* function. This happens on x86 platforms that have
|
|
* CONFIG_ARCH_HAS_CC_PLATFORM defined. The root cause is from pgprot_decrypted
|
|
* implementation that has been changed to use cc_mkdec. To avoid the
|
|
* GPL-incompatibility issue with the proprietary flavor of NVIDIA driver, we
|
|
* reimplement io_remap_pfn_range according to the Linux kernel 5.17.15, which
|
|
* predates support for Intel CC.
|
|
*/
|
|
static inline int gdrdrv_io_remap_pfn_range(struct vm_area_struct *vma, unsigned long vaddr, unsigned long pfn, size_t size, pgprot_t prot)
|
|
{
|
|
#if defined(GDRDRV_OPENSOURCE_NVIDIA) || !((defined(CONFIG_X86_64) || defined(CONFIG_X86_32)) && IS_ENABLED(CONFIG_ARCH_HAS_CC_PLATFORM))
|
|
return io_remap_pfn_range(vma, vaddr, pfn, size, prot);
|
|
#else
|
|
|
|
#ifndef CONFIG_AMD_MEM_ENCRYPT
|
|
#warning "CC is not fully functional in gdrdrv with the proprietary flavor of NVIDIA driver on Intel CPU. Use the open-source flavor if you want full support."
|
|
#endif
|
|
|
|
return remap_pfn_range(vma, vaddr, pfn, size, __pgprot(__sme_clr(pgprot_val(prot))));
|
|
#endif
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
static int gdrdrv_remap_gpu_mem(struct vm_area_struct *vma, unsigned long vaddr, unsigned long paddr, size_t size, gdr_mr_type_t mapping_type)
|
|
{
|
|
int ret = 0;
|
|
unsigned long pfn;
|
|
|
|
gdr_dbg("mmaping phys mem addr=0x%lx size=%zu at user virt addr=0x%lx\n",
|
|
paddr, size, vaddr);
|
|
|
|
if (!size) {
|
|
gdr_dbg("size == 0\n");
|
|
goto out;
|
|
}
|
|
// in case the original user address was not properly host page-aligned
|
|
if (0 != (paddr & (PAGE_SIZE-1))) {
|
|
gdr_err("paddr=%lx, original mr address was not host page-aligned\n", paddr);
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
if (0 != (vaddr & (PAGE_SIZE-1))) {
|
|
gdr_err("vaddr=%lx, trying to map to non page-aligned vaddr\n", vaddr);
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
pfn = paddr >> PAGE_SHIFT;
|
|
|
|
// Disallow mmapped VMA to propagate to children processes
|
|
vm_flags_set(vma, VM_DONTCOPY);
|
|
|
|
if (mapping_type == GDR_MR_WC) {
|
|
// override prot to create non-coherent WC mappings
|
|
vma->vm_page_prot = pgprot_modify_writecombine(vma->vm_page_prot);
|
|
} else if (mapping_type == GDR_MR_DEVICE) {
|
|
// override prot to create non-coherent device mappings
|
|
vma->vm_page_prot = pgprot_modify_device(vma->vm_page_prot);
|
|
} else {
|
|
// by default, vm_page_prot should be set to create cached mappings
|
|
}
|
|
if (gdrdrv_io_remap_pfn_range(vma, vaddr, pfn, size, vma->vm_page_prot)) {
|
|
gdr_err("error in gdrdrv_io_remap_pfn_range()\n");
|
|
ret = -EAGAIN;
|
|
goto out;
|
|
}
|
|
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// BUG: should obtain GPU_PAGE_SIZE from page_table!!!
|
|
|
|
static int gdrdrv_mmap(struct file *filp, struct vm_area_struct *vma)
|
|
{
|
|
int ret = 0;
|
|
size_t size = vma->vm_end - vma->vm_start;
|
|
gdr_info_t* info = filp->private_data;
|
|
gdr_hnd_t handle;
|
|
gdr_mr_t *mr = NULL;
|
|
u64 offset;
|
|
int p = 0;
|
|
unsigned long vaddr;
|
|
gdr_mr_type_t cpu_mapping_type = GDR_MR_NONE;
|
|
|
|
gdr_info("mmap filp=0x%px vma=0x%px vm_file=0x%px start=0x%lx size=%zu off=0x%lx\n", filp, vma, vma->vm_file, vma->vm_start, size, vma->vm_pgoff);
|
|
|
|
if (!info) {
|
|
gdr_err("filp contains no info\n");
|
|
return -EIO;
|
|
}
|
|
// Check that the caller is the same process that did gdrdrv_open
|
|
if (!gdrdrv_check_same_process(info, current)) {
|
|
gdr_dbg("filp is not opened by the current process\n");
|
|
return -EACCES;
|
|
}
|
|
|
|
handle = gdrdrv_handle_from_off(vma->vm_pgoff);
|
|
mr = gdr_get_mr_from_handle_write(info, handle);
|
|
if (!mr) {
|
|
gdr_dbg("cannot find handle in mr_list\n");
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
offset = mr->offset;
|
|
if (gdr_mr_is_mapped(mr)) {
|
|
gdr_dbg("mr has been mapped already\n");
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
if (mr->cb_flag) {
|
|
gdr_dbg("mr has been invalidated\n");
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
if (!mr->page_table) {
|
|
gdr_dbg("invalid mr state\n");
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
if (mr->page_table->entries <= 0) {
|
|
gdr_dbg("invalid entries in page table\n");
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
if (offset) {
|
|
gdr_dbg("offset != 0 is not supported\n");
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
if (offset > GPU_PAGE_SIZE * mr->page_table->entries) {
|
|
gdr_dbg("offset %llu too big\n", offset);
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
if (size + offset > GPU_PAGE_SIZE * mr->page_table->entries) {
|
|
gdr_dbg("size %zu too big\n", size);
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
if (size % PAGE_SIZE != 0) {
|
|
gdr_dbg("size is not multiple of PAGE_SIZE\n");
|
|
}
|
|
|
|
// Set to None first
|
|
mr->cpu_mapping_type = GDR_MR_NONE;
|
|
vma->vm_ops = &gdrdrv_vm_ops;
|
|
|
|
// check for physically contiguous IO ranges
|
|
p = 0;
|
|
vaddr = vma->vm_start;
|
|
do {
|
|
// map individual physically contiguous IO ranges
|
|
unsigned long paddr = mr->page_table->pages[p]->physical_address;
|
|
unsigned nentries = 1;
|
|
size_t len;
|
|
gdr_mr_type_t chunk_mapping_type = GDR_MR_NONE;
|
|
|
|
gdr_dbg("range start with p=%d vaddr=%lx page_paddr=%lx\n", p, vaddr, paddr);
|
|
|
|
++p;
|
|
// check p-1 and p for contiguity
|
|
{
|
|
unsigned long prev_page_paddr = mr->page_table->pages[p-1]->physical_address;
|
|
for(; p < mr->page_table->entries; ++p) {
|
|
struct nvidia_p2p_page *page = mr->page_table->pages[p];
|
|
unsigned long cur_page_paddr = page->physical_address;
|
|
//gdr_dbg("p=%d prev_page_paddr=%lx cur_page_paddr=%lx\n",
|
|
// p, prev_page_paddr, cur_page_paddr);
|
|
if (prev_page_paddr + GPU_PAGE_SIZE != cur_page_paddr) {
|
|
gdr_dbg("non-contig p=%d prev_page_paddr=%lx cur_page_paddr=%lx\n",
|
|
p, prev_page_paddr, cur_page_paddr);
|
|
break;
|
|
}
|
|
prev_page_paddr = cur_page_paddr;
|
|
++nentries;
|
|
}
|
|
}
|
|
// offset not supported, see check above
|
|
len = MIN(size, GPU_PAGE_SIZE * nentries);
|
|
// phys range is [paddr, paddr+len-1]
|
|
gdr_dbg("mapping p=%u entries=%d offset=%llx len=%zu vaddr=%lx paddr=%lx\n",
|
|
p, nentries, offset, len, vaddr, paddr);
|
|
if (gdr_pfn_is_ram(paddr >> PAGE_SHIFT)) {
|
|
WARN_ON_ONCE(!gdrdrv_cpu_can_cache_gpu_mappings);
|
|
chunk_mapping_type = GDR_MR_CACHING;
|
|
} else if (gdrdrv_cpu_must_use_device_mapping) {
|
|
chunk_mapping_type = GDR_MR_DEVICE;
|
|
} else {
|
|
// flagging the whole mr as a WC mapping if at least one chunk is WC
|
|
chunk_mapping_type = GDR_MR_WC;
|
|
}
|
|
|
|
if (cpu_mapping_type == GDR_MR_NONE)
|
|
cpu_mapping_type = chunk_mapping_type;
|
|
|
|
// We don't handle when different chunks have different mapping types.
|
|
// This scenario should never happen.
|
|
BUG_ON(cpu_mapping_type != chunk_mapping_type);
|
|
|
|
ret = gdrdrv_remap_gpu_mem(vma, vaddr, paddr, len, cpu_mapping_type);
|
|
if (ret) {
|
|
gdr_err("error %d in gdrdrv_remap_gpu_mem\n", ret);
|
|
goto out;
|
|
}
|
|
vaddr += len;
|
|
size -= len;
|
|
offset = 0;
|
|
} while(size && p < mr->page_table->entries);
|
|
|
|
if (vaddr != vma->vm_end) {
|
|
gdr_err("vaddr=%lx != vm_end=%lx\n", vaddr, vma->vm_end);
|
|
ret = -EINVAL;
|
|
}
|
|
|
|
out:
|
|
if (ret) {
|
|
if (mr) {
|
|
mr->vma = NULL;
|
|
mr->mapping = NULL;
|
|
mr->cpu_mapping_type = GDR_MR_NONE;
|
|
}
|
|
} else {
|
|
mr->vma = vma;
|
|
mr->mapping = filp->f_mapping;
|
|
|
|
BUG_ON(cpu_mapping_type == GDR_MR_NONE);
|
|
mr->cpu_mapping_type = cpu_mapping_type;
|
|
|
|
gdr_dbg("mr vma=0x%px mapping=0x%px\n", mr->vma, mr->mapping);
|
|
}
|
|
|
|
if (mr)
|
|
gdr_put_mr_write(mr);
|
|
|
|
return ret;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
struct file_operations gdrdrv_fops = {
|
|
.owner = THIS_MODULE,
|
|
|
|
#ifdef HAVE_UNLOCKED_IOCTL
|
|
.unlocked_ioctl = gdrdrv_unlocked_ioctl,
|
|
#else
|
|
.ioctl = gdrdrv_ioctl,
|
|
#endif
|
|
.open = gdrdrv_open,
|
|
.release = gdrdrv_release,
|
|
.mmap = gdrdrv_mmap
|
|
};
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
static int __init gdrdrv_init(void)
|
|
{
|
|
int result;
|
|
|
|
result = register_chrdev(gdrdrv_major, DEVNAME, &gdrdrv_fops);
|
|
if (result < 0) {
|
|
gdr_err("can't get major %d\n", gdrdrv_major);
|
|
return result;
|
|
}
|
|
if (gdrdrv_major == 0) gdrdrv_major = result; /* dynamic */
|
|
|
|
gdr_msg(KERN_INFO, "loading gdrdrv version %s built for %s NVIDIA driver\n", GDRDRV_VERSION_STRING, GDRDRV_BUILT_FOR_NVIDIA_FLAVOR_STRING);
|
|
gdr_msg(KERN_INFO, "device registered with major number %d\n", gdrdrv_major);
|
|
gdr_msg(KERN_INFO, "dbg traces %s, info traces %s", dbg_enabled ? "enabled" : "disabled", info_enabled ? "enabled" : "disabled");
|
|
|
|
#if defined(CONFIG_PPC64) && defined(PVR_POWER9)
|
|
if (pvr_version_is(PVR_POWER9)) {
|
|
// Approximating CPU-GPU coherence with CPU model
|
|
// This might break in the future
|
|
// A better way would be to detect the presence of the IBM-NPU bridges and
|
|
// verify that all GPUs are connected through those
|
|
gdrdrv_cpu_can_cache_gpu_mappings = 1;
|
|
}
|
|
#elif defined(CONFIG_ARM64)
|
|
// Grace-Hopper supports CPU cached mapping. But this feature might be disabled at runtime.
|
|
// gdrdrv_pin_buffer will do the right thing.
|
|
gdrdrv_cpu_can_cache_gpu_mappings = 1;
|
|
#endif
|
|
|
|
if (gdrdrv_cpu_can_cache_gpu_mappings)
|
|
gdr_msg(KERN_INFO, "The platform may support CPU cached mappings. Decision to use cached mappings is left to the pinning function.\n");
|
|
|
|
#if defined(CONFIG_ARM64)
|
|
{
|
|
// Some compilers are strict and do not allow us to declare a variable
|
|
// in the for statement.
|
|
int i;
|
|
for (i = 0; i < ARRAY_SIZE(GDRDRV_BF3_PCI_ROOT_DEV_DEVICE_ID); ++i)
|
|
{
|
|
struct pci_dev *pdev = pci_get_device(GDRDRV_BF3_PCI_ROOT_DEV_VENDOR_ID, GDRDRV_BF3_PCI_ROOT_DEV_DEVICE_ID[i], NULL);
|
|
if (pdev) {
|
|
pci_dev_put(pdev);
|
|
gdrdrv_cpu_must_use_device_mapping = 1;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
if (gdrdrv_cpu_must_use_device_mapping)
|
|
gdr_msg(KERN_INFO, "enabling use of CPU device mappings\n");
|
|
|
|
if (gdr_use_persistent_mapping())
|
|
gdr_msg(KERN_INFO, "Persistent mapping will be used\n");
|
|
|
|
return 0;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
static void __exit gdrdrv_cleanup(void)
|
|
{
|
|
gdr_msg(KERN_INFO, "unregistering major number %d\n", gdrdrv_major);
|
|
|
|
/* cleanup_module is never called if registering failed */
|
|
unregister_chrdev(gdrdrv_major, DEVNAME);
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
module_init(gdrdrv_init);
|
|
module_exit(gdrdrv_cleanup);
|
|
|
|
/*
|
|
* Local variables:
|
|
* c-indent-level: 4
|
|
* c-basic-offset: 4
|
|
* tab-width: 4
|
|
* indent-tabs-mode: nil
|
|
* End:
|
|
*/
|