/* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include using namespace std; #include "gdrapi.h" #include "common.hpp" using namespace gdrcopy::test; // manually tuned... int num_write_iters = 10000; int num_read_iters = 100; size_t _size = 128*1024; size_t copy_size = 0; size_t copy_offset = 0; int dev_id = 0; void print_usage(const char *path) { cout << "Usage: " << path << " [-h][-s ][-c ][-o ][-d ][-w ][-r ][-a ]" << endl; cout << endl; cout << "Options:" << endl; cout << " -h Print this help text" << endl; cout << " -s Buffer allocation size (default: " << _size << ")" << endl; cout << " -c Copy size (default: " << copy_size << ")" << endl; cout << " -o Copy offset (default: " << copy_offset << ")" << endl; cout << " -d GPU ID (default: " << dev_id << ")" << endl; cout << " -w Number of write iterations (default: " << num_write_iters << ")" << endl; cout << " -r Number of read iterations (default: " << num_read_iters << ")" << endl; cout << " -a GPU buffer allocation function (default: cuMemAlloc)" << endl; cout << " Choices: cuMemAlloc, cuMemCreate" << endl; } void run_test(CUdeviceptr d_A, size_t size) { uint32_t *init_buf = NULL; ASSERTDRV(cuMemAllocHost((void **)&init_buf, size)); ASSERT_NEQ(init_buf, (void*)0); init_hbuf_walking_bit(init_buf, size); gdr_t g = gdr_open_safe(); gdr_mh_t mh; BEGIN_CHECK { // tokens are optional in CUDA 6.0 // wave out the test if GPUDirectRDMA is not enabled BREAK_IF_NEQ(gdr_pin_buffer(g, d_A, size, 0, 0, &mh), 0); ASSERT_NEQ(mh, null_mh); void *map_d_ptr = NULL; ASSERT_EQ(gdr_map(g, mh, &map_d_ptr, size), 0); cout << "map_d_ptr: " << map_d_ptr << endl; gdr_info_t info; ASSERT_EQ(gdr_get_info(g, mh, &info), 0); cout << "info.va: " << hex << info.va << dec << endl; cout << "info.mapped_size: " << info.mapped_size << endl; cout << "info.page_size: " << info.page_size << endl; cout << "info.mapped: " << info.mapped << endl; cout << "info.wc_mapping: " << info.wc_mapping << endl; // remember that mappings start on a 64KB boundary, so let's // calculate the offset from the head of the mapping to the // beginning of the buffer int off = info.va - d_A; cout << "page offset: " << off << endl; uint32_t *buf_ptr = (uint32_t *)((char *)map_d_ptr + off); cout << "user-space pointer:" << buf_ptr << endl; // copy to GPU benchmark cout << "writing test, size=" << copy_size << " offset=" << copy_offset << " num_iters=" << num_write_iters << endl; struct timespec beg, end; clock_gettime(MYCLOCK, &beg); for (int iter=0; iter _size) { fprintf(stderr, "ERROR: offset + copy size run past the end of the buffer\n"); exit(EXIT_FAILURE); } size_t size = PAGE_ROUND_UP(_size, GPU_PAGE_SIZE); ASSERTDRV(cuInit(0)); int n_devices = 0; ASSERTDRV(cuDeviceGetCount(&n_devices)); CUdevice dev; for (int n=0; n