/* * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation * and any modifications thereto. Any use, reproduction, disclosure or * distribution of this software and related documentation without an express * license agreement from NVIDIA CORPORATION is strictly prohibited. * * See COPYRIGHT.txt for license information */ #define CUMODULE_NAME "shmem_atomic_latency.cubin" #include "atomic_one_sided_common.h" #if defined __cplusplus || defined NVSHMEM_BITCODE_APPLICATION extern "C" { #endif /* add */ DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned int, uint, add, (value * (1 + i)), (value)); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long, ulong, add, (value * (1 + i)), (value)); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long long, ulonglong, add, (value * (1 + i)), (value)); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(int32_t, int32, add, (value * (1 + i)), (value)); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(uint32_t, uint32, add, (value * (1 + i)), (value)); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(uint64_t, uint64, add, (value * (1 + i)), (value)); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(int, int, add, (value * (1 + i)), (value)); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(long, long, add, (value * (1 + i)), (value)); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(size_t, size, add, (value * (1 + i)), (value)); /* fetch_add */ DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned int, uint, fetch_add, (value * (1 + i)), (value)); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long, ulong, fetch_add, (value * (1 + i)), (value)); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long long, ulonglong, fetch_add, (value * (1 + i)), (value)); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(int32_t, int32, fetch_add, (value * (1 + i)), (value)); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(uint32_t, uint32, fetch_add, (value * (1 + i)), (value)); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(uint64_t, uint64, fetch_add, (value * (1 + i)), (value)); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(int, int, fetch_add, (value * (1 + i)), (value)); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(long, long, fetch_add, (value * (1 + i)), (value)); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(size_t, size, fetch_add, (value * (1 + i)), (value)); /* and */ /* should get flag set to 0b1, 0b11, 0b111, etc. */ DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned int, uint, and, (value << (i + 1)), (value << (i + 1))); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long, ulong, and, (value << (i + 1)), (value << (i + 1))); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long long, ulonglong, and, (value << (i + 1)), (value << (i + 1))); /* DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(int32_t, int32, and, (value << (i + 1)), (value << (i + * 1))); */ DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(uint32_t, uint32, and, (value << (i + 1)), (value << (i + 1))); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(uint64_t, uint64, and, (value << (i + 1)), (value << (i + 1))); /* DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(int64_t, int64, and, (value << (i + 1)), (value << (i + * 1))); */ /* fetch_and */ /* should get flag set to 0b1, 0b11, 0b111, etc. */ DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned int, uint, fetch_and, (value << (i + 1)), (value << (i + 1))); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long, ulong, fetch_and, (value << (i + 1)), (value << (i + 1))); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long long, ulonglong, fetch_and, (value << (i + 1)), (value << (i + 1))); /* DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(int32_t, int32, fetch_and, (value << (i + 1)), (value << (i * + 1))); */ DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(uint32_t, uint32, fetch_and, (value << (i + 1)), (value << (i + 1))); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(uint64_t, uint64, fetch_and, (value << (i + 1)), (value << (i + 1))); /* DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(int64_t, int64, fetch_and, (value << (i + 1)), (value << (i * + 1))); */ /* inc */ DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_NO_ARG(unsigned int, uint, inc); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_NO_ARG(unsigned long, ulong, inc); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_NO_ARG(unsigned long long, ulonglong, inc); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_NO_ARG(int32_t, int32, inc); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_NO_ARG(uint32_t, uint32, inc); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_NO_ARG(uint64_t, uint64, inc); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_NO_ARG(int, int, inc); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_NO_ARG(long, long, inc); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_NO_ARG(size_t, size, inc); /* fetch_inc */ DEFINE_LAT_FETCH_TEST_FOR_AMO_NO_ARG(unsigned int, uint, fetch_inc); DEFINE_LAT_FETCH_TEST_FOR_AMO_NO_ARG(unsigned long, ulong, fetch_inc); DEFINE_LAT_FETCH_TEST_FOR_AMO_NO_ARG(unsigned long long, ulonglong, fetch_inc); DEFINE_LAT_FETCH_TEST_FOR_AMO_NO_ARG(int32_t, int32, fetch_inc); DEFINE_LAT_FETCH_TEST_FOR_AMO_NO_ARG(uint32_t, uint32, fetch_inc); DEFINE_LAT_FETCH_TEST_FOR_AMO_NO_ARG(uint64_t, uint64, fetch_inc); DEFINE_LAT_FETCH_TEST_FOR_AMO_NO_ARG(int, int, fetch_inc); DEFINE_LAT_FETCH_TEST_FOR_AMO_NO_ARG(long, long, fetch_inc); DEFINE_LAT_FETCH_TEST_FOR_AMO_NO_ARG(size_t, size, fetch_inc); /* or */ /* should get flag set to 0b1, 0b11, 0b111, etc. */ DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned int, uint, or, (cmp >> (iter - (i + 1))), (value << i)); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long, ulong, or, (cmp >> (iter - (i + 1))), (value << i)); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long long, ulonglong, or, (cmp >> (iter - (i + 1))), (value << i)); /* DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(int32_t, int32, or, (cmp >> (iter - (i + 1))), (value * << i)); */ DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(uint32_t, uint32, or, (cmp >> (iter - (i + 1))), (value << i)); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(uint64_t, uint64, or, (cmp >> (iter - (i + 1))), (value << i)); /* DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(int64_t, int64, or, (cmp >> (iter - (i + 1))), (value * << i)); */ /* fetch_or */ /* should get flag set to 0b1, 0b11, 0b111, etc. */ DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned int, uint, fetch_or, (cmp >> (iter - (i + 1))), (value << i)); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long, ulong, fetch_or, (cmp >> (iter - (i + 1))), (value << i)); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long long, ulonglong, fetch_or, (cmp >> (iter - (i + 1))), (value << i)); /* DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(int32_t, int32, fetch_or, (cmp >> (iter - (i + 1))), (value * << i)); */ DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(uint32_t, uint32, fetch_or, (cmp >> (iter - (i + 1))), (value << i)); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(uint64_t, uint64, fetch_or, (cmp >> (iter - (i + 1))), (value << i)); /* DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(int64_t, int64, fetch_or, (cmp >> (iter - (i + 1))), (value * << i)); */ /* xor */ DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned int, uint, xor, i % 2, 1); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long, ulong, xor, i % 2, 1); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long long, ulonglong, xor, i % 2, 1); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(int32_t, int32, xor, i % 2, 1); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(uint32_t, uint32, xor, i % 2, 1); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(uint64_t, uint64, xor, i % 2, 1); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(int64_t, int64, xor, i % 2, 1); /* fetch_xor */ DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned int, uint, fetch_xor, i % 2, 1); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long, ulong, fetch_xor, i % 2, 1); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long long, ulonglong, fetch_xor, i % 2, 1); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(int32_t, int32, fetch_xor, i % 2, 1); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(uint32_t, uint32, fetch_xor, i % 2, 1); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(uint64_t, uint64, fetch_xor, i % 2, 1); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(int64_t, int64, fetch_xor, i % 2, 1); /* set */ DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned int, uint, set, i, i); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long, ulong, set, i, i); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long long, ulonglong, set, i, i); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(int32_t, int32, set, i, i); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(uint32_t, uint32, set, i, i); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(uint64_t, uint64, set, i, i); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(int, int, set, i, i); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(long, long, set, i, i); DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(size_t, size, set, i, i); /* swap */ DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned int, uint, swap, i, i); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long, ulong, swap, i, i); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long long, ulonglong, swap, i, i); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(int32_t, int32, swap, i, i); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(uint32_t, uint32, swap, i, i); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(uint64_t, uint64, swap, i, i); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(int, int, swap, i, i); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(long, long, swap, i, i); DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(size_t, size, swap, i, i); /* compare_swap */ DEFINE_LAT_FETCH_TEST_FOR_AMO_TWO_ARG(unsigned int, uint, compare_swap, i, i + 1); DEFINE_LAT_FETCH_TEST_FOR_AMO_TWO_ARG(unsigned long, ulong, compare_swap, i, i + 1); DEFINE_LAT_FETCH_TEST_FOR_AMO_TWO_ARG(unsigned long long, ulonglong, compare_swap, i, i + 1); DEFINE_LAT_FETCH_TEST_FOR_AMO_TWO_ARG(int32_t, int32, compare_swap, i, i + 1); DEFINE_LAT_FETCH_TEST_FOR_AMO_TWO_ARG(uint32_t, uint32, compare_swap, i, i + 1); DEFINE_LAT_FETCH_TEST_FOR_AMO_TWO_ARG(uint64_t, uint64, compare_swap, i, i + 1); DEFINE_LAT_FETCH_TEST_FOR_AMO_TWO_ARG(int, int, compare_swap, i, i + 1); DEFINE_LAT_FETCH_TEST_FOR_AMO_TWO_ARG(long, long, compare_swap, i, i + 1); DEFINE_LAT_FETCH_TEST_FOR_AMO_TWO_ARG(size_t, size, compare_swap, i, i + 1); #if defined __cplusplus || defined NVSHMEM_BITCODE_APPLICATION } #endif int main(int argc, char *argv[]) { cudaStream_t stream; int rc = 0; double *h_lat; uint64_t *h_size_arr; void *flag_d = NULL; void **h_tables; read_args(argc, argv); int iter = iters; int skip = warmup_iters; int mype, npes; MAIN_SETUP(argc, argv, mype, npes, flag_d, stream, h_size_arr, h_tables, h_lat); switch (test_amo.type) { case AMO_INC: { iter = 500; skip = 50; RUN_TEST_WITHOUT_ARG(unsigned int, uint, inc, flag_d, mype, iter, skip, h_lat, h_size_arr, 0); RUN_TEST_WITHOUT_ARG(unsigned long, ulong, inc, flag_d, mype, iter, skip, h_lat, h_size_arr, 0); RUN_TEST_WITHOUT_ARG(unsigned long long, ulonglong, inc, flag_d, mype, iter, skip, h_lat, h_size_arr, 0); RUN_TEST_WITHOUT_ARG(int32_t, int32, inc, flag_d, mype, iter, skip, h_lat, h_size_arr, 0); RUN_TEST_WITHOUT_ARG(uint32_t, uint32, inc, flag_d, mype, iter, skip, h_lat, h_size_arr, 0); RUN_TEST_WITHOUT_ARG(uint64_t, uint64, inc, flag_d, mype, iter, skip, h_lat, h_size_arr, 0); RUN_TEST_WITHOUT_ARG(int, int, inc, flag_d, mype, iter, skip, h_lat, h_size_arr, 0); RUN_TEST_WITHOUT_ARG(long, long, inc, flag_d, mype, iter, skip, h_lat, h_size_arr, 0); RUN_TEST_WITHOUT_ARG(size_t, size, inc, flag_d, mype, iter, skip, h_lat, h_size_arr, 0); break; } case AMO_SET: { iter = 500; skip = 50; RUN_TEST_WITH_ARG(unsigned int, uint, set, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 551); RUN_TEST_WITH_ARG(unsigned long, ulong, set, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 551); RUN_TEST_WITH_ARG(unsigned long long, ulonglong, set, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 551); RUN_TEST_WITH_ARG(int32_t, int32, set, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 551); RUN_TEST_WITH_ARG(uint32_t, uint32, set, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 551); RUN_TEST_WITH_ARG(uint64_t, uint64, set, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 551); RUN_TEST_WITH_ARG(int, int, set, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 551); RUN_TEST_WITH_ARG(long, long, set, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 551); RUN_TEST_WITH_ARG(size_t, size, set, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 551); break; } case AMO_ADD: { iter = 500; skip = 50; RUN_TEST_WITH_ARG(unsigned int, uint, add, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(unsigned long, ulong, add, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(unsigned long long, ulonglong, add, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(int32_t, int32, add, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(uint32_t, uint32, add, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(uint64_t, uint64, add, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(int, int, add, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(long, long, add, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(size_t, size, add, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); break; } case AMO_AND: { iter = 64; skip = 0; /* TODO: Figure out a good way to do this with signed types. The bit shifts we do don't * mix with signed types. */ RUN_TEST_WITH_ARG(unsigned long, ulong, and, flag_d, mype, iter, skip, h_lat, h_size_arr, 0xFFFFFFFFFFFFFFFF, 0, 0xFFFFFFFFFFFFFFFF); RUN_TEST_WITH_ARG(unsigned long long, ulonglong, and, flag_d, mype, iter, skip, h_lat, h_size_arr, 0xFFFFFFFFFFFFFFFF, 0, 0xFFFFFFFFFFFFFFFF); RUN_TEST_WITH_ARG(uint64_t, uint64, and, flag_d, mype, iter, skip, h_lat, h_size_arr, 0xFFFFFFFFFFFFFFFF, 0, 0xFFFFFFFFFFFFFFFF); /* RUN_TEST_WITH_ARG(int64_t, int64, and, flag_d, mype, iter, skip, h_lat, h_size_arr, * 0xFFFFFFFFFFFFFFFF, 0, 0xFFFFFFFFFFFFFFFF); */ iter = 32; /* RUN_TEST_WITH_ARG(int64_t, int64, and, flag_d, mype, iter, skip, h_lat, h_size_arr, * 0xFFFFFFFF, 0, 0xFFFFFFFF); */ RUN_TEST_WITH_ARG(uint32_t, uint32, and, flag_d, mype, iter, skip, h_lat, h_size_arr, 0xFFFFFFFF, 0, 0xFFFFFFFF); RUN_TEST_WITH_ARG(unsigned int, uint, and, flag_d, mype, iter, skip, h_lat, h_size_arr, 0xFFFFFFFF, 0, 0xFFFFFFFF); break; } case AMO_OR: { iter = 64; skip = 0; /* TODO: Figure out a good way to do this with signed types. The bit shifts we do don't * mix with signed types. */ RUN_TEST_WITH_ARG(unsigned long, ulong, or, flag_d, mype, iter, skip, h_lat, h_size_arr, 1, 0xFFFFFFFFFFFFFFFF, 0); RUN_TEST_WITH_ARG(unsigned long long, ulonglong, or, flag_d, mype, iter, skip, h_lat, h_size_arr, 1, 0xFFFFFFFFFFFFFFFF, 0); RUN_TEST_WITH_ARG(uint64_t, uint64, or, flag_d, mype, iter, skip, h_lat, h_size_arr, 1, 0xFFFFFFFFFFFFFFFF, 0); /* RUN_TEST_WITH_ARG(int64_t, int64, or, flag_d, mype, iter, skip, h_lat, h_size_arr, 1, * 0xFFFFFFFFFFFFFFFF, 0); */ iter = 32; /* RUN_TEST_WITH_ARG(int64_t, int64, or, flag_d, mype, iter, skip, h_lat, h_size_arr, 1, * 0xFFFFFFFFFFFFFFFF, 0); */ RUN_TEST_WITH_ARG(uint32_t, uint32, or, flag_d, mype, iter, skip, h_lat, h_size_arr, 1, 0xFFFFFFFF, 0); RUN_TEST_WITH_ARG(unsigned int, uint, or, flag_d, mype, iter, skip, h_lat, h_size_arr, 1, 0xFFFFFFFF, 0); break; } case AMO_XOR: { iter = 500; skip = 50; RUN_TEST_WITH_ARG(unsigned long, ulong, xor, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1); RUN_TEST_WITH_ARG(unsigned long long, ulonglong, xor, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1); RUN_TEST_WITH_ARG(uint64_t, uint64, xor, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1); RUN_TEST_WITH_ARG(int64_t, int64, xor, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1); RUN_TEST_WITH_ARG(int64_t, int64, xor, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1); RUN_TEST_WITH_ARG(uint32_t, uint32, xor, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1); RUN_TEST_WITH_ARG(unsigned int, uint, xor, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1); break; } case AMO_FETCH_INC: { iter = 500; skip = 50; RUN_TEST_WITHOUT_ARG(unsigned int, uint, fetch_inc, flag_d, mype, iter, skip, h_lat, h_size_arr, 0); RUN_TEST_WITHOUT_ARG(unsigned long, ulong, fetch_inc, flag_d, mype, iter, skip, h_lat, h_size_arr, 0); RUN_TEST_WITHOUT_ARG(unsigned long long, ulonglong, fetch_inc, flag_d, mype, iter, skip, h_lat, h_size_arr, 0); RUN_TEST_WITHOUT_ARG(int32_t, int32, fetch_inc, flag_d, mype, iter, skip, h_lat, h_size_arr, 0); RUN_TEST_WITHOUT_ARG(uint32_t, uint32, fetch_inc, flag_d, mype, iter, skip, h_lat, h_size_arr, 0); RUN_TEST_WITHOUT_ARG(uint64_t, uint64, fetch_inc, flag_d, mype, iter, skip, h_lat, h_size_arr, 0); RUN_TEST_WITHOUT_ARG(int, int, fetch_inc, flag_d, mype, iter, skip, h_lat, h_size_arr, 0); RUN_TEST_WITHOUT_ARG(long, long, fetch_inc, flag_d, mype, iter, skip, h_lat, h_size_arr, 0); RUN_TEST_WITHOUT_ARG(size_t, size, fetch_inc, flag_d, mype, iter, skip, h_lat, h_size_arr, 0); break; } case AMO_FETCH_ADD: { iter = 500; skip = 50; RUN_TEST_WITH_ARG(unsigned int, uint, fetch_add, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(unsigned long, ulong, fetch_add, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(unsigned long long, ulonglong, fetch_add, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(int32_t, int32, fetch_add, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(uint32_t, uint32, fetch_add, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(uint64_t, uint64, fetch_add, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(int, int, fetch_add, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(long, long, fetch_add, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(size_t, size, fetch_add, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); break; } case AMO_FETCH_AND: { iter = 64; skip = 0; /* TODO: Figure out a good way to do this with signed types. The bit shifts we do don't * mix with signed types. */ RUN_TEST_WITH_ARG(unsigned long, ulong, fetch_and, flag_d, mype, iter, skip, h_lat, h_size_arr, 0xFFFFFFFFFFFFFFFF, 0, 0xFFFFFFFFFFFFFFFF); RUN_TEST_WITH_ARG(unsigned long long, ulonglong, fetch_and, flag_d, mype, iter, skip, h_lat, h_size_arr, 0xFFFFFFFFFFFFFFFF, 0, 0xFFFFFFFFFFFFFFFF); RUN_TEST_WITH_ARG(uint64_t, uint64, fetch_and, flag_d, mype, iter, skip, h_lat, h_size_arr, 0xFFFFFFFFFFFFFFFF, 0, 0xFFFFFFFFFFFFFFFF); /* RUN_TEST_WITH_ARG(int64_t, int64, fetch_and, flag_d, mype, iter, skip, h_lat, * h_size_arr, 0xFFFFFFFFFFFFFFFF, 0, 0xFFFFFFFFFFFFFFFF); */ iter = 32; /* RUN_TEST_WITH_ARG(int64_t, int64, fetch_and, flag_d, mype, iter, skip, h_lat, * h_size_arr, 0xFFFFFFFF, 0, 0xFFFFFFFF); */ RUN_TEST_WITH_ARG(uint32_t, uint32, fetch_and, flag_d, mype, iter, skip, h_lat, h_size_arr, 0xFFFFFFFF, 0, 0xFFFFFFFF); RUN_TEST_WITH_ARG(unsigned int, uint, fetch_and, flag_d, mype, iter, skip, h_lat, h_size_arr, 0xFFFFFFFF, 0, 0xFFFFFFFF); break; } case AMO_FETCH_OR: { iter = 64; skip = 0; /* TODO: Figure out a good way to do this with signed types. The bit shifts we do don't * mix with signed types. */ RUN_TEST_WITH_ARG(unsigned long, ulong, fetch_or, flag_d, mype, iter, skip, h_lat, h_size_arr, 1, 0xFFFFFFFFFFFFFFFF, 0); RUN_TEST_WITH_ARG(unsigned long long, ulonglong, fetch_or, flag_d, mype, iter, skip, h_lat, h_size_arr, 1, 0xFFFFFFFFFFFFFFFF, 0); RUN_TEST_WITH_ARG(uint64_t, uint64, fetch_or, flag_d, mype, iter, skip, h_lat, h_size_arr, 1, 0xFFFFFFFFFFFFFFFF, 0); /* RUN_TEST_WITH_ARG(int64_t, int64, fetch_or, flag_d, mype, iter, skip, h_lat, * h_size_arr, 1, 0xFFFFFFFFFFFFFFFF, 0); */ iter = 32; /* RUN_TEST_WITH_ARG(int64_t, int64, fetch_or, flag_d, mype, iter, skip, h_lat, * h_size_arr, 1, 0xFFFFFFFFFFFFFFFF, 0); */ RUN_TEST_WITH_ARG(uint32_t, uint32, fetch_or, flag_d, mype, iter, skip, h_lat, h_size_arr, 1, 0xFFFFFFFF, 0); RUN_TEST_WITH_ARG(unsigned int, uint, fetch_or, flag_d, mype, iter, skip, h_lat, h_size_arr, 1, 0xFFFFFFFF, 0); break; } case AMO_FETCH_XOR: { iter = 500; skip = 50; RUN_TEST_WITH_ARG(unsigned long, ulong, fetch_xor, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1); RUN_TEST_WITH_ARG(unsigned long long, ulonglong, fetch_xor, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1); RUN_TEST_WITH_ARG(uint64_t, uint64, fetch_xor, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1); RUN_TEST_WITH_ARG(int64_t, int64, fetch_xor, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1); RUN_TEST_WITH_ARG(int64_t, int64, fetch_xor, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1); RUN_TEST_WITH_ARG(uint32_t, uint32, fetch_xor, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1); RUN_TEST_WITH_ARG(unsigned int, uint, fetch_xor, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1); break; } case AMO_SWAP: { iter = 500; skip = 50; RUN_TEST_WITH_ARG(unsigned int, uint, swap, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1); RUN_TEST_WITH_ARG(unsigned long, ulong, swap, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1); RUN_TEST_WITH_ARG(unsigned long long, ulonglong, swap, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1); RUN_TEST_WITH_ARG(int32_t, int32, swap, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1); RUN_TEST_WITH_ARG(uint32_t, uint32, swap, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1); RUN_TEST_WITH_ARG(uint64_t, uint64, swap, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1); RUN_TEST_WITH_ARG(int, int, swap, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1); RUN_TEST_WITH_ARG(long, long, swap, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1); RUN_TEST_WITH_ARG(size_t, size, swap, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1); break; } case AMO_COMPARE_SWAP: { iter = 500; skip = 50; RUN_TEST_WITH_ARG(unsigned int, uint, compare_swap, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(unsigned long, ulong, compare_swap, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(unsigned long long, ulonglong, compare_swap, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(int32_t, int32, compare_swap, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(uint32_t, uint32, compare_swap, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(uint64_t, uint64, compare_swap, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(int, int, compare_swap, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(long, long, compare_swap, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); RUN_TEST_WITH_ARG(size_t, size, compare_swap, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0, 0); break; } default: { fprintf(stderr, "Error, unsupported Atomic op %s.\n", test_amo.name.c_str()); rc = -1; break; } } MAIN_CLEANUP(flag_d, stream, h_tables); return rc; }