501 lines
28 KiB
Plaintext
501 lines
28 KiB
Plaintext
/*
|
|
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
* and proprietary rights in and to this software, related documentation
|
|
* and any modifications thereto. Any use, reproduction, disclosure or
|
|
* distribution of this software and related documentation without an express
|
|
* license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
*
|
|
* See COPYRIGHT.txt for license information
|
|
*/
|
|
|
|
#define CUMODULE_NAME "shmem_atomic_latency.cubin"
|
|
|
|
#include "atomic_one_sided_common.h"
|
|
|
|
#if defined __cplusplus || defined NVSHMEM_BITCODE_APPLICATION
|
|
extern "C" {
|
|
#endif
|
|
|
|
/* add */
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned int, uint, add, (value * (1 + i)), (value));
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long, ulong, add, (value * (1 + i)), (value));
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long long, ulonglong, add, (value * (1 + i)),
|
|
(value));
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(int32_t, int32, add, (value * (1 + i)), (value));
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(uint32_t, uint32, add, (value * (1 + i)), (value));
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(uint64_t, uint64, add, (value * (1 + i)), (value));
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(int, int, add, (value * (1 + i)), (value));
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(long, long, add, (value * (1 + i)), (value));
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(size_t, size, add, (value * (1 + i)), (value));
|
|
|
|
/* fetch_add */
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned int, uint, fetch_add, (value * (1 + i)), (value));
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long, ulong, fetch_add, (value * (1 + i)), (value));
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long long, ulonglong, fetch_add, (value * (1 + i)),
|
|
(value));
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(int32_t, int32, fetch_add, (value * (1 + i)), (value));
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(uint32_t, uint32, fetch_add, (value * (1 + i)), (value));
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(uint64_t, uint64, fetch_add, (value * (1 + i)), (value));
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(int, int, fetch_add, (value * (1 + i)), (value));
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(long, long, fetch_add, (value * (1 + i)), (value));
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(size_t, size, fetch_add, (value * (1 + i)), (value));
|
|
|
|
/* and */
|
|
/* should get flag set to 0b1, 0b11, 0b111, etc. */
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned int, uint, and, (value << (i + 1)),
|
|
(value << (i + 1)));
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long, ulong, and, (value << (i + 1)),
|
|
(value << (i + 1)));
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long long, ulonglong, and, (value << (i + 1)),
|
|
(value << (i + 1)));
|
|
/* DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(int32_t, int32, and, (value << (i + 1)), (value << (i +
|
|
* 1))); */
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(uint32_t, uint32, and, (value << (i + 1)),
|
|
(value << (i + 1)));
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(uint64_t, uint64, and, (value << (i + 1)),
|
|
(value << (i + 1)));
|
|
/* DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(int64_t, int64, and, (value << (i + 1)), (value << (i +
|
|
* 1))); */
|
|
|
|
/* fetch_and */
|
|
/* should get flag set to 0b1, 0b11, 0b111, etc. */
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned int, uint, fetch_and, (value << (i + 1)),
|
|
(value << (i + 1)));
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long, ulong, fetch_and, (value << (i + 1)),
|
|
(value << (i + 1)));
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long long, ulonglong, fetch_and, (value << (i + 1)),
|
|
(value << (i + 1)));
|
|
/* DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(int32_t, int32, fetch_and, (value << (i + 1)), (value << (i
|
|
* + 1))); */
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(uint32_t, uint32, fetch_and, (value << (i + 1)),
|
|
(value << (i + 1)));
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(uint64_t, uint64, fetch_and, (value << (i + 1)),
|
|
(value << (i + 1)));
|
|
/* DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(int64_t, int64, fetch_and, (value << (i + 1)), (value << (i
|
|
* + 1))); */
|
|
|
|
/* inc */
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_NO_ARG(unsigned int, uint, inc);
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_NO_ARG(unsigned long, ulong, inc);
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_NO_ARG(unsigned long long, ulonglong, inc);
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_NO_ARG(int32_t, int32, inc);
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_NO_ARG(uint32_t, uint32, inc);
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_NO_ARG(uint64_t, uint64, inc);
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_NO_ARG(int, int, inc);
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_NO_ARG(long, long, inc);
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_NO_ARG(size_t, size, inc);
|
|
|
|
/* fetch_inc */
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_NO_ARG(unsigned int, uint, fetch_inc);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_NO_ARG(unsigned long, ulong, fetch_inc);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_NO_ARG(unsigned long long, ulonglong, fetch_inc);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_NO_ARG(int32_t, int32, fetch_inc);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_NO_ARG(uint32_t, uint32, fetch_inc);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_NO_ARG(uint64_t, uint64, fetch_inc);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_NO_ARG(int, int, fetch_inc);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_NO_ARG(long, long, fetch_inc);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_NO_ARG(size_t, size, fetch_inc);
|
|
|
|
/* or */
|
|
/* should get flag set to 0b1, 0b11, 0b111, etc. */
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned int, uint, or, (cmp >> (iter - (i + 1))),
|
|
(value << i));
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long, ulong, or, (cmp >> (iter - (i + 1))),
|
|
(value << i));
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long long, ulonglong, or,
|
|
(cmp >> (iter - (i + 1))), (value << i));
|
|
/* DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(int32_t, int32, or, (cmp >> (iter - (i + 1))), (value
|
|
* << i)); */
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(uint32_t, uint32, or, (cmp >> (iter - (i + 1))),
|
|
(value << i));
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(uint64_t, uint64, or, (cmp >> (iter - (i + 1))),
|
|
(value << i));
|
|
/* DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(int64_t, int64, or, (cmp >> (iter - (i + 1))), (value
|
|
* << i)); */
|
|
|
|
/* fetch_or */
|
|
/* should get flag set to 0b1, 0b11, 0b111, etc. */
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned int, uint, fetch_or, (cmp >> (iter - (i + 1))),
|
|
(value << i));
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long, ulong, fetch_or, (cmp >> (iter - (i + 1))),
|
|
(value << i));
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long long, ulonglong, fetch_or,
|
|
(cmp >> (iter - (i + 1))), (value << i));
|
|
/* DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(int32_t, int32, fetch_or, (cmp >> (iter - (i + 1))), (value
|
|
* << i)); */
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(uint32_t, uint32, fetch_or, (cmp >> (iter - (i + 1))),
|
|
(value << i));
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(uint64_t, uint64, fetch_or, (cmp >> (iter - (i + 1))),
|
|
(value << i));
|
|
/* DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(int64_t, int64, fetch_or, (cmp >> (iter - (i + 1))), (value
|
|
* << i)); */
|
|
|
|
/* xor */
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned int, uint, xor, i % 2, 1);
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long, ulong, xor, i % 2, 1);
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long long, ulonglong, xor, i % 2, 1);
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(int32_t, int32, xor, i % 2, 1);
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(uint32_t, uint32, xor, i % 2, 1);
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(uint64_t, uint64, xor, i % 2, 1);
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(int64_t, int64, xor, i % 2, 1);
|
|
|
|
/* fetch_xor */
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned int, uint, fetch_xor, i % 2, 1);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long, ulong, fetch_xor, i % 2, 1);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long long, ulonglong, fetch_xor, i % 2, 1);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(int32_t, int32, fetch_xor, i % 2, 1);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(uint32_t, uint32, fetch_xor, i % 2, 1);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(uint64_t, uint64, fetch_xor, i % 2, 1);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(int64_t, int64, fetch_xor, i % 2, 1);
|
|
|
|
/* set */
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned int, uint, set, i, i);
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long, ulong, set, i, i);
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long long, ulonglong, set, i, i);
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(int32_t, int32, set, i, i);
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(uint32_t, uint32, set, i, i);
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(uint64_t, uint64, set, i, i);
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(int, int, set, i, i);
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(long, long, set, i, i);
|
|
DEFINE_LAT_NON_FETCH_TEST_FOR_AMO_ONE_ARG(size_t, size, set, i, i);
|
|
|
|
/* swap */
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned int, uint, swap, i, i);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long, ulong, swap, i, i);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(unsigned long long, ulonglong, swap, i, i);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(int32_t, int32, swap, i, i);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(uint32_t, uint32, swap, i, i);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(uint64_t, uint64, swap, i, i);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(int, int, swap, i, i);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(long, long, swap, i, i);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_ONE_ARG(size_t, size, swap, i, i);
|
|
|
|
/* compare_swap */
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_TWO_ARG(unsigned int, uint, compare_swap, i, i + 1);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_TWO_ARG(unsigned long, ulong, compare_swap, i, i + 1);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_TWO_ARG(unsigned long long, ulonglong, compare_swap, i, i + 1);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_TWO_ARG(int32_t, int32, compare_swap, i, i + 1);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_TWO_ARG(uint32_t, uint32, compare_swap, i, i + 1);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_TWO_ARG(uint64_t, uint64, compare_swap, i, i + 1);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_TWO_ARG(int, int, compare_swap, i, i + 1);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_TWO_ARG(long, long, compare_swap, i, i + 1);
|
|
DEFINE_LAT_FETCH_TEST_FOR_AMO_TWO_ARG(size_t, size, compare_swap, i, i + 1);
|
|
|
|
#if defined __cplusplus || defined NVSHMEM_BITCODE_APPLICATION
|
|
}
|
|
#endif
|
|
|
|
int main(int argc, char *argv[]) {
|
|
cudaStream_t stream;
|
|
int rc = 0;
|
|
|
|
double *h_lat;
|
|
uint64_t *h_size_arr;
|
|
void *flag_d = NULL;
|
|
void **h_tables;
|
|
|
|
read_args(argc, argv);
|
|
|
|
int iter = iters;
|
|
int skip = warmup_iters;
|
|
int mype, npes;
|
|
|
|
MAIN_SETUP(argc, argv, mype, npes, flag_d, stream, h_size_arr, h_tables, h_lat);
|
|
|
|
switch (test_amo.type) {
|
|
case AMO_INC: {
|
|
iter = 500;
|
|
skip = 50;
|
|
RUN_TEST_WITHOUT_ARG(unsigned int, uint, inc, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0);
|
|
RUN_TEST_WITHOUT_ARG(unsigned long, ulong, inc, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0);
|
|
RUN_TEST_WITHOUT_ARG(unsigned long long, ulonglong, inc, flag_d, mype, iter, skip,
|
|
h_lat, h_size_arr, 0);
|
|
RUN_TEST_WITHOUT_ARG(int32_t, int32, inc, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
0);
|
|
RUN_TEST_WITHOUT_ARG(uint32_t, uint32, inc, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
0);
|
|
RUN_TEST_WITHOUT_ARG(uint64_t, uint64, inc, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
0);
|
|
RUN_TEST_WITHOUT_ARG(int, int, inc, flag_d, mype, iter, skip, h_lat, h_size_arr, 0);
|
|
RUN_TEST_WITHOUT_ARG(long, long, inc, flag_d, mype, iter, skip, h_lat, h_size_arr, 0);
|
|
RUN_TEST_WITHOUT_ARG(size_t, size, inc, flag_d, mype, iter, skip, h_lat, h_size_arr, 0);
|
|
break;
|
|
}
|
|
case AMO_SET: {
|
|
iter = 500;
|
|
skip = 50;
|
|
RUN_TEST_WITH_ARG(unsigned int, uint, set, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
415, 0, 551);
|
|
RUN_TEST_WITH_ARG(unsigned long, ulong, set, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 415, 0, 551);
|
|
RUN_TEST_WITH_ARG(unsigned long long, ulonglong, set, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 415, 0, 551);
|
|
RUN_TEST_WITH_ARG(int32_t, int32, set, flag_d, mype, iter, skip, h_lat, h_size_arr, 415,
|
|
0, 551);
|
|
RUN_TEST_WITH_ARG(uint32_t, uint32, set, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
415, 0, 551);
|
|
RUN_TEST_WITH_ARG(uint64_t, uint64, set, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
415, 0, 551);
|
|
RUN_TEST_WITH_ARG(int, int, set, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0,
|
|
551);
|
|
RUN_TEST_WITH_ARG(long, long, set, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0,
|
|
551);
|
|
RUN_TEST_WITH_ARG(size_t, size, set, flag_d, mype, iter, skip, h_lat, h_size_arr, 415,
|
|
0, 551);
|
|
break;
|
|
}
|
|
case AMO_ADD: {
|
|
iter = 500;
|
|
skip = 50;
|
|
RUN_TEST_WITH_ARG(unsigned int, uint, add, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
415, 0, 0);
|
|
RUN_TEST_WITH_ARG(unsigned long, ulong, add, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 415, 0, 0);
|
|
RUN_TEST_WITH_ARG(unsigned long long, ulonglong, add, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 415, 0, 0);
|
|
RUN_TEST_WITH_ARG(int32_t, int32, add, flag_d, mype, iter, skip, h_lat, h_size_arr, 415,
|
|
0, 0);
|
|
RUN_TEST_WITH_ARG(uint32_t, uint32, add, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
415, 0, 0);
|
|
RUN_TEST_WITH_ARG(uint64_t, uint64, add, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
415, 0, 0);
|
|
RUN_TEST_WITH_ARG(int, int, add, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0,
|
|
0);
|
|
RUN_TEST_WITH_ARG(long, long, add, flag_d, mype, iter, skip, h_lat, h_size_arr, 415, 0,
|
|
0);
|
|
RUN_TEST_WITH_ARG(size_t, size, add, flag_d, mype, iter, skip, h_lat, h_size_arr, 415,
|
|
0, 0);
|
|
break;
|
|
}
|
|
case AMO_AND: {
|
|
iter = 64;
|
|
skip = 0;
|
|
/* TODO: Figure out a good way to do this with signed types. The bit shifts we do don't
|
|
* mix with signed types. */
|
|
RUN_TEST_WITH_ARG(unsigned long, ulong, and, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0xFFFFFFFFFFFFFFFF, 0, 0xFFFFFFFFFFFFFFFF);
|
|
RUN_TEST_WITH_ARG(unsigned long long, ulonglong, and, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0xFFFFFFFFFFFFFFFF, 0, 0xFFFFFFFFFFFFFFFF);
|
|
RUN_TEST_WITH_ARG(uint64_t, uint64, and, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
0xFFFFFFFFFFFFFFFF, 0, 0xFFFFFFFFFFFFFFFF);
|
|
/* RUN_TEST_WITH_ARG(int64_t, int64, and, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
* 0xFFFFFFFFFFFFFFFF, 0, 0xFFFFFFFFFFFFFFFF); */
|
|
iter = 32;
|
|
/* RUN_TEST_WITH_ARG(int64_t, int64, and, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
* 0xFFFFFFFF, 0, 0xFFFFFFFF); */
|
|
RUN_TEST_WITH_ARG(uint32_t, uint32, and, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
0xFFFFFFFF, 0, 0xFFFFFFFF);
|
|
RUN_TEST_WITH_ARG(unsigned int, uint, and, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
0xFFFFFFFF, 0, 0xFFFFFFFF);
|
|
break;
|
|
}
|
|
case AMO_OR: {
|
|
iter = 64;
|
|
skip = 0;
|
|
/* TODO: Figure out a good way to do this with signed types. The bit shifts we do don't
|
|
* mix with signed types. */
|
|
RUN_TEST_WITH_ARG(unsigned long, ulong, or, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
1, 0xFFFFFFFFFFFFFFFF, 0);
|
|
RUN_TEST_WITH_ARG(unsigned long long, ulonglong, or, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 1, 0xFFFFFFFFFFFFFFFF, 0);
|
|
RUN_TEST_WITH_ARG(uint64_t, uint64, or, flag_d, mype, iter, skip, h_lat, h_size_arr, 1,
|
|
0xFFFFFFFFFFFFFFFF, 0);
|
|
/* RUN_TEST_WITH_ARG(int64_t, int64, or, flag_d, mype, iter, skip, h_lat, h_size_arr, 1,
|
|
* 0xFFFFFFFFFFFFFFFF, 0); */
|
|
iter = 32;
|
|
/* RUN_TEST_WITH_ARG(int64_t, int64, or, flag_d, mype, iter, skip, h_lat, h_size_arr, 1,
|
|
* 0xFFFFFFFFFFFFFFFF, 0); */
|
|
RUN_TEST_WITH_ARG(uint32_t, uint32, or, flag_d, mype, iter, skip, h_lat, h_size_arr, 1,
|
|
0xFFFFFFFF, 0);
|
|
RUN_TEST_WITH_ARG(unsigned int, uint, or, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
1, 0xFFFFFFFF, 0);
|
|
break;
|
|
}
|
|
case AMO_XOR: {
|
|
iter = 500;
|
|
skip = 50;
|
|
RUN_TEST_WITH_ARG(unsigned long, ulong, xor, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0, 0, 1);
|
|
RUN_TEST_WITH_ARG(unsigned long long, ulonglong, xor, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0, 0, 1);
|
|
RUN_TEST_WITH_ARG(uint64_t, uint64, xor, flag_d, mype, iter, skip, h_lat, h_size_arr, 0,
|
|
0, 1);
|
|
RUN_TEST_WITH_ARG(int64_t, int64, xor, flag_d, mype, iter, skip, h_lat, h_size_arr, 0,
|
|
0, 1);
|
|
RUN_TEST_WITH_ARG(int64_t, int64, xor, flag_d, mype, iter, skip, h_lat, h_size_arr, 0,
|
|
0, 1);
|
|
RUN_TEST_WITH_ARG(uint32_t, uint32, xor, flag_d, mype, iter, skip, h_lat, h_size_arr, 0,
|
|
0, 1);
|
|
RUN_TEST_WITH_ARG(unsigned int, uint, xor, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
0, 0, 1);
|
|
break;
|
|
}
|
|
case AMO_FETCH_INC: {
|
|
iter = 500;
|
|
skip = 50;
|
|
RUN_TEST_WITHOUT_ARG(unsigned int, uint, fetch_inc, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0);
|
|
RUN_TEST_WITHOUT_ARG(unsigned long, ulong, fetch_inc, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0);
|
|
RUN_TEST_WITHOUT_ARG(unsigned long long, ulonglong, fetch_inc, flag_d, mype, iter, skip,
|
|
h_lat, h_size_arr, 0);
|
|
RUN_TEST_WITHOUT_ARG(int32_t, int32, fetch_inc, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0);
|
|
RUN_TEST_WITHOUT_ARG(uint32_t, uint32, fetch_inc, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0);
|
|
RUN_TEST_WITHOUT_ARG(uint64_t, uint64, fetch_inc, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0);
|
|
RUN_TEST_WITHOUT_ARG(int, int, fetch_inc, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
0);
|
|
RUN_TEST_WITHOUT_ARG(long, long, fetch_inc, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
0);
|
|
RUN_TEST_WITHOUT_ARG(size_t, size, fetch_inc, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0);
|
|
break;
|
|
}
|
|
case AMO_FETCH_ADD: {
|
|
iter = 500;
|
|
skip = 50;
|
|
RUN_TEST_WITH_ARG(unsigned int, uint, fetch_add, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 415, 0, 0);
|
|
RUN_TEST_WITH_ARG(unsigned long, ulong, fetch_add, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 415, 0, 0);
|
|
RUN_TEST_WITH_ARG(unsigned long long, ulonglong, fetch_add, flag_d, mype, iter, skip,
|
|
h_lat, h_size_arr, 415, 0, 0);
|
|
RUN_TEST_WITH_ARG(int32_t, int32, fetch_add, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 415, 0, 0);
|
|
RUN_TEST_WITH_ARG(uint32_t, uint32, fetch_add, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 415, 0, 0);
|
|
RUN_TEST_WITH_ARG(uint64_t, uint64, fetch_add, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 415, 0, 0);
|
|
RUN_TEST_WITH_ARG(int, int, fetch_add, flag_d, mype, iter, skip, h_lat, h_size_arr, 415,
|
|
0, 0);
|
|
RUN_TEST_WITH_ARG(long, long, fetch_add, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
415, 0, 0);
|
|
RUN_TEST_WITH_ARG(size_t, size, fetch_add, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
415, 0, 0);
|
|
break;
|
|
}
|
|
case AMO_FETCH_AND: {
|
|
iter = 64;
|
|
skip = 0;
|
|
/* TODO: Figure out a good way to do this with signed types. The bit shifts we do don't
|
|
* mix with signed types. */
|
|
RUN_TEST_WITH_ARG(unsigned long, ulong, fetch_and, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0xFFFFFFFFFFFFFFFF, 0, 0xFFFFFFFFFFFFFFFF);
|
|
RUN_TEST_WITH_ARG(unsigned long long, ulonglong, fetch_and, flag_d, mype, iter, skip,
|
|
h_lat, h_size_arr, 0xFFFFFFFFFFFFFFFF, 0, 0xFFFFFFFFFFFFFFFF);
|
|
RUN_TEST_WITH_ARG(uint64_t, uint64, fetch_and, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0xFFFFFFFFFFFFFFFF, 0, 0xFFFFFFFFFFFFFFFF);
|
|
/* RUN_TEST_WITH_ARG(int64_t, int64, fetch_and, flag_d, mype, iter, skip, h_lat,
|
|
* h_size_arr, 0xFFFFFFFFFFFFFFFF, 0, 0xFFFFFFFFFFFFFFFF); */
|
|
iter = 32;
|
|
/* RUN_TEST_WITH_ARG(int64_t, int64, fetch_and, flag_d, mype, iter, skip, h_lat,
|
|
* h_size_arr, 0xFFFFFFFF, 0, 0xFFFFFFFF); */
|
|
RUN_TEST_WITH_ARG(uint32_t, uint32, fetch_and, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0xFFFFFFFF, 0, 0xFFFFFFFF);
|
|
RUN_TEST_WITH_ARG(unsigned int, uint, fetch_and, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0xFFFFFFFF, 0, 0xFFFFFFFF);
|
|
break;
|
|
}
|
|
case AMO_FETCH_OR: {
|
|
iter = 64;
|
|
skip = 0;
|
|
/* TODO: Figure out a good way to do this with signed types. The bit shifts we do don't
|
|
* mix with signed types. */
|
|
RUN_TEST_WITH_ARG(unsigned long, ulong, fetch_or, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 1, 0xFFFFFFFFFFFFFFFF, 0);
|
|
RUN_TEST_WITH_ARG(unsigned long long, ulonglong, fetch_or, flag_d, mype, iter, skip,
|
|
h_lat, h_size_arr, 1, 0xFFFFFFFFFFFFFFFF, 0);
|
|
RUN_TEST_WITH_ARG(uint64_t, uint64, fetch_or, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 1, 0xFFFFFFFFFFFFFFFF, 0);
|
|
/* RUN_TEST_WITH_ARG(int64_t, int64, fetch_or, flag_d, mype, iter, skip, h_lat,
|
|
* h_size_arr, 1, 0xFFFFFFFFFFFFFFFF, 0); */
|
|
iter = 32;
|
|
/* RUN_TEST_WITH_ARG(int64_t, int64, fetch_or, flag_d, mype, iter, skip, h_lat,
|
|
* h_size_arr, 1, 0xFFFFFFFFFFFFFFFF, 0); */
|
|
RUN_TEST_WITH_ARG(uint32_t, uint32, fetch_or, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 1, 0xFFFFFFFF, 0);
|
|
RUN_TEST_WITH_ARG(unsigned int, uint, fetch_or, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 1, 0xFFFFFFFF, 0);
|
|
break;
|
|
}
|
|
case AMO_FETCH_XOR: {
|
|
iter = 500;
|
|
skip = 50;
|
|
RUN_TEST_WITH_ARG(unsigned long, ulong, fetch_xor, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0, 0, 1);
|
|
RUN_TEST_WITH_ARG(unsigned long long, ulonglong, fetch_xor, flag_d, mype, iter, skip,
|
|
h_lat, h_size_arr, 0, 0, 1);
|
|
RUN_TEST_WITH_ARG(uint64_t, uint64, fetch_xor, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0, 0, 1);
|
|
RUN_TEST_WITH_ARG(int64_t, int64, fetch_xor, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0, 0, 1);
|
|
RUN_TEST_WITH_ARG(int64_t, int64, fetch_xor, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0, 0, 1);
|
|
RUN_TEST_WITH_ARG(uint32_t, uint32, fetch_xor, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0, 0, 1);
|
|
RUN_TEST_WITH_ARG(unsigned int, uint, fetch_xor, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0, 0, 1);
|
|
break;
|
|
}
|
|
case AMO_SWAP: {
|
|
iter = 500;
|
|
skip = 50;
|
|
RUN_TEST_WITH_ARG(unsigned int, uint, swap, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
0, 0, 1);
|
|
RUN_TEST_WITH_ARG(unsigned long, ulong, swap, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0, 0, 1);
|
|
RUN_TEST_WITH_ARG(unsigned long long, ulonglong, swap, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 0, 0, 1);
|
|
RUN_TEST_WITH_ARG(int32_t, int32, swap, flag_d, mype, iter, skip, h_lat, h_size_arr, 0,
|
|
0, 1);
|
|
RUN_TEST_WITH_ARG(uint32_t, uint32, swap, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
0, 0, 1);
|
|
RUN_TEST_WITH_ARG(uint64_t, uint64, swap, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
0, 0, 1);
|
|
RUN_TEST_WITH_ARG(int, int, swap, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0, 1);
|
|
RUN_TEST_WITH_ARG(long, long, swap, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0,
|
|
1);
|
|
RUN_TEST_WITH_ARG(size_t, size, swap, flag_d, mype, iter, skip, h_lat, h_size_arr, 0, 0,
|
|
1);
|
|
break;
|
|
}
|
|
case AMO_COMPARE_SWAP: {
|
|
iter = 500;
|
|
skip = 50;
|
|
RUN_TEST_WITH_ARG(unsigned int, uint, compare_swap, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 415, 0, 0);
|
|
RUN_TEST_WITH_ARG(unsigned long, ulong, compare_swap, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 415, 0, 0);
|
|
RUN_TEST_WITH_ARG(unsigned long long, ulonglong, compare_swap, flag_d, mype, iter, skip,
|
|
h_lat, h_size_arr, 415, 0, 0);
|
|
RUN_TEST_WITH_ARG(int32_t, int32, compare_swap, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 415, 0, 0);
|
|
RUN_TEST_WITH_ARG(uint32_t, uint32, compare_swap, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 415, 0, 0);
|
|
RUN_TEST_WITH_ARG(uint64_t, uint64, compare_swap, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 415, 0, 0);
|
|
RUN_TEST_WITH_ARG(int, int, compare_swap, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
415, 0, 0);
|
|
RUN_TEST_WITH_ARG(long, long, compare_swap, flag_d, mype, iter, skip, h_lat, h_size_arr,
|
|
415, 0, 0);
|
|
RUN_TEST_WITH_ARG(size_t, size, compare_swap, flag_d, mype, iter, skip, h_lat,
|
|
h_size_arr, 415, 0, 0);
|
|
break;
|
|
}
|
|
default: {
|
|
fprintf(stderr, "Error, unsupported Atomic op %s.\n", test_amo.name.c_str());
|
|
rc = -1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
MAIN_CLEANUP(flag_d, stream, h_tables);
|
|
return rc;
|
|
}
|