// Copyright 2024 Google LLC // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. $TESTNAME = f"F32Simd{ARCH.upper()}Test" $if ARCH_MACRO: // This header needs to go first for the arch test macros. #include "xnnpack/common.h" #if ${ARCH_MACRO} #include #include #include #include #include #include #include #include #include #include "xnnpack/isa-checks.h" #include "xnnpack/simd/f32-${ARCH}.h" #include "replicable_random_device.h" namespace xnnpack { class ${TESTNAME} : public ::testing::Test { protected: void SetUp() override { $if TEST_REQUIRES: ${TEST_REQUIRES}; inputs_.resize(3 * xnn_simd_size_f32); output_.resize(xnn_simd_size_f32); std::uniform_real_distribution f32dist(-10.0f, 10.0f); std::generate(inputs_.begin(), inputs_.end(), [&]() { return f32dist(rng_); }); } xnnpack::ReplicableRandomDevice rng_; std::vector inputs_; std::vector output_; }; TEST_F(${TESTNAME}, SetZero) { xnn_storeu_f32(output_.data(), xnn_zero_f32()); EXPECT_THAT(output_, testing::Each(testing::Eq(0.0f))); } TEST_F(${TESTNAME}, Add) { const xnn_simd_f32_t a = xnn_loadu_f32(inputs_.data()); const xnn_simd_f32_t b = xnn_loadu_f32(inputs_.data() + xnn_simd_size_f32); const xnn_simd_f32_t res = xnn_add_f32(a, b); xnn_storeu_f32(output_.data(), res); for (size_t k = 0; k < xnn_simd_size_f32; k++) { ASSERT_EQ(output_[k], inputs_[k] + inputs_[k + xnn_simd_size_f32]); } } TEST_F(${TESTNAME}, Mul) { const xnn_simd_f32_t a = xnn_loadu_f32(inputs_.data()); const xnn_simd_f32_t b = xnn_loadu_f32(inputs_.data() + xnn_simd_size_f32); const xnn_simd_f32_t res = xnn_mul_f32(a, b); xnn_storeu_f32(output_.data(), res); for (size_t k = 0; k < xnn_simd_size_f32; k++) { ASSERT_EQ(output_[k], inputs_[k] * inputs_[k + xnn_simd_size_f32]); } } TEST_F(${TESTNAME}, Fmadd) { const xnn_simd_f32_t a = xnn_loadu_f32(inputs_.data()); const xnn_simd_f32_t b = xnn_loadu_f32(inputs_.data() + xnn_simd_size_f32); const xnn_simd_f32_t c = xnn_loadu_f32(inputs_.data() + 2 * xnn_simd_size_f32); const xnn_simd_f32_t res = xnn_fmadd_f32(a, b, c); xnn_storeu_f32(output_.data(), res); for (size_t k = 0; k < xnn_simd_size_f32; k++) { #if XNN_SIMD_HAS_NATIVE_FMA // If an arch claims to support FMA, it better also round things correctly. ASSERT_EQ(output_[k], static_cast( static_cast(inputs_[k]) * static_cast(inputs_[k + xnn_simd_size_f32]) + static_cast(inputs_[k + 2 * xnn_simd_size_f32]))); #else ASSERT_EQ(output_[k], inputs_[k] * inputs_[k + xnn_simd_size_f32] + inputs_[k + 2 * xnn_simd_size_f32]); #endif // XNN_SIMD_HAS_NATIVE_FMA } } TEST_F(${TESTNAME}, Fmsub) { const xnn_simd_f32_t a = xnn_loadu_f32(inputs_.data()); const xnn_simd_f32_t b = xnn_loadu_f32(inputs_.data() + xnn_simd_size_f32); const xnn_simd_f32_t c = xnn_loadu_f32(inputs_.data() + 2 * xnn_simd_size_f32); const xnn_simd_f32_t res = xnn_fmsub_f32(a, b, c); xnn_storeu_f32(output_.data(), res); for (size_t k = 0; k < xnn_simd_size_f32; k++) { #if XNN_SIMD_HAS_NATIVE_FMA // If an arch claims to support FMA, it better also round things correctly. ASSERT_EQ(output_[k], static_cast( static_cast(inputs_[k]) * static_cast(inputs_[k + xnn_simd_size_f32]) - static_cast(inputs_[k + 2 * xnn_simd_size_f32]))); #else ASSERT_EQ(output_[k], inputs_[k] * inputs_[k + xnn_simd_size_f32] - inputs_[k + 2 * xnn_simd_size_f32]); #endif // XNN_SIMD_HAS_NATIVE_FMA } } TEST_F(${TESTNAME}, Fnmadd) { const xnn_simd_f32_t a = xnn_loadu_f32(inputs_.data()); const xnn_simd_f32_t b = xnn_loadu_f32(inputs_.data() + xnn_simd_size_f32); const xnn_simd_f32_t c = xnn_loadu_f32(inputs_.data() + 2 * xnn_simd_size_f32); const xnn_simd_f32_t res = xnn_fnmadd_f32(a, b, c); xnn_storeu_f32(output_.data(), res); for (size_t k = 0; k < xnn_simd_size_f32; k++) { #if XNN_SIMD_HAS_NATIVE_FMA // If an arch claims to support FMA, it better also round things correctly. ASSERT_EQ(output_[k], static_cast( static_cast(-inputs_[k]) * static_cast(inputs_[k + xnn_simd_size_f32]) + static_cast(inputs_[k + 2 * xnn_simd_size_f32]))); #else ASSERT_EQ(output_[k], -inputs_[k] * inputs_[k + xnn_simd_size_f32] + inputs_[k + 2 * xnn_simd_size_f32]); #endif // XNN_SIMD_HAS_NATIVE_FMA } } TEST_F(${TESTNAME}, Sub) { const xnn_simd_f32_t a = xnn_loadu_f32(inputs_.data()); const xnn_simd_f32_t b = xnn_loadu_f32(inputs_.data() + xnn_simd_size_f32); const xnn_simd_f32_t res = xnn_sub_f32(a, b); xnn_storeu_f32(output_.data(), res); for (size_t k = 0; k < xnn_simd_size_f32; k++) { ASSERT_EQ(output_[k], inputs_[k] - inputs_[k + xnn_simd_size_f32]); } } TEST_F(${TESTNAME}, Div) { const xnn_simd_f32_t a = xnn_loadu_f32(inputs_.data()); const xnn_simd_f32_t b = xnn_loadu_f32(inputs_.data() + xnn_simd_size_f32); const xnn_simd_f32_t res = xnn_div_f32(a, b); xnn_storeu_f32(output_.data(), res); for (size_t k = 0; k < xnn_simd_size_f32; k++) { ASSERT_NEAR(output_[k], inputs_[k] / inputs_[k + xnn_simd_size_f32], 2 * std::numeric_limits::epsilon() * std::abs(output_[k])); } } TEST_F(${TESTNAME}, Max) { const xnn_simd_f32_t a = xnn_loadu_f32(inputs_.data()); const xnn_simd_f32_t b = xnn_loadu_f32(inputs_.data() + xnn_simd_size_f32); const xnn_simd_f32_t res = xnn_max_f32(a, b); xnn_storeu_f32(output_.data(), res); for (size_t k = 0; k < xnn_simd_size_f32; k++) { ASSERT_EQ(output_[k], std::max(inputs_[k], inputs_[k + xnn_simd_size_f32])); } } TEST_F(${TESTNAME}, Min) { const xnn_simd_f32_t a = xnn_loadu_f32(inputs_.data()); const xnn_simd_f32_t b = xnn_loadu_f32(inputs_.data() + xnn_simd_size_f32); const xnn_simd_f32_t res = xnn_min_f32(a, b); xnn_storeu_f32(output_.data(), res); for (size_t k = 0; k < xnn_simd_size_f32; k++) { ASSERT_EQ(output_[k], std::min(inputs_[k], inputs_[k + xnn_simd_size_f32])); } } TEST_F(${TESTNAME}, Abs) { const xnn_simd_f32_t a = xnn_loadu_f32(inputs_.data()); const xnn_simd_f32_t res = xnn_abs_f32(a); xnn_storeu_f32(output_.data(), res); for (size_t k = 0; k < xnn_simd_size_f32; k++) { ASSERT_EQ(output_[k], std::abs(inputs_[k])); } } TEST_F(${TESTNAME}, Neg) { const xnn_simd_f32_t a = xnn_loadu_f32(inputs_.data()); const xnn_simd_f32_t res = xnn_neg_f32(a); xnn_storeu_f32(output_.data(), res); for (size_t k = 0; k < xnn_simd_size_f32; k++) { ASSERT_EQ(output_[k], -inputs_[k]); } } TEST_F(${TESTNAME}, And) { const xnn_simd_f32_t a = xnn_loadu_f32(inputs_.data()); const xnn_simd_f32_t b = xnn_loadu_f32(inputs_.data() + xnn_simd_size_f32); const xnn_simd_f32_t res = xnn_and_f32(a, b); xnn_storeu_f32(output_.data(), res); for (size_t k = 0; k < xnn_simd_size_f32; k++) { ASSERT_EQ(*(uint32_t *)&output_[k], *(uint32_t *)&inputs_[k] & *(uint32_t *)&inputs_[k + xnn_simd_size_f32]); } } TEST_F(${TESTNAME}, Or) { const xnn_simd_f32_t a = xnn_loadu_f32(inputs_.data()); const xnn_simd_f32_t b = xnn_loadu_f32(inputs_.data() + xnn_simd_size_f32); const xnn_simd_f32_t res = xnn_or_f32(a, b); xnn_storeu_f32(output_.data(), res); for (size_t k = 0; k < xnn_simd_size_f32; k++) { ASSERT_EQ(*(uint32_t *)&output_[k], *(uint32_t *)&inputs_[k] | *(uint32_t *)&inputs_[k + xnn_simd_size_f32]); } } TEST_F(${TESTNAME}, Xor) { const xnn_simd_f32_t a = xnn_loadu_f32(inputs_.data()); const xnn_simd_f32_t b = xnn_loadu_f32(inputs_.data() + xnn_simd_size_f32); const xnn_simd_f32_t res = xnn_xor_f32(a, b); xnn_storeu_f32(output_.data(), res); for (size_t k = 0; k < xnn_simd_size_f32; k++) { ASSERT_EQ(*(uint32_t *)&output_[k], *(uint32_t *)&inputs_[k] ^ *(uint32_t *)&inputs_[k + xnn_simd_size_f32]); } } TEST_F(${TESTNAME}, ShiftLeft) { const xnn_simd_f32_t a = xnn_loadu_f32(inputs_.data()); // Not using a loop since the `bits` parameter needs to be a compile-time // constant, e.g. for `neon`. $for BITS in range(1, 32): { const xnn_simd_f32_t res = xnn_sll_f32(a, ${BITS}); xnn_storeu_f32(output_.data(), res); for (size_t k = 0; k < xnn_simd_size_f32; k++) { ASSERT_EQ(*(uint32_t *)&output_[k], *(uint32_t *)&inputs_[k] << ${BITS}); } } } TEST_F(${TESTNAME}, ShiftRight) { const xnn_simd_f32_t a = xnn_loadu_f32(inputs_.data()); // Not using a loop since the `bits` parameter needs to be a compile-time // constant, e.g. for `neon`. $for BITS in range(1, 32): { const xnn_simd_f32_t res = xnn_srl_f32(a, ${BITS}); xnn_storeu_f32(output_.data(), res); for (size_t k = 0; k < xnn_simd_size_f32; k++) { ASSERT_EQ(*(uint32_t *)&output_[k], *(uint32_t *)&inputs_[k] >> ${BITS}); } } } TEST_F(${TESTNAME}, ShiftRightSigned) { const xnn_simd_f32_t a = xnn_loadu_f32(inputs_.data()); // Not using a loop since the `bits` parameter needs to be a compile-time // constant, e.g. for `neon`. $for BITS in range(1, 32): { const xnn_simd_f32_t res = xnn_sra_f32(a, ${BITS}); xnn_storeu_f32(output_.data(), res); for (size_t k = 0; k < xnn_simd_size_f32; k++) { ASSERT_EQ(*(int32_t *)&output_[k], *(int32_t *)&inputs_[k] >> ${BITS}); } } } TEST_F(${TESTNAME}, CmpEq) { for (size_t k = 0; k < xnn_simd_size_f32; k++) { if (rng_() & 1) { inputs_[k + xnn_simd_size_f32] = inputs_[k]; } } const xnn_simd_f32_t a = xnn_loadu_f32(inputs_.data()); const xnn_simd_f32_t b = xnn_loadu_f32(inputs_.data() + xnn_simd_size_f32); const xnn_simd_f32_t res = xnn_cmpeq_f32(a, b); xnn_storeu_f32(output_.data(), res); for (size_t k = 0; k < xnn_simd_size_f32; k++) { ASSERT_EQ(*(uint32_t *)&output_[k], inputs_[k] == inputs_[k + xnn_simd_size_f32] ? 0xFFFFFFFF : 0); } } TEST_F(${TESTNAME}, GetExp) { const xnn_simd_f32_t a = xnn_loadu_f32(inputs_.data()); const xnn_simd_f32_t res = xnn_getexp_f32(a); xnn_storeu_f32(output_.data(), res); for (size_t k = 0; k < xnn_simd_size_f32; k++) { ASSERT_EQ(output_[k], std::logb(inputs_[k])); } } TEST_F(${TESTNAME}, StoreTail) { const xnn_simd_f32_t a = xnn_loadu_f32(inputs_.data()); for (size_t num_elements = 1; num_elements < xnn_simd_size_f32; num_elements++) { xnn_store_tail_f32(output_.data(), a, num_elements); for (size_t k = 0; k < num_elements; k++) { ASSERT_EQ(output_[k], inputs_[k]); } for (size_t k = num_elements; k < xnn_simd_size_f32; k++) { ASSERT_EQ(output_[k], 0.0f); } } } } // namespace xnnpack $if ARCH_MACRO: #endif // ${ARCH_MACRO}