// Copyright 2019 Google LLC // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. #pragma once #include #include #include #include #include #include #include #include #include #include #include #include #include "xnnpack.h" #include "xnnpack/datatype.h" #include "xnnpack/microfnptr.h" #include "xnnpack/buffer.h" #include "replicable_random_device.h" // Copyright 2019 Google LLC // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. #include #include #include #include #include #include #include #include #include #include #include #include "xnnpack.h" #include "xnnpack/buffer.h" #include "xnnpack/common.h" #include "xnnpack/isa-checks.h" #include "xnnpack/math.h" #include "xnnpack/microfnptr.h" #include "xnnpack/microparams.h" #include "replicable_random_device.h" #include "unary-ops.h" class VUnaryMicrokernelTester { public: VUnaryMicrokernelTester& batch_size(size_t batch_size) { assert(batch_size != 0); this->batch_size_ = batch_size; return *this; } size_t batch_size() const { return this->batch_size_; } VUnaryMicrokernelTester& inplace(bool inplace) { this->inplace_ = inplace; return *this; } bool inplace() const { return this->inplace_; } VUnaryMicrokernelTester& input_quantization( const xnn_quantization_params& quantization) { this->input_quantization_ = quantization; return *this; } const xnn_quantization_params& input_quantization() const { return this->input_quantization_; } VUnaryMicrokernelTester& output_quantization( const xnn_quantization_params& quantization) { this->output_quantization_ = quantization; return *this; } const xnn_quantization_params& output_quantization() const { return this->output_quantization_; } VUnaryMicrokernelTester& iterations(size_t iterations) { this->iterations_ = iterations; return *this; } size_t iterations() const { return this->iterations_; } // Generic test function for `vunary` kernels. // // The function is templated on the type of the kernel parameters and takes // the following arguments: // // * `T`: The datatype to test. Should be implicitly convertible to and from // `float`. // * `init_params`: A function that populates a given parameters data // structure or returns `nullptr` if there is no default initialization. template void Test(void (*ukernel)(size_t, const typename xnnpack::unwrap_quantized::type*, typename xnnpack::unwrap_quantized::type*, const UKernelParamsType*), xnn_init_unary_uparams_fn init_params, const xnn_unary_params& params) const { using InKernel = typename xnnpack::unwrap_quantized::type; using OutKernel = typename xnnpack::unwrap_quantized::type; TestInfo test_info; auto domain = test_info.Domain(xnn_datatype_of()); xnnpack::ReplicableRandomDevice rng; xnnpack::Buffer x(batch_size() + XNN_EXTRA_BYTES / sizeof(In)); xnnpack::Buffer y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(Out) : 0)); xnnpack::Buffer y_ref(batch_size()); for (size_t iteration = 0; iteration < iterations(); iteration++) { // This should only fill batch_size() elements, but some kernels trigger // msan errors if we don't initialize the XNN_EXTRA_BYTES. FillRandom(rng, x.data(), x.size(), domain, input_quantization_); if (inplace()) { std::copy((InKernel*)x.begin(), (InKernel*)x.end(), (OutKernel*)y.begin()); } const In* x_data = inplace() ? (const In*)y.data() : x.data(); // Compute reference results. UnaryReferenceImpl(x_data, batch_size(), y_ref.data(), test_info, input_quantization_, output_quantization_, params); // Initialize the params. xnn_unary_uparams uparams; if (init_params) { init_params(&uparams, ¶ms, &input_quantization_, &output_quantization_); } // Call optimized micro-kernel. ukernel(batch_size() * sizeof(In), (const InKernel*)x_data, (OutKernel*)y.data(), (UKernelParamsType*)&uparams); // Verify results. for (size_t i = 0; i < batch_size(); i++) { ASSERT_NEAR(y[i], y_ref[i], test_info.Tolerance(y_ref[i], xnn_datatype_of())) << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << std::scientific << (float)x[i]; } } } template void Test(void (*ukernel)(size_t, const typename xnnpack::unwrap_quantized::type*, typename xnnpack::unwrap_quantized::type*, const UKernelParamsType*), xnn_init_unary_uparams_fn init_params) const { Test(ukernel, init_params, TestInfo().DefaultParams()); } template void Test(void (*ukernel)(size_t, const In*, Out*, const UKernelParamsType*), xnn_init_unary_uparams_fn init_params, const xnn_unary_params& params, std::vector inputs, const std::vector& expected, int tolerance_ulp) const { std::vector outputs(inputs.size()); inputs.resize(inputs.size() + XNN_EXTRA_BYTES / sizeof(In)); xnn_unary_uparams uparams; if (init_params) { init_params(&uparams, ¶ms, nullptr, nullptr); } ukernel(outputs.size() * sizeof(In), inputs.data(), outputs.data(), (UKernelParamsType*)&uparams); for (size_t i = 0; i < outputs.size(); i++) { if (std::isfinite(expected[i])) { EXPECT_NEAR(expected[i], outputs[i], tolerance_ulp * std::abs(expected[i]) * std::numeric_limits::epsilon()) << "for input " << inputs[i]; } else { EXPECT_EQ(std::fpclassify(expected[i]), std::fpclassify(outputs[i])) << "for input " << inputs[i] << " and output " << outputs[i] << " (FP_INFINITE=" << FP_INFINITE << ", FP_NAN=" << FP_NAN << ", FP_NORMAL=" << FP_NORMAL << ", FP_SUBNORMAL=" << FP_SUBNORMAL << ", FP_ZERO=" << FP_ZERO << ")"; } } } template void Test(void (*ukernel)(size_t, const In*, Out*, const UKernelParamsType*), xnn_init_unary_uparams_fn init_params, std::vector inputs, const std::vector& expected, int tolerance_ulp) const { Test(ukernel, init_params, TestInfo().DefaultParams(), inputs, expected, tolerance_ulp); } private: size_t batch_size_ = 1; bool inplace_ = false; xnn_quantization_params input_quantization_ = {0, 1.0f}; xnn_quantization_params output_quantization_ = {0, 1.0f}; size_t iterations_ = 15; }; template void TestBatchEq(uint64_t arch_flags, size_t batch_tile, UKernelFn ukernel, xnn_init_unary_uparams_fn init_params, Args... args) { TEST_REQUIRES_ARCH_FLAGS(arch_flags); const size_t batch_scale = get_batch_scale(); VUnaryMicrokernelTester() .batch_size(batch_tile * batch_scale) .Test(ukernel, init_params, args...); } template void TestBatchDiv(uint64_t arch_flags, size_t batch_tile, UKernelFn ukernel, xnn_init_unary_uparams_fn init_params, Args... args) { if (batch_tile == 1) return; TEST_REQUIRES_ARCH_FLAGS(arch_flags); const size_t batch_scale = get_batch_scale(); const size_t batch_step = batch_tile * batch_scale; for (size_t batch_size = 2 * batch_step; batch_size < 10 * batch_step; batch_size += batch_step) { VUnaryMicrokernelTester() .batch_size(batch_size) .Test(ukernel, init_params, args...); } } template void TestBatchLT(uint64_t arch_flags, size_t batch_tile, UKernelFn ukernel, xnn_init_unary_uparams_fn init_params, Args... args) { if (batch_tile == 1) return; TEST_REQUIRES_ARCH_FLAGS(arch_flags); const size_t batch_scale = get_batch_scale(); const size_t batch_end = batch_tile * batch_scale; for (size_t batch_size = 1; batch_size < batch_end; batch_size++) { VUnaryMicrokernelTester() .batch_size(batch_size) .Test(ukernel, init_params, args...); } } template void TestBatchGT(uint64_t arch_flags, size_t batch_tile, UKernelFn ukernel, xnn_init_unary_uparams_fn init_params, Args... args) { TEST_REQUIRES_ARCH_FLAGS(arch_flags); const size_t batch_scale = get_batch_scale(); const size_t batch_step = batch_tile * batch_scale; const size_t batch_end = batch_tile == 1 ? 10 : 2 * batch_step; for (size_t batch_size = batch_step + 1; batch_size < batch_end; batch_size++) { VUnaryMicrokernelTester() .batch_size(batch_size) .Test(ukernel, init_params, args...); } } template void TestInPlace(uint64_t arch_flags, size_t batch_tile, UKernelFn ukernel, xnn_init_unary_uparams_fn init_params, Args... args) { TEST_REQUIRES_ARCH_FLAGS(arch_flags); const size_t batch_scale = get_batch_scale(); const size_t batch_end = batch_tile * batch_scale; const size_t batch_step = std::max(1, batch_tile - 1); for (size_t batch_size = 1; batch_size <= batch_end; batch_size += batch_step) { VUnaryMicrokernelTester() .batch_size(batch_size) .inplace(true) .Test(ukernel, init_params, args...); } } template void TestInputScale(uint64_t arch_flags, size_t batch_tile, UKernelFn ukernel, xnn_init_unary_uparams_fn init_params, Args... args) { TEST_REQUIRES_ARCH_FLAGS(arch_flags); for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) { for (float input_scale : {4.0f, 16.0f, 64.0f}) { xnn_quantization_params input_quantization = TestInfo().InputQuantizationParams(xnn_datatype_of()); xnn_quantization_params output_quantization = TestInfo().InputQuantizationParams(xnn_datatype_of()); input_quantization.scale = input_scale; VUnaryMicrokernelTester() .batch_size(batch_size) .input_quantization(input_quantization) .output_quantization(output_quantization) .Test(ukernel, init_params, args...); } } } template void TestOutputScale(uint64_t arch_flags, size_t batch_tile, UKernelFn ukernel, xnn_init_unary_uparams_fn init_params, Args... args) { TEST_REQUIRES_ARCH_FLAGS(arch_flags); for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) { for (float output_scale : {4.0f, 16.0f, 64.0f}) { xnn_quantization_params input_quantization = TestInfo().InputQuantizationParams(xnn_datatype_of()); xnn_quantization_params output_quantization = TestInfo().InputQuantizationParams(xnn_datatype_of()); output_quantization.scale = output_scale; VUnaryMicrokernelTester() .batch_size(batch_size) .input_quantization(input_quantization) .output_quantization(output_quantization) .Test(ukernel, init_params, args...); } } } template void TestInputZeroPoint(uint64_t arch_flags, size_t batch_tile, UKernelFn ukernel, xnn_init_unary_uparams_fn init_params, Args... args) { TEST_REQUIRES_ARCH_FLAGS(arch_flags); for (int16_t input_zero_point = 2; input_zero_point < 10; input_zero_point += 3) { for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) { xnn_quantization_params input_quantization = TestInfo().InputQuantizationParams(xnn_datatype_of()); xnn_quantization_params output_quantization = TestInfo().InputQuantizationParams(xnn_datatype_of()); input_quantization.zero_point = input_zero_point; VUnaryMicrokernelTester() .batch_size(batch_size) .input_quantization(input_quantization) .output_quantization(output_quantization) .Test(ukernel, init_params, args...); } } } template void TestOutputZeroPoint(uint64_t arch_flags, size_t batch_tile, UKernelFn ukernel, xnn_init_unary_uparams_fn init_params, Args... args) { TEST_REQUIRES_ARCH_FLAGS(arch_flags); for (int16_t output_zero_point = 2; output_zero_point < 10; output_zero_point += 3) { for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) { xnn_quantization_params input_quantization = TestInfo().InputQuantizationParams(xnn_datatype_of()); xnn_quantization_params output_quantization = TestInfo().InputQuantizationParams(xnn_datatype_of()); output_quantization.zero_point = output_zero_point; VUnaryMicrokernelTester() .batch_size(batch_size) .input_quantization(input_quantization) .output_quantization(output_quantization) .Test(ukernel, init_params, args...); } } } template void TestOutputSaturation(uint64_t arch_flags, size_t batch_tile, UKernelFn ukernel, xnn_init_unary_uparams_fn init_params, Args... args) { TEST_REQUIRES_ARCH_FLAGS(arch_flags); const size_t batch_scale = get_batch_scale(); const size_t batch_end = batch_tile * batch_scale * 5; const size_t batch_step = std::max(2, batch_end / 8) - 1; for (size_t batch_size = 1; batch_size <= batch_end; batch_size += batch_step) { VUnaryMicrokernelTester() .batch_size(batch_size) .output_quantization({0, 500.0f}) .Test(ukernel, init_params, args...); } } template void TestOutputOverflow(uint64_t arch_flags, size_t batch_tile, UKernelFn ukernel, xnn_init_unary_uparams_fn init_params, Args... args) { TEST_REQUIRES_ARCH_FLAGS(arch_flags); const size_t batch_scale = get_batch_scale(); const size_t batch_end = batch_tile * batch_scale * 5; const size_t batch_step = std::max(2, batch_end / 8) - 1; for (size_t batch_size = 1; batch_size <= batch_end; batch_size += batch_step) { VUnaryMicrokernelTester() .batch_size(batch_size) .output_quantization({0, 4294967296.0f}) .Test(ukernel, init_params, args...); } }