sglang_v0.5.2/pytorch_2.8.0/third_party/XNNPACK/test/subgraph-fp16.cc

1022 lines
41 KiB
C++

// Copyright 2022 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.
#include <algorithm>
#include <array>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <functional>
#include <memory>
#include <random>
#include <vector>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "xnnpack.h"
#include "xnnpack/allocation-type.h"
#include "xnnpack/math.h"
#include "xnnpack/node-type.h"
#include "xnnpack/subgraph.h"
#include "mock-allocator.h"
#include "replicable_random_device.h"
#include "runtime-tester.h"
#include "subgraph-tester.h"
namespace xnnpack {
using ::testing::_;
using ::testing::AnyNumber;
using ::testing::Return;
TEST(SUBGRAPH_FP16, fully_connected_f16_weights_and_biases) {
SubgraphTester tester(4);
// external input[0] static f16[1 + 2]
// \ /
// \ /
// [fully_connected]
// |
// external
// output[3]
std::vector<xnn_float16> weights(3 * 4);
std::vector<xnn_float16> biases(4);
tester.AddInputTensorF32({1, 2, 2, 3}, 0)
.AddStaticTensorF16({3, 4}, 1, weights.data())
.AddStaticTensorF16({4}, 2, biases.data())
.AddOutputTensorF32({1, 2, 2, 4}, 3)
.AddFullyConnected(0, 1, 2, 3, 0)
.Optimize()
.RewriteForFp16();
// After rewriting for FP16, the graph should look like this, with *
// indicating new operators and values created:
//
// external input[0]
// |
// [convert]*
// |
// input[4] static f16[1 + 2]
// \ /
// \ /
// [fully_connected]
// |
// fp16 value[5]*
// |
// [convert]*
// |
// external
// output[3]
// We should have 2 convert nodes, one for external input, 1 for external
// output, so 3 in total, including the fully-connected in the original graph.
ASSERT_EQ(tester.NumNodes(), 3);
const xnn_node* input_convert_node = tester.Node(0);
ASSERT_EQ(input_convert_node->type, xnn_node_type_convert);
ASSERT_EQ(tester.Value(4)->datatype, xnn_datatype_fp16);
const xnn_node* output_convert_node = tester.Node(2);
ASSERT_EQ(output_convert_node->type, xnn_node_type_convert);
ASSERT_EQ(tester.Value(5)->datatype, xnn_datatype_fp16);
// Check that the weights and biases are still `f16`.
ASSERT_EQ(tester.Value(1)->datatype, xnn_datatype_fp16);
ASSERT_EQ(tester.Value(2)->datatype, xnn_datatype_fp16);
// Check that Fully-Connected node refers to the FP16 value before conversion.
const xnn_node* fully_connected_node = tester.Node(1);
ASSERT_EQ(fully_connected_node->type, xnn_node_type_fully_connected);
ASSERT_EQ(fully_connected_node->inputs[0], 4);
ASSERT_EQ(fully_connected_node->inputs[1], 1);
ASSERT_EQ(fully_connected_node->inputs[2], 2);
// Check that the output type is `fp32`.
ASSERT_EQ(tester.Value(3)->datatype, xnn_datatype_fp32);
}
TEST(SUBGRAPH_FP16, fully_connected_f16_weights_f32_biases) {
SubgraphTester tester(4);
// external input[0] static f16[1] + f32[2]
// \ /
// \ /
// [fully_connected]
// |
// external
// output[3]
std::vector<xnn_float16> weights(3 * 4);
std::vector<float> biases(4);
tester.AddInputTensorF32({1, 2, 2, 3}, 0)
.AddStaticTensorF16({3, 4}, 1, weights.data())
.AddStaticTensorF32({4}, 2, biases.data())
.AddOutputTensorF32({1, 2, 2, 4}, 3)
.AddFullyConnected(0, 1, 2, 3, 0)
.Optimize()
.RewriteForFp16();
// After rewriting for FP16, the graph should look like this, with *
// indicating new operators and values created:
//
// external input[0]
// |
// [convert]*
// |
// input[4] static f16[1] + f16[2] (converted)
// \ /
// \ /
// [fully_connected]
// |
// fp16 value[5]*
// |
// [convert]*
// |
// external
// output[3]
// We should have 2 convert nodes, two for external inputs, 1 for external
// output, so 3 in total, including the fully-connected in the original graph.
ASSERT_EQ(tester.NumNodes(), 3);
const xnn_node* input_convert_node = tester.Node(0);
ASSERT_EQ(input_convert_node->type, xnn_node_type_convert);
ASSERT_EQ(tester.Value(4)->datatype, xnn_datatype_fp16);
const xnn_node* output_convert_node = tester.Node(2);
ASSERT_EQ(output_convert_node->type, xnn_node_type_convert);
ASSERT_EQ(tester.Value(5)->datatype, xnn_datatype_fp16);
// Check that the weights and biases are now both `f16`, as the bias was
// converted statically.
ASSERT_EQ(tester.Value(1)->datatype, xnn_datatype_fp16);
ASSERT_EQ(tester.Value(2)->datatype, xnn_datatype_fp16);
// Check that Fully-Connected node refers to the FP16 value before conversion.
const xnn_node* fully_connected_node = tester.Node(1);
ASSERT_EQ(fully_connected_node->type, xnn_node_type_fully_connected);
ASSERT_EQ(fully_connected_node->inputs[0], 4);
ASSERT_EQ(fully_connected_node->inputs[1], 1);
ASSERT_EQ(fully_connected_node->inputs[2], 2);
// Check that the output type is `fp32`.
ASSERT_EQ(tester.Value(3)->datatype, xnn_datatype_fp32);
}
TEST(SUBGRAPH_FP16, fully_connected_f16_weights_no_biases) {
SubgraphTester tester(4);
// external input[0] static f16[12]
// \ /
// \ /
// [fully_connected]
// |
// external
// output[2]
std::vector<xnn_float16> weights(3 * 4);
tester.AddInputTensorF32({1, 2, 2, 3}, 0)
.AddStaticTensorF16({3, 4}, 1, weights.data())
.AddOutputTensorF32({1, 2, 2, 4}, 2)
.AddFullyConnected(0, 1, XNN_INVALID_VALUE_ID, 2, 0)
.Optimize()
.RewriteForFp16();
// After rewriting for FP16, the graph should look like this, with *
// indicating new operators and values created:
//
// external input[0]
// |
// [convert]*
// |
// input[3] static f16[1]
// \ /
// \ /
// [fully_connected]
// |
// fp16 value[4]*
// |
// [convert]*
// |
// external
// output[2]
// We should have 2 convert nodes, one for external input, 1 for external
// output, so 5 in total, including the fully-connected in the original graph.
ASSERT_EQ(tester.NumNodes(), 3);
const xnn_node* input_convert_node = tester.Node(0);
ASSERT_EQ(input_convert_node->type, xnn_node_type_convert);
ASSERT_EQ(tester.Value(4)->datatype, xnn_datatype_fp16);
const xnn_node* output_convert_node = tester.Node(2);
ASSERT_EQ(output_convert_node->type, xnn_node_type_convert);
ASSERT_EQ(tester.Value(5)->datatype, xnn_datatype_fp16);
// Check that Fully-Connected node refers to the FP16 value before conversion.
const xnn_node* fully_connected_node = tester.Node(1);
ASSERT_EQ(fully_connected_node->type, xnn_node_type_fully_connected);
ASSERT_EQ(fully_connected_node->inputs[0], 4);
ASSERT_EQ(fully_connected_node->inputs[1], 1);
// Check that the output type is `fp32`.
ASSERT_EQ(tester.Value(2)->datatype, xnn_datatype_fp32);
}
TEST(SUBGRAPH_FP16, value_both_external_output_and_input) {
SubgraphTester tester(4);
std::array<size_t, 4> pre_paddings = {0, 1, 0, 0};
std::array<size_t, 4> post_paddings = {0, 1, 0, 0};
// external input[0]
// /
// [constant pad]
// /
// external dynamic[1]
// output[2] /
// \ /
// [add]
// |
// external
// output[3]
tester.AddInputTensorF32({1, 2, 2, 3}, 0)
.AddDynamicTensorF32({1, 1, 1, 3}, 1)
.AddOutputTensorF32({1, 4, 2, 3}, 2)
.AddOutputTensorF32({1, 4, 2, 3}, 3)
.AddConstantPad(pre_paddings.data(), post_paddings.data(), 0.0f, 0, 2)
.AddAddition(2, 1, 3)
.Optimize()
.RewriteForFp16();
// After rewriting for FP16, the graph should look like this, with *
// indicating new operators and values created:
//
// external input[0]
// |
// [convert]*
// |
// input[4]*
// /
// [constant pad]
// /
// fp16 value[5]*
// | \
// [convert]* \
// | \
// external \ dynamic[1] converted in-place
// output[2] \ /
// \ /
// [add]
// |
// fp16 value[6]*
// |
// [convert]*
// |
// external
// output[3]
// We should have 3 convert nodes, one for external input, 2 for external
// outputs, so 5 in total, including the pad and add in the original graph.
ASSERT_EQ(tester.NumNodes(), 5);
const xnn_node* output_convert_node = tester.Node(4);
ASSERT_EQ(output_convert_node->type, xnn_node_type_convert);
// Check that Addition node refers to the FP16 value before conversion.
const xnn_node* addition_node = tester.Node(3);
ASSERT_EQ(addition_node->type, xnn_node_type_binary_elementwise);
ASSERT_EQ(addition_node->binary_operator, xnn_binary_add);
ASSERT_EQ(addition_node->inputs[0], 5);
ASSERT_EQ(addition_node->inputs[1], 1);
ASSERT_EQ(tester.Value(5)->datatype, xnn_datatype_fp16);
ASSERT_EQ(tester.Value(1)->datatype, xnn_datatype_fp16);
ASSERT_EQ(tester.Node(2)->type, xnn_node_type_convert);
ASSERT_EQ(tester.Node(1)->type, xnn_node_type_static_constant_pad);
ASSERT_EQ(tester.Node(0)->type, xnn_node_type_convert);
}
TEST(SUBGRAPH_FP16, with_static_value) {
SubgraphTester tester(3);
float static_tensor_data[3 + XNN_EXTRA_BYTES / sizeof(float)] = {1.0f, 2.0f,
3.0f};
// external input[0] static[1]
// \ /
// \ /
// [add]
// |
// external
// output[2]
tester
.AddInputTensorF32({1, 2, 2, 3}, 0)
// Tensor #1 is both static and external
.AddStaticTensorF32({1, 1, 1, 3}, TensorType::kDense, 1,
/*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT,
static_tensor_data)
.AddOutputTensorF32({1, 4, 2, 3}, 2)
.AddAddition(0, 1, 2)
.Optimize()
.RewriteForFp16();
// After rewriting for FP16, the graph should look like this, with *
// indicating new operators and values created: The static tensor data has
// been converted into a new buffer.
//
// external input[0]
// |
// [convert]*
// |
// input[3]* static[1]* (converted into new buffer)
// \ /
// \ /
// [add]
// |
// fp16 value[4]*
// |
// [convert]*
// |
// external
// output[2]
// We should have 3 nodes, the original add node, plus one convert node for
// each of the external input and output.
ASSERT_EQ(tester.NumNodes(), 3);
// The static value should be converted to FP16
const xnn_value* static_value = tester.Value(1);
ASSERT_EQ(static_value->datatype, xnn_datatype_fp16);
ASSERT_EQ(static_cast<const xnn_float16*>(static_value->data)[0], 1.0f);
ASSERT_EQ(static_cast<const xnn_float16*>(static_value->data)[1], 2.0f);
ASSERT_EQ(static_cast<const xnn_float16*>(static_value->data)[2], 3.0f);
// Check that the output of convert is allocated in workspace.
const xnn_value* convert_out = tester.Value(3);
ASSERT_EQ(convert_out->allocation_type, xnn_allocation_type_workspace);
// Check that external input remains external (bug in runtime changed its
// allocation type. const xnn_value* input = tester.Value(0);
// ASSERT_EQ(input->allocation_type, xnn_allocation_type_external);
}
TEST(SUBGRAPH_FP16, external_inputs_allocation_type_remains_external) {
// external input[0] static[1]
// \ /
// \ /
// [add]
// |
// external
// output[2]
RuntimeTester tester(3);
tester.AddInputTensorF32({1, 2, 2, 3}, 0)
.AddInputTensorF32({1, 2, 2, 3}, 1)
.AddOutputTensorF32({1, 2, 2, 3}, 2)
.AddAddition(0, 1, 2)
.Optimize()
.RewriteForFp16();
xnn_runtime_t runtime = tester.Runtime();
xnn_status status = xnn_create_runtime_v3(tester.Subgraph(), nullptr, nullptr,
/*flags=*/0, &runtime);
std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(
runtime, xnn_delete_runtime);
if (status == xnn_status_unsupported_hardware) {
GTEST_SKIP();
}
// Check that both external inputs remain external inputs after rewriting
// for FP16.
ASSERT_EQ(tester.Value(0)->allocation_type, xnn_allocation_type_external);
ASSERT_EQ(tester.Value(1)->allocation_type, xnn_allocation_type_external);
}
TEST(SUBGRAPH_FP16, static_buffer_allocation_failure) {
SubgraphTester tester(3);
tester.AddInputTensorF32({1, 2, 2, 3}, 0)
.AddStaticTensorF32({1, 1, 1, 3}, TensorType::kDense, 1,
/*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT)
.AddOutputTensorF32({1, 4, 2, 3}, 2)
.AddAddition(0, 1, 2)
.Optimize();
MockAllocator mock_allocator;
std::unique_ptr<MockAllocator, decltype(&RestoreDefaultAllocator)>
auto_mock_allocator(&mock_allocator, &RestoreDefaultAllocator);
SetUpMockAllocator(&mock_allocator);
// Make the allocation of the static fp16 tensor buffer
// (of size 22 = 3 * 16bits + XNN_EXTRA_BYTES) fail.
EXPECT_CALL(mock_allocator, allocate(_, _)).Times(AnyNumber());
EXPECT_CALL(mock_allocator, allocate(_, 22)).WillOnce(Return(nullptr));
tester.RewriteForFp16WithFailure();
}
TEST(SUBGRAPH_FP16, external_value_allocation_failure) {
SubgraphTester tester(3);
tester.AddInputTensorF32({1, 2, 2, 3}, 0)
.AddStaticTensorF32({1, 1, 1, 3}, TensorType::kDense, 1,
/*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT)
.AddOutputTensorF32({1, 4, 2, 3}, 2)
.AddAddition(0, 1, 2)
.Optimize();
MockAllocator mock_allocator;
std::unique_ptr<MockAllocator, decltype(&RestoreDefaultAllocator)>
auto_mock_allocator(&mock_allocator, &RestoreDefaultAllocator);
SetUpMockAllocator(&mock_allocator);
// Make the allocation of the external values fail.
EXPECT_CALL(mock_allocator, reallocate(_, tester.Subgraph()->values, _))
.WillOnce(Return(nullptr));
tester.RewriteForFp16WithFailure();
}
TEST(SUBGRAPH_FP16, convolution_weights_used_by_another_node) {
SubgraphTester tester(7);
float static_filter_data[6 + XNN_EXTRA_BYTES / sizeof(float)] = {
1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f,
};
// external input[0] bias [2] static filter [1] external input [6]
//
// \ / / \ /
// \ / / \ /
// [convolution] [subtract]
// | |
// convolution out [3] subtract out [5]
const uint32_t input_id = 0;
const uint32_t filter_id = 1;
const uint32_t bias_id = 2;
const uint32_t convolution_out_id = 3;
const uint32_t out_id2 = 5;
const uint32_t subtract_input_id = 6;
tester.AddInputTensorF32({1, 5, 5, 3}, input_id)
.AddStaticTensorF32({2, 1, 1, 3}, TensorType::kDense, filter_id,
/*flags=*/0, static_filter_data)
.AddStaticTensorF32({2}, TensorType::kDense, bias_id)
.AddOutputTensorF32({1, 5, 5, 2}, convolution_out_id)
.AddInputTensorF32({1, 4, 2, 3}, subtract_input_id)
.AddOutputTensorF32({2, 1, 1, 3}, out_id2)
.AddConvolution2D(ConvolutionParams{Padding{0, 0, 0, 0}, Kernel{3, 3},
Subsampling{1, 1}, Dilation{1, 1},
/*groups=*/1,
/*group_input_channels=*/3,
/*group_output_channels*/ 2},
input_id, filter_id, bias_id, convolution_out_id)
.AddSubtract(filter_id, subtract_input_id, out_id2)
.Optimize()
.RewriteForFp16();
// After rewriting for FP16, the graph should look like this, with *
// indicating new operators and values created: The static filter data has
// been converted into a new buffer.
//
// external input[0] bias [2] filter [1]* external input [6]
// \ / / \ /
// [convert]* / / \ [convert]*
// \ / / \ /
// \ / / \ /
// [convolution] [subtract]
// | |
// [convert]* [convert]*
// | |
// convolution out [3] subtract out [5]
// We should have 6 nodes, the original convolution and subtraction node, a
// convert for the two external inputs, and a convert for the two external
// outputs.
ASSERT_EQ(tester.NumNodes(), 6);
// The static value should be converted to FP16
const xnn_value* static_value = tester.Value(filter_id);
ASSERT_EQ(static_value->datatype, xnn_datatype_fp16);
ASSERT_EQ(static_value->fp32_data, static_filter_data);
// Weights are converted to fp16.
ASSERT_EQ(static_cast<const xnn_float16*>(static_value->data)[0], 1.0f);
ASSERT_EQ(static_cast<const xnn_float16*>(static_value->data)[1], 2.0f);
ASSERT_EQ(static_cast<const xnn_float16*>(static_value->data)[2], 3.0f);
ASSERT_EQ(static_cast<const xnn_float16*>(static_value->data)[3], 4.0f);
ASSERT_EQ(static_cast<const xnn_float16*>(static_value->data)[4], 5.0f);
ASSERT_EQ(static_cast<const xnn_float16*>(static_value->data)[5], 6.0f);
// But original fp32 weights kept around.
ASSERT_EQ(static_cast<const float*>(static_value->fp32_data)[0], 1.0f);
ASSERT_EQ(static_cast<const float*>(static_value->fp32_data)[1], 2.0f);
ASSERT_EQ(static_cast<const float*>(static_value->fp32_data)[2], 3.0f);
ASSERT_EQ(static_cast<const float*>(static_value->fp32_data)[3], 4.0f);
ASSERT_EQ(static_cast<const float*>(static_value->fp32_data)[4], 5.0f);
ASSERT_EQ(static_cast<const float*>(static_value->fp32_data)[5], 6.0f);
}
TEST(SUBGRAPH_FP16, convolution_bias_used_by_another_node) {
SubgraphTester tester(7);
float static_bias_data[2 + XNN_EXTRA_BYTES / sizeof(float)] = {
1.0f,
2.0f,
};
// external input[0] bias [2] static filter [1] external input [6]
//
// \ / / \ /
// \ / / \ /
// [convolution] [subtract]
// | |
// convolution out [3] subtract out [5]
const uint32_t input_id = 0;
const uint32_t filter_id = 1;
const uint32_t bias_id = 2;
const uint32_t convolution_out_id = 3;
const uint32_t out_id2 = 5;
const uint32_t subtract_input_id = 6;
tester.AddInputTensorF32({1, 5, 5, 3}, input_id)
.AddStaticTensorF32({2, 1, 1, 3}, TensorType::kDense, filter_id)
.AddStaticTensorF32({2}, TensorType::kDense, bias_id, /*flags=*/0,
static_bias_data)
.AddOutputTensorF32({1, 5, 5, 2}, convolution_out_id)
.AddInputTensorF32({2}, subtract_input_id)
.AddOutputTensorF32({2}, out_id2)
.AddConvolution2D(ConvolutionParams{Padding{0, 0, 0, 0}, Kernel{3, 3},
Subsampling{1, 1}, Dilation{1, 1},
/*groups=*/1,
/*group_input_channels=*/3,
/*group_output_channels*/ 2},
input_id, filter_id, bias_id, convolution_out_id)
.AddSubtract(bias_id, subtract_input_id, out_id2)
.Optimize()
.RewriteForFp16();
// After rewriting for FP16, the graph should look like this, with *
// indicating new operators and values created: The static bias data has
// been converted into a new buffer.
//
// external input[0] bias [2] filter [1]* external input [6]
// \ / / \ /
// [convert]* / / \ [convert]*
// \ / / \ /
// \ / / \ /
// [convolution] [subtract]
// | |
// [convert]* [convert]*
// | |
// convolution out [3] subtract out [5]
// We should have 6 nodes, the original convolution and subtraction node, a
// convert for the two external inputs, and a convert for the two external
// outputs.
ASSERT_EQ(tester.NumNodes(), 6);
// The static value should be converted to FP16
const xnn_value* static_value = tester.Value(bias_id);
ASSERT_EQ(static_value->datatype, xnn_datatype_fp16);
ASSERT_EQ(static_value->fp32_data, static_bias_data);
// Weights are converted to fp16.
ASSERT_EQ(static_cast<const xnn_float16*>(static_value->data)[0], 1.0f);
ASSERT_EQ(static_cast<const xnn_float16*>(static_value->data)[1], 2.0f);
// But original fp32 weights kept around.
ASSERT_EQ(static_cast<const float*>(static_value->fp32_data)[0], 1.0f);
ASSERT_EQ(static_cast<const float*>(static_value->fp32_data)[1], 2.0f);
}
TEST(SUBGRAPH_FP16, fully_connected_qd8_f16_qc8w) {
SubgraphTester tester(5);
SubgraphTester reference_tester(5);
int8_t static_filter_data[6 + XNN_EXTRA_BYTES / sizeof(int8_t)] = {
1, 2, 3, -3, -2, -1,
};
float bias[2] = {1, 2};
float kernel_scale[2] = {0.5f, 1.5f};
// external input[0] bias [2] static filter [1]
// | / /
// [convert f32->qd8] / /
// \ / /
// \ / /
// [fully connected]
// |
// fully connected out [3]
const uint32_t input_id = 0;
const uint32_t filter_id = 1;
const uint32_t bias_id = 2;
const uint32_t converted_input_id = 3;
const uint32_t fully_connected_out_id = 4;
tester.AddInputTensorF32({5, 3}, input_id)
.AddDynamicallyQuantizedTensor({5, 3}, converted_input_id, /*flags=*/0)
.AddStaticTensorQS8({2, 3}, TensorType::kDense, &kernel_scale[0],
filter_id, /*flags=*/0, static_filter_data)
.AddStaticTensorF32({2}, TensorType::kDense, bias_id, /*flags=*/0,
&bias[0])
.AddOutputTensorF32({5, 2}, fully_connected_out_id)
.AddConvert(input_id, converted_input_id)
.AddFullyConnected(converted_input_id, filter_id, bias_id,
fully_connected_out_id)
.Optimize()
.RewriteForFp16();
// After rewriting for FP16, the graph should look like this, with *
// indicating new operators and values created:
//
// external input[0] bias [2] filter [1]*
// | | /
// [convert f32->f16] | /
// | | /
// [convert f16->qd8]* | /
// \ | /
// \ |/
// [fully connected]
// |
// [convert f16->f32]*
// |
// fully connected out [3]
reference_tester.AddInputTensorF32({5, 3}, input_id)
.AddDynamicallyQuantizedTensor({5, 3}, converted_input_id, /*flags=*/0)
.AddStaticTensorQS8({2, 3}, TensorType::kDense, &kernel_scale[0],
filter_id, /*flags=*/0, static_filter_data)
.AddStaticTensorF32({2}, TensorType::kDense, bias_id, /*flags=*/0,
&bias[0])
.AddOutputTensorF32({5, 2}, fully_connected_out_id)
.AddConvert(input_id, converted_input_id)
.AddFullyConnected(converted_input_id, filter_id, bias_id,
fully_connected_out_id);
// We should have 4 nodes, the original fully connected and conversion
// nodes, a convert for the external input, and a convert for the external
// output.
xnnpack::ReplicableRandomDevice rng;
auto f32rng = std::bind(std::uniform_real_distribution<float>(-1.f, 1.f),
std::ref(rng));
xnnpack::Buffer<float> input(15 + XNN_EXTRA_BYTES / sizeof(float));
std::generate(input.begin(), input.end(), std::ref(f32rng));
xnnpack::Buffer<float> reference_output(10), output(10);
ASSERT_EQ(tester.NumNodes(), 4);
xnn_runtime_t fp16_runtime_ptr = nullptr;
xnn_status status = xnn_create_runtime(tester.Subgraph(), &fp16_runtime_ptr);
if (status == xnn_status_unsupported_hardware) {
GTEST_SKIP();
}
std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_fp16_runtime(
fp16_runtime_ptr, xnn_delete_runtime);
ASSERT_EQ(xnn_status_success, status);
xnn_runtime_t fp32_runtime_ptr = nullptr;
status = xnn_create_runtime(reference_tester.Subgraph(), &fp32_runtime_ptr);
ASSERT_EQ(xnn_status_success, status);
std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_fp32_runtime(
fp32_runtime_ptr, xnn_delete_runtime);
std::array<xnn_external_value, 2> external_values = {
xnn_external_value{input_id, input.data()},
xnn_external_value{fully_connected_out_id, output.data()}};
ASSERT_EQ(xnn_status_success,
xnn_setup_runtime(fp16_runtime_ptr, 2, external_values.data()));
ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(fp16_runtime_ptr));
std::array<xnn_external_value, 2> reference_external_values = {
xnn_external_value{input_id, input.data()},
xnn_external_value{fully_connected_out_id, reference_output.data()}};
ASSERT_EQ(
xnn_status_success,
xnn_setup_runtime(fp32_runtime_ptr, 2, reference_external_values.data()));
ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(fp32_runtime_ptr));
for (int i = 0; i < output.size(); ++i) {
const float tolerance =
std::max(std::abs(reference_output[i]) * 5e-2, 5e-2);
ASSERT_NEAR(output[i], reference_output[i], tolerance);
}
}
TEST(SUBGRAPH_FP16, fully_connected_weights_used_by_another_node) {
SubgraphTester tester(7);
float static_filter_data[6 + XNN_EXTRA_BYTES / sizeof(float)] = {
1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f,
};
// external input[0] bias [2] static filter [1] external input [6]
//
// \ / / \ /
// \ / / \ /
// [fully connected] [subtract]
// | |
// fully connected out [3] subtract out [5]
const uint32_t input_id = 0;
const uint32_t filter_id = 1;
const uint32_t bias_id = 2;
const uint32_t fully_connected_out_id = 3;
const uint32_t out_id2 = 5;
const uint32_t subtract_input_id = 6;
tester.AddInputTensorF32({5, 3}, input_id)
.AddStaticTensorF32({2, 3}, TensorType::kDense, filter_id, /*flags=*/0,
static_filter_data)
.AddStaticTensorF32({2}, TensorType::kDense, bias_id)
.AddOutputTensorF32({5, 2}, fully_connected_out_id)
.AddInputTensorF32({2, 3}, subtract_input_id)
.AddOutputTensorF32({2, 3}, out_id2)
.AddFullyConnected(input_id, filter_id, bias_id, fully_connected_out_id)
.AddSubtract(filter_id, subtract_input_id, out_id2)
.Optimize()
.RewriteForFp16();
// After rewriting for FP16, the graph should look like this, with *
// indicating new operators and values created: The static filter data has
// been converted into a new buffer.
//
// external input[0] bias [2] filter [1]* external input [6]
// \ / / \ /
// [convert]* / / \ [convert]*
// \ / / \ /
// \ / / \ /
// [fully connected] [subtract]
// | |
// [convert]* [convert]*
// | |
// fully connected out [3] subtract out [5]
// We should have 6 nodes, the original fully connected and subtraction
// node, a convert for the two external inputs, and a convert for the two
// external outputs.
ASSERT_EQ(tester.NumNodes(), 6);
// The static value should be converted to FP16
const xnn_value* static_value = tester.Value(filter_id);
ASSERT_EQ(static_value->datatype, xnn_datatype_fp16);
ASSERT_EQ(static_value->fp32_data, static_filter_data);
// Weights are converted to fp16.
ASSERT_EQ(static_cast<const xnn_float16*>(static_value->data)[0], 1.0f);
ASSERT_EQ(static_cast<const xnn_float16*>(static_value->data)[1], 2.0f);
ASSERT_EQ(static_cast<const xnn_float16*>(static_value->data)[2], 3.0f);
ASSERT_EQ(static_cast<const xnn_float16*>(static_value->data)[3], 4.0f);
ASSERT_EQ(static_cast<const xnn_float16*>(static_value->data)[4], 5.0f);
ASSERT_EQ(static_cast<const xnn_float16*>(static_value->data)[5], 6.0f);
// But original fp32 weights kept around.
ASSERT_EQ(static_cast<const float*>(static_value->fp32_data)[0], 1.0f);
ASSERT_EQ(static_cast<const float*>(static_value->fp32_data)[1], 2.0f);
ASSERT_EQ(static_cast<const float*>(static_value->fp32_data)[2], 3.0f);
ASSERT_EQ(static_cast<const float*>(static_value->fp32_data)[3], 4.0f);
ASSERT_EQ(static_cast<const float*>(static_value->fp32_data)[4], 5.0f);
ASSERT_EQ(static_cast<const float*>(static_value->fp32_data)[5], 6.0f);
}
TEST(SUBGRAPH_FP16, fully_connected_bias_used_by_another_node) {
SubgraphTester tester(7);
float static_bias_data[2 + XNN_EXTRA_BYTES / sizeof(float)] = {
1.0f,
2.0f,
};
// external input[0] bias [2] static filter [1] external input [6]
//
// \ / / \ /
// \ / / \ /
// [fully connected] [subtract]
// | |
// fully connected out [3] subtract out [5]
const uint32_t input_id = 0;
const uint32_t filter_id = 1;
const uint32_t bias_id = 2;
const uint32_t fully_connected_out_id = 3;
const uint32_t out_id2 = 5;
const uint32_t subtract_input_id = 6;
tester.AddInputTensorF32({5, 3}, input_id)
.AddStaticTensorF32({2, 3}, TensorType::kDense, filter_id)
.AddStaticTensorF32({2}, TensorType::kDense, bias_id, /*flags=*/0,
static_bias_data)
.AddOutputTensorF32({5, 2}, fully_connected_out_id)
.AddInputTensorF32({2}, subtract_input_id)
.AddOutputTensorF32({2}, out_id2)
.AddFullyConnected(input_id, filter_id, bias_id, fully_connected_out_id)
.AddSubtract(bias_id, subtract_input_id, out_id2)
.Optimize()
.RewriteForFp16();
// After rewriting for FP16, the graph should look like this, with *
// indicating new operators and values created: The static bias data has
// been converted into a new buffer.
//
// external input[0] bias [2] filter [1]* external input [6]
// \ / / \ /
// [convert]* / / \ [convert]*
// \ / / \ /
// \ / / \ /
// [fully connected] [subtract]
// | |
// [convert]* [convert]*
// | |
// fully connected out [3] subtract out [5]
// We should have 6 nodes, the original fully connected and subtraction
// node, a convert for the two external inputs, and a convert for the two
// external outputs.
ASSERT_EQ(tester.NumNodes(), 6);
// The static value should be converted to FP16
const xnn_value* static_value = tester.Value(bias_id);
ASSERT_EQ(static_value->datatype, xnn_datatype_fp16);
ASSERT_EQ(static_value->fp32_data, static_bias_data);
// Weights are converted to fp16.
ASSERT_EQ(static_cast<const xnn_float16*>(static_value->data)[0], 1.0f);
ASSERT_EQ(static_cast<const xnn_float16*>(static_value->data)[1], 2.0f);
// But original fp32 weights kept around.
ASSERT_EQ(static_cast<const float*>(static_value->fp32_data)[0], 1.0f);
ASSERT_EQ(static_cast<const float*>(static_value->fp32_data)[1], 2.0f);
}
TEST(SUBGRAPH_FP16, prelu_slope_used_by_another_node) {
SubgraphTester tester(5);
float static_slope_data[2 + XNN_EXTRA_BYTES / sizeof(float)] = {
1.0f,
2.0f,
};
// external input[0] static slope [1] external input [4]
//
// \ / \ /
// \ / \ /
// [prelu] [subtract]
// | |
// prelu out [2] subtract out [3]
const uint32_t input_id = 0;
const uint32_t slope_id = 1;
const uint32_t prelu_out_id = 2;
const uint32_t out_id2 = 3;
const uint32_t subtract_input_id = 4;
tester.AddInputTensorF32({5, 3, 3, 2}, input_id)
.AddStaticTensorF32({2}, TensorType::kDense, slope_id, /*flags=*/0,
static_slope_data)
.AddOutputTensorF32({5, 3, 3, 2}, prelu_out_id)
.AddInputTensorF32({2}, subtract_input_id)
.AddOutputTensorF32({2}, out_id2)
.AddPrelu(input_id, slope_id, prelu_out_id)
.AddSubtract(slope_id, subtract_input_id, out_id2)
.Optimize()
.RewriteForFp16();
// After rewriting for FP16, the graph should look like this, with *
// indicating new operators and values created: The static bias data has
// been converted into a new buffer.
//
// external input[0] static slope [1]* external input [4]
// \ / \ /
// [convert]* / \ [convert]*
// \ / \ /
// \ / \ /
// [prelu] [subtract]
// | |
// [convert]* [convert]*
// | |
// prelu out [2] subtract out [3]
// We should have 6 nodes, the original prelu and subtraction node, a
// convert for the two external inputs, and a convert for the two external
// outputs.
ASSERT_EQ(tester.NumNodes(), 6);
// The static value should be converted to FP16
const xnn_value* static_value = tester.Value(slope_id);
ASSERT_EQ(static_value->datatype, xnn_datatype_fp16);
ASSERT_EQ(static_value->fp32_data, static_slope_data);
// Weights are converted to fp16.
ASSERT_EQ(static_cast<const xnn_float16*>(static_value->data)[0], 1.0f);
ASSERT_EQ(static_cast<const xnn_float16*>(static_value->data)[1], 2.0f);
// But original fp32 weights kept around.
ASSERT_EQ(static_cast<const float*>(static_value->fp32_data)[0], 1.0f);
ASSERT_EQ(static_cast<const float*>(static_value->fp32_data)[1], 2.0f);
}
TEST(SUBGRAPH_FP16_DYNAMIC_FULLY_CONNECTED,
dynamic_weights_no_bias_weights_converted_to_fp16) {
SubgraphTester tester(5);
// external input[0] external input [1]
// \ /
// \ [constant pad]
// \ /
// [fully connected]
// |
// fully connected out [2]
const uint32_t input_id = 0;
const uint32_t input2_id = 1;
const uint32_t weights_id = 3;
const uint32_t fully_connected_out_id = 2;
std::array<size_t, 4> pre_paddings = {1, 0, 0, 0};
std::array<size_t, 4> post_paddings = {0, 0, 0, 0};
tester.AddInputTensorF32({1, 5, 5, 3}, input_id)
.AddInputTensorF32({1, 1, 1, 3}, input2_id)
.AddOutputTensorF32({1, 5, 5, 2}, fully_connected_out_id)
.AddDynamicTensorF32({2, 1, 1, 3}, weights_id)
.AddConstantPad(pre_paddings.data(), post_paddings.data(), 0.0f,
input2_id, weights_id)
.AddFullyConnected(input_id, weights_id,
/*bias_id=*/XNN_INVALID_VALUE_ID,
fully_connected_out_id)
.Optimize()
.RewriteForFp16();
const xnn_value* weights_value = tester.Value(weights_id);
ASSERT_EQ(weights_value->datatype, xnn_datatype_fp16);
}
TEST(SUBGRAPH_FP16_DYNAMIC_FULLY_CONNECTED,
dynamic_weights_static_bias_weights_converted_to_fp16) {
SubgraphTester tester(5);
// external input[0] external input [1]
// \ / static bias [4]
// \ [constant pad] /
// \ / /
// [fully connected]
// |
// fully connected out [2]
const uint32_t input_id = 0;
const uint32_t input2_id = 1;
const uint32_t weights_id = 3;
const uint32_t bias_id = 4;
const uint32_t fully_connected_out_id = 2;
std::array<size_t, 4> pre_paddings = {1, 0, 0, 0};
std::array<size_t, 4> post_paddings = {0, 0, 0, 0};
tester.AddInputTensorF32({1, 5, 5, 3}, input_id)
.AddInputTensorF32({1, 1, 1, 3}, input2_id)
.AddOutputTensorF32({1, 5, 5, 2}, fully_connected_out_id)
.AddDynamicTensorF32({2, 1, 1, 3}, weights_id)
.AddStaticTensorF32({2}, TensorType::kDense, bias_id)
.AddConstantPad(pre_paddings.data(), post_paddings.data(), 0.0f,
input2_id, weights_id)
.AddFullyConnected(input_id, weights_id, bias_id, fully_connected_out_id)
.Optimize()
.RewriteForFp16();
const xnn_value* weights_value = tester.Value(weights_id);
ASSERT_EQ(weights_value->datatype, xnn_datatype_fp16);
}
TEST(SUBGRAPH_FP16_DYNAMIC_FULLY_CONNECTED,
static_weights_dynamic_bias_bias_converted_to_fp16) {
SubgraphTester tester(5);
// external input[0] static weights [4] external input [1]
// \ | /
// \ | [constant pad]
// \ | /
// [fully connected]
// |
// fully connected out [2]
const uint32_t input_id = 0;
const uint32_t input2_id = 1;
const uint32_t weights_id = 3;
const uint32_t bias_id = 4;
const uint32_t fully_connected_out_id = 2;
std::array<size_t, 4> pre_paddings = {1};
std::array<size_t, 4> post_paddings = {0};
tester.AddInputTensorF32({1, 5, 5, 3}, input_id)
.AddInputTensorF32({1}, input2_id)
.AddOutputTensorF32({1, 5, 5, 2}, fully_connected_out_id)
.AddStaticTensorF32({2, 1, 1, 3}, TensorType::kDense, weights_id)
.AddDynamicTensorF32({2}, bias_id)
.AddConstantPad(pre_paddings.data(), post_paddings.data(), 0.0f,
input2_id, bias_id)
.AddFullyConnected(input_id, weights_id, bias_id, fully_connected_out_id)
.Optimize()
.RewriteForFp16();
const xnn_value* bias_value = tester.Value(bias_id);
ASSERT_EQ(bias_value->datatype, xnn_datatype_fp16);
}
TEST(SUBGRAPH_FP16_DYNAMIC_FULLY_CONNECTED,
dynamic_weights_dynamic_bias_weights_and_bias_converted_to_fp16) {
SubgraphTester tester(6);
// external input[0] external input [1] external input [2]
// \ / /
// \ [constant pad] [constant pad]
// \ / /
// [fully connected]
// |
// fully connected out [5]
const uint32_t input_id = 0;
const uint32_t input2_id = 1;
const uint32_t input3_id = 2;
const uint32_t weights_id = 3;
const uint32_t bias_id = 4;
const uint32_t fully_connected_out_id = 5;
std::array<size_t, 4> weights_pre_paddings = {1, 0, 0, 0};
std::array<size_t, 4> weights_post_paddings = {0, 0, 0, 0};
std::array<size_t, 4> bias_pre_paddings = {1};
std::array<size_t, 4> bias_post_paddings = {0};
tester.AddInputTensorF32({1, 5, 5, 3}, input_id)
.AddInputTensorF32({1, 1, 1, 3}, input2_id)
.AddInputTensorF32({1}, input3_id)
.AddOutputTensorF32({1, 5, 5, 2}, fully_connected_out_id)
.AddDynamicTensorF32({2, 1, 1, 3}, weights_id)
.AddDynamicTensorF32({2}, bias_id)
.AddConstantPad(weights_pre_paddings.data(), weights_post_paddings.data(),
0.0f, input2_id, weights_id)
.AddConstantPad(bias_pre_paddings.data(), bias_post_paddings.data(), 0.0f,
input3_id, bias_id)
.AddFullyConnected(input_id, weights_id, bias_id, fully_connected_out_id)
.Optimize()
.RewriteForFp16();
const xnn_value* weights_value = tester.Value(weights_id);
ASSERT_EQ(weights_value->datatype, xnn_datatype_fp16);
const xnn_value* bias_value = tester.Value(bias_id);
ASSERT_EQ(bias_value->datatype, xnn_datatype_fp16);
}
} // namespace xnnpack