sglang_v0.5.2/pytorch_2.8.0/third_party/XNNPACK/test/indirection.cc

434 lines
13 KiB
C++

// Copyright 2022 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.
#include <cstddef>
#include <cstdint>
#include <numeric>
#include <vector>
#include <gtest/gtest.h>
#include "xnnpack.h"
#include "xnnpack/indirection.h"
#include "xnnpack/math.h"
#include "xnnpack/operator-utils.h"
#include "xnnpack/operator.h"
#include "xnnpack/buffer.h"
namespace xnnpack {
namespace {
// Constant index to refer to zero buffer.
static constexpr uintptr_t kZero = SIZE_MAX;
class IndirectionTester {
public:
IndirectionTester& input_height(size_t input_height) {
input_height_ = input_height;
return *this;
}
IndirectionTester& input_width(size_t input_width) {
input_width_ = input_width;
return *this;
}
IndirectionTester& kernel_height(size_t kernel_height) {
kernel_height_ = kernel_height;
return *this;
}
IndirectionTester& kernel_width(size_t kernel_width) {
kernel_width_ = kernel_width;
return *this;
}
IndirectionTester& padding_height(size_t padding_height) {
padding_height_ = padding_height;
return *this;
}
IndirectionTester& padding_width(size_t padding_width) {
padding_width_ = padding_width;
return *this;
}
IndirectionTester& subsampling(size_t subsampling) {
subsampling_ = subsampling;
return *this;
}
IndirectionTester& dilation(size_t dilation) {
dilation_ = dilation;
return *this;
}
IndirectionTester& channels(size_t channels) {
channels_ = channels;
return *this;
}
IndirectionTester& primary_tile(size_t primary_tile) {
primary_tile_ = primary_tile;
return *this;
}
IndirectionTester& channel_tile(size_t channel_tile) {
channel_tile_ = channel_tile;
return *this;
}
IndirectionTester& expected_indices(std::vector<size_t> expected_indices) {
expected_indices_ = expected_indices;
return *this;
}
void Test() {
IndirectionInit();
EXPECT_EQ(indirection_buffer_.size(), expected_indices_.size());
for (size_t i = 0; i < expected_indices_.size(); i++) {
EXPECT_EQ(indirection_buffer_[i], &input_[expected_indices_[i]])
<< "i: " << i << ", input_index:" << expected_indices_[i];
}
}
void TestCompressed() {
IndirectionCompressedInit();
EXPECT_EQ(indirection_buffer_.size(), expected_indices_.size());
for (size_t i = 0; i < expected_indices_.size(); i++) {
if (expected_indices_[i] == kZero) {
EXPECT_EQ(indirection_buffer_[i], zero_buffer_.data())
<< "i: " << i << ", zero_buffer: " << zero_buffer_.data();
} else {
EXPECT_EQ(indirection_buffer_[i], &input_[expected_indices_[i]])
<< "i: " << i << ", input_index:" << expected_indices_[i];
}
}
}
private:
void IndirectionInit() {
const size_t kernel_size = kernel_height_ * kernel_width_;
const size_t output_height = xnn_compute_convolution_output_dimension(
input_height_ + padding_height_, kernel_height_, dilation_, subsampling_);
const size_t output_width = xnn_compute_convolution_output_dimension(
input_width_ + padding_width_, kernel_width_, dilation_, subsampling_);
const size_t step_width = dilation_ == 1 ? min(subsampling_, kernel_width_) : kernel_width_;
const size_t step_height =
kernel_size + (output_width - 1) * step_width * kernel_height_;
input_ = xnnpack::Buffer<float>(channels_ * input_height_ * input_width_);
std::iota(input_.begin(), input_.end(), 0.0f);
zero_buffer_ = xnnpack::Buffer<float>(channels_, 0.0f);
const size_t num_indirection_elements = (primary_tile_ - kernel_size) + output_height * step_height;
indirection_buffer_ = xnnpack::Buffer<const float*>(num_indirection_elements);
xnn_operator op = {};
op.indirection_buffer = reinterpret_cast<const void**>(indirection_buffer_.data());
op.input = input_.data();
op.input_pixel_stride = channels_;
op.zero_buffer = zero_buffer_.data();
op.input_height = input_height_;
op.input_width = input_width_;
op.output_height = output_height;
op.output_width = output_width;
op.kernel_height = kernel_height_;
op.kernel_width = kernel_width_;
op.stride_height = subsampling_;
op.stride_width = subsampling_;
op.dilation_height = dilation_;
op.dilation_width = dilation_;
op.padding_top = padding_height_ / 2;
op.padding_left = padding_width_ / 2;
xnn_indirection_init_dwconv2d(
/*output_y_start=*/0, /*output_y_end=*/output_height,
op.indirection_buffer,
op.input,
op.input_pixel_stride << /*log2_input_element_size=*/2,
op.zero_buffer,
op.input_height, op.input_width,
op.output_height, op.output_width,
op.kernel_height, op.kernel_width,
op.stride_height, op.stride_width,
op.dilation_height, op.dilation_width,
op.padding_top, op.padding_left,
step_height, step_width, primary_tile_);
}
void IndirectionCompressedInit() {
const size_t kernel_size = kernel_height_ * kernel_width_;
const size_t output_height = xnn_compute_convolution_output_dimension(
input_height_ + padding_height_, kernel_height_, dilation_, subsampling_);
const size_t output_width = xnn_compute_convolution_output_dimension(
input_width_ + padding_width_, kernel_width_, dilation_, subsampling_);
const size_t step_width = dilation_ == 1 ? min(subsampling_, kernel_width_) : kernel_width_;
const size_t step_height =
kernel_size + (output_width - 1) * step_width * kernel_height_;
input_ = xnnpack::Buffer<float>(channels_ * input_height_ * input_width_);
std::iota(input_.begin(), input_.end(), 0);
zero_buffer_ = xnnpack::Buffer<float>(channels_, 0.0f);
const size_t indirect_top_height = divide_round_up(padding_height_ / 2, subsampling_);
const size_t indirect_bot_height = divide_round_up(padding_height_ / 2, subsampling_);
const size_t indirection_buffer_output_height = (indirect_top_height + indirect_bot_height + 1);
const size_t num_indirection_elements = (primary_tile_ - kernel_size) + indirection_buffer_output_height * step_height;
indirection_buffer_ = xnnpack::Buffer<const float*>(num_indirection_elements);
xnn_operator op = {};
op.indirection_buffer = reinterpret_cast<const void**>(indirection_buffer_.data());
op.input = input_.data();
op.input_pixel_stride = channels_;
op.zero_buffer = zero_buffer_.data();
op.input_height = input_height_;
op.input_width = input_width_;
op.output_height = output_height;
op.output_width = output_width;
op.kernel_height = kernel_height_;
op.kernel_width = kernel_width_;
op.stride_height = subsampling_;
op.stride_width = subsampling_;
op.dilation_height = dilation_;
op.dilation_width = dilation_;
op.padding_top = padding_height_ / 2;
op.padding_left = padding_width_ / 2;
xnn_indirection_init_dwconv2d_compressed(
/*output_y_start=*/0, /*output_y_end=*/output_height,
op.indirection_buffer,
op.input,
op.input_pixel_stride << /*log2_input_element_size=*/2,
op.zero_buffer,
op.input_height, op.input_width,
op.output_height, op.output_width,
op.kernel_height, op.kernel_width,
op.stride_height, op.stride_width,
op.dilation_height, op.dilation_width,
op.padding_top, op.padding_left,
step_height, step_width,
indirect_top_height,
indirect_bot_height,
primary_tile_);
}
// Set by tests using setter functions.
size_t input_height_;
size_t input_width_;
size_t kernel_height_;
size_t kernel_width_;
size_t padding_height_ = 0;
size_t padding_width_ = 0;
size_t subsampling_ = 1;
size_t dilation_ = 1;
size_t channels_ = 1;
size_t primary_tile_;
size_t channel_tile_ = 1;
std::vector<size_t> expected_indices_;
// Initialized by IndirectionInit.
xnnpack::Buffer<const float*> indirection_buffer_;
xnnpack::Buffer<float> input_;
xnnpack::Buffer<float> zero_buffer_;
};
TEST(INDIRECTION, input3x3_kernel1x1) {
IndirectionTester()
.input_height(3)
.input_width(3)
.kernel_height(1)
.kernel_width(1)
.primary_tile(1)
.expected_indices({0, 1, 2, 3, 4, 5, 6, 7, 8})
.Test();
}
TEST(INDIRECTION, input3x3_kernel2x2) {
IndirectionTester()
.input_height(3)
.input_width(3)
.kernel_height(2)
.kernel_width(2)
.primary_tile(4)
// input: kernel:
// 0 1 2 a b
// 3 4 5 c d
// 6 7 8
.expected_indices({
// For each output row, column major, and compress pointers within a single output row.
0, 3, 1, 4, 2, 5,
3, 6, 4, 7, 5, 8,
})
.Test();
}
TEST(INDIRECTION, input3x3_kernel1x1_subsampling2) {
IndirectionTester()
.input_height(3)
.input_width(3)
.kernel_height(1)
.kernel_width(1)
.subsampling(2)
.primary_tile(1)
// input: kernel:
// 0 1 2 a
// 3 4 5
// 6 7 8
.expected_indices({
0, 2,
6, 8,
})
.Test();
}
TEST(INDIRECTION, input4x4_kernel2x2_subsampling2) {
IndirectionTester()
.input_height(4)
.input_width(4)
.kernel_height(2)
.kernel_width(2)
.subsampling(2)
.primary_tile(4)
// input: kernel:
// 0 1 2 3 a b
// 4 5 6 7 c d
// 8 9 10 11
// 12 13 14 15
.expected_indices({
0, 4, 1, 5, 2, 6, 3, 7,
8, 12, 9, 13, 10, 14, 11, 15,
})
.Test();
}
TEST(INDIRECTION, input4x4_kernel2x1_primarytile4) {
IndirectionTester()
.input_height(4)
.input_width(4)
.kernel_height(2)
.kernel_width(1)
.primary_tile(4)
// input: kernel:
// 0 1 2 3 a
// 4 5 6 7 b
// 8 9 10 11
// 12 13 14 15
.expected_indices({
0, 4, 1, 5, 2, 6, 3, 7,
4, 8, 5, 9, 6, 10, 7, 11,
8, 12, 9, 13, 10, 14, 11, 15,
// 4 - (2 x 2) extra elements.
15, 15,
})
.Test();
}
TEST(INDIRECTION, input4x4_kernel1x2_primarytile4_subsampling2) {
IndirectionTester()
.input_height(4)
.input_width(4)
.kernel_height(1)
.kernel_width(2)
.primary_tile(4)
.subsampling(2)
// input: kernel:
// 0 1 2 3 a b
// 4 5 6 7
// 8 9 10 11
// 12 13 14 15
.expected_indices({
0, 1, 2, 3, 8, 9, 10, 11,
// primary_tile - kernel_size (4 - 2) extra elements, set to last input pixel.
11, 11,
})
.Test();
}
TEST(INDIRECTION, input4x4_kernel2x1_primarytile4_subsampling2) {
IndirectionTester()
.input_height(4)
.input_width(4)
.kernel_height(2)
.kernel_width(1)
.primary_tile(4)
.subsampling(2)
// input: kernel: output:
// 0 1 2 3 a A B
// 4 5 6 7 b C D
// 8 9 10 11
// 12 13 14 15
.expected_indices({
0, 4, 2, 6,
8, 12, 10, 14,
// primary_tile - kernel_size (4 - 2) extra elements, set to last input pixel.
14, 14
})
.Test();
}
TEST(INDIRECTION_COMPRESSED, input3x3_kernel1x1) {
IndirectionTester()
.input_height(3)
.input_width(3)
.kernel_height(1)
.kernel_width(1)
.primary_tile(1)
.expected_indices({0, 1, 2})
.TestCompressed();
}
TEST(INDIRECTION_COMPRESSED, input3x3_kernel2x2_padding2x2) {
IndirectionTester()
.input_height(3)
.input_width(3)
.kernel_height(2)
.kernel_width(2)
.padding_height(2)
.padding_width(2)
.primary_tile(4)
// input: kernel:
// 0 0 0 0 0 a b
// 0 0 1 2 0 c d
// 0 3 4 5 0
// 0 6 7 8 0
// 0 0 0 0 0
.expected_indices({
// Top section.
kZero, kZero, kZero, 0, kZero, 1, kZero, 2, kZero, kZero,
// Compressed rows
kZero, kZero, 0, 3, 1, 4, 2, 5, kZero, kZero,
// Compressed rows.
kZero, kZero, 6, kZero, 7, kZero, 8, kZero, kZero, kZero,
})
.TestCompressed();
}
TEST(INDIRECTION_COMPRESSED, input2x2_kernel2x2_padding2x2_subsampling2) {
IndirectionTester()
.input_height(2)
.input_width(2)
.kernel_height(2)
.kernel_width(2)
.padding_height(2)
.padding_width(2)
.primary_tile(4)
.subsampling(2)
// input: kernel:
// 0 0 0 0 a b
// 0 0 1 0 c d
// 0 2 3 0
// 0 0 0 0
.expected_indices({
// Top section.
kZero, kZero, kZero, 0, kZero, 1, kZero, kZero,
// We don't actually need a compressed row, but it is easier to assume that there is always 1 compressed row,
// and this is the same as the bottom section.
kZero, kZero, 2, kZero, 3, kZero, kZero, kZero,
// Bottom section is just all 0.
kZero, kZero, 2, kZero, 3, kZero, kZero, kZero,
})
.TestCompressed();
}
} // namespace
} // namespace xnnpack