sglang_v0.5.2/pytorch_2.8.0/third_party/XNNPACK/test/indirection.cc

// Copyright 2022 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include <cstddef>
#include <cstdint>
#include <numeric>
#include <vector>

#include <gtest/gtest.h>
#include "xnnpack.h"
#include "xnnpack/indirection.h"
#include "xnnpack/math.h"
#include "xnnpack/operator-utils.h"
#include "xnnpack/operator.h"
#include "xnnpack/buffer.h"

namespace xnnpack {
namespace {


// Constant index to refer to zero buffer.
static constexpr uintptr_t kZero = SIZE_MAX;

class IndirectionTester {
 public:
  IndirectionTester& input_height(size_t input_height) {
    input_height_ = input_height;
    return *this;
  }

  IndirectionTester& input_width(size_t input_width) {
    input_width_ = input_width;
    return *this;
  }

  IndirectionTester& kernel_height(size_t kernel_height) {
    kernel_height_ = kernel_height;
    return *this;
  }

  IndirectionTester& kernel_width(size_t kernel_width) {
    kernel_width_ = kernel_width;
    return *this;
  }

  IndirectionTester& padding_height(size_t padding_height) {
    padding_height_ = padding_height;
    return *this;
  }

  IndirectionTester& padding_width(size_t padding_width) {
    padding_width_ = padding_width;
    return *this;
  }

  IndirectionTester& subsampling(size_t subsampling) {
    subsampling_ = subsampling;
    return *this;
  }

  IndirectionTester& dilation(size_t dilation) {
    dilation_ = dilation;
    return *this;
  }

  IndirectionTester& channels(size_t channels) {
    channels_ = channels;
    return *this;
  }

  IndirectionTester& primary_tile(size_t primary_tile) {
    primary_tile_ = primary_tile;
    return *this;
  }

  IndirectionTester& channel_tile(size_t channel_tile) {
    channel_tile_ = channel_tile;
    return *this;
  }

  IndirectionTester& expected_indices(std::vector<size_t> expected_indices) {
    expected_indices_ = expected_indices;
    return *this;
  }

  void Test() {
    IndirectionInit();
    EXPECT_EQ(indirection_buffer_.size(), expected_indices_.size());
    for (size_t i = 0; i < expected_indices_.size(); i++) {
      EXPECT_EQ(indirection_buffer_[i], &input_[expected_indices_[i]])
          << "i: " << i << ", input_index:" << expected_indices_[i];
    }
  }

  void TestCompressed() {
    IndirectionCompressedInit();
    EXPECT_EQ(indirection_buffer_.size(), expected_indices_.size());
    for (size_t i = 0; i < expected_indices_.size(); i++) {
      if (expected_indices_[i] == kZero) {
        EXPECT_EQ(indirection_buffer_[i], zero_buffer_.data())
            << "i: " << i << ", zero_buffer: " << zero_buffer_.data();
      } else {
        EXPECT_EQ(indirection_buffer_[i], &input_[expected_indices_[i]])
            << "i: " << i << ", input_index:" << expected_indices_[i];
      }
    }
  }

 private:
  void IndirectionInit() {
    const size_t kernel_size = kernel_height_ * kernel_width_;
    const size_t output_height = xnn_compute_convolution_output_dimension(
        input_height_ + padding_height_, kernel_height_, dilation_, subsampling_);
    const size_t output_width = xnn_compute_convolution_output_dimension(
        input_width_ + padding_width_, kernel_width_, dilation_, subsampling_);
    const size_t step_width = dilation_ == 1 ? min(subsampling_, kernel_width_) : kernel_width_;
    const size_t step_height =
        kernel_size + (output_width - 1) * step_width * kernel_height_;

    input_ = xnnpack::Buffer<float>(channels_ * input_height_ * input_width_);
    std::iota(input_.begin(), input_.end(), 0.0f);
    zero_buffer_ = xnnpack::Buffer<float>(channels_, 0.0f);

    const size_t num_indirection_elements = (primary_tile_ - kernel_size) + output_height * step_height;
    indirection_buffer_ = xnnpack::Buffer<const float*>(num_indirection_elements);
    xnn_operator op = {};
    op.indirection_buffer = reinterpret_cast<const void**>(indirection_buffer_.data());
    op.input = input_.data();
    op.input_pixel_stride = channels_;
    op.zero_buffer = zero_buffer_.data();
    op.input_height = input_height_;
    op.input_width = input_width_;
    op.output_height = output_height;
    op.output_width = output_width;
    op.kernel_height = kernel_height_;
    op.kernel_width = kernel_width_;
    op.stride_height = subsampling_;
    op.stride_width = subsampling_;
    op.dilation_height = dilation_;
    op.dilation_width = dilation_;
    op.padding_top = padding_height_ / 2;
    op.padding_left = padding_width_ / 2;
    xnn_indirection_init_dwconv2d(
      /*output_y_start=*/0, /*output_y_end=*/output_height,
      op.indirection_buffer,
      op.input,
      op.input_pixel_stride << /*log2_input_element_size=*/2,
      op.zero_buffer,
      op.input_height, op.input_width,
      op.output_height, op.output_width,
      op.kernel_height, op.kernel_width,
      op.stride_height, op.stride_width,
      op.dilation_height, op.dilation_width,
      op.padding_top, op.padding_left,
      step_height, step_width, primary_tile_);
  }

  void IndirectionCompressedInit() {
    const size_t kernel_size = kernel_height_ * kernel_width_;
    const size_t output_height = xnn_compute_convolution_output_dimension(
        input_height_ + padding_height_, kernel_height_, dilation_, subsampling_);
    const size_t output_width = xnn_compute_convolution_output_dimension(
        input_width_ + padding_width_, kernel_width_, dilation_, subsampling_);
    const size_t step_width = dilation_ == 1 ? min(subsampling_, kernel_width_) : kernel_width_;
    const size_t step_height =
        kernel_size + (output_width - 1) * step_width * kernel_height_;

    input_ = xnnpack::Buffer<float>(channels_ * input_height_ * input_width_);
    std::iota(input_.begin(), input_.end(), 0);
    zero_buffer_ = xnnpack::Buffer<float>(channels_, 0.0f);

    const size_t indirect_top_height = divide_round_up(padding_height_ / 2, subsampling_);
    const size_t indirect_bot_height = divide_round_up(padding_height_ / 2, subsampling_);
    const size_t indirection_buffer_output_height = (indirect_top_height + indirect_bot_height + 1);

    const size_t num_indirection_elements = (primary_tile_ - kernel_size) + indirection_buffer_output_height * step_height;
    indirection_buffer_ = xnnpack::Buffer<const float*>(num_indirection_elements);
    xnn_operator op = {};
    op.indirection_buffer = reinterpret_cast<const void**>(indirection_buffer_.data());
    op.input = input_.data();
    op.input_pixel_stride = channels_;
    op.zero_buffer = zero_buffer_.data();
    op.input_height = input_height_;
    op.input_width = input_width_;
    op.output_height = output_height;
    op.output_width = output_width;
    op.kernel_height = kernel_height_;
    op.kernel_width = kernel_width_;
    op.stride_height = subsampling_;
    op.stride_width = subsampling_;
    op.dilation_height = dilation_;
    op.dilation_width = dilation_;
    op.padding_top = padding_height_ / 2;
    op.padding_left = padding_width_ / 2;
    xnn_indirection_init_dwconv2d_compressed(
      /*output_y_start=*/0, /*output_y_end=*/output_height,
      op.indirection_buffer,
      op.input,
      op.input_pixel_stride << /*log2_input_element_size=*/2,
      op.zero_buffer,
      op.input_height, op.input_width,
      op.output_height, op.output_width,
      op.kernel_height, op.kernel_width,
      op.stride_height, op.stride_width,
      op.dilation_height, op.dilation_width,
      op.padding_top, op.padding_left,
      step_height, step_width,
      indirect_top_height,
      indirect_bot_height,
      primary_tile_);
  }

  // Set by tests using setter functions.
  size_t input_height_;
  size_t input_width_;
  size_t kernel_height_;
  size_t kernel_width_;
  size_t padding_height_ = 0;
  size_t padding_width_ = 0;
  size_t subsampling_ = 1;
  size_t dilation_ = 1;
  size_t channels_ = 1;
  size_t primary_tile_;
  size_t channel_tile_ = 1;
  std::vector<size_t> expected_indices_;

  // Initialized by IndirectionInit.
  xnnpack::Buffer<const float*> indirection_buffer_;
  xnnpack::Buffer<float> input_;
  xnnpack::Buffer<float> zero_buffer_;
};

TEST(INDIRECTION, input3x3_kernel1x1) {
  IndirectionTester()
      .input_height(3)
      .input_width(3)
      .kernel_height(1)
      .kernel_width(1)
      .primary_tile(1)
      .expected_indices({0, 1, 2, 3, 4, 5, 6, 7, 8})
      .Test();
}

TEST(INDIRECTION, input3x3_kernel2x2) {
  IndirectionTester()
      .input_height(3)
      .input_width(3)
      .kernel_height(2)
      .kernel_width(2)
      .primary_tile(4)
      // input:  kernel:
      // 0 1 2   a b
      // 3 4 5   c d
      // 6 7 8
      .expected_indices({
        // For each output row, column major, and compress pointers within a single output row.
        0, 3, 1, 4, 2, 5,
        3, 6, 4, 7, 5, 8,
      })
      .Test();
}

TEST(INDIRECTION, input3x3_kernel1x1_subsampling2) {
  IndirectionTester()
      .input_height(3)
      .input_width(3)
      .kernel_height(1)
      .kernel_width(1)
      .subsampling(2)
      .primary_tile(1)
      // input:  kernel:
      // 0 1 2   a
      // 3 4 5
      // 6 7 8
      .expected_indices({
        0, 2,
        6, 8,
      })
      .Test();
}

TEST(INDIRECTION, input4x4_kernel2x2_subsampling2) {
  IndirectionTester()
      .input_height(4)
      .input_width(4)
      .kernel_height(2)
      .kernel_width(2)
      .subsampling(2)
      .primary_tile(4)
      // input:       kernel:
      // 0  1  2  3   a b
      // 4  5  6  7   c d
      // 8  9  10 11
      // 12 13 14 15
      .expected_indices({
        0, 4, 1, 5, 2, 6, 3, 7,
        8, 12, 9, 13, 10, 14, 11, 15,
      })
      .Test();
}

TEST(INDIRECTION, input4x4_kernel2x1_primarytile4) {
  IndirectionTester()
      .input_height(4)
      .input_width(4)
      .kernel_height(2)
      .kernel_width(1)
      .primary_tile(4)
      // input:       kernel:
      // 0  1  2  3   a
      // 4  5  6  7   b
      // 8  9  10 11
      // 12 13 14 15
      .expected_indices({
        0, 4, 1, 5, 2, 6, 3, 7,
        4, 8, 5, 9, 6, 10, 7, 11,
        8, 12, 9, 13, 10, 14, 11, 15,
        // 4 - (2 x 2) extra elements.
        15, 15,
      })
      .Test();
}

TEST(INDIRECTION, input4x4_kernel1x2_primarytile4_subsampling2) {
  IndirectionTester()
      .input_height(4)
      .input_width(4)
      .kernel_height(1)
      .kernel_width(2)
      .primary_tile(4)
      .subsampling(2)
      // input:       kernel:
      // 0  1  2  3   a b
      // 4  5  6  7
      // 8  9  10 11
      // 12 13 14 15
      .expected_indices({
        0, 1, 2, 3, 8, 9, 10, 11,
        // primary_tile - kernel_size (4 - 2) extra elements, set to last input pixel.
        11, 11,
      })
      .Test();
}

TEST(INDIRECTION, input4x4_kernel2x1_primarytile4_subsampling2) {
  IndirectionTester()
      .input_height(4)
      .input_width(4)
      .kernel_height(2)
      .kernel_width(1)
      .primary_tile(4)
      .subsampling(2)
      // input:       kernel:  output:
      // 0  1  2  3   a        A B
      // 4  5  6  7   b        C D
      // 8  9  10 11
      // 12 13 14 15
      .expected_indices({
        0, 4, 2, 6,
        8, 12, 10, 14,
        // primary_tile - kernel_size (4 - 2) extra elements, set to last input pixel.
        14, 14
      })
      .Test();
}

TEST(INDIRECTION_COMPRESSED, input3x3_kernel1x1) {
  IndirectionTester()
      .input_height(3)
      .input_width(3)
      .kernel_height(1)
      .kernel_width(1)
      .primary_tile(1)
      .expected_indices({0, 1, 2})
      .TestCompressed();
}

TEST(INDIRECTION_COMPRESSED, input3x3_kernel2x2_padding2x2) {
  IndirectionTester()
      .input_height(3)
      .input_width(3)
      .kernel_height(2)
      .kernel_width(2)
      .padding_height(2)
      .padding_width(2)
      .primary_tile(4)
      // input:     kernel:
      // 0 0 0 0 0  a b
      // 0 0 1 2 0  c d
      // 0 3 4 5 0
      // 0 6 7 8 0
      // 0 0 0 0 0
      .expected_indices({
        // Top section.
        kZero, kZero, kZero, 0, kZero, 1, kZero, 2, kZero, kZero,
        // Compressed rows
        kZero, kZero, 0, 3, 1, 4, 2, 5, kZero, kZero,
        // Compressed rows.
        kZero, kZero, 6, kZero, 7, kZero, 8, kZero, kZero, kZero,
      })
      .TestCompressed();
}

TEST(INDIRECTION_COMPRESSED, input2x2_kernel2x2_padding2x2_subsampling2) {
  IndirectionTester()
      .input_height(2)
      .input_width(2)
      .kernel_height(2)
      .kernel_width(2)
      .padding_height(2)
      .padding_width(2)
      .primary_tile(4)
      .subsampling(2)
      // input:   kernel:
      // 0 0 0 0  a b
      // 0 0 1 0  c d
      // 0 2 3 0
      // 0 0 0 0
      .expected_indices({
        // Top section.
        kZero, kZero, kZero, 0, kZero, 1, kZero, kZero,
        // We don't actually need a compressed row, but it is easier to assume that there is always 1 compressed row,
        // and this is the same as the bottom section.
        kZero, kZero, 2, kZero, 3, kZero, kZero, kZero,
        // Bottom section is just all 0.
        kZero, kZero, 2, kZero, 3, kZero, kZero, kZero,
      })
      .TestCompressed();
}
}  // namespace
}  // namespace xnnpack