sglang_v0.5.2/pytorch_2.8.0/third_party/XNNPACK/test/subgraph.cc

// Copyright 2023 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include "xnnpack/subgraph.h"

#include <cstddef>
#include <vector>

#include <gtest/gtest.h>
#include "runtime-tester.h"
#include "subgraph-tester.h"

namespace xnnpack {

TEST(SUBGRAPH, hanging_nodes) {
  SubgraphTester tester(6);
  tester
    .AddDynamicTensorF32({1, 256, 256, 3}, 0)
    .AddStaticTensorF32({32, 3, 3, 3}, TensorType::kDense, 1)
    .AddStaticTensorF32({32}, TensorType::kDense, 2)
    .AddDynamicTensorF32({1, 128, 128, 32}, 3)
    .AddOutputTensorF32({32}, 4)
    .AddDynamicTensorF32({32}, 5)
    .AddConvolution2D(
        ConvolutionParams{
          Padding{1, 1, 1, 1},
          Kernel{3, 3},
          Subsampling{2, 2},
          Dilation{1, 1},
          /*groups=*/ 1,
          /*group_input_channels=*/ 3,
          /*group_output_channels=*/ 32,
        }, 0, 1, 2, 3)
    .AddGlobalAveragePooling(3, 4)
    // Add hanging node
    .AddGlobalAveragePooling(3, 5)
    .Optimize();

  // The hanging node is still there.
  ASSERT_EQ(tester.NumNodes(), 3);
}

TEST(SUBGRAPH, multiple_outputs_with_hanging_nodes) {
  SubgraphTester tester(4);
  tester
    .AddDynamicTensorF32({96}, 0)
    .AddDynamicTensorF32({32}, 1)
    .AddDynamicTensorF32({32}, 2)
    .AddOutputTensorF32({32}, 3)
    // Add split3 with 1 consumed output and two unconsumed outputs.
    .AddEvenSplit3(0, 1, 2, 3)
    .Optimize();

  // The node is still there.
  ASSERT_EQ(tester.NumNodes(), 1);
  // And all four values also.
  ASSERT_EQ(tester.NumValues(), 4);
  // The first two outputs are optimized away.
  ASSERT_EQ(tester.Value(1)->type, xnn_value_type_invalid);
  ASSERT_EQ(tester.Value(2)->type, xnn_value_type_invalid);
  // The last output is consumed.
  ASSERT_EQ(tester.Value(3)->type, xnn_value_type_dense_tensor);
}

TEST(SUBGRAPH, even_split3_first_two_outputs_optimized_away) {
  RuntimeTester tester(5);
  constexpr size_t size = 9;
  float inputs[size] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
  tester
    .AddStaticTensorF32({size}, TensorType::kDense, 0, 0, inputs)
    .AddDynamicTensorF32({3}, 1)
    .AddDynamicTensorF32({3}, 2)
    .AddOutputTensorF32({3}, 3)
    // Add split3 with 1 consumed output and two unconsumed outputs.
    .AddEvenSplit3(0, 1, 2, 3);
  // Regression test for a crash where we could not deal with a split where the
  // 0th output is not used (and optimized away).
  auto output = tester.RunWithFusion<float>();
  xnnpack::Buffer<float> expected = {6, 7, 8};
  ASSERT_EQ(expected, output);
}

}  // namespace xnnpack