sglang_v0.5.2/pytorch_2.8.0/third_party/XNNPACK/test/subgraph.cc

86 lines
2.6 KiB
C++

// Copyright 2023 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.
#include "xnnpack/subgraph.h"
#include <cstddef>
#include <vector>
#include <gtest/gtest.h>
#include "runtime-tester.h"
#include "subgraph-tester.h"
namespace xnnpack {
TEST(SUBGRAPH, hanging_nodes) {
SubgraphTester tester(6);
tester
.AddDynamicTensorF32({1, 256, 256, 3}, 0)
.AddStaticTensorF32({32, 3, 3, 3}, TensorType::kDense, 1)
.AddStaticTensorF32({32}, TensorType::kDense, 2)
.AddDynamicTensorF32({1, 128, 128, 32}, 3)
.AddOutputTensorF32({32}, 4)
.AddDynamicTensorF32({32}, 5)
.AddConvolution2D(
ConvolutionParams{
Padding{1, 1, 1, 1},
Kernel{3, 3},
Subsampling{2, 2},
Dilation{1, 1},
/*groups=*/ 1,
/*group_input_channels=*/ 3,
/*group_output_channels=*/ 32,
}, 0, 1, 2, 3)
.AddGlobalAveragePooling(3, 4)
// Add hanging node
.AddGlobalAveragePooling(3, 5)
.Optimize();
// The hanging node is still there.
ASSERT_EQ(tester.NumNodes(), 3);
}
TEST(SUBGRAPH, multiple_outputs_with_hanging_nodes) {
SubgraphTester tester(4);
tester
.AddDynamicTensorF32({96}, 0)
.AddDynamicTensorF32({32}, 1)
.AddDynamicTensorF32({32}, 2)
.AddOutputTensorF32({32}, 3)
// Add split3 with 1 consumed output and two unconsumed outputs.
.AddEvenSplit3(0, 1, 2, 3)
.Optimize();
// The node is still there.
ASSERT_EQ(tester.NumNodes(), 1);
// And all four values also.
ASSERT_EQ(tester.NumValues(), 4);
// The first two outputs are optimized away.
ASSERT_EQ(tester.Value(1)->type, xnn_value_type_invalid);
ASSERT_EQ(tester.Value(2)->type, xnn_value_type_invalid);
// The last output is consumed.
ASSERT_EQ(tester.Value(3)->type, xnn_value_type_dense_tensor);
}
TEST(SUBGRAPH, even_split3_first_two_outputs_optimized_away) {
RuntimeTester tester(5);
constexpr size_t size = 9;
float inputs[size] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
tester
.AddStaticTensorF32({size}, TensorType::kDense, 0, 0, inputs)
.AddDynamicTensorF32({3}, 1)
.AddDynamicTensorF32({3}, 2)
.AddOutputTensorF32({3}, 3)
// Add split3 with 1 consumed output and two unconsumed outputs.
.AddEvenSplit3(0, 1, 2, 3);
// Regression test for a crash where we could not deal with a split where the
// 0th output is not used (and optimized away).
auto output = tester.RunWithFusion<float>();
xnnpack::Buffer<float> expected = {6, 7, 8};
ASSERT_EQ(expected, output);
}
} // namespace xnnpack