{ "EndToEndLSTM (__main__.RNNTest)": 184.65333048502603, "MultiheadAttention (__main__.ModulesTest)": 134.43099975585938, "test_AllenaiLongformerBase_repro_cpu_halide (__main__.HalideCpuTests)": 199.10467020670572, "test__adaptive_avg_pool2d (__main__.CPUReproTests)": 83.39333131578233, "test_adaptive_max_pool2d1_cpu_halide (__main__.HalideCpuTests)": 113.98933410644531, "test_after_aot_cpu_runtime_error (__main__.MinifierIsolateTests)": 61.397444831000435, "test_alexnet_prefix_cpu_halide (__main__.HalideCpuTests)": 176.93266805013022, "test_aot_autograd_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 64.99899800618489, "test_aot_autograd_symbolic_exhaustive_linalg_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 66.08271371750604, "test_aot_autograd_symbolic_exhaustive_masked_norm_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 61.71266555786133, "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool1d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 151.31399536132812, "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 267.58533732096356, "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool3d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 120.89933013916016, "test_aot_autograd_symbolic_exhaustive_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 73.94028554643903, "test_aot_autograd_symbolic_module_exhaustive_nn_TransformerDecoderLayer_cpu_float32 (__main__.TestEagerFusionModuleInfoCPU)": 112.47666422526042, "test_avg_pool3d_backward2_cpu (__main__.CpuTests)": 609.4812072753906, "test_avg_pool3d_backward2_cuda (__main__.GPUTests)": 158.25587558746338, "test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 502.05988226996527, "test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 494.381110297309, "test_avg_pool3d_backward2_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 124.20333480834961, "test_avg_pool3d_backward_cpu_halide (__main__.HalideCpuTests)": 61.64700063069662, "test_backward_nn_functional_multi_head_attention_forward_cpu_float32 (__main__.TestCompositeComplianceCPU)": 71.78066380818684, "test_backward_nn_functional_multi_head_attention_forward_cuda_float32 (__main__.TestCompositeComplianceCUDA)": 78.40683364868164, "test_basic_cpu (__main__.EfficientConvBNEvalCpuTests)": 250.50655958387586, "test_basic_cuda (__main__.EfficientConvBNEvalGpuTests)": 145.54050064086914, "test_checkpointing_without_reentrant_input_requires_grad_False (__main__.TestAutogradWithCompiledAutograd)": 327.4082217746311, "test_checkpointing_without_reentrant_input_requires_grad_True (__main__.TestAutogradWithCompiledAutograd)": 409.865227593316, "test_collect_callgrind (__main__.TestBenchmarkUtils)": 310.50811258951825, "test_comprehensive_diff_cuda_complex128 (__main__.TestDecompCUDA)": 90.77466710408528, "test_comprehensive_diff_cuda_complex64 (__main__.TestDecompCUDA)": 88.94400024414062, "test_comprehensive_diff_cuda_float64 (__main__.TestDecompCUDA)": 61.99116643269857, "test_comprehensive_grid_sampler_2d_cpu_bfloat16 (__main__.TestDecompCPU)": 89.07300059000652, "test_comprehensive_grid_sampler_2d_cpu_float16 (__main__.TestDecompCPU)": 98.6163330078125, "test_comprehensive_grid_sampler_2d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 65.7913335164388, "test_comprehensive_grid_sampler_2d_cpu_float32 (__main__.TestDecompCPU)": 400.17799886067706, "test_comprehensive_grid_sampler_2d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 65.32166544596355, "test_comprehensive_grid_sampler_2d_cpu_float64 (__main__.TestDecompCPU)": 433.8283386230469, "test_comprehensive_grid_sampler_2d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 65.70300038655598, "test_comprehensive_grid_sampler_2d_cuda_bfloat16 (__main__.TestDecompCUDA)": 246.12633005777994, "test_comprehensive_grid_sampler_2d_cuda_float16 (__main__.TestDecompCUDA)": 237.4903361002604, "test_comprehensive_grid_sampler_2d_cuda_float32 (__main__.TestDecompCUDA)": 1256.5741882324219, "test_comprehensive_grid_sampler_2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 68.78149922688802, "test_comprehensive_grid_sampler_2d_cuda_float64 (__main__.TestDecompCUDA)": 1055.0651448567708, "test_comprehensive_grid_sampler_2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 69.93966611226399, "test_comprehensive_linalg_lu_solve_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 65.20016670227051, "test_comprehensive_linalg_lu_solve_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 61.16316668192545, "test_comprehensive_linalg_solve_triangular_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 62.08466657002767, "test_comprehensive_linalg_solve_triangular_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 62.160666147867836, "test_comprehensive_linalg_svd_cuda_complex128 (__main__.TestDecompCUDA)": 65.54600079854329, "test_comprehensive_linalg_vector_norm_cpu_float16 (__main__.TestInductorOpInfoCPU)": 85.31400044759114, "test_comprehensive_linalg_vector_norm_cpu_float32 (__main__.TestInductorOpInfoCPU)": 86.7923355102539, "test_comprehensive_linalg_vector_norm_cpu_float64 (__main__.TestInductorOpInfoCPU)": 83.80366770426433, "test_comprehensive_linalg_vector_norm_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 65.01507412945783, "test_comprehensive_linalg_vector_norm_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 66.07433333220305, "test_comprehensive_logspace_cpu_float32 (__main__.TestInductorOpInfoCPU)": 169.19166564941406, "test_comprehensive_logspace_cpu_float64 (__main__.TestInductorOpInfoCPU)": 164.14199829101562, "test_comprehensive_logspace_cpu_int32 (__main__.TestInductorOpInfoCPU)": 167.1233367919922, "test_comprehensive_logspace_cpu_int64 (__main__.TestInductorOpInfoCPU)": 161.9933319091797, "test_comprehensive_masked_norm_cpu_float16 (__main__.TestInductorOpInfoCPU)": 204.7566680908203, "test_comprehensive_masked_norm_cpu_float32 (__main__.TestInductorOpInfoCPU)": 202.51532999674478, "test_comprehensive_masked_norm_cpu_float64 (__main__.TestInductorOpInfoCPU)": 205.77066548665366, "test_comprehensive_masked_norm_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 114.11033376057942, "test_comprehensive_masked_norm_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 105.25066757202148, "test_comprehensive_masked_norm_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 113.67999903361003, "test_comprehensive_nn_functional_fractional_max_pool3d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 101.1036114162869, "test_comprehensive_nn_functional_fractional_max_pool3d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 94.08183288574219, "test_comprehensive_nn_functional_fractional_max_pool3d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 94.20638847351074, "test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestDecompCUDA)": 93.08233388264973, "test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float64 (__main__.TestDecompCUDA)": 94.11516571044922, "test_comprehensive_nn_functional_grid_sample_cpu_float32 (__main__.TestDecompCPU)": 107.86000061035156, "test_comprehensive_nn_functional_grid_sample_cpu_float64 (__main__.TestDecompCPU)": 94.72633361816406, "test_comprehensive_nn_functional_grid_sample_cuda_float32 (__main__.TestDecompCUDA)": 284.54283142089844, "test_comprehensive_nn_functional_grid_sample_cuda_float64 (__main__.TestDecompCUDA)": 228.18283081054688, "test_comprehensive_nn_functional_interpolate_trilinear_cuda_float32 (__main__.TestDecompCUDA)": 77.24066543579102, "test_comprehensive_nn_functional_interpolate_trilinear_cuda_float64 (__main__.TestDecompCUDA)": 77.22533416748047, "test_comprehensive_nn_functional_max_pool1d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 153.27567036946616, "test_comprehensive_nn_functional_max_pool1d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 151.73899841308594, "test_comprehensive_nn_functional_max_pool1d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 137.59866841634116, "test_comprehensive_nn_functional_max_pool2d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 1176.6233723958333, "test_comprehensive_nn_functional_max_pool2d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 1034.320332845052, "test_comprehensive_nn_functional_max_pool2d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 1053.9040120442708, "test_comprehensive_nn_functional_max_pool2d_cpu_int32 (__main__.TestInductorOpInfoCPU)": 901.5313517252604, "test_comprehensive_nn_functional_max_pool2d_cpu_int64 (__main__.TestInductorOpInfoCPU)": 914.4829915364584, "test_comprehensive_nn_functional_max_pool2d_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 1132.8611653645833, "test_comprehensive_nn_functional_max_pool2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 1129.974344889323, "test_comprehensive_nn_functional_max_pool2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 1135.6740112304688, "test_comprehensive_nn_functional_max_pool3d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 891.2769978841146, "test_comprehensive_nn_functional_max_pool3d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 687.6756591796875, "test_comprehensive_nn_functional_max_pool3d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 683.6936645507812, "test_comprehensive_nn_functional_max_pool3d_cpu_int32 (__main__.TestInductorOpInfoCPU)": 678.6616617838541, "test_comprehensive_nn_functional_max_pool3d_cpu_int64 (__main__.TestInductorOpInfoCPU)": 701.6133422851562, "test_comprehensive_nn_functional_max_pool3d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 495.5906626383464, "test_comprehensive_nn_functional_max_pool3d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 487.7074940999349, "test_comprehensive_nn_functional_max_unpool2d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 115.73200225830078, "test_comprehensive_nn_functional_max_unpool2d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 118.66033426920573, "test_comprehensive_nn_functional_max_unpool2d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 115.82266743977864, "test_comprehensive_nn_functional_max_unpool3d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 67.43566640218098, "test_comprehensive_nn_functional_unfold_cpu_bool (__main__.TestInductorOpInfoCPU)": 68.42166900634766, "test_comprehensive_nn_functional_unfold_cpu_float16 (__main__.TestInductorOpInfoCPU)": 118.02966817220052, "test_comprehensive_nn_functional_unfold_cpu_float32 (__main__.TestInductorOpInfoCPU)": 105.94366709391277, "test_comprehensive_nn_functional_unfold_cpu_float64 (__main__.TestInductorOpInfoCPU)": 118.99266815185547, "test_comprehensive_ormqr_cuda_complex128 (__main__.TestDecompCUDA)": 115.5125020345052, "test_comprehensive_ormqr_cuda_complex64 (__main__.TestDecompCUDA)": 103.90849939982097, "test_comprehensive_ormqr_cuda_float32 (__main__.TestDecompCUDA)": 66.59218077226119, "test_comprehensive_ormqr_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 64.84800084431966, "test_comprehensive_ormqr_cuda_float64 (__main__.TestDecompCUDA)": 60.27900060017904, "test_comprehensive_svd_cuda_complex128 (__main__.TestDecompCUDA)": 68.57966613769531, "test_comprehensive_svd_cuda_complex64 (__main__.TestDecompCUDA)": 66.81166776021321, "test_compute_global_tensor_shape_1D_invalid_shape (__main__.UtilTest)": 209.35732873280844, "test_constructor_autograd_SparseBSC_cuda (__main__.TestSparseAnyCUDA)": 154.30916849772134, "test_constructor_autograd_SparseBSR_cuda (__main__.TestSparseAnyCUDA)": 142.58683141072592, "test_constructor_autograd_SparseCSC_cuda (__main__.TestSparseAnyCUDA)": 94.73116620381673, "test_constructor_autograd_SparseCSR_cuda (__main__.TestSparseAnyCUDA)": 110.29800033569336, "test_conv1d_basic (__main__.TestXNNPACKConv1dTransformPass)": 244.17077806260852, "test_conv1d_with_relu_fc (__main__.TestXNNPACKConv1dTransformPass)": 627.981665717231, "test_conv2d_unary_cpu_cpp_wrapper (__main__.TestCppWrapper)": 68.8806660970052, "test_conv3d_binary_broadcast_shapes_cpu_cpu (__main__.TestPatternMatcherGenericCPU)": 75.51066589355469, "test_correctness_AdamW_use_closure_True_cuda_float32 (__main__.CompiledOptimizerParityTestsCUDA)": 78.39416631062825, "test_correctness_Adam_use_closure_True_cuda_float32 (__main__.CompiledOptimizerParityTestsCUDA)": 74.26416778564453, "test_count_nonzero_all (__main__.TestBool)": 630.1393364800347, "test_custom_module_lstm (__main__.TestQuantizedOps)": 666.0326605902778, "test_dispatch_symbolic_meta_outplace_all_strides_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestMetaCUDA)": 84.40749867757161, "test_dtensor_op_db_nn_functional_gaussian_nll_loss_cpu_float32 (__main__.TestDTensorOpsCPU)": 88.80566660563152, "test_eig_check_magma_cuda_float32 (__main__.TestLinalgCUDA)": 153.85249682267508, "test_error_detection_and_propagation (__main__.NcclErrorHandlingTest)": 67.68433125813802, "test_fail_arithmetic_ops.py (__main__.TestTyping)": 64.70655483669705, "test_fail_creation_ops.py (__main__.TestTyping)": 70.33796894550323, "test_fn_fwgrad_bwgrad_cumprod_cuda_complex128 (__main__.TestFwdGradientsCUDA)": 73.33583068847656, "test_fn_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 95.88233311971028, "test_fn_gradgrad_map_nested_cpu_float64 (__main__.TestBwdGradientsCPU)": 84.52066802978516, "test_fn_gradgrad_map_triple_nested_cpu_float64 (__main__.TestBwdGradientsCPU)": 518.5540161132812, "test_fn_gradgrad_map_triple_nested_cuda_float64 (__main__.TestBwdGradientsCUDA)": 352.0611623128255, "test_fuse_large_params_cpu (__main__.CpuTests)": 98.19175052642822, "test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 172.9732191297743, "test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 178.04811265733508, "test_fuse_large_params_dynamic_shapes_cuda (__main__.DynamicShapesCodegenGPUTests)": 96.32300059000652, "test_fuse_large_params_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 94.25100072224934, "test_grad_nn_Transformer_cuda_float64 (__main__.TestModuleCUDA)": 110.52466583251953, "test_gradgrad_nn_LSTM_eval_mode_cuda_float64 (__main__.TestModuleCUDA)": 147.46899922688803, "test_gradgrad_nn_LSTM_train_mode_cuda_float64 (__main__.TestModuleCUDA)": 137.17833455403647, "test_gradgrad_nn_TransformerDecoderLayer_cuda_float64 (__main__.TestModuleCUDA)": 223.40133412679037, "test_gradgrad_nn_TransformerEncoder_eval_mode_cuda_float64 (__main__.TestModuleCUDA)": 130.75699996948242, "test_gradgrad_nn_TransformerEncoder_train_mode_cuda_float64 (__main__.TestModuleCUDA)": 159.8721669514974, "test_gradgrad_nn_Transformer_cuda_float64 (__main__.TestModuleCUDA)": 661.1241658528646, "test_grid_sampler_2d_cpu_halide (__main__.HalideCpuTests)": 196.1066640218099, "test_group_norm (__main__.TestQuantizedOps)": 143.82022105322943, "test_indirect_device_assert (__main__.TritonCodeGenTests)": 252.9750010172526, "test_inductor_no_recursionerror_on_for_loops_dynamic_shapes (__main__.DynamicShapesReproTests)": 68.59622192382812, "test_inplace_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 132.5279998779297, "test_inputs_overlapping_with_mutation_stress_dynamic_shapes (__main__.DynamicShapesAotAutogradFallbackTests)": 151.57311164008246, "test_jit_cuda_archflags (__main__.TestCppExtensionJIT)": 117.37533315022786, "test_linalg_solve_triangular_large_cuda_complex128 (__main__.TestLinalgCUDA)": 577.0678304036459, "test_linalg_solve_triangular_large_cuda_complex64 (__main__.TestLinalgCUDA)": 72.07283401489258, "test_linear (__main__.TestStaticQuantizedModule)": 178.05622397528754, "test_linear_relu (__main__.TestStaticQuantizedModule)": 64.9945551554362, "test_lobpcg_ortho_cuda_float64 (__main__.TestLinalgCUDA)": 83.73499965667725, "test_lstm_cpu (__main__.TestMkldnnCPU)": 66.0846659342448, "test_many_overlapping_inputs_does_not_explode_guards_dynamic_shapes (__main__.DynamicShapesReproTests)": 125.42355600992839, "test_max_pool2d2_cpu_halide (__main__.HalideCpuTests)": 445.62599690755206, "test_max_pool2d3_cpu_halide (__main__.HalideCpuTests)": 134.19500223795572, "test_max_pool2d5_cpu_halide (__main__.HalideCpuTests)": 363.20066324869794, "test_max_pool2d_with_indices_backward4_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 63.19877794053819, "test_max_pool2d_with_indices_backward4_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 61.39377763536241, "test_proper_exit (__main__.TestDataLoader)": 240.04466501871744, "test_proper_exit (__main__.TestDataLoaderPersistentWorkers)": 271.00699615478516, "test_python_ref_executor__refs_special_zeta_executor_aten_cuda_float64 (__main__.TestCommonCUDA)": 64.18233426411946, "test_qat_conv2d_unary (__main__.TestQuantizePT2EX86Inductor)": 151.71777767605252, "test_qat_conv_bn_fusion_no_conv_bias (__main__.TestQuantizePT2EQAT_ConvBn1d)": 61.14148919847276, "test_qat_conv_bn_fusion_no_conv_bias (__main__.TestQuantizePT2EQAT_ConvBn2d)": 60.4263552347819, "test_qat_mobilenet_v2 (__main__.TestQuantizePT2EQATModels)": 88.72544479370117, "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False (__main__.TestPatternMatcher)": 69.56600189208984, "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 82.00166829427083, "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 78.14999898274739, "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True (__main__.TestPatternMatcher)": 68.93766784667969, "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 75.8633321126302, "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 78.89766947428386, "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False (__main__.TestPatternMatcher)": 67.93033345540364, "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 76.1066665649414, "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 77.59533437093098, "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True (__main__.TestPatternMatcher)": 70.57233174641927, "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 86.69966634114583, "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 82.32333374023438, "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False (__main__.TestPatternMatcher)": 69.6453348795573, "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 79.38400014241536, "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 80.18400065104167, "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True (__main__.TestPatternMatcher)": 71.49599965413411, "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 78.35600026448567, "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 82.9933344523112, "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False (__main__.TestPatternMatcher)": 71.89866892496745, "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 75.72566731770833, "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 80.28999837239583, "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 80.68799845377605, "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 85.98066711425781, "test_quick_core_backward__unsafe_masked_index_cpu_float64 (__main__.TestDecompCPU)": 418.50034586588544, "test_quick_core_backward__unsafe_masked_index_cuda_float64 (__main__.TestDecompCUDA)": 842.5636698404948, "test_quick_core_backward__unsafe_masked_index_put_accumulate_cpu_float64 (__main__.TestDecompCPU)": 658.1936645507812, "test_quick_core_backward__unsafe_masked_index_put_accumulate_cuda_float64 (__main__.TestDecompCUDA)": 1321.1958414713542, "test_quick_core_backward_expand_copy_cuda_float64 (__main__.TestDecompCUDA)": 72.79183260599773, "test_quick_core_backward_nn_functional_max_unpool3d_grad_cpu_float64 (__main__.TestDecompCPU)": 68.16699981689453, "test_quick_core_backward_nn_functional_max_unpool3d_grad_cuda_float64 (__main__.TestDecompCUDA)": 222.59966786702475, "test_quick_core_backward_roll_cpu_float64 (__main__.TestDecompCPU)": 89.49299875895183, "test_quick_core_backward_roll_cuda_float64 (__main__.TestDecompCUDA)": 208.05382792154947, "test_quick_core_backward_select_scatter_cpu_float64 (__main__.TestDecompCPU)": 61.09833272298177, "test_quick_core_backward_select_scatter_cuda_float64 (__main__.TestDecompCUDA)": 119.15299987792969, "test_quick_core_backward_split_with_sizes_copy_cpu_float64 (__main__.TestDecompCPU)": 72.5490010579427, "test_quick_core_backward_split_with_sizes_copy_cuda_float64 (__main__.TestDecompCUDA)": 137.61000188191733, "test_quick_core_backward_std_cuda_float64 (__main__.TestDecompCUDA)": 83.77516682942708, "test_register_spills_cuda (__main__.BenchmarkFusionCudaTest)": 112.9426663716634, "test_replicatepad_64bit_indexing_cuda_float16 (__main__.TestNNDeviceTypeCUDA)": 68.61433410644531, "test_rosenbrock_sparse_with_lrsched_False_SGD_cuda_float64 (__main__.TestOptimRenewedCUDA)": 71.73550089200337, "test_rosenbrock_sparse_with_lrsched_True_SGD_cuda_float64 (__main__.TestOptimRenewedCUDA)": 66.45991698900859, "test_runtime_checks_large_cpu (__main__.AOTInductorTestABICompatibleCpu)": 60.68633270263672, "test_runtime_checks_large_cpu_with_stack_allocation (__main__.AOTInductorTestABICompatibleCpuWithStackAllocation)": 74.52111011081271, "test_runtime_checks_large_cuda (__main__.AOTInductorTestABICompatibleGpu)": 156.46233622233072, "test_save_load_large_string_attribute (__main__.TestSaveLoad)": 128.3509979248047, "test_shuffler_iterdatapipe (__main__.IntegrationTestDataLoaderDataPipe)": 148.15933481852213, "test_slow_tasks (__main__.TestFunctionalAutogradBenchmark)": 145.64644877115884, "test_sort_stable_cpu (__main__.CpuTritonTests)": 76.39066569010417, "test_split_cumsum_cpu (__main__.CpuTritonTests)": 89.5290018717448, "test_svd_lowrank_cuda_complex128 (__main__.TestLinalgCUDA)": 150.72099796930948, "test_tensor_split (__main__.TestVmapOperators)": 72.26428134347766, "test_terminate_handler_on_crash (__main__.TestTorch)": 100.98866719669766, "test_terminate_signal (__main__.ForkTest)": 134.33088995267948, "test_terminate_signal (__main__.ParallelForkServerShouldWorkTest)": 133.97255667547384, "test_terminate_signal (__main__.SpawnTest)": 137.73455943001642, "test_torch_distributions_functions_dynamic_shapes (__main__.DynamicShapesFunctionTests)": 193.52591840426126, "test_torchvision_smoke (__main__.TestTensorBoardPytorchGraph)": 144.84678077697754, "test_train_parity_multi_group_unshard_async_op (__main__.TestFullyShard1DTrainingCore)": 62.523999532063804, "test_transformer_backend_inductor_fullgraph_True (__main__.TestFullyShardCompile)": 82.06791687011719, "test_transformer_backend_inductor_fullgraph_True_graph_partition (__main__.TestFullyShardCompile)": 82.57758394877116, "test_triton_bsr_scatter_mm_blocksize_64_cuda_bfloat16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 93.72849909464519, "test_triton_bsr_scatter_mm_blocksize_64_cuda_float16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 86.33483123779297, "test_triton_bsr_scatter_mm_blocksize_64_cuda_float32 (__main__.TestSparseCompressedTritonKernelsCUDA)": 84.0580005645752, "test_triton_bsr_softmax_cuda_bfloat16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 128.47150166829428, "test_triton_bsr_softmax_cuda_float16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 125.92099952697754, "test_triton_bsr_softmax_cuda_float32 (__main__.TestSparseCompressedTritonKernelsCUDA)": 105.98566563924153, "test_unary_ops (__main__.TestTEFuserDynamic)": 173.52266354031033, "test_unary_ops (__main__.TestTEFuserStatic)": 154.03555562761096, "test_upsample_bicubic2d_cpu_halide (__main__.HalideCpuTests)": 95.91699727376302, "test_variant_consistency_jit_nn_functional_max_pool2d_cpu_float32 (__main__.TestJitCPU)": 91.32800038655598, "test_variant_consistency_jit_nn_functional_max_pool2d_cuda_float32 (__main__.TestJitCUDA)": 72.65949885050456, "test_vmapjvpvjp_diff_cuda_float32 (__main__.TestOperatorsCUDA)": 64.64249992370605, "test_vmapjvpvjp_linalg_lstsq_grad_oriented_cpu_float32 (__main__.TestOperatorsCPU)": 114.75466410319011, "test_vmapjvpvjp_linalg_lu_solve_cpu_float32 (__main__.TestOperatorsCPU)": 61.643143063499814, "test_vmapjvpvjp_linalg_lu_solve_cuda_float32 (__main__.TestOperatorsCUDA)": 76.99316660563152, "test_vmapjvpvjp_linalg_multi_dot_cuda_float32 (__main__.TestOperatorsCUDA)": 67.82800102233887, "test_vmapjvpvjp_linalg_pinv_singular_cpu_float32 (__main__.TestOperatorsCPU)": 60.267666498819985, "test_vmapjvpvjp_linalg_solve_triangular_cuda_float32 (__main__.TestOperatorsCUDA)": 68.94433307647705, "test_vmapjvpvjp_linalg_svd_cuda_float32 (__main__.TestOperatorsCUDA)": 73.93966547648112, "test_vmapjvpvjp_max_pool2d_with_indices_backward_cpu_float32 (__main__.TestOperatorsCPU)": 88.03500111897786, "test_vmapjvpvjp_max_pool2d_with_indices_backward_cuda_float32 (__main__.TestOperatorsCUDA)": 90.39650090535481, "test_vmapjvpvjp_nn_functional_max_pool2d_cpu_float32 (__main__.TestOperatorsCPU)": 79.07066853841145, "test_vmapjvpvjp_nn_functional_max_pool2d_cuda_float32 (__main__.TestOperatorsCUDA)": 95.49366696675618, "test_vmapjvpvjp_svd_cuda_float32 (__main__.TestOperatorsCUDA)": 81.16833623250325, "test_vmapjvpvjp_unbind_cpu_float32 (__main__.TestOperatorsCPU)": 61.30799865722656, "test_vmapjvpvjp_unbind_cuda_float32 (__main__.TestOperatorsCUDA)": 79.50816663106282, "test_vmapvjpvjp_linalg_lstsq_cuda_float32 (__main__.TestOperatorsCUDA)": 100.31945332613859, "test_vmapvjpvjp_meshgrid_list_of_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 106.99416732788086, "test_vmapvjpvjp_meshgrid_variadic_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 103.08566665649414, "test_vmapvjpvjp_nn_functional_bilinear_cuda_float32 (__main__.TestOperatorsCUDA)": 149.96750259399414 }