{ "torch/_inductor/async_compile.py": { "class AsyncCompile": 281 }, "torch/_inductor/autoheuristic/artifacts/_MMRankingA100.py": { "class MMRankingA100": 278, "def MMRankingA100.fill_choices()": 199 }, "torch/_inductor/autoheuristic/artifacts/_MMRankingH100.py": { "class MMRankingH100": 303, "def MMRankingH100.fill_choices()": 203 }, "torch/_inductor/autoheuristic/artifacts/_MixedMMA100.py": { "class MixedMMA100": 132, "def MixedMMA100.get_best_choices()": 85 }, "torch/_inductor/autoheuristic/artifacts/_MixedMMH100.py": { "class MixedMMH100": 131, "def MixedMMH100.get_best_choices()": 85 }, "torch/_inductor/autotune_process.py": { "class CUDABenchmarkRequest": 115, "class TritonBenchmarkRequest": 121, "def TritonBenchmarkRequest.make_run_fn()": 81 }, "torch/_inductor/bounds.py": { "class ValueRangeAnalysis": 107 }, "torch/_inductor/codecache.py": { "class AotCodeCompiler": 516, "class CUDACodeCache": 107, "class CppCodeCache": 125, "class CppPythonBindingsCodeCache": 168, "class HalideCodeCache": 350 }, "torch/_inductor/codegen/common.py": { "class CSE": 167, "class CSEProxy": 310, "class Kernel": 286, "class KernelArgs": 325, "class OpOverrides": 227 }, "torch/_inductor/codegen/cpp.py": { "class CppKernel": 572, "class CppKernelProxy": 601, "class CppOverrides": 429, "class CppScheduling": 777, "class CppVecKernel": 857, "class OuterLoopFusedSchedulerNode": 159, "def CppKernel.codegen_loops_impl()": 144, "def CppKernelProxy.codegen_functions()": 183, "def CppKernelProxy.legalize_lowp_fp_dtype_loopbody()": 224, "def CppScheduling.fuse()": 81, "def CppVecKernel.reduction()": 193, "def CppVecKernel.reduction_combine_vec()": 87, "def TilingSelect.select_tiling()": 165 }, "torch/_inductor/codegen/cpp_flex_attention_template.py": { "class CppFlexAttentionTemplate": 374, "def CppFlexAttentionTemplate.modification()": 94 }, "torch/_inductor/codegen/cpp_gemm_template.py": { "class CppGemmTemplate": 998, "def CppGemmTemplate.add_choices()": 163, "def CppGemmTemplate.get_options()": 243 }, "torch/_inductor/codegen/cpp_grouped_gemm_template.py": { "def CppGroupedGemmTemplate.add_choices()": 141, "def CppGroupedGemmTemplate.render()": 146 }, "torch/_inductor/codegen/cpp_micro_gemm.py": { "def create_micro_gemm()": 94 }, "torch/_inductor/codegen/cpp_template.py": { "class CppTemplate": 114 }, "torch/_inductor/codegen/cpp_template_kernel.py": { "class CppTemplateKernel": 469, "def CppTemplateKernel.store_outputs()": 102 }, "torch/_inductor/codegen/cpp_utils.py": { "def create_epilogue_with_attr()": 165 }, "torch/_inductor/codegen/cpp_wrapper_cpu.py": { "def CppWrapperCpu.generate_extern_kernel_args_decl_if_needed()": 152, "def CppWrapperCpu.generate_input_output_runtime_checks()": 115, "def CppWrapperCpu.generate_py_arg()": 96, "def CppWrapperCpu.val_to_arg_str()": 88, "def CppWrapperCpu.write_wrapper_decl()": 140 }, "torch/_inductor/codegen/cpp_wrapper_cpu_array_ref.py": { "def CppWrapperCpuArrayRef.generate_return()": 127, "def CppWrapperCpuArrayRef.write_wrapper_decl()": 208 }, "torch/_inductor/codegen/cuda/cutlass_lib_extensions/gemm_operation_extensions.py": { "def EmitGemmUniversal3xInstanceWithEVT.emit()": 98 }, "torch/_inductor/codegen/cuda/device_op_overrides.py": { "class CUDADeviceOpOverrides": 222, "def CUDADeviceOpOverrides.tma_descriptor_helpers()": 102 }, "torch/_inductor/codegen/cuda/gemm_template.py": { "class CUTLASS2xGemmTemplate": 265, "class CUTLASS3xGemmTemplate": 326 }, "torch/_inductor/codegen/debug_utils.py": { "class DebugPrinterManager": 228 }, "torch/_inductor/codegen/halide.py": { "class HalideKernel": 982, "class HalideOverrides": 329, "class HalidePrinter": 129, "def HalideKernel.halide_kernel_meta()": 82 }, "torch/_inductor/codegen/mps.py": { "class MetalKernel": 354, "class MetalOverrides": 335, "def MetalKernel.reduction()": 109 }, "torch/_inductor/codegen/rocm/ck_conv_template.py": { "class CKGroupedConvFwdTemplate": 531, "def CKGroupedConvFwdTemplate.globals()": 143 }, "torch/_inductor/codegen/rocm/ck_universal_gemm_template.py": { "class CKGemmTemplate": 947 }, "torch/_inductor/codegen/rocm/rocm_benchmark_request.py": { "class ROCmBenchmarkRequest": 117 }, "torch/_inductor/codegen/simd.py": { "class IterationRangesRoot": 122, "class SIMDScheduling": 1054, "def SIMDScheduling.candidate_tilings()": 126, "def SIMDScheduling.generate_node_schedule()": 95 }, "torch/_inductor/codegen/triton.py": { "class BlockPtrOptions": 272, "class TritonKernel": 2455, "class TritonOverrides": 505, "class TritonPrinter": 172, "class TritonScheduling": 396, "def TritonKernel.codegen_kernel()": 222, "def TritonKernel.codegen_kernel_benchmark()": 89, "def TritonKernel.load()": 134, "def TritonKernel.reduction()": 383, "def TritonKernel.scan()": 103, "def TritonScheduling.benchmark_codegened_module()": 83, "def TritonScheduling.benchmark_combo_kernel()": 91 }, "torch/_inductor/codegen/triton_combo_kernel.py": { "class ComboKernel": 808, "def ComboKernel.codegen_kernel_benchmark()": 89 }, "torch/_inductor/codegen/triton_split_scan.py": { "def TritonSplitScanKernel.scan()": 114 }, "torch/_inductor/codegen/wrapper.py": { "def PythonWrapperCodegen.benchmark_compiled_module()": 92, "def PythonWrapperCodegen.define_user_defined_triton_kernel()": 249, "def PythonWrapperCodegen.generate_example_arg_value()": 83, "def user_defined_kernel_grid_fn_code()": 96 }, "torch/_inductor/comm_lowering.py": { "def register_comm_lowerings()": 189 }, "torch/_inductor/comms.py": { "def enforce_comm_ordering_for_fsdp()": 170, "def reinplace_fsdp_all_gather()": 110 }, "torch/_inductor/compile_fx.py": { "def _InProcessFxCompile.codegen_and_compile()": 379, "def fw_compiler_freezing()": 93 }, "torch/_inductor/config.py": { "class cpp": 107, "class triton": 182 }, "torch/_inductor/constant_folding.py": { "class ConstantFolder": 223, "def ConstantFolder.run_node()": 94 }, "torch/_inductor/cpu_vec_isa.py": { "class VecISA": 120 }, "torch/_inductor/debug.py": { "class DebugContext": 158, "class DebugFormatter": 189, "def DebugFormatter.log_autotuning_results()": 81 }, "torch/_inductor/dependencies.py": { "class MemoryDep": 225 }, "torch/_inductor/fx_passes/b2b_gemm.py": { "def b2b_gemm_handler()": 180 }, "torch/_inductor/fx_passes/binary_folding.py": { "def binary_folding_init()": 416 }, "torch/_inductor/fx_passes/freezing_patterns.py": { "def addmm_patterns_init()": 94 }, "torch/_inductor/fx_passes/group_batch_fusion.py": { "def BatchLayernormFusion.fuse()": 131, "def PostGradBatchLinearFusion.fuse()": 83, "def PreGradBatchLinearFusion.fuse()": 87 }, "torch/_inductor/fx_passes/joint_graph.py": { "def constant_fold_uniform_value()": 109, "def remove_no_ops()": 93 }, "torch/_inductor/fx_passes/micro_pipeline_tp.py": { "def find_all_gather_patterns()": 116, "def find_reduce_scatter_patterns()": 125 }, "torch/_inductor/fx_passes/post_grad.py": { "def lower_scan_to_while_loop()": 154 }, "torch/_inductor/fx_passes/split_cat.py": { "def SplitCatSimplifier.replace_cat()": 145, "def merge_getitem_cat()": 97, "def merge_split_cat_aten()": 87, "def move_reshape_out_of_split_stack()": 110 }, "torch/_inductor/fx_utils.py": { "def FakeTensorUpdater.incremental_update()": 100 }, "torch/_inductor/graph.py": { "class GraphLowering": 2032, "def GraphLowering.call_function()": 116, "def GraphLowering.extract_autotune_inputs()": 90, "def GraphLowering.output()": 87, "def GraphLowering.placeholder()": 92, "def GraphLowering.run_node()": 380 }, "torch/_inductor/ir.py": { "class Buffer": 122, "class ComputedBuffer": 329, "class Conditional": 138, "class ExternKernel": 793, "class FallbackKernel": 439, "class FlexibleLayout": 139, "class IRNode": 244, "class Layout": 202, "class Loops": 128, "class Reduction": 737, "class Scan": 199, "class Sort": 150, "class UserDefinedTritonKernel": 183, "class View": 174, "class WelfordReduction": 221, "class WhileLoop": 203, "def ConcatKernel.create()": 95, "def ExternKernel.process_kernel()": 110, "def ExternKernel.require_strides()": 149, "def FallbackKernel.create()": 81, "def FallbackKernel.export_extern_kernel_node()": 82, "def Reduction.create()": 136, "def Reduction.num_splits()": 152, "def Scan.create()": 83, "def WelfordReduction.create()": 110, "def WhileLoop.create()": 161 }, "torch/_inductor/jagged_lowerings.py": { "def register_jagged_ops()": 156 }, "torch/_inductor/kernel/bmm.py": { "def tuned_bmm()": 91 }, "torch/_inductor/kernel/conv.py": { "def convolution()": 231 }, "torch/_inductor/kernel/flex_attention.py": { "def flex_attention()": 303, "def flex_attention_backward()": 323, "def lower_cpu()": 273 }, "torch/_inductor/kernel/flex_decoding.py": { "def create_flex_decoding_kernel()": 288 }, "torch/_inductor/kernel/mm.py": { "def tuned_addmm()": 169, "def tuned_mm()": 127, "def tuned_scaled_mm()": 130 }, "torch/_inductor/loop_body.py": { "class CaptureIndexing": 174 }, "torch/_inductor/lowering.py": { "def avg_pool2d_backward()": 155, "def avg_pool3d_backward()": 189, "def cat()": 123, "def index_put_impl_()": 125, "def make_pointwise()": 85, "def max_pool2d_with_indices_backward()": 140, "def scatter_reduce_()": 111, "def sdpa_constraint()": 132, "def searchsorted()": 84 }, "torch/_inductor/mkldnn_ir.py": { "class MkldnnRnnLayer": 114 }, "torch/_inductor/mkldnn_lowerings.py": { "def register_onednn_fusion_ops()": 1152 }, "torch/_inductor/mock_cache.py": { "class PatchCaches": 108 }, "torch/_inductor/pattern_matcher.py": { "class ReplacementPatternEntry": 196, "def ReplacementPatternEntry.replace_with_graph()": 177 }, "torch/_inductor/quantized_lowerings.py": { "def register_woq_mm_ops()": 136 }, "torch/_inductor/runtime/autotune_cache.py": { "class AutotuneCache": 190 }, "torch/_inductor/runtime/benchmarking.py": { "class InductorBenchmarker": 111 }, "torch/_inductor/scheduler.py": { "class BaseSchedulerNode": 697, "class BaseScheduling": 139, "class Scheduler": 2568, "class SchedulerBuffer": 103, "class SchedulerNode": 256 }, "torch/_inductor/select_algorithm.py": { "class AlgorithmSelectorCache": 694, "class TritonTemplate": 224, "class TritonTemplateKernel": 770, "def AlgorithmSelectorCache.log_results()": 92, "def AlgorithmSelectorCache.make_benchmark_fn[2]()": 145 }, "torch/_inductor/sizevars.py": { "class SizeVarAllocator": 780 }, "torch/_inductor/template_heuristics.py": { "class ROCmConfigHeuristic": 212 }, "torch/_inductor/utils.py": { "class IndentedBuffer": 136 }, "torch/_inductor/wrapper_benchmark.py": { "def parse_profile_event_list()": 119 } }