#!/usr/bin/env python from __future__ import print_function def extract_time(line, prefix): if line.startswith(prefix): line = line[len(prefix):].lstrip() line = line[:line.index(" ms")].rstrip() return line def convolution(mode, batch_size, input_channels, output_channels, image_size, kernel_size, padding, algorithm, transform_strategy=None, threads=None, verbose=False, use_selldr=False): import subprocess if use_selldr: import os import sys nacl_sdk_dir = os.getenv("NACL_SDK_ROOT") if nacl_sdk_dir is None: print("Error: can not find Native Client SDK: set NACL_SDK_ROOT envorinment variable and try again", file=sys.stderr) sys.exit(1) benchmark_args = [os.path.join(nacl_sdk_dir, "tools", "sel_ldr.py"), "--", "bin/convolution-benchmark"] else: benchmark_args = ["bin/convolution-benchmark"] benchmark_args += [ "-m", mode, "-b", str(batch_size), "-ic", str(input_channels), "-oc", str(output_channels), "-is", str(image_size[0]), str(image_size[1]), "-ip", str(padding), "-ks", str(kernel_size[0]), str(kernel_size[1]), "-a", algorithm ] if mode == "inference" and transform_strategy is not None: benchmark_args += ["-ts", transform_strategy] if threads is not None: benchmark_args += ["-t", str(threads)] benchmark = subprocess.Popen(benchmark_args, stdout=subprocess.PIPE) benchmark_stdout, _ = benchmark.communicate() if benchmark.returncode == 0: output_lines = [line for line in benchmark_stdout.splitlines() if len(line)] total, input_transform, kernel_transform, output_transform, block_multiplication, overhead = None, None, None, None, None, None for output_line in output_lines: total = total or extract_time(output_line, "Time:") input_transform = input_transform or extract_time(output_line, "Input transform:") kernel_transform = kernel_transform or extract_time(output_line, "Kernel transform:") output_transform = output_transform or extract_time(output_line, "Output transform:") block_multiplication = block_multiplication or extract_time(output_line, "Block multiplication:") overhead = overhead or extract_time(output_line, "Overhead:") if verbose: return (total, input_transform, kernel_transform, output_transform, block_multiplication, overhead) else: return (total,) def fully_connected(mode, batch_size, input_channels, output_channels, threads=None, verbose=False, use_selldr=False): import subprocess if use_selldr: import os import sys nacl_sdk_dir = os.getenv("NACL_SDK_ROOT") if nacl_sdk_dir is None: print("Error: can not find Native Client SDK: set NACL_SDK_ROOT envorinment variable and try again", file=sys.stderr) sys.exit(1) benchmark_args = [os.path.join(nacl_sdk_dir, "tools", "sel_ldr.py"), "--", "bin/fully-connected-benchmark"] else: benchmark_args = ["bin/fully-connected-benchmark"] benchmark_args += [ "-m", mode, "-b", str(batch_size), "-ic", str(input_channels), "-oc", str(output_channels) ] if threads is not None: benchmark_args += ["-t", str(threads)] benchmark = subprocess.Popen(benchmark_args, stdout=subprocess.PIPE) benchmark_stdout, _ = benchmark.communicate() if benchmark.returncode == 0: output_lines = [line for line in benchmark_stdout.splitlines() if len(line)] total, input_transform, kernel_transform, block_multiplication, overhead = None, None, None, None, None for output_line in output_lines: total = total or extract_time(output_line, "Time:") input_transform = input_transform or extract_time(output_line, "Input packing:") kernel_transform = kernel_transform or extract_time(output_line, "Kernel packing:") block_multiplication = block_multiplication or extract_time(output_line, "Block multiplication:") overhead = overhead or extract_time(output_line, "Overhead:") if verbose: return (total, input_transform, kernel_transform, block_multiplication, overhead) else: return (total,) overfeat_fast_layers = [ ("conv2", 96, 256, (24, 24), (5, 5), 0), ("conv3", 256, 512, (12, 12), (3, 3), 1), ("conv4", 512, 1024, (12, 12), (3, 3), 1), ("conv5", 1024, 1024, (12, 12), (3, 3), 1), ("fc6", 36864, 3072), ("fc7", 3072, 4096), ("fc8", 4096, 1000), ] alexnet_layers = [ ("conv2", 64, 192, (27, 27), (5, 5), 2), ("conv3", 192, 384, (13, 13), (3, 3), 1), ("conv4", 384, 256, (13, 13), (3, 3), 1), ("conv5", 256, 256, (13, 13), (3, 3), 1), ("fc6", 12544, 4096), ("fc7", 4096, 4096), ("fc8", 4096, 1000), ] vgg_a_layers = [ ("conv1", 3, 64, (224, 224), (3, 3), 1), ("conv2", 64, 128, (112, 112), (3, 3), 1), ("conv3.1", 128, 256, (56, 56), (3, 3), 1), ("conv3.2", 256, 256, (56, 56), (3, 3), 1), ("conv4.1", 256, 512, (28, 28), (3, 3), 1), ("conv4.2", 512, 512, (28, 28), (3, 3), 1), ("conv5", 512, 512, (14, 14), (3, 3), 1), ("fc6", 25088, 4096), ("fc7", 4096, 4096), ("fc8", 4096, 1000), ] if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description="NNPACK benchmarking script") parser.add_argument("--enable-selldr", dest="use_selldr", action="store_true") parser.add_argument("-l", "--layer", dest="layer", required=True, choices=["convolution", "fully-connected", "pooling"]) parser.add_argument("-n", "--network", dest="network", required=True, choices=["vgg-a", "alexnet", "overfeat-fast"]) parser.add_argument("-m", "--mode", dest="mode", required=True, choices=["inference", "output", "input-gradient", "kernel-gradient"]) parser.add_argument("--transform-strategy", dest="transform_strategy", default="compute", choices=["compute", "precompute"]) parser.add_argument("-b", "--batch", dest="batch", type=int) parser.add_argument("-t", "--threads", dest="threads") parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", default=False) options = parser.parse_args() network_layers, default_batch = { "vgg-a": (vgg_a_layers, 64), "alexnet": (alexnet_layers, 128), "overfeat-fast": (overfeat_fast_layers, 128) }[options.network] layer_prefix = { "convolution": "conv", "fully-connected": "fc", "pooling": "pool" }[options.layer] network_layers = [layer for layer in network_layers if layer[0].startswith(layer_prefix)] batch = default_batch if options.batch is not None: batch = options.batch if batch != 1 and options.mode == "inference": raise ValueError("Non-unit batch {batch} is not allowed in inference mode".format(batch=batch)) elif options.mode == "inference": batch = 1 if options.transform_strategy is not None: if options.layer != "convolution": raise ValueError("Transform strategy {transform_strategy} is meaningless for non-convolutional layers".format(transform_strategy=transform_strategy)) elif options.mode != "inference": raise ValueError("Transform strategy {transform_strategy} is meaningless in non-inference mode".format(transform_strategy=transform_strategy)) if options.layer == "convolution": for name, input_channels, output_channels, image_size, kernel_size, padding in network_layers: measurements = [name] for algorithm in ["implicit-gemm", "ft8x8", "ft16x16", "wt8x8"]: if algorithm.startswith("wt") and kernel_size != (3, 3): continue measurements += list(convolution(options.mode, batch, input_channels, output_channels, image_size, kernel_size, padding, algorithm, transform_strategy=options.transform_strategy, threads=options.threads, verbose=options.verbose, use_selldr=options.use_selldr)) print("\t".join(map(str, measurements))) elif options.layer == "fully-connected": for name, input_channels, output_channels in network_layers: measurements = fully_connected(options.mode, batch, input_channels, output_channels, threads=options.threads, verbose=options.verbose, use_selldr=options.use_selldr) print("{name}\t{measurements}".format(name=name, measurements="\t".join(measurements)))