sglang_v0.5.2/pytorch_2.8.0/third_party/XNNPACK/scripts/generate-x16-packw.sh

41 lines
3.7 KiB
Bash
Executable File

#!/bin/sh
# Copyright 2023 Google LLC
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
#################################### Scalar ###################################
### Generic C micro-kernels
tools/xngen src/x32-packw/scalar.c.in -D NR=8 -D KBLOCK=4 -D TYPE=uint16_t -o src/x16-packw/gen/x16-packw-x8-gemm-goi-scalar-int-u4.c &
tools/xngen src/x32-packw/scalar.c.in -D NR=16 -D KBLOCK=4 -D TYPE=uint16_t -o src/x16-packw/gen/x16-packw-x16-gemm-goi-scalar-int-u4.c &
# AVX needs NR 32 and NR 64 packw microkernels
tools/xngen src/x32-packw/scalar.c.in -D NR=32 -D KBLOCK=4 -D TYPE=uint16_t -o src/x16-packw/gen/x16-packw-x32-gemm-goi-scalar-int-u4.c &
tools/xngen src/x32-packw/scalar.c.in -D NR=64 -D KBLOCK=4 -D TYPE=uint16_t -o src/x16-packw/gen/x16-packw-x64-gemm-goi-scalar-int-u4.c &
################################### ARM NEON ##################################
### NR multiple of 4
tools/xngen src/x16-packw/neon.c.in -D NR=8 -D PREFETCH=0 -D KBLOCK=4 -o src/x16-packw/gen/x16-packw-x8-gemm-goi-neon-ld4lane-u4.c &
tools/xngen src/x16-packw/neon.c.in -D NR=8 -D PREFETCH=1 -D KBLOCK=4 -o src/x16-packw/gen/x16-packw-x8-gemm-goi-neon-ld4lane-u4-prfm.c &
tools/xngen src/x16-packw/neon.c.in -D NR=16 -D PREFETCH=0 -D KBLOCK=4 -o src/x16-packw/gen/x16-packw-x16-gemm-goi-neon-ld4lane-u4.c &
tools/xngen src/x16-packw/neon.c.in -D NR=16 -D PREFETCH=1 -D KBLOCK=4 -o src/x16-packw/gen/x16-packw-x16-gemm-goi-neon-ld4lane-u4-prfm.c &
tools/xngen src/x16-packw/neon.c.in -D NR=8 -D PREFETCH=0 -D KBLOCK=8 -o src/x16-packw/gen/x16-packw-x8-gemm-goi-neon-ld4lane-u8.c &
tools/xngen src/x16-packw/neon.c.in -D NR=8 -D PREFETCH=1 -D KBLOCK=8 -o src/x16-packw/gen/x16-packw-x8-gemm-goi-neon-ld4lane-u8-prfm.c &
tools/xngen src/x16-packw/neon.c.in -D NR=16 -D PREFETCH=0 -D KBLOCK=8 -o src/x16-packw/gen/x16-packw-x16-gemm-goi-neon-ld4lane-u8.c &
tools/xngen src/x16-packw/neon.c.in -D NR=16 -D PREFETCH=1 -D KBLOCK=8 -o src/x16-packw/gen/x16-packw-x16-gemm-goi-neon-ld4lane-u8-prfm.c &
tools/xngen src/x16-packw/neon.c.in -D NR=8 -D PREFETCH=0 -D KBLOCK=12 -o src/x16-packw/gen/x16-packw-x8-gemm-goi-neon-ld4lane-u12.c &
tools/xngen src/x16-packw/neon.c.in -D NR=8 -D PREFETCH=1 -D KBLOCK=12 -o src/x16-packw/gen/x16-packw-x8-gemm-goi-neon-ld4lane-u12-prfm.c &
tools/xngen src/x16-packw/neon.c.in -D NR=16 -D PREFETCH=0 -D KBLOCK=12 -o src/x16-packw/gen/x16-packw-x16-gemm-goi-neon-ld4lane-u12.c &
tools/xngen src/x16-packw/neon.c.in -D NR=16 -D PREFETCH=1 -D KBLOCK=12 -o src/x16-packw/gen/x16-packw-x16-gemm-goi-neon-ld4lane-u12-prfm.c &
tools/xngen src/x16-packw/neon.c.in -D NR=8 -D PREFETCH=0 -D KBLOCK=16 -o src/x16-packw/gen/x16-packw-x8-gemm-goi-neon-ld4lane-u16.c &
tools/xngen src/x16-packw/neon.c.in -D NR=8 -D PREFETCH=1 -D KBLOCK=16 -o src/x16-packw/gen/x16-packw-x8-gemm-goi-neon-ld4lane-u16-prfm.c &
tools/xngen src/x16-packw/neon.c.in -D NR=16 -D PREFETCH=0 -D KBLOCK=16 -o src/x16-packw/gen/x16-packw-x16-gemm-goi-neon-ld4lane-u16.c &
tools/xngen src/x16-packw/neon.c.in -D NR=16 -D PREFETCH=1 -D KBLOCK=16 -o src/x16-packw/gen/x16-packw-x16-gemm-goi-neon-ld4lane-u16-prfm.c &
################################### X86 AVX2 ##################################
tools/xngen src/x16-packw/avx.c.in -D NR=8 -D PREFETCH=0 -D KBLOCK=16 -o src/x16-packw/gen/x16-packw-x8-gemm-goi-avx2-u16.c &
tools/xngen src/x16-packw/avx.c.in -D NR=8 -D PREFETCH=1 -D KBLOCK=16 -o src/x16-packw/gen/x16-packw-x8-gemm-goi-avx2-u16-prfm.c &
tools/xngen src/x16-packw/avx.c.in -D NR=16 -D PREFETCH=0 -D KBLOCK=16 -o src/x16-packw/gen/x16-packw-x16-gemm-goi-avx2-u16.c &
tools/xngen src/x16-packw/avx.c.in -D NR=16 -D PREFETCH=1 -D KBLOCK=16 -o src/x16-packw/gen/x16-packw-x16-gemm-goi-avx2-u16-prfm.c &
wait