]> git.ipfire.org Git - thirdparty/gcc.git/commit
RISC-V: Optimize permutation codegen with vcompress
authorJu-Zhe Zhong <juzhe.zhong@rivai.ai>
Tue, 11 Jul 2023 06:38:28 +0000 (14:38 +0800)
committerPan Li <pan2.li@intel.com>
Tue, 11 Jul 2023 23:34:19 +0000 (07:34 +0800)
commit9aabf81f40f0ee130646ab5e60d158218d1276cc
treedabcf2823c28c231cf280f40ed30aec6fc4279e5
parent6726bca472645cdc91ae596a9f71de43a8158bbc
RISC-V: Optimize permutation codegen with vcompress

This patch is to recognize specific permutation pattern which can be applied compress approach.

Consider this following case:
typedef int8_t vnx64i __attribute__ ((vector_size (64)));
  1, 2, 3, 5, 7, 9, 10, 11, 12, 14, 15, 17, 19, 21, 22, 23, 26, 28, 30, 31,    \
    37, 38, 41, 46, 47, 53, 54, 55, 60, 61, 62, 63, 76, 77, 78, 79, 80, 81,    \
    82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,    \
    100, 101, 102, 103, 104, 105, 106, 107
void __attribute__ ((noinline, noclone)) test_1 (int8_t *x, int8_t *y, int8_t *out)
{
  vnx64i v1 = *(vnx64i*)x;
  vnx64i v2 = *(vnx64i*)y;
  vnx64i v3 = __builtin_shufflevector (v1, v2, MASK_64);
  *(vnx64i*)out = v3;
}

https://godbolt.org/z/P33nev6cW

Before this patch:
        lui     a4,%hi(.LANCHOR0)
        addi    a4,a4,%lo(.LANCHOR0)
        vl4re8.v        v4,0(a4)
        li      a4,64
        vsetvli a5,zero,e8,m4,ta,mu
        vl4re8.v        v20,0(a0)
        vl4re8.v        v16,0(a1)
        vmv.v.x v12,a4
        vrgather.vv     v8,v20,v4
        vmsgeu.vv       v0,v4,v12
        vsub.vv v4,v4,v12
        vrgather.vv     v8,v16,v4,v0.t
        vs4r.v  v8,0(a2)
        ret

After this patch:
        lui a4,%hi(.LANCHOR0)
        addi a4,a4,%lo(.LANCHOR0)
        vsetvli a5,zero,e8,m4,ta,ma
        vl4re8.v v12,0(a1)
        vl4re8.v v8,0(a0)
        vlm.v v0,0(a4)
        vslideup.vi v4,v12,20
        vcompress.vm v4,v8,v0
        vs4r.v v4,0(a2)
        ret

gcc/ChangeLog:

* config/riscv/riscv-protos.h (enum insn_type): Add vcompress optimization.
* config/riscv/riscv-v.cc (emit_vlmax_compress_insn): Ditto.
(shuffle_compress_patterns): Ditto.
(expand_vec_perm_const_1): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls-vlmax/compress-1.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress-3.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress-4.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress-5.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress-6.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-1.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-3.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-4.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-5.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-6.c: New test.
14 files changed:
gcc/config/riscv/riscv-protos.h
gcc/config/riscv/riscv-v.cc
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/compress-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/compress-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/compress-3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/compress-4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/compress-5.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/compress-6.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-5.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-6.c [new file with mode: 0644]