]> git.ipfire.org Git - thirdparty/gcc.git/commit
RISC-V: Use merge approach to optimize vector permutation
authorJuzhe-Zhong <juzhe.zhong@rivai.ai>
Wed, 14 Jun 2023 04:24:09 +0000 (12:24 +0800)
committerPan Li <pan2.li@intel.com>
Thu, 15 Jun 2023 02:21:41 +0000 (10:21 +0800)
commit9e3607e19bcd34e1fc857ca44ae30a8a1a4f5e20
tree7e6919f1aa13544f06d94d5c68e5dff8c58a42c6
parent0ec3fbb5903ac3ad735b3154e814b46724fe1a27
RISC-V: Use merge approach to optimize vector permutation

This patch is to optimize the permuation case that is suiteable use
merge approach.

Consider this following case:
typedef int8_t vnx16qi __attribute__((vector_size (16)));

void __attribute__ ((noipa))
merge0 (vnx16qi x, vnx16qi y, vnx16qi *out)
{
  vnx16qi v = __builtin_shufflevector ((vnx16qi) x, (vnx16qi) y, MASK_16);
  *(vnx16qi*)out = v;
}

The gimple IR:
v_3 = VEC_PERM_EXPR <x_1(D), y_2(D), { 0, 17, 2, 19, 4, 21, 6, 23, 8, 9, 10, 27, 12, 29, 14, 31 }>;

Selector = { 0, 17, 2, 19, 4, 21, 6, 23, 8, 9, 10, 27, 12, 29, 14, 31 }, the common expression:
{ 0, nunits + 1, 2, nunits + 3, 4, nunits + 5, ...  }

For this selector, we can use vmsltu + vmerge to optimize the codegen.

Before this patch:
merge0:
        addi    a5,sp,16
        vl1re8.v        v3,0(a5)
        li      a5,31
        vsetivli        zero,16,e8,m1,ta,mu
        vmv.v.x v2,a5
        lui     a5,%hi(.LANCHOR0)
        addi    a5,a5,%lo(.LANCHOR0)
        vl1re8.v        v1,0(a5)
        vl1re8.v        v4,0(sp)
        vand.vv v1,v1,v2
        vmsgeu.vi       v0,v1,16
        vrgather.vv     v2,v4,v1
        vadd.vi v1,v1,-16
        vrgather.vv     v2,v3,v1,v0.t
        vs1r.v  v2,0(a0)
        ret

After this patch:
merge0:
        addi    a5,sp,16
        vl1re8.v        v1,0(a5)
        lui     a5,%hi(.LANCHOR0)
        addi    a5,a5,%lo(.LANCHOR0)
        vsetivli        zero,16,e8,m1,ta,ma
        vl1re8.v        v0,0(a5)
        vl1re8.v        v2,0(sp)
        vmsltu.vi       v0,v0,16
        vmerge.vvm      v1,v1,v2,v0
        vs1r.v  v1,0(a0)
        ret

The key of this optimization is that:
1. mask = vmsltu (selector, nunits)
2. result = vmerge (op0, op1, mask)

gcc/ChangeLog:

* config/riscv/riscv-v.cc (shuffle_merge_patterns): New pattern.
(expand_vec_perm_const_1): Add merge optmization.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-1.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-3.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-4.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-5.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-6.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-7.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-1.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-3.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-4.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-5.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-6.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-7.c: New test.
15 files changed:
gcc/config/riscv/riscv-v.cc
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-5.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-6.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-7.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-5.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-6.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-7.c [new file with mode: 0644]