]> git.ipfire.org Git - thirdparty/gcc.git/commit
RISC-V: Support one more overlap for wv instructions
authorJuzhe-Zhong <juzhe.zhong@rivai.ai>
Mon, 18 Dec 2023 11:35:21 +0000 (19:35 +0800)
committerPan Li <pan2.li@intel.com>
Mon, 18 Dec 2023 13:19:23 +0000 (21:19 +0800)
commitb3b2799b872bc4c1944629af9dfc8472c8ca5fe6
tree51583127ee497da8b37255250b6bd75ae463a8f4
parent5bca321faa30c4fb46225efbe2698a13b3271b1c
RISC-V: Support one more overlap for wv instructions

For 'wv' instructions, e.g. vwadd.wv vd,vs2,vs1.

vs2 has same EEW as vd.
vs1 has smaller than vd.

So, vs2 can overlap with vd, but vs1 can only overlap highest-number of vd
when LMUL of vs1 is greater than 1.

We already have supported overlap for vs1 LMUL >= 1.
But I forget vs1 LMUL < 1, vs2 can overlap vd even though vs1 totally can not overlap vd.

Consider the reduction auto-vectorization:

int64_t
reduc_plus_int (int *__restrict a, int n)
{
  int64_t r = 0;
  for (int i = 0; i < n; ++i)
    r += a[i];
  return r;
}

When we use --param=riscv-autovec-lmul=m2, the codegen is good to us because we already supported
overlap for source EEW32 LMUL1 -> dest EEW64 LMUL2.

--param=riscv-autovec-lmul=m2:

reduc_plus_int:
        ble     a1,zero,.L4
        vsetvli a5,zero,e64,m2,ta,ma
        vmv.v.i v2,0
.L3:
        vsetvli a5,a1,e32,m1,tu,ma
        slli    a4,a5,2
        sub     a1,a1,a5
        vle32.v v1,0(a0)
        add     a0,a0,a4
        vwadd.wv        v2,v2,v1
        bne     a1,zero,.L3
        li      a5,0
        vsetivli        zero,1,e64,m1,ta,ma
        vmv.s.x v1,a5
        vsetvli a5,zero,e64,m2,ta,ma
        vredsum.vs      v2,v2,v1
        vmv.x.s a0,v2
        ret
.L4:
        li      a0,0
        ret

However, default LMUL (--param=riscv-autovec-lmul=m1) generates redundant vmv1r since
it is EEW32 LMUL=MF2 -> EEW64 LMUL = 1

Before this patch:

reduc_plus_int:
        ble     a1,zero,.L4
        vsetvli a5,zero,e64,m1,ta,ma
        vmv.v.i v1,0
.L3:
        vsetvli a5,a1,e32,mf2,tu,ma
        slli    a4,a5,2
        sub     a1,a1,a5
        vle32.v v2,0(a0)
        vmv1r.v v3,v1                  ---->  This should be removed.
        add     a0,a0,a4
        vwadd.wv        v1,v3,v2       ---->  vs2 should be v1
        bne     a1,zero,.L3
        li      a5,0
        vsetivli        zero,1,e64,m1,ta,ma
        vmv.s.x v2,a5
        vsetvli a5,zero,e64,m1,ta,ma
        vredsum.vs      v1,v1,v2
        vmv.x.s a0,v1
        ret
.L4:
        li      a0,0
        ret

After this patch:

reduc_plus_int:
ble a1,zero,.L4
vsetvli a5,zero,e64,m1,ta,ma
vmv.v.i v1,0
.L3:
vsetvli a5,a1,e32,mf2,tu,ma
slli a4,a5,2
sub a1,a1,a5
vle32.v v2,0(a0)
add a0,a0,a4
vwadd.wv v1,v1,v2
bne a1,zero,.L3
li a5,0
vsetivli zero,1,e64,m1,ta,ma
vmv.s.x v2,a5
vsetvli a5,zero,e64,m1,ta,ma
vredsum.vs v1,v1,v2
vmv.x.s a0,v1
ret
.L4:
li a0,0
ret

PR target/112432

gcc/ChangeLog:

* config/riscv/riscv.md (none,W21,W42,W84,W43,W86,W87): Add W0.
(none,W21,W42,W84,W43,W86,W87,W0): Ditto.
* config/riscv/vector.md: Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112432-42.c: New test.
gcc/config/riscv/riscv.md
gcc/config/riscv/vector.md
gcc/testsuite/gcc.target/riscv/rvv/base/pr112432-42.c [new file with mode: 0644]