]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
RISC-V: Fix RVV register order
authorJuzhe-Zhong <juzhe.zhong@rivai.ai>
Fri, 24 Mar 2023 06:57:25 +0000 (14:57 +0800)
committerKito Cheng <kito.cheng@sifive.com>
Thu, 20 Apr 2023 13:34:23 +0000 (21:34 +0800)
This patch fixes the issue of incorrect reigster order of RVV.
The new register order is coming from kito original RVV GCC implementation.

Consider this case:
void f (void *base,void *base2,void *out,size_t vl, int n)
{
    vuint64m8_t bindex = __riscv_vle64_v_u64m8 (base + 100, vl);
    for (int i = 0; i < n; i++){
      vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
      vuint64m8_t v = __riscv_vluxei64_v_u64m8_m(m,base,bindex,vl);
      vuint64m8_t v2 = __riscv_vle64_v_u64m8_tu (v, base2 + i, vl);
      vint8m1_t v3 = __riscv_vluxei64_v_i8m1_m(m,base,v,vl);
      vint8m1_t v4 = __riscv_vluxei64_v_i8m1_m(m,base,v2,vl);
      __riscv_vse8_v_i8m1 (out + 100*i,v3,vl);
      __riscv_vse8_v_i8m1 (out + 222*i,v4,vl);
    }
}

Before this patch:
f:
csrr    t0,vlenb
slli    t1,t0,3
sub     sp,sp,t1
addi    a5,a0,100
vsetvli zero,a3,e64,m8,ta,ma
vle64.v v24,0(a5)
vs8r.v  v24,0(sp)
ble     a4,zero,.L1
mv      a6,a0
add     a4,a4,a0
mv      a5,a2
.L3:
vsetvli zero,zero,e64,m8,ta,ma
vl8re64.v       v24,0(sp)
vlm.v   v0,0(a6)
vluxei64.v      v24,(a0),v24,v0.t
addi    a6,a6,1
vsetvli zero,zero,e8,m1,tu,ma
vmv8r.v v16,v24
vluxei64.v      v8,(a0),v24,v0.t
vle64.v v16,0(a1)
vluxei64.v      v24,(a0),v16,v0.t
vse8.v  v8,0(a2)
vse8.v  v24,0(a5)
addi    a1,a1,1
addi    a2,a2,100
addi    a5,a5,222
bne     a4,a6,.L3
.L1:
csrr    t0,vlenb
slli    t1,t0,3
add     sp,sp,t1
jr      ra

After this patch:
f:
addi    a5,a0,100
vsetvli zero,a3,e64,m8,ta,ma
vle64.v v24,0(a5)
ble     a4,zero,.L1
mv      a6,a0
add     a4,a4,a0
mv      a5,a2
.L3:
vsetvli zero,zero,e64,m8,ta,ma
vlm.v   v0,0(a6)
addi    a6,a6,1
vluxei64.v      v8,(a0),v24,v0.t
vsetvli zero,zero,e8,m1,tu,ma
vmv8r.v v16,v8
vluxei64.v      v2,(a0),v8,v0.t
vle64.v v16,0(a1)
vluxei64.v      v1,(a0),v16,v0.t
vse8.v  v2,0(a2)
vse8.v  v1,0(a5)
addi    a1,a1,1
addi    a2,a2,100
addi    a5,a5,222
bne     a4,a6,.L3
.L1:
ret

The redundant register spillings is eliminated.
However, there is one more issue need to be addressed which is the redundant
move instruction "vmv8r.v". This is another story, and it will be fixed by another
patch (Fine tune RVV machine description RA constraint).

gcc/ChangeLog:

* config/riscv/riscv.h (enum reg_class): Fix RVV register order.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/spill-4.c: Adapt testcase.
* gcc.target/riscv/rvv/base/spill-6.c: Adapt testcase.
* gcc.target/riscv/rvv/base/reg_order-1.c: New test.

Signed-off-by: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
Co-authored-by: kito-cheng <kito.cheng@sifive.com>
gcc/config/riscv/riscv.h
gcc/testsuite/gcc.target/riscv/rvv/base/reg_order-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/base/spill-4.c
gcc/testsuite/gcc.target/riscv/rvv/base/spill-6.c

index 66fb07d66521843fcab130c7640f483f8a4516ec..13038a39e5c2faa23df30090564d41ff991ad292 100644 (file)
@@ -553,13 +553,12 @@ enum reg_class
   60, 61, 62, 63,                                                      \
   /* Call-saved FPRs.  */                                              \
   40, 41, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,                      \
-  /* V24 ~ V31.  */                                                    \
-  120, 121, 122, 123, 124, 125, 126, 127,                              \
-  /* V8 ~ V23.  */                                                     \
-  104, 105, 106, 107, 108, 109, 110, 111,                              \
-  112, 113, 114, 115, 116, 117, 118, 119,                              \
-  /* V0 ~ V7.  */                                                      \
-  96, 97, 98, 99, 100, 101, 102, 103,                                  \
+  /* v1 ~ v31 vector registers.  */                                    \
+  97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,   \
+  111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123,     \
+  124, 125, 126, 127,                                                  \
+  /* The vector mask register.  */                                     \
+  96,                                                                  \
   /* None of the remaining classes have defined call-saved             \
      registers.  */                                                    \
   64, 65, 66, 67                                                       \
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/reg_order-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/reg_order-1.c
new file mode 100644 (file)
index 0000000..b33f914
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void f (void *base,void *base2,void *out,size_t vl, int n)
+{
+    vuint64m8_t bindex = __riscv_vle64_v_u64m8 (base + 100, vl);
+    for (int i = 0; i < n; i++){
+      vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
+      vuint64m8_t v = __riscv_vluxei64_v_u64m8_m(m,base,bindex,vl);
+      vuint64m8_t v2 = __riscv_vle64_v_u64m8_tu (v, base2 + i, vl);
+      vint8m1_t v3 = __riscv_vluxei64_v_i8m1_m(m,base,v,vl);
+      vint8m1_t v4 = __riscv_vluxei64_v_i8m1_m(m,base,v2,vl);
+      __riscv_vse8_v_i8m1 (out + 100*i,v3,vl);
+      __riscv_vse8_v_i8m1 (out + 222*i,v4,vl);
+    }
+}
+
+/* { dg-final { scan-assembler-not {csrr} } } */
index 83c80b0b0456bf84ed3f2406cf1729bf1109e2aa..ad7592f30bc7f8ea2e0537c0f87385a24a2ef6c2 100644 (file)
@@ -10,7 +10,7 @@
 **  csrr\tt0,vlenb
 **  sub\tsp,sp,t0
 **  ...
-**  vs1r.v\tv24,0\(sp\)
+**  vs1r.v\tv[0-9]+,0\(sp\)
 **  ...
 **  vl1re64.v\tv2,0\(sp\)
 **  vs1r.v\tv2,0\(a1\)
@@ -34,7 +34,7 @@ spill_4 (int64_t *in, int64_t *out)
 **  slli\tt1,t0,1
 **  sub\tsp,sp,t1
 **  ...
-**  vs2r.v\tv24,0\(sp\)
+**  vs2r.v\tv[0-9]+,0\(sp\)
 **  ...
 **  vl2re64.v\tv4,0\(sp\)
 **  vs2r.v\tv4,0\(a1\)
@@ -58,10 +58,10 @@ spill_5 (int64_t *in, int64_t *out)
 **  slli\tt1,t0,2
 **  sub\tsp,sp,t1
 **  ...
-**  vs4r.v\tv24,0\(sp\)
+**  vs4r.v\tv[0-9]+,0\(sp\)
 **  ...
-**  vl4re64.v\tv8,0\(sp\)
-**  vs4r.v\tv8,0\(a1\)
+**  vl4re64.v\tv[0-9]+,0\(sp\)
+**  vs4r.v\tv[0-9]+,0\(a1\)
 **  ...
 **  jr\tra
 */
@@ -82,10 +82,10 @@ spill_6 (int64_t *in, int64_t *out)
 **  slli\tt1,t0,3
 **  sub\tsp,sp,t1
 **  ...
-**  vs8r.v\tv24,0\(sp\)
+**  vs8r.v\tv[0-9]+,0\(sp\)
 **  ...
-**  vl8re64.v\tv16,0\(sp\)
-**  vs8r.v\tv16,0\(a1\)
+**  vl8re64.v\tv[0-9]+,0\(sp\)
+**  vs8r.v\tv[0-9]+,0\(a1\)
 **  ...
 **  jr\tra
 */
@@ -105,7 +105,7 @@ spill_7 (int64_t *in, int64_t *out)
 **  csrr\tt0,vlenb
 **  sub\tsp,sp,t0
 **  ...
-**  vs1r.v\tv24,0\(sp\)
+**  vs1r.v\tv[0-9]+,0\(sp\)
 **  ...
 **  vl1re64.v\tv2,0\(sp\)
 **  vs1r.v\tv2,0\(a1\)
@@ -129,7 +129,7 @@ spill_11 (uint64_t *in, uint64_t *out)
 **  slli\tt1,t0,1
 **  sub\tsp,sp,t1
 **  ...
-**  vs2r.v\tv24,0\(sp\)
+**  vs2r.v\tv[0-9]+,0\(sp\)
 **  ...
 **  vl2re64.v\tv4,0\(sp\)
 **  vs2r.v\tv4,0\(a1\)
@@ -153,10 +153,10 @@ spill_12 (uint64_t *in, uint64_t *out)
 **  slli\tt1,t0,2
 **  sub\tsp,sp,t1
 **  ...
-**  vs4r.v\tv24,0\(sp\)
+**  vs4r.v\tv[0-9]+,0\(sp\)
 **  ...
-**  vl4re64.v\tv8,0\(sp\)
-**  vs4r.v\tv8,0\(a1\)
+**  vl4re64.v\tv[0-9]+,0\(sp\)
+**  vs4r.v\tv[0-9]+,0\(a1\)
 **  ...
 **  jr\tra
 */
@@ -177,10 +177,10 @@ spill_13 (uint64_t *in, uint64_t *out)
 **  slli\tt1,t0,3
 **  sub\tsp,sp,t1
 **  ...
-**  vs8r.v\tv24,0\(sp\)
+**  vs8r.v\tv[0-9]+,0\(sp\)
 **  ...
-**  vl8re64.v\tv16,0\(sp\)
-**  vs8r.v\tv16,0\(a1\)
+**  vl8re64.v\tv[0-9]+,0\(sp\)
+**  vs8r.v\tv[0-9]+,0\(a1\)
 **  ...
 **  jr\tra
 */
index 340029da88b277155980405d2c22993258e34942..07eee61baa37b6a2c19a186452db86b95abed52b 100644 (file)
 **  csrr\tt0,vlenb
 **  sub\tsp,sp,t0
 **  ...
-**  vs1r.v\tv24,0\(sp\)
+**  vs1r.v\tv[0-9]+,0\(sp\)
 **  ...
-**  vl1re64.v\tv2,0\(sp\)
-**  vs1r.v\tv2,0\(a1\)
+**  vl1re64.v\tv[0-9]+,0\(sp\)
+**  vs1r.v\tv[0-9]+,0\(a1\)
 **  ...
 **  jr\tra
 */
@@ -34,7 +34,7 @@ spill_4 (double *in, double *out)
 **  slli\tt1,t0,1
 **  sub\tsp,sp,t1
 **  ...
-**  vs2r.v\tv24,0\(sp\)
+**  vs2r.v\tv[0-9]+,0\(sp\)
 **  ...
 **  vl2re64.v\tv4,0\(sp\)
 **  vs2r.v\tv4,0\(a1\)
@@ -58,7 +58,7 @@ spill_5 (double *in, double *out)
 **  slli\tt1,t0,2
 **  sub\tsp,sp,t1
 **  ...
-**  vs4r.v\tv24,0\(sp\)
+**  vs4r.v\tv[0-9]+,0\(sp\)
 **  ...
 **  vl4re64.v\tv8,0\(sp\)
 **  vs4r.v\tv8,0\(a1\)
@@ -82,10 +82,10 @@ spill_6 (double *in, double *out)
 **  slli\tt1,t0,3
 **  sub\tsp,sp,t1
 **  ...
-**  vs8r.v\tv24,0\(sp\)
+**  vs8r.v\tv[0-9]+,0\(sp\)
 **  ...
-**  vl8re64.v\tv16,0\(sp\)
-**  vs8r.v\tv16,0\(a1\)
+**  vl8re64.v\tv[0-9]+,0\(sp\)
+**  vs8r.v\tv[0-9]+,0\(a1\)
 **  ...
 **  jr\tra
 */