Inspired by PR118103, the VXRM register should be treated almost the
same as the FRM register, aka cooperatively-managed global register.
Thus, add the VXRM to global_regs to avoid the elimination by the
late-combine pass.
For example as below code:
21 │
22 │ void compute ()
23 │ {
24 │ size_t vl = __riscv_vsetvl_e16m1 (N);
25 │ vuint16m1_t va = __riscv_vle16_v_u16m1 (a, vl);
26 │ vuint16m1_t vb = __riscv_vle16_v_u16m1 (b, vl);
27 │ vuint16m1_t vc = __riscv_vaaddu_vv_u16m1 (va, vb, __RISCV_VXRM_RDN, vl);
28 │
29 │ __riscv_vse16_v_u16m1 (c, vc, vl);
30 │ }
31 │
32 │ int main ()
33 │ {
34 │ initialize ();
35 │ compute();
36 │
37 │ return 0;
38 │ }
After compile with -march=rv64gcv -O3, we will have:
30 │ compute:
31 │ csrwi vxrm,2
32 │ lui a3,%hi(a)
33 │ lui a4,%hi(b)
34 │ addi a4,a4,%lo(b)
35 │ vsetivli zero,4,e16,m1,ta,ma
36 │ addi a3,a3,%lo(a)
37 │ vle16.v v2,0(a4)
38 │ vle16.v v1,0(a3)
39 │ lui a4,%hi(c)
40 │ addi a4,a4,%lo(c)
41 │ vaaddu.vv v1,v1,v2
42 │ vse16.v v1,0(a4)
43 │ ret
44 │ .size compute, .-compute
45 │ .section .text.startup,"ax",@progbits
46 │ .align 1
47 │ .globl main
48 │ .type main, @function
49 │ main:
| // csrwi vxrm,2 deleted after inline
50 │ addi sp,sp,-16
51 │ sd ra,8(sp)
52 │ call initialize
53 │ lui a3,%hi(a)
54 │ lui a4,%hi(b)
55 │ vsetivli zero,4,e16,m1,ta,ma
56 │ addi a4,a4,%lo(b)
57 │ addi a3,a3,%lo(a)
58 │ vle16.v v2,0(a4)
59 │ vle16.v v1,0(a3)
60 │ lui a4,%hi(c)
61 │ addi a4,a4,%lo(c)
62 │ li a0,0
63 │ vaaddu.vv v1,v1,v2
The below test suites are passed for this patch.
* The rv64gcv fully regression test.
PR target/118103
gcc/ChangeLog:
* config/riscv/riscv.cc (riscv_conditional_register_usage): Add
the VXRM as the global_regs.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/pr118103-2.c: New test.
* gcc.target/riscv/rvv/base/pr118103-run-2.c: New test.
Signed-off-by: Pan Li <pan2.li@intel.com>
call_used_regs[regno] = 1;
}
- if (!TARGET_VECTOR)
+ if (TARGET_VECTOR)
+ global_regs[VXRM_REGNUM] = 1;
+ else
{
for (int regno = V_REG_FIRST; regno <= V_REG_LAST; regno++)
fixed_regs[regno] = call_used_regs[regno] = 1;
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d" } */
+
+#include "riscv_vector.h"
+
+#define N 4
+uint16_t a[N];
+uint16_t b[N];
+uint16_t c[N];
+
+void initialize ()
+{
+ uint16_t tmp_0[N] = { 0xfff, 3213, 238, 275, };
+
+ for (int i = 0; i < N; ++i)
+ a[i] = b[i] = tmp_0[i];
+
+ for (int i = 0; i < N; ++i)
+ c[i] = 0;
+}
+
+void compute ()
+{
+ size_t vl = __riscv_vsetvl_e16m1 (N);
+ vuint16m1_t va = __riscv_vle16_v_u16m1 (a, vl);
+ vuint16m1_t vb = __riscv_vle16_v_u16m1 (b, vl);
+ vuint16m1_t vc = __riscv_vaaddu_vv_u16m1 (va, vb, __RISCV_VXRM_RDN, vl);
+
+ __riscv_vse16_v_u16m1 (c, vc, vl);
+}
+
+int main ()
+{
+ initialize ();
+ compute();
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times {csrwi\s+vxrm,\s*[01234]} 2 } } */
--- /dev/null
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-options "-O3" } */
+
+#include "riscv_vector.h"
+
+#define N 4
+uint16_t a[N];
+uint16_t b[N];
+uint16_t c[N];
+
+void initialize () {
+ uint16_t tmp_0[N] = { 0xfff, 3213, 238, 275, };
+ uint16_t tmp_1[N] = { 0x2, 823, 39, 9, };
+
+ for (int i = 0; i < N; ++i)
+ {
+ a[i] = tmp_0[i];
+ b[i] = tmp_1[i];
+ }
+
+ for (int i = 0; i < N; ++i)
+ c[i] = 0;
+}
+
+void compute ()
+{
+ size_t vl = __riscv_vsetvl_e16m1 (N);
+ vuint16m1_t va = __riscv_vle16_v_u16m1 (a, vl);
+ vuint16m1_t vb = __riscv_vle16_v_u16m1 (b, vl);
+ vuint16m1_t vc = __riscv_vaaddu_vv_u16m1 (va, vb, __RISCV_VXRM_RDN, vl);
+
+ __riscv_vse16_v_u16m1 (c, vc, vl);
+}
+
+int main ()
+{
+ initialize ();
+ compute ();
+
+ if (c[0] != 2048)
+ __builtin_abort ();
+
+ return 0;
+}