After we enabled the labe-combine pass after the mode-switching pass, it
will try to combine below insn patterns into op. Aka:
(insn 40 5 41 2 (set (reg:SI 11 a1 [151])
(reg:SI 69 frm)) "pr118103-simple.c":67:15 2712 {frrmsi}
(nil))
(insn 41 40 7 2 (set (reg:SI 69 frm)
(const_int 2 [0x2])) "pr118103-simple.c":69:8 2710 {fsrmsi_restore}
(nil))
(insn 42 10 11 2 (set (reg:SI 69 frm)
(reg:SI 11 a1 [151])) "pr118103-simple.c":70:8 2710 {fsrmsi_restore}
(nil))
trying to combine definition of r11 in:
40: a1:SI=frm:SI
into:
42: frm:SI=a1:SI
instruction becomes a no-op:
(set (reg:SI 69 frm)
(reg:SI 69 frm))
original cost = 4 + 4 (weighted: 8.000000), replacement cost =
2147483647; keeping replacement
rescanning insn with uid = 42.
updating insn 42 in-place
verify found no changes in insn with uid = 42.
deleting insn 40
For example we have code as blow:
9 │ int test_exampe () {
10 │ test ();
11 │
12 │ size_t vl = 4;
13 │ vfloat16m1_t va = __riscv_vle16_v_f16m1(a, vl);
14 │ va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, vl);
15 │ va = __riscv_vfmsac_vv_f16m1(va, va, va, vl);
16 │
17 │ __riscv_vse16_v_f16m1(b, va, vl);
18 │
19 │ return 0;
20 │ }
it will be compiled to:
53 │ main:
54 │ addi sp,sp,-16
55 │ sd ra,8(sp)
56 │ call initialize
57 │ lui a6,%hi(b)
58 │ lui a2,%hi(a)
59 │ addi a3,a6,%lo(b)
60 │ addi a2,a2,%lo(a)
61 │ li a4,4
62 │ .L8:
63 │ fsrmi 2
64 │ vsetvli a5,a4,e16,m1,ta,ma
65 │ vle16.v v1,0(a2)
66 │ slli a1,a5,1
67 │ subw a4,a4,a5
68 │ add a2,a2,a1
69 │ vfnmadd.vv v1,v1,v1
>> The fsrm a0 insn is deleted by late-combine <<
70 │ vfmsub.vv v1,v1,v1
71 │ vse16.v v1,0(a3)
72 │ add a3,a3,a1
73 │ bgt a4,zero,.L8
74 │ lh a4,%lo(b)(a6)
75 │ li a5,-20480
76 │ addi a5,a5,-1382
77 │ bne a4,a5,.L14
78 │ ld ra,8(sp)
79 │ li a0,0
80 │ addi sp,sp,16
81 │ jr ra
This patch would like to add the FRM register to the global_regs as it
is a cooperatively-managed global register. And then the fsrm insn will
not be eliminated by late-combine. The related spec17 cam4 failure may
also caused by this issue too.
The below test suites are passed for this patch.
* The rv64gcv fully regression test.
PR target/118103
gcc/ChangeLog:
* config/riscv/riscv.cc (riscv_conditional_register_usage): Add
the FRM as the global_regs.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/pr118103-1.c: New test.
* gcc.target/riscv/rvv/base/pr118103-run-1.c: New test.
Signed-off-by: Pan Li <pan2.li@intel.com>
call_used_regs[r] = 1;
}
- if (!TARGET_HARD_FLOAT)
+ if (TARGET_HARD_FLOAT)
+ global_regs[FRM_REGNUM] = 1;
+ else
{
for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
fixed_regs[regno] = call_used_regs[regno] = 1;
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv_zvfh -mabi=lp64d" } */
+
+#include "riscv_vector.h"
+
+#define N 4
+typedef _Float16 float16_t;
+float16_t a[N]; float16_t b[N];
+
+extern void test ();
+
+int test_exampe () {
+ test ();
+
+ size_t vl = N;
+ vfloat16m1_t va = __riscv_vle16_v_f16m1(a, vl);
+ va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, vl);
+ va = __riscv_vfmsac_vv_f16m1(va, va, va, vl);
+
+ __riscv_vse16_v_f16m1(b, va, vl);
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times {frrm\s+[axs][0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {fsrmi\s+[01234]} 1 } } */
+/* { dg-final { scan-assembler-times {fsrm\s+[axs][0-9]+} 1 } } */
--- /dev/null
+/* { dg-do run { target { riscv_zvfh } } } */
+/* { dg-options "-O3 -fno-strict-aliasing" } */
+
+#include "riscv_vector.h"
+#define N 4
+typedef _Float16 float16_t;
+float16_t a[N]; float16_t b[N];
+
+void initialize () {
+ uint16_t tmp_0[N] = {43883, 3213, 238, 275, };
+
+ for (int i = 0; i < N; ++i)
+ {
+ union { float16_t f16; uint16_t u16; } converter;
+ converter.u16 = tmp_0[i];
+ a[i] = converter.f16;
+ }
+
+ for (int i = 0; i < N; ++i)
+ b[i] = 0;
+}
+
+void compute ()
+{
+ int avl = N;
+ float16_t* ptr_a = a; float16_t* ptr_b = b;
+
+ for (size_t vl; avl > 0; avl -= vl)
+ {
+ vl = __riscv_vsetvl_e16m1(avl);
+ vfloat16m1_t va = __riscv_vle16_v_f16m1(ptr_a, vl);
+ va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, vl);
+ va = __riscv_vfmsac_vv_f16m1(va, va, va, vl);
+ __riscv_vse16_v_f16m1(ptr_b, va, vl);
+ ptr_a += vl; ptr_b += vl;
+ }
+}
+
+int main ()
+{
+ initialize();
+ compute();
+
+ short *tmp = (short *)b;
+
+ if (*tmp != -21862)
+ __builtin_abort ();
+
+ return 0;
+}