]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
RISC-V: Add pattern for vector-scalar dual widening floating-point sub
authorPaul-Antoine Arras <parras@baylibre.com>
Tue, 9 Sep 2025 14:29:09 +0000 (16:29 +0200)
committerPaul-Antoine Arras <parras@baylibre.com>
Tue, 9 Sep 2025 15:10:41 +0000 (17:10 +0200)
This pattern enables the combine pass (or late-combine, depending on the case)
to merge a float_extend'ed vec_duplicate into a minus RTL instruction. Both
minus operands are widened.

Before this patch, we have six instructions, e.g.:
  fcvt.d.s        fa0,fa0
  vsetvli         a5,zero,e64,m1,ta,ma
  vfmv.v.f        v3,fa0
  vfwcvt.f.f.v    v1,v2
  vsetvli         zero,zero,e64,m1,ta,ma
  vfsub.vv        v1,v1,v3

After, we get only one:
  vfwsub.vf       v1,v2,fa0

gcc/ChangeLog:

* config/riscv/autovec-opt.md (*vfwsub_vf_<mode>): New pattern to
combine float_extend + vec_duplicate + vfwsub.vv into vfwsub.vf.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c: Add vfwsub.vf.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h
(DEF_VF_BINOP_WIDEN_CASE_0, DEF_VF_BINOP_WIDEN_CASE_1): Swap operands.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f16.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f32.c: New test.

13 files changed:
gcc/config/riscv/autovec-opt.md
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f16.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f32.c [new file with mode: 0644]

index 5512c46fa8e5ea6ae56a4d2472898c4385bb5885..02f19bc6a42f08e3399306355817e200742d71ec 100644 (file)
   [(set_attr "type" "vfwalu")]
 )
 
+;; vfwsub.vf
+(define_insn_and_split "*vfwsub_vf_<mode>"
+  [(set (match_operand:VWEXTF 0 "register_operand")
+    (minus:VWEXTF
+      (float_extend:VWEXTF
+       (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"))
+      (vec_duplicate:VWEXTF
+       (float_extend:<VEL>
+         (match_operand:<VSUBEL> 2 "register_operand")))))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    riscv_vector::emit_vlmax_insn (code_for_pred_dual_widen_scalar (MINUS,
+                                                                   <MODE>mode),
+                                  riscv_vector::BINARY_OP_FRM_DYN, operands);
+
+    DONE;
+  }
+  [(set_attr "type" "vfwalu")]
+)
+
 ;; vfadd.vf
 (define_insn_and_split "*vfadd_vf_<mode>"
   [(set (match_operand:V_VLSF 0 "register_operand")
index fed5d3b60013479b7b53df21ce887f39e91ec117..20e809010d84a08f889cf3739c64020da21838cb 100644 (file)
@@ -27,6 +27,7 @@ DEF_VF_BINOP_CASE_2_WRAP (_Float16, MAX_FUNC_0_WRAP (_Float16), max)
 DEF_VF_BINOP_CASE_2_WRAP (_Float16, MAX_FUNC_1_WRAP (_Float16), max)
 DEF_VF_BINOP_WIDEN_CASE_0 (_Float16, float, *, mul)
 DEF_VF_BINOP_WIDEN_CASE_0 (_Float16, float, +, add)
+DEF_VF_BINOP_WIDEN_CASE_0 (_Float16, float, -, sub)
 DEF_VF_BINOP_WIDEN_CASE_2 (_Float16, float, +, add)
 
 /* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */
@@ -50,4 +51,5 @@ DEF_VF_BINOP_WIDEN_CASE_2 (_Float16, float, +, add)
 /* { dg-final { scan-assembler-times {vfmax.vf} 2 } } */
 /* { dg-final { scan-assembler-times {vfwmul.vf} 1 } } */
 /* { dg-final { scan-assembler-times {vfwadd.vf} 1 } } */
+/* { dg-final { scan-assembler-times {vfwsub.vf} 1 } } */
 /* { dg-final { scan-assembler-times {vfwadd.wf} 1 } } */
index 82d64d11c87cd934905a6c8a57b045e31c270f6b..8ecd7d0fa002e47279974983523b8b80ab313db7 100644 (file)
@@ -27,6 +27,7 @@ DEF_VF_BINOP_CASE_2_WRAP (float, MAX_FUNC_0_WRAP (float), max)
 DEF_VF_BINOP_CASE_2_WRAP (float, MAX_FUNC_1_WRAP (float), max)
 DEF_VF_BINOP_WIDEN_CASE_0 (float, double, *, mul)
 DEF_VF_BINOP_WIDEN_CASE_0 (float, double, +, add)
+DEF_VF_BINOP_WIDEN_CASE_0 (float, double, -, sub)
 DEF_VF_BINOP_WIDEN_CASE_2 (float, double, +, add)
 
 /* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */
@@ -50,4 +51,5 @@ DEF_VF_BINOP_WIDEN_CASE_2 (float, double, +, add)
 /* { dg-final { scan-assembler-times {vfmax.vf} 2 } } */
 /* { dg-final { scan-assembler-times {vfwmul.vf} 1 } } */
 /* { dg-final { scan-assembler-times {vfwadd.vf} 1 } } */
+/* { dg-final { scan-assembler-times {vfwsub.vf} 1 } } */
 /* { dg-final { scan-assembler-times {vfwadd.wf} 1 } } */
index eef86749c505c2052b11eb0587375962445c3a20..8fe361f4f70e9a7221c0d1da4aa2056da09736bc 100644 (file)
@@ -24,5 +24,6 @@
 /* { dg-final { scan-assembler-not {vfmax.vf} } } */
 /* { dg-final { scan-assembler-not {vfwmul.vf} } } */
 /* { dg-final { scan-assembler-not {vfwadd.vf} } } */
+/* { dg-final { scan-assembler-not {vfwsub.vf} } } */
 /* { dg-final { scan-assembler-not {vfwadd.wf} } } */
-/* { dg-final { scan-assembler-times {fcvt.s.h} 7 } } */
+/* { dg-final { scan-assembler-times {fcvt.s.h} 8 } } */
index c5e93060e0274e37e18cb2f9eab8659e9e60c133..a1eaaa8b47fd1371dafab2f54fd6504338db7ee8 100644 (file)
@@ -24,5 +24,6 @@
 /* { dg-final { scan-assembler-not {vfmax.vf} } } */
 /* { dg-final { scan-assembler-not {vfwmul.vf} } } */
 /* { dg-final { scan-assembler-not {vfwadd.vf} } } */
+/* { dg-final { scan-assembler-not {vfwsub.vf} } } */
 /* { dg-final { scan-assembler-not {vfwadd.wf} } } */
-/* { dg-final { scan-assembler-times {fcvt.d.s} 7 } } */
+/* { dg-final { scan-assembler-times {fcvt.d.s} 8 } } */
index f0c6594533eeb64058de4567558d8f3f85914833..f799437d3ca41b01bf7a7ea06c35a0cfb4aae75a 100644 (file)
@@ -31,6 +31,7 @@ DEF_VF_BINOP_CASE_3_WRAP (_Float16, MAX_FUNC_1_WRAP (_Float16), max,
                          VF_BINOP_FUNC_BODY_X128)
 DEF_VF_BINOP_WIDEN_CASE_1 (_Float16, float, *, mul)
 DEF_VF_BINOP_WIDEN_CASE_1 (_Float16, float, +, add)
+DEF_VF_BINOP_WIDEN_CASE_1 (_Float16, float, -, sub)
 DEF_VF_BINOP_WIDEN_CASE_3 (_Float16, float, +, add)
 
 /* { dg-final { scan-assembler {vfmadd.vf} } } */
@@ -54,4 +55,5 @@ DEF_VF_BINOP_WIDEN_CASE_3 (_Float16, float, +, add)
 /* { dg-final { scan-assembler {vfmax.vf} } } */
 /* { dg-final { scan-assembler {vfwmul.vf} } } */
 /* { dg-final { scan-assembler {vfwadd.vf} } } */
+/* { dg-final { scan-assembler {vfwsub.vf} } } */
 /* { dg-final { scan-assembler {vfwadd.wf} } } */
index 60617c3ec9b6dc89eae72bbaeb843dcda1dfc7ba..bb987e1edc0f70de2039acef6d18c207d347298e 100644 (file)
@@ -31,6 +31,7 @@ DEF_VF_BINOP_CASE_3_WRAP (float, MAX_FUNC_1_WRAP (float), max,
                          VF_BINOP_FUNC_BODY_X128)
 DEF_VF_BINOP_WIDEN_CASE_1 (float, double, *, mul)
 DEF_VF_BINOP_WIDEN_CASE_1 (float, double, +, add)
+DEF_VF_BINOP_WIDEN_CASE_1 (float, double, -, sub)
 DEF_VF_BINOP_WIDEN_CASE_3 (float, double, +, add)
 
 /* { dg-final { scan-assembler {vfmadd.vf} } } */
@@ -54,4 +55,5 @@ DEF_VF_BINOP_WIDEN_CASE_3 (float, double, +, add)
 /* { dg-final { scan-assembler {vfmax.vf} } } */
 /* { dg-final { scan-assembler {vfwmul.vf} } } */
 /* { dg-final { scan-assembler {vfwadd.vf} } } */
+/* { dg-final { scan-assembler {vfwsub.vf} } } */
 /* { dg-final { scan-assembler {vfwadd.wf} } } */
index 0650265b6c0ca2bacd66f99091f7af121d2be5fe..50a4968718be76e939d80a93dc375c76c065672e 100644 (file)
@@ -24,5 +24,6 @@
 /* { dg-final { scan-assembler-not {vfmax.vf} } } */
 /* { dg-final { scan-assembler-not {vfwmul.vf} } } */
 /* { dg-final { scan-assembler-not {vfwadd.vf} } } */
+/* { dg-final { scan-assembler-not {vfwsub.vf} } } */
 /* { dg-final { scan-assembler-not {vfwadd.wf} } } */
 /* { dg-final { scan-assembler {fcvt.s.h} } } */
index b43699deb83cb28d3109ce8e7103199aa45c59c3..2e7ef5382150c3b3fa76392525bcb5e3422c8d5c 100644 (file)
@@ -24,5 +24,6 @@
 /* { dg-final { scan-assembler-not {vfmax.vf} } } */
 /* { dg-final { scan-assembler-not {vfwmul.vf} } } */
 /* { dg-final { scan-assembler-not {vfwadd.vf} } } */
+/* { dg-final { scan-assembler-not {vfwsub.vf} } } */
 /* { dg-final { scan-assembler-not {vfwadd.wf} } } */
 /* { dg-final { scan-assembler {fcvt.d.s} } } */
index 2a55c9c6df93199286f994f2ee8f601a2d402f4f..479a6fa7222913e86b3532566d6f990ce56f54d9 100644 (file)
@@ -37,7 +37,7 @@
                                                   unsigned n)                 \
   {                                                                            \
     for (unsigned i = 0; i < n; i++)                                           \
-      out[i] = (T2) f OP (T2) in[i];                                           \
+      out[i] = (T2) in[i] OP (T2) f;                                           \
   }
 #define DEF_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, OP, NAME)                       \
   DEF_VF_BINOP_WIDEN_CASE_0 (T1, T2, OP, NAME)
@@ -246,10 +246,10 @@ DEF_MAX_1 (double)
   {                                                                            \
     for (int i = 0; i < n; i++)                                                \
       {                                                                        \
-       dst[i] = (TYPE2) * a OP (TYPE2) b[i];                                  \
-       dst2[i] = (TYPE2) * a2 OP (TYPE2) b[i];                                \
-       dst3[i] = (TYPE2) * a2 OP (TYPE2) a[i];                                \
-       dst4[i] = (TYPE2) * a OP (TYPE2) b2[i];                                \
+       dst[i] = (TYPE2) b[i] OP (TYPE2) * a;                                  \
+       dst2[i] = (TYPE2) b[i] OP (TYPE2) * a2;                                \
+       dst3[i] = (TYPE2) a[i] OP (TYPE2) * a2;                                \
+       dst4[i] = (TYPE2) b2[i] OP (TYPE2) * a;                                \
       }                                                                        \
   }
 
index 8748cda21aa9ae1168ebee08686c6c1782d1836c..1c9dc8c5e7b41e39fe4e051cac71b38ee3869702 100644 (file)
@@ -28,7 +28,7 @@ int main ()
   TEST_RUN (T1, T2, NAME, out, in, f, N);
 
   for (int i = 0; i < N; i++)
-    assert (out[i] == ((T2) f OP (T2) in[i]));
+    assert (out[i] == ((T2) in[i] OP (T2) f));
 
   return 0;
 }
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f16.c
new file mode 100644 (file)
index 0000000..6269073
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-require-effective-target riscv_zvfh_ok } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvfh" } */
+/* { dg-additional-options "--param=fpr2vr-cost=0" } */
+
+#include "vf_binop.h"
+
+#define T1    _Float16
+#define T2    float
+#define NAME sub
+#define OP -
+
+DEF_VF_BINOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NAME)
+
+#define TEST_RUN(T1, T2, NAME, out, in, f, n) RUN_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n)
+#define LIMIT -32768
+
+#include "vf_binop_widen_run.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f32.c
new file mode 100644 (file)
index 0000000..9e23db1
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "--param=fpr2vr-cost=0" } */
+
+#include "vf_binop.h"
+
+#define T1    float
+#define T2    double
+#define NAME sub
+#define OP -
+
+DEF_VF_BINOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NAME)
+
+#define TEST_RUN(T1, T2, NAME, out, in, f, n) RUN_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n)
+#define LIMIT -2147483648
+
+#include "vf_binop_widen_run.h"