]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
RISC-V: Add pattern for vector-scalar widening floating-point multiply
authorPaul-Antoine Arras <parras@baylibre.com>
Wed, 3 Sep 2025 12:29:13 +0000 (14:29 +0200)
committerPaul-Antoine Arras <parras@baylibre.com>
Mon, 8 Sep 2025 10:26:09 +0000 (12:26 +0200)
This pattern enables the combine pass (or late-combine, depending on the case)
to merge a float_extend'ed vec_duplicate into a mult RTL instruction.

Before this patch, we have six instructions, e.g.:
  fcvt.d.s        fa0,fa0
  vsetvli         a5,zero,e64,m1,ta,ma
  vfmv.v.f        v3,fa0
  vfwcvt.f.f.v    v1,v2
  vsetvli         zero,zero,e64,m1,ta,ma
  vfmul.vv        v1,v3,v1

After, we get only one:
  vfwmul.vf       v1,v2,fa0

gcc/ChangeLog:

* config/riscv/autovec-opt.md (*vfwmul_vf_<mode>): New pattern to
combine float_extend + vec_duplicate + vfmul.vv into vfmul.vf.
* config/riscv/vector.md (*@pred_dual_widen_<optab><mode>_scalar):
Swap operands to match the RTL emitted by expand, i.e. first
float_extend then vec_duplicate.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c: Add vfwmul.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h: Add support for
widening variants.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h: New test
helper.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f16.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f32.c: New test.

14 files changed:
gcc/config/riscv/autovec-opt.md
gcc/config/riscv/vector.md
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f16.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f32.c [new file with mode: 0644]

index d2a89a5d63b4da0356fa9f1efaec702ed74190f0..82a5fa0fae902e01b8492fb84dfcf12a2dedd022 100644 (file)
   }
   [(set_attr "type" "vfminmax")]
 )
+
+;; vfwmul.vf
+(define_insn_and_split "*vfwmul_vf_<mode>"
+  [(set (match_operand:VWEXTF 0 "register_operand")
+    (mult:VWEXTF
+      (float_extend:VWEXTF
+       (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"))
+      (vec_duplicate:VWEXTF
+       (float_extend:<VEL>
+         (match_operand:<VSUBEL> 2 "register_operand")))))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    riscv_vector::emit_vlmax_insn (code_for_pred_dual_widen_scalar (MULT,
+                                                                   <MODE>mode),
+                                  riscv_vector::BINARY_OP_FRM_DYN, operands);
+
+    DONE;
+  }
+  [(set_attr "type" "vfwmul")]
+)
index 95d44baf6fddf7e8ecb54cc9a2294dc2eb888930..d62f8bb2cd2e2094f568ba33c9105618b33fe74d 100644 (file)
          (any_widen_binop:VWEXTF
            (float_extend:VWEXTF
              (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand" "   vr,   vr"))
-           (float_extend:VWEXTF
-             (vec_duplicate:<V_DOUBLE_TRUNC>
+           (vec_duplicate:VWEXTF
+             (float_extend:<VEL>
                (match_operand:<VSUBEL> 4 "register_operand"       "    f,    f"))))
          (match_operand:VWEXTF 2 "vector_merge_operand"           "   vu,    0")))]
   "TARGET_VECTOR"
index 0be64f1fd6460084b895c9f259eb9ebf7b1af868..cbec87e6c0b332ab867d5a4aeeafc37ecf1ec3d1 100644 (file)
@@ -22,6 +22,7 @@ DEF_VF_BINOP_CASE_2_WRAP (_Float16, MIN_FUNC_0_WRAP (_Float16), min)
 DEF_VF_BINOP_CASE_2_WRAP (_Float16, MIN_FUNC_1_WRAP (_Float16), min)
 DEF_VF_BINOP_CASE_2_WRAP (_Float16, MAX_FUNC_0_WRAP (_Float16), max)
 DEF_VF_BINOP_CASE_2_WRAP (_Float16, MAX_FUNC_1_WRAP (_Float16), max)
+DEF_VF_BINOP_WIDEN_CASE_0 (_Float16, float, *, mul)
 
 /* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */
 /* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */
@@ -39,3 +40,4 @@ DEF_VF_BINOP_CASE_2_WRAP (_Float16, MAX_FUNC_1_WRAP (_Float16), max)
 /* { dg-final { scan-assembler-times {vfrdiv.vf} 1 } } */
 /* { dg-final { scan-assembler-times {vfmin.vf} 2 } } */
 /* { dg-final { scan-assembler-times {vfmax.vf} 2 } } */
+/* { dg-final { scan-assembler-times {vfwmul.vf} 1 } } */
index a9cd38aebeb122d4d67174bcc135ff697e51424b..b6d94c650b0cd6eff20e1b5482f4185dd7cba650 100644 (file)
@@ -22,6 +22,7 @@ DEF_VF_BINOP_CASE_2_WRAP (float, MIN_FUNC_0_WRAP (float), min)
 DEF_VF_BINOP_CASE_2_WRAP (float, MIN_FUNC_1_WRAP (float), min)
 DEF_VF_BINOP_CASE_2_WRAP (float, MAX_FUNC_0_WRAP (float), max)
 DEF_VF_BINOP_CASE_2_WRAP (float, MAX_FUNC_1_WRAP (float), max)
+DEF_VF_BINOP_WIDEN_CASE_0 (float, double, *, mul)
 
 /* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */
 /* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */
@@ -39,3 +40,4 @@ DEF_VF_BINOP_CASE_2_WRAP (float, MAX_FUNC_1_WRAP (float), max)
 /* { dg-final { scan-assembler-times {vfrdiv.vf} 1 } } */
 /* { dg-final { scan-assembler-times {vfmin.vf} 2 } } */
 /* { dg-final { scan-assembler-times {vfmax.vf} 2 } } */
+/* { dg-final { scan-assembler-times {vfwmul.vf} 1 } } */
index 0db3048688ce04c8f6aba6ae938a44b9a5b88b4f..3f31568825e4ad1b3caa36c89eb370db787eb510 100644 (file)
@@ -19,4 +19,5 @@
 /* { dg-final { scan-assembler-not {vfrdiv.vf} } } */
 /* { dg-final { scan-assembler-not {vfmin.vf} } } */
 /* { dg-final { scan-assembler-not {vfmax.vf} } } */
-/* { dg-final { scan-assembler-times {fcvt.s.h} 4 } } */
+/* { dg-final { scan-assembler-not {vfwmul.vf} } } */
+/* { dg-final { scan-assembler-times {fcvt.s.h} 5 } } */
index 494b33e45b2969da95afcf2714e25d606196f1c1..21a3e1d16aa5d64cd4ffb5d6d11ed500d8e0fdea 100644 (file)
@@ -19,4 +19,5 @@
 /* { dg-final { scan-assembler-not {vfrdiv.vf} } } */
 /* { dg-final { scan-assembler-not {vfmin.vf} } } */
 /* { dg-final { scan-assembler-not {vfmax.vf} } } */
-/* { dg-final { scan-assembler-times {fcvt.d.s} 4 } } */
+/* { dg-final { scan-assembler-not {vfwmul.vf} } } */
+/* { dg-final { scan-assembler-times {fcvt.d.s} 5 } } */
index c2c4f430b1547d3bf1f0c32753e0a40c3423c805..522b5d294649e8b138bcbf2053c6e7087366f4de 100644 (file)
@@ -26,6 +26,7 @@ DEF_VF_BINOP_CASE_3_WRAP (_Float16, MAX_FUNC_0_WRAP (_Float16), max,
                          VF_BINOP_FUNC_BODY_X128)
 DEF_VF_BINOP_CASE_3_WRAP (_Float16, MAX_FUNC_1_WRAP (_Float16), max,
                          VF_BINOP_FUNC_BODY_X128)
+DEF_VF_BINOP_WIDEN_CASE_1 (_Float16, float, *, mul)
 
 /* { dg-final { scan-assembler {vfmadd.vf} } } */
 /* { dg-final { scan-assembler {vfmsub.vf} } } */
@@ -43,3 +44,4 @@ DEF_VF_BINOP_CASE_3_WRAP (_Float16, MAX_FUNC_1_WRAP (_Float16), max,
 /* { dg-final { scan-assembler {vfrdiv.vf} } } */
 /* { dg-final { scan-assembler {vfmin.vf} } } */
 /* { dg-final { scan-assembler {vfmax.vf} } } */
+/* { dg-final { scan-assembler {vfwmul.vf} } } */
index f2582cad8b37c8c82b44311df5c8eb3babf1e3b3..8d05c54c7729918f13266b72c09a0d1c01ba9ce1 100644 (file)
@@ -26,6 +26,7 @@ DEF_VF_BINOP_CASE_3_WRAP (float, MAX_FUNC_0_WRAP (float), max,
                          VF_BINOP_FUNC_BODY_X128)
 DEF_VF_BINOP_CASE_3_WRAP (float, MAX_FUNC_1_WRAP (float), max,
                          VF_BINOP_FUNC_BODY_X128)
+DEF_VF_BINOP_WIDEN_CASE_1 (float, double, *, mul)
 
 /* { dg-final { scan-assembler {vfmadd.vf} } } */
 /* { dg-final { scan-assembler {vfmsub.vf} } } */
@@ -43,3 +44,4 @@ DEF_VF_BINOP_CASE_3_WRAP (float, MAX_FUNC_1_WRAP (float), max,
 /* { dg-final { scan-assembler {vfrdiv.vf} } } */
 /* { dg-final { scan-assembler {vfmin.vf} } } */
 /* { dg-final { scan-assembler {vfmax.vf} } } */
+/* { dg-final { scan-assembler {vfwmul.vf} } } */
index 3fa31504cfef2174ad814e8c0f27bb78dd0fea6f..3058367785e9acf69c5dc2901bc6245d6bf4c27a 100644 (file)
@@ -19,4 +19,5 @@
 /* { dg-final { scan-assembler-not {vfrdiv.vf} } } */
 /* { dg-final { scan-assembler-not {vfmin.vf} } } */
 /* { dg-final { scan-assembler-not {vfmax.vf} } } */
+/* { dg-final { scan-assembler-not {vfwmul.vf} } } */
 /* { dg-final { scan-assembler {fcvt.s.h} } } */
index 3d526b56e01ab65412bfec5eacd7664bd6f506e0..d687f8f6f62475eeb74385923181ad9a6815fd48 100644 (file)
@@ -19,4 +19,5 @@
 /* { dg-final { scan-assembler-not {vfrdiv.vf} } } */
 /* { dg-final { scan-assembler-not {vfmin.vf} } } */
 /* { dg-final { scan-assembler-not {vfmax.vf} } } */
+/* { dg-final { scan-assembler-not {vfwmul.vf} } } */
 /* { dg-final { scan-assembler {fcvt.d.s} } } */
index da02065dda825577695d86bfb76b07f9aab05aca..c9253e9867a4b1c854f21b4ac127dd8feef4eeb3 100644 (file)
 #define RUN_VF_BINOP_REVERSE_CASE_0_WRAP(T, NAME, out, in, f, n)               \
   RUN_VF_BINOP_REVERSE_CASE_0 (T, NAME, out, in, f, n)
 
+#define DEF_VF_BINOP_WIDEN_CASE_0(T1, T2, OP, NAME)                            \
+  void test_vf_binop_widen_##NAME##_##T1##_case_0 (T2 *restrict out,           \
+                                                  T1 *restrict in, T1 f,      \
+                                                  unsigned n)                 \
+  {                                                                            \
+    for (unsigned i = 0; i < n; i++)                                           \
+      out[i] = (T2) f * (T2) in[i];                                            \
+  }
+#define DEF_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, OP, NAME)                       \
+  DEF_VF_BINOP_WIDEN_CASE_0 (T1, T2, OP, NAME)
+#define RUN_VF_BINOP_WIDEN_CASE_0(T1, T2, NAME, out, in, f, n)                 \
+  test_vf_binop_widen_##NAME##_##T1##_case_0 (out, in, f, n)
+#define RUN_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n)            \
+  RUN_VF_BINOP_WIDEN_CASE_0 (T1, T2, NAME, out, in, f, n)
+
 #define VF_BINOP_BODY(op)                                                      \
   out[k + 0] = in[k + 0] op tmp;                                               \
   out[k + 1] = in[k + 1] op tmp;                                               \
 #define DEF_VF_BINOP_REVERSE_CASE_1_WRAP(T, OP, NAME, BODY)                    \
   DEF_VF_BINOP_REVERSE_CASE_1 (T, OP, NAME, BODY)
 
-#endif
-
 #define DEF_MIN_0(T)                                                           \
   static inline T test_##T##_min_0 (T a, T b) { return a > b ? b : a; }
 
@@ -224,3 +237,20 @@ DEF_MAX_1 (double)
 #define VF_BINOP_FUNC_BODY_X128(op)                                            \
   VF_BINOP_FUNC_BODY_X64 (op)                                                  \
   VF_BINOP_FUNC_BODY_X64 (op)
+
+#define DEF_VF_BINOP_WIDEN_CASE_1(TYPE1, TYPE2, OP, NAME)                      \
+  void test_vf_binop_widen_##NAME##_##TYPE1##_##TYPE2##_case_1 (               \
+    TYPE2 *__restrict dst, TYPE2 *__restrict dst2, TYPE2 *__restrict dst3,     \
+    TYPE2 *__restrict dst4, TYPE1 *__restrict a, TYPE1 *__restrict b,          \
+    TYPE1 *__restrict a2, TYPE1 *__restrict b2, int n)                         \
+  {                                                                            \
+    for (int i = 0; i < n; i++)                                                \
+      {                                                                        \
+       dst[i] = (TYPE2) * a OP (TYPE2) b[i];                                  \
+       dst2[i] = (TYPE2) * a2 OP (TYPE2) b[i];                                \
+       dst3[i] = (TYPE2) * a2 OP (TYPE2) a[i];                                \
+       dst4[i] = (TYPE2) * a OP (TYPE2) b2[i];                                \
+      }                                                                        \
+  }
+
+#endif
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h
new file mode 100644 (file)
index 0000000..929b2de
--- /dev/null
@@ -0,0 +1,32 @@
+#ifndef HAVE_DEFINED_VF_BINOP_WIDEN_RUN_H
+#define HAVE_DEFINED_VF_BINOP_WIDEN_RUN_H
+
+#include <assert.h>
+
+#define N 512
+
+int main ()
+{
+  T1 f;
+  T1 in[N];
+  T2 out[N];
+  T2 out2[N];
+
+  f = LIMIT % 8723;
+  for (int i = 0; i < N; i++) 
+    {
+      in[i] = LIMIT + i & 1964;
+      out[i] = LIMIT + i & 628;
+      out2[i] = LIMIT + i & 628;
+      asm volatile ("" ::: "memory");
+    }
+
+  TEST_RUN (T1, T2, NAME, out, in, f, N);
+
+  for (int i = 0; i < N; i++)
+    assert (out[i] == ((T2) f OP (T2) in[i]));
+
+  return 0;
+}
+
+#endif
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f16.c
new file mode 100644 (file)
index 0000000..1ba84e5
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-require-effective-target riscv_zvfh_ok } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvfh" } */
+/* { dg-additional-options "--param=fpr2vr-cost=0" } */
+
+#include "vf_binop.h"
+
+#define T1    _Float16
+#define T2    float
+#define NAME mul
+#define OP *
+
+DEF_VF_BINOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NAME)
+
+#define TEST_RUN(T1, T2, NAME, out, in, f, n) RUN_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n)
+#define LIMIT -32768
+
+#include "vf_binop_widen_run.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f32.c
new file mode 100644 (file)
index 0000000..52fbeb3
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "--param=fpr2vr-cost=0" } */
+
+#include "vf_binop.h"
+
+#define T1    float
+#define T2    double
+#define NAME mul
+#define OP *
+
+DEF_VF_BINOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NAME)
+
+#define TEST_RUN(T1, T2, NAME, out, in, f, n) RUN_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n)
+#define LIMIT -2147483648
+
+#include "vf_binop_widen_run.h"