]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
RISC-V: Support vfwmul.vv combine lowering
authorJuzhe-Zhong <juzhe.zhong@rivai.ai>
Wed, 28 Jun 2023 04:15:12 +0000 (12:15 +0800)
committerLehua Ding <lehua.ding@rivai.ai>
Mon, 3 Jul 2023 09:22:28 +0000 (17:22 +0800)
Consider the following complicate case:
  __attribute__ ((noipa)) void vwadd_##TYPE1_##TYPE2 (                         \
    TYPE1 *__restrict dst, TYPE1 *__restrict dst2, TYPE1 *__restrict dst3,     \
    TYPE1 *__restrict dst4, TYPE2 *__restrict a, TYPE2 *__restrict b,          \
    TYPE2 *__restrict a2, TYPE2 *__restrict b2, int n)                         \
  {                                                                            \
    for (int i = 0; i < n; i++)                                                \
      {                                                                        \
dst[i] = (TYPE1) a[i] * (TYPE1) b[i];                                  \
dst2[i] = (TYPE1) a2[i] * (TYPE1) b[i];                                \
dst3[i] = (TYPE1) a2[i] * (TYPE1) a[i];                                \
dst4[i] = (TYPE1) a[i] * (TYPE1) b2[i];                                \
      }                                                                        \
  }

TEST_TYPE (double, float)

Such complicate situation, Combine PASS can not combine extension of both operands on the fly.
So the combine PASS will first try to combine one of the combine extension, and then combine
the other. The combine flow is as follows:

Original IR:
(set (reg 0) (float_extend: (reg 1))
(set (reg 3) (float_extend: (reg 2))
(set (reg 4) (mult: (reg 0) (reg 3))

First step of combine:
(set (reg 3) (float_extend: (reg 2))
(set (reg 4) (mult: (float_extend: (reg 1) (reg 3))

Second step of combine:
(set (reg 4) (mult: (float_extend: (reg 1) (float_extend: (reg 2))

So, to enhance the combine optimization, we add a "pseudo vwfmul.wv" RTL pattern in autovec-opt.md
which is (set (reg 0) (mult (float_extend (reg 1) (reg 2)))).

gcc/ChangeLog:

* config/riscv/autovec-opt.md (@pred_single_widen_mul<any_extend:su><mode>): Change "@"
into "*" in pattern name which simplifies build files.
(*pred_single_widen_mul<any_extend:su><mode>): Ditto.
(*pred_single_widen_mul<mode>): New pattern.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/widen/widen-3.c: Add floating-point.
* gcc.target/riscv/rvv/autovec/widen/widen-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/widen/widen-complicate-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/widen/widen_run-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/widen/widen_run-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-3.c: New test.
* gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-7.c: New test.

gcc/config/riscv/autovec-opt.md
gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-3.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-7.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-complicate-3.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-3.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-7.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-7.c [new file with mode: 0644]

index 80b85fa2de8840b3a30b7551e24fccf018ebd855..a362812bf05a78b96c1e2a43e8f6e53b4efa5bcf 100644 (file)
   [(set_attr "type" "vimovvx")
    (set_attr "mode" "<MODE>")])
 
+;; We don't have vfwmul.wv instruction like vfwadd.wv in RVV.
+;; This pattern is an intermediate RTL IR as a pseudo vfwmul.wv to enhance
+;; optimization of instructions combine.
+(define_insn_and_split "*pred_single_widen_mul<mode>"
+  [(set (match_operand:VWEXTF 0 "register_operand"                  "=&vr,  &vr")
+       (if_then_else:VWEXTF
+         (unspec:<VM>
+           [(match_operand:<VM> 1 "vector_mask_operand"           "vmWc1,vmWc1")
+            (match_operand 5 "vector_length_operand"              "   rK,   rK")
+            (match_operand 6 "const_int_operand"                  "    i,    i")
+            (match_operand 7 "const_int_operand"                  "    i,    i")
+            (match_operand 8 "const_int_operand"                  "    i,    i")
+            (match_operand 9 "const_int_operand"                  "    i,    i")
+            (reg:SI VL_REGNUM)
+            (reg:SI VTYPE_REGNUM)
+            (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
+         (mult:VWEXTF
+           (float_extend:VWEXTF
+             (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" "   vr,   vr"))
+           (match_operand:VWEXTF 3 "register_operand"             "   vr,   vr"))
+         (match_operand:VWEXTF 2 "vector_merge_operand"           "   vu,    0")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    insn_code icode = code_for_pred_extend (<MODE>mode);
+    rtx tmp = gen_reg_rtx (<MODE>mode);
+    rtx ops[] = {tmp, operands[4]};
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, ops);
+
+    emit_insn (gen_pred (MULT, <MODE>mode, operands[0], operands[1], operands[2],
+                        operands[3], tmp, operands[5], operands[6],
+                        operands[7], operands[8], operands[9]));
+    DONE;
+  }
+  [(set_attr "type" "vfwmul")
+   (set_attr "mode" "<MODE>")])
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP] VFWMACC
 ;; -------------------------------------------------------------------------
index 609a5c09f70dea2ca1f223f5b0223129b3eeaa65..b2b144059022f7cfd96981837b49d81488287940 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
 
 #include <stdint-gcc.h>
 
   TEST_TYPE (int32_t, int16_t)                                                 \
   TEST_TYPE (uint32_t, uint16_t)                                               \
   TEST_TYPE (int64_t, int32_t)                                                 \
-  TEST_TYPE (uint64_t, uint32_t)
+  TEST_TYPE (uint64_t, uint32_t)                                               \
+  TEST_TYPE (float, _Float16)                                                  \
+  TEST_TYPE (double, float)
 
 TEST_ALL ()
 
 /* { dg-final { scan-assembler-times {\tvwmul\.vv} 3 } } */
 /* { dg-final { scan-assembler-times {\tvwmulu\.vv} 3 } } */
+/* { dg-final { scan-assembler-times {\tvfwmul\.vv} 2 } } */
index cc43d9ba3fe7f5dc69fa9a47aca73f667f80a601..3806e8b98ee689566e05d34c003eed25a9aacdac 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
 
 #include <stdint-gcc.h>
 
   TEST_TYPE (int32_t, int16_t)                                                 \
   TEST_TYPE (uint32_t, uint16_t)                                               \
   TEST_TYPE (int64_t, int32_t)                                                 \
-  TEST_TYPE (uint64_t, uint32_t)
+  TEST_TYPE (uint64_t, uint32_t)                                               \
+  TEST_TYPE (float, _Float16)                                                  \
+  TEST_TYPE (double, float)
 
 TEST_ALL ()
 
 /* { dg-final { scan-assembler-times {\tvsext\.vf2} 3 } } */
 /* { dg-final { scan-assembler-times {\tvzext\.vf2} 3 } } */
+/* { dg-final { scan-assembler-times {\tvfwcvt} 2 } } */
index e1fd79430c3878a0ef0abaf948752755cf3dc468..1515374890d6227c2504c963bd31e44bfab1116d 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
 
 #include <stdint-gcc.h>
 
   TEST_TYPE (int32_t, int16_t)                                                 \
   TEST_TYPE (uint32_t, uint16_t)                                               \
   TEST_TYPE (int64_t, int32_t)                                                 \
-  TEST_TYPE (uint64_t, uint32_t)
+  TEST_TYPE (uint64_t, uint32_t)                                               \
+  TEST_TYPE (float, _Float16)                                                  \
+  TEST_TYPE (double, float)
 
 TEST_ALL ()
 
 /* { dg-final { scan-assembler-times {\tvwmul\.vv} 12 } } */
 /* { dg-final { scan-assembler-times {\tvwmulu\.vv} 12 } } */
+/* { dg-final { scan-assembler-times {\tvfwmul\.vv} 8 } } */
index beb0cc2b58b15b4711e24f010310a97b8dd9d3cc..b7dd60fa8e8606e342034572f19fb0e0dbf0fa05 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run { target { riscv_vector } } } */
-/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */
 
 #include <assert.h>
 #include "widen-3.c"
@@ -25,7 +25,8 @@
   RUN (int32_t, int16_t, -32768)                                               \
   RUN (uint32_t, uint16_t, 65535)                                              \
   RUN (int64_t, int32_t, -2147483648)                                          \
-  RUN (uint64_t, uint32_t, 4294967295)
+  RUN (uint64_t, uint32_t, 4294967295)                                         \
+  RUN (double, float, -2147483648)
 
 int
 main ()
index 4abddd5d7183314d1a9ba46c531bc4f7d378b018..ab29f4a0f7097fccb55c51a4db39f852baeec783 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run { target { riscv_vector } } } */
-/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */
 
 #include <assert.h>
 #include "widen-7.c"
@@ -25,7 +25,8 @@
   RUN (int32_t, int16_t, -32768)                                               \
   RUN (uint32_t, uint16_t, 65535)                                              \
   RUN (int64_t, int32_t, -2147483648)                                          \
-  RUN (uint64_t, uint32_t, 4294967295)
+  RUN (uint64_t, uint32_t, 4294967295)                                         \
+  RUN (double, float, -2147483648)
 
 int
 main ()
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-3.c
new file mode 100644 (file)
index 0000000..c3efd0b
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */
+
+#include <assert.h>
+#include "widen-3.c"
+
+#define SZ 512
+
+#define RUN(TYPE1, TYPE2, LIMIT)                                               \
+  TYPE2 a##TYPE2[SZ];                                                          \
+  TYPE2 b##TYPE2[SZ];                                                          \
+  TYPE1 dst##TYPE1[SZ];                                                        \
+  for (int i = 0; i < SZ; i++)                                                 \
+    {                                                                          \
+      a##TYPE2[i] = LIMIT + i % 8723;                                          \
+      b##TYPE2[i] = LIMIT + i & 1964;                                          \
+    }                                                                          \
+  vwmul_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE2, SZ);                  \
+  for (int i = 0; i < SZ; i++)                                                 \
+    assert (dst##TYPE1[i] == ((TYPE1) a##TYPE2[i] * (TYPE1) b##TYPE2[i]));
+
+#define RUN_ALL() RUN (float, _Float16, -32768)
+
+int
+main ()
+{
+  RUN_ALL ()
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-7.c
new file mode 100644 (file)
index 0000000..60e2401
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */
+
+#include <assert.h>
+#include "widen-7.c"
+
+#define SZ 512
+
+#define RUN(TYPE1, TYPE2, LIMIT)                                               \
+  TYPE2 a##TYPE2[SZ];                                                          \
+  TYPE1 b##TYPE1[SZ];                                                          \
+  TYPE1 dst##TYPE1[SZ];                                                        \
+  for (int i = 0; i < SZ; i++)                                                 \
+    {                                                                          \
+      a##TYPE2[i] = LIMIT + i % LIMIT;                                         \
+      b##TYPE1[i] = LIMIT + i & LIMIT;                                         \
+    }                                                                          \
+  vwmul_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE1, SZ);                  \
+  for (int i = 0; i < SZ; i++)                                                 \
+    assert (dst##TYPE1[i] == (((TYPE1) a##TYPE2[i]) * b##TYPE1[i]));
+
+#define RUN_ALL() RUN (float, _Float16, -32768)
+
+int
+main ()
+{
+  RUN_ALL ()
+}