RISC-V: Support vfwmacc combine lowering

author Juzhe-Zhong <juzhe.zhong@rivai.ai>

Wed, 28 Jun 2023 08:39:06 +0000 (16:39 +0800)

committer Pan Li <pan2.li@intel.com>

Wed, 28 Jun 2023 08:47:36 +0000 (16:47 +0800)
author Juzhe-Zhong <juzhe.zhong@rivai.ai>
Wed, 28 Jun 2023 08:39:06 +0000 (16:39 +0800)
committer Pan Li <pan2.li@intel.com>
Wed, 28 Jun 2023 08:47:36 +0000 (16:47 +0800)
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md

index 28040805b238f1cfa01f124b29fa02f311643bd2..80b85fa2de8840b3a30b7551e24fccf018ebd855 100644 (file)
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -405,3 +405,61 @@
    "vmv.x.s\t%0,%1"
    [(set_attr "type" "vimovvx")
     (set_attr "mode" "<MODE>")])
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] VFWMACC
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - vfwmacc.vv
+;; -------------------------------------------------------------------------
+
+;; Combine ext + ext + fma ===> widen fma.
+;; Most of circumstantces, LoopVectorizer will generate the following IR:
+;; vect__8.176_40 = (vector([2,2]) double) vect__7.175_41;
+;; vect__11.180_35 = (vector([2,2]) double) vect__10.179_36;
+;; vect__13.182_33 = .FMA (vect__11.180_35, vect__8.176_40, vect__4.172_45);
+(define_insn_and_split "*double_widen_fma<mode>"
+  [(set (match_operand:VWEXTF 0 "register_operand")
+       (fma:VWEXTF
+         (float_extend:VWEXTF
+           (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))
+         (float_extend:VWEXTF
+           (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand"))
+         (match_operand:VWEXTF 1 "register_operand")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    riscv_vector::emit_vlmax_fp_ternary_insn (code_for_pred_widen_mul (PLUS, <MODE>mode),
+                                             riscv_vector::RVV_WIDEN_TERNOP, operands);
+    DONE;
+  }
+  [(set_attr "type" "vfwmuladd")
+   (set_attr "mode" "<V_DOUBLE_TRUNC>")])
+
+;; This helps to match ext + fma.
+(define_insn_and_split "*single_widen_fma<mode>"
+  [(set (match_operand:VWEXTF 0 "register_operand")
+       (fma:VWEXTF
+         (float_extend:VWEXTF
+           (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))
+         (match_operand:VWEXTF 3 "register_operand")
+         (match_operand:VWEXTF 1 "register_operand")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    insn_code icode = code_for_pred_extend (<MODE>mode);
+    rtx tmp = gen_reg_rtx (<MODE>mode);
+    rtx ext_ops[] = {tmp, operands[2]};
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, ext_ops);
+
+    rtx dst = expand_ternary_op (<MODE>mode, fma_optab, tmp, operands[3],
+                                operands[1], operands[0], 0);
+    emit_move_insn (operands[0], dst);
+    DONE;
+  }
+  [(set_attr "type" "vfwmuladd")
+   (set_attr "mode" "<V_DOUBLE_TRUNC>")])
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-8.c

index f3ca07c02e0bf6725a5562dee337605d91f1102f..8f41bdfdec2cd3f02bb6997e9768fe51994e3a0a 100644 (file)
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-8.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-8.c
@@ -1,5 +1,5 @@
  /* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
  
  #include <stdint-gcc.h>
  
@@ -19,9 +19,12 @@
    TEST_TYPE (int32_t, int16_t)                                                 \
    TEST_TYPE (uint32_t, uint16_t)                                               \
    TEST_TYPE (int64_t, int32_t)                                                 \
-  TEST_TYPE (uint64_t, uint32_t)
+  TEST_TYPE (uint64_t, uint32_t)                                               \
+  TEST_TYPE (float, _Float16)                                                  \
+  TEST_TYPE (double, float)
  
  TEST_ALL ()
  
  /* { dg-final { scan-assembler-times {\tvwmacc\.vv} 3 } } */
  /* { dg-final { scan-assembler-times {\tvwmaccu\.vv} 3 } } */
+/* { dg-final { scan-assembler-times {\tvfwmacc\.vv} 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-complicate-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-complicate-5.c

index 187b6db21fde2f5e167f3cc26bf076a082f64919..3ff8483cde47ae3865e2369b91a64a7717bfe705 100644 (file)
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-complicate-5.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-complicate-5.c
@@ -1,5 +1,5 @@
  /* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
  
  #include <stdint-gcc.h>
  
@@ -24,9 +24,12 @@
    TEST_TYPE (int32_t, int16_t)                                                 \
    TEST_TYPE (uint32_t, uint16_t)                                               \
    TEST_TYPE (int64_t, int32_t)                                                 \
-  TEST_TYPE (uint64_t, uint32_t)
+  TEST_TYPE (uint64_t, uint32_t)                                               \
+  TEST_TYPE (float, _Float16)                                                  \
+  TEST_TYPE (double, float)
  
  TEST_ALL ()
  
  /* { dg-final { scan-assembler-times {\tvwmacc\.vv} 12 } } */
  /* { dg-final { scan-assembler-times {\tvwmaccu\.vv} 12 } } */
+/* { dg-final { scan-assembler-times {\tvfwmacc\.vv} 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-8.c

index f4840d30dc22dd232ace51b831914b8cfbeaed62..15095002154c0879f6c2c4e97b4a0cdfad24d53b 100644 (file)
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-8.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-8.c
@@ -1,5 +1,5 @@
  /* { dg-do run { target { riscv_vector } } } */
-/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */
  
  #include <assert.h>
  #include "widen-8.c"
@@ -29,7 +29,8 @@
    RUN (int32_t, int16_t, -32768)                                               \
    RUN (uint32_t, uint16_t, 65535)                                              \
    RUN (int64_t, int32_t, -2147483648)                                          \
-  RUN (uint64_t, uint32_t, 4294967295)
+  RUN (uint64_t, uint32_t, 4294967295)                                         \
+  RUN (double, float, -2147483648)
  
  int
  main ()
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-8.c

new file mode 100644 (file)

index 0000000..63563b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-8.c
@@ -0,0 +1,32 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */
+
+#include <assert.h>
+#include "widen-8.c"
+
+#define SZ 512
+
+#define RUN(TYPE1, TYPE2, LIMIT)                                               \
+  TYPE2 a##TYPE2[SZ];                                                          \
+  TYPE2 b##TYPE2[SZ];                                                          \
+  TYPE1 dst##TYPE1[SZ];                                                        \
+  TYPE1 dst2##TYPE1[SZ];                                                       \
+  for (int i = 0; i < SZ; i++)                                                 \
+    {                                                                          \
+      a##TYPE2[i] = LIMIT + i % 8723;                                          \
+      b##TYPE2[i] = LIMIT + i & 1964;                                          \
+      dst##TYPE1[i] = LIMIT + i & 628;                                         \
+      dst2##TYPE1[i] = LIMIT + i & 628;                                        \
+    }                                                                          \
+  vwmacc_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE2, SZ);                 \
+  for (int i = 0; i < SZ; i++)                                                 \
+    assert (dst##TYPE1[i]                                                      \
+           == ((TYPE1) a##TYPE2[i] * (TYPE1) b##TYPE2[i]) + dst2##TYPE1[i]);
+
+#define RUN_ALL() RUN (float, _Float16, -32768)
+
+int
+main ()
+{
+  RUN_ALL ()
+}
author	Juzhe-Zhong <juzhe.zhong@rivai.ai>
	Wed, 28 Jun 2023 08:39:06 +0000 (16:39 +0800)
committer	Pan Li <pan2.li@intel.com>
	Wed, 28 Jun 2023 08:47:36 +0000 (16:47 +0800)
gcc/config/riscv/autovec-opt.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-8.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-complicate-5.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-8.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-8.c	[new file with mode: 0644]	patch \| blob