Combine the FADD(A, FMA(B, C, 0)) to FMA(B, C, A) and combine FADD(A, FMUL(B, C)...

author konglin1 <lingling.kong@intel.com>

Tue, 19 Oct 2021 01:35:30 +0000 (09:35 +0800)

committer liuhongt <hongtao.liu@intel.com>

Mon, 25 Oct 2021 08:01:14 +0000 (16:01 +0800)
author konglin1 <lingling.kong@intel.com>
Tue, 19 Oct 2021 01:35:30 +0000 (09:35 +0800)
committer liuhongt <hongtao.liu@intel.com>
Mon, 25 Oct 2021 08:01:14 +0000 (16:01 +0800)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md

index f37c5c0e7062151843c44084e51a3e452d299d9c..431236ab3a43326b7bc542f69017cc08c433ba38 100644 (file)
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -5958,6 +5958,58 @@
    [(set_attr "type" "ssemuladd")
     (set_attr "mode" "<MODE>")])
  
+(define_insn_and_split "fma_<mode>_fadd_fmul"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+       (plus:VF_AVX512FP16VL
+         (unspec:VF_AVX512FP16VL
+               [(match_operand:VF_AVX512FP16VL 1 "vector_operand")
+                (match_operand:VF_AVX512FP16VL 2 "vector_operand")]
+                UNSPEC_COMPLEX_FMUL)
+         (match_operand:VF_AVX512FP16VL 3 "vector_operand")))]
+  "TARGET_AVX512FP16 && flag_unsafe_math_optimizations
+  && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+       (unspec:VF_AVX512FP16VL
+         [(match_dup 1) (match_dup 2) (match_dup 3)]
+          UNSPEC_COMPLEX_FMA))])
+
+(define_insn_and_split "fma_<mode>_fadd_fcmul"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+       (plus:VF_AVX512FP16VL
+         (unspec:VF_AVX512FP16VL
+               [(match_operand:VF_AVX512FP16VL 1 "vector_operand")
+                (match_operand:VF_AVX512FP16VL 2 "vector_operand")]
+                UNSPEC_COMPLEX_FCMUL)
+         (match_operand:VF_AVX512FP16VL 3 "vector_operand")))]
+  "TARGET_AVX512FP16 && flag_unsafe_math_optimizations
+  && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+       (unspec:VF_AVX512FP16VL
+         [(match_dup 1) (match_dup 2) (match_dup 3)]
+          UNSPEC_COMPLEX_FCMA))])
+
+(define_insn_and_split "fma_<complexopname>_<mode>_fma_zero"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+       (plus:VF_AVX512FP16VL
+         (unspec:VF_AVX512FP16VL
+               [(match_operand:VF_AVX512FP16VL 1 "vector_operand")
+                (match_operand:VF_AVX512FP16VL 2 "vector_operand")
+                (match_operand:VF_AVX512FP16VL 3 "const0_operand")]
+                UNSPEC_COMPLEX_F_C_MA)
+         (match_operand:VF_AVX512FP16VL 4 "vector_operand")))]
+  "TARGET_AVX512FP16 && flag_unsafe_math_optimizations
+  && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+       (unspec:VF_AVX512FP16VL
+         [(match_dup 1) (match_dup 2) (match_dup 4)]
+          UNSPEC_COMPLEX_F_C_MA))])
+
  (define_insn "<avx512>_<complexopname>_<mode>_mask<round_name>"
    [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v")
         (vec_merge:VF_AVX512FP16VL
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-complex-fma.c b/gcc/testsuite/gcc.target/i386/avx512fp16-complex-fma.c

new file mode 100644 (file)

index 0000000..2dfd369
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-complex-fma.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -O2 -Ofast" } */
+/* { dg-final { scan-assembler-times "vfmaddcph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-not "vaddph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)"} } */
+/* { dg-final { scan-assembler-not "vfmulcph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)"} } */
+/* { dg-final { scan-assembler-times "vfcmaddcph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
+
+#include <immintrin.h>
+volatile __m512h x1, x2, res, a, b;
+void extern
+avx512f_test (void)
+{
+  res = _mm512_add_ph (x1, _mm512_fmadd_pch (a, b, _mm512_setzero_ph()));
+  res = _mm512_add_ph (x1, _mm512_fcmadd_pch (a, b, _mm512_setzero_ph()));
+
+  res = _mm512_add_ph (x1, _mm512_fmul_pch (a, b));
+  res = _mm512_add_ph (x1, _mm512_fcmul_pch (a, b));
+}
author	konglin1 <lingling.kong@intel.com>
	Tue, 19 Oct 2021 01:35:30 +0000 (09:35 +0800)
committer	liuhongt <hongtao.liu@intel.com>
	Mon, 25 Oct 2021 08:01:14 +0000 (16:01 +0800)
gcc/config/i386/sse.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/i386/avx512fp16-complex-fma.c	[new file with mode: 0644]	patch \| blob