]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
i386: Enable AVX512 memory broadcast for FNMSUB
authorH.J. Lu <hongjiu.lu@intel.com>
Sun, 21 Oct 2018 20:28:56 +0000 (20:28 +0000)
committerH.J. Lu <hjl@gcc.gnu.org>
Sun, 21 Oct 2018 20:28:56 +0000 (13:28 -0700)
Many AVX512 vector operations can broadcast from a scalar memory source.
This patch enables memory broadcast for FNMSUB operations.  In order to
support AVX512 memory broadcast for FNMSUB, FNMSUB builtin functions are
also added, instead of passing the negated value to FMA builtin functions.

gcc/

PR target/72782
* config/i386/avx512fintrin.h (_mm512_fnmsub_round_pd): Use
__builtin_ia32_vfnmsubpd512_mask.
(_mm512_mask_fnmsub_round_pd): Likewise.
(_mm512_fnmsub_pd): Likewise.
(_mm512_mask_fnmsub_pd): Likewise.
(_mm512_maskz_fnmsub_round_pd): Use
__builtin_ia32_vfnmsubpd512_maskz.
(_mm512_maskz_fnmsub_pd): Likewise.
(_mm512_fnmsub_round_ps): Use __builtin_ia32_vfnmsubps512_mask.
(_mm512_mask_fnmsub_round_ps): Likewise.
(_mm512_fnmsub_ps): Likewise.
(_mm512_mask_fnmsub_ps): Likewise.
(_mm512_maskz_fnmsub_round_ps): Use
__builtin_ia32_vfnmsubps512_maskz.
(_mm512_maskz_fnmsub_ps): Likewise.
* config/i386/avx512vlintrin.h (_mm256_mask_fnmsub_pd): Use
__builtin_ia32_vfnmsubpd256_mask.
(_mm256_maskz_fnmsub_pd): Use __builtin_ia32_vfnmsubpd256_maskz.
(_mm_mask_fnmsub_pd): Use __builtin_ia32_vfmaddpd128_mask
(_mm_maskz_fnmsub_pd): Use __builtin_ia32_vfnmsubpd128_maskz.
(_mm256_mask_fnmsub_ps): Use __builtin_ia32_vfnmsubps256_mask.
(_mm256_mask_fnmsub_ps): Use __builtin_ia32_vfnmsubps256_mask.
(_mm256_maskz_fnmsub_ps): Use __builtin_ia32_vfnmsubps256_maskz.
(_mm_mask_fnmsub_ps): Use __builtin_ia32_vfnmsubps128_mask.
(_mm_maskz_fnmsub_ps): Use __builtin_ia32_vfnmsubps128_maskz.
* config/i386/fmaintrin.h (_mm_fnmsub_pd): Use
__builtin_ia32_vfnmsubpd.
(_mm256_fnmsub_pd): Use __builtin_ia32_vfnmsubpd256.
(_mm_fnmsub_ps): Use __builtin_ia32_vfnmsubps.
(_mm256_fnmsub_ps): Use __builtin_ia32_vfnmsubps256.
(_mm_fnmsub_sd): Use __builtin_ia32_vfnmsubsd3.
(_mm_fnmsub_ss): Use __builtin_ia32_vfnmsubss3.
* config/i386/i386-builtin.def: Add
__builtin_ia32_vfnmsubpd256_mask,
__builtin_ia32_vfnmsubpd256_maskz,
__builtin_ia32_vfnmsubpd128_mask,
__builtin_ia32_vfnmsubpd128_maskz,
__builtin_ia32_vfnmsubps256_mask,
__builtin_ia32_vfnmsubps256_maskz,
__builtin_ia32_vfnmsubps128_mask,
__builtin_ia32_vfnmsubps128_maskz,
__builtin_ia32_vfnmsubpd512_mask,
__builtin_ia32_vfnmsubpd512_maskz,
__builtin_ia32_vfnmsubps512_mask,
__builtin_ia32_vfnmsubps512_maskz, __builtin_ia32_vfnmsubss3,
__builtin_ia32_vfnmsubsd3, __builtin_ia32_vfnmsubps,
__builtin_ia32_vfnmsubpd, __builtin_ia32_vfnmsubps256 and.
__builtin_ia32_vfnmsubpd256.
* config/i386/sse.md (fma4i_fnmsub_<mode>): New.
(<avx512>_fnmsub_<mode>_maskz<round_expand_name>): Likewise.
(*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_1):
Likewise.
(*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_2):
Likewise.
(*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_3):
Likewise.
(fmai_vmfnmsub_<mode><round_name>): Likewise.

gcc/testsuite/

PR target/72782
* gcc.target/i386/avx512f-fnmsub-df-zmm-1.c: New test.
* gcc.target/i386/avx512f-fnmsub-sf-zmm-1.c: Likewise.
* gcc.target/i386/avx512f-fnmsub-sf-zmm-2.c: Likewise.
* gcc.target/i386/avx512f-fnmsub-sf-zmm-3.c: Likewise.
* gcc.target/i386/avx512f-fnmsub-sf-zmm-4.c: Likewise.
* gcc.target/i386/avx512f-fnmsub-sf-zmm-5.c: Likewise.
* gcc.target/i386/avx512f-fnmsub-sf-zmm-6.c: Likewise.
* gcc.target/i386/avx512f-fnmsub-sf-zmm-7.c: Likewise.
* gcc.target/i386/avx512f-fnmsub-sf-zmm-8.c: Likewise.
* gcc.target/i386/avx512vl-fnmsub-sf-xmm-1.c: Likewise.
* gcc.target/i386/avx512vl-fnmsub-sf-ymm-1.c: Likewise.

From-SVN: r265358

18 files changed:
gcc/ChangeLog
gcc/config/i386/avx512fintrin.h
gcc/config/i386/avx512vlintrin.h
gcc/config/i386/fmaintrin.h
gcc/config/i386/i386-builtin.def
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx512f-fnmsub-df-zmm-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-5.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-6.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-7.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-8.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-fnmsub-sf-xmm-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-fnmsub-sf-ymm-1.c [new file with mode: 0644]

index 1148b15809eee1432cf3ec315ad8aa04920d4a21..0d11aa8e4ed3551714b02580d70642a685513904 100644 (file)
@@ -1,3 +1,64 @@
+2018-10-21  H.J. Lu  <hongjiu.lu@intel.com>
+
+       PR target/72782
+       * config/i386/avx512fintrin.h (_mm512_fnmsub_round_pd): Use
+       __builtin_ia32_vfnmsubpd512_mask.
+       (_mm512_mask_fnmsub_round_pd): Likewise.
+       (_mm512_fnmsub_pd): Likewise.
+       (_mm512_mask_fnmsub_pd): Likewise.
+       (_mm512_maskz_fnmsub_round_pd): Use
+       __builtin_ia32_vfnmsubpd512_maskz.
+       (_mm512_maskz_fnmsub_pd): Likewise.
+       (_mm512_fnmsub_round_ps): Use __builtin_ia32_vfnmsubps512_mask.
+       (_mm512_mask_fnmsub_round_ps): Likewise.
+       (_mm512_fnmsub_ps): Likewise.
+       (_mm512_mask_fnmsub_ps): Likewise.
+       (_mm512_maskz_fnmsub_round_ps): Use
+       __builtin_ia32_vfnmsubps512_maskz.
+       (_mm512_maskz_fnmsub_ps): Likewise.
+       * config/i386/avx512vlintrin.h (_mm256_mask_fnmsub_pd): Use
+       __builtin_ia32_vfnmsubpd256_mask.
+       (_mm256_maskz_fnmsub_pd): Use __builtin_ia32_vfnmsubpd256_maskz.
+       (_mm_mask_fnmsub_pd): Use __builtin_ia32_vfmaddpd128_mask
+       (_mm_maskz_fnmsub_pd): Use __builtin_ia32_vfnmsubpd128_maskz.
+       (_mm256_mask_fnmsub_ps): Use __builtin_ia32_vfnmsubps256_mask.
+       (_mm256_mask_fnmsub_ps): Use __builtin_ia32_vfnmsubps256_mask.
+       (_mm256_maskz_fnmsub_ps): Use __builtin_ia32_vfnmsubps256_maskz.
+       (_mm_mask_fnmsub_ps): Use __builtin_ia32_vfnmsubps128_mask.
+       (_mm_maskz_fnmsub_ps): Use __builtin_ia32_vfnmsubps128_maskz.
+       * config/i386/fmaintrin.h (_mm_fnmsub_pd): Use
+       __builtin_ia32_vfnmsubpd.
+       (_mm256_fnmsub_pd): Use __builtin_ia32_vfnmsubpd256.
+       (_mm_fnmsub_ps): Use __builtin_ia32_vfnmsubps.
+       (_mm256_fnmsub_ps): Use __builtin_ia32_vfnmsubps256.
+       (_mm_fnmsub_sd): Use __builtin_ia32_vfnmsubsd3.
+       (_mm_fnmsub_ss): Use __builtin_ia32_vfnmsubss3.
+       * config/i386/i386-builtin.def: Add
+       __builtin_ia32_vfnmsubpd256_mask,
+       __builtin_ia32_vfnmsubpd256_maskz,
+       __builtin_ia32_vfnmsubpd128_mask,
+       __builtin_ia32_vfnmsubpd128_maskz,
+       __builtin_ia32_vfnmsubps256_mask,
+       __builtin_ia32_vfnmsubps256_maskz,
+       __builtin_ia32_vfnmsubps128_mask,
+       __builtin_ia32_vfnmsubps128_maskz,
+       __builtin_ia32_vfnmsubpd512_mask,
+       __builtin_ia32_vfnmsubpd512_maskz,
+       __builtin_ia32_vfnmsubps512_mask,
+       __builtin_ia32_vfnmsubps512_maskz, __builtin_ia32_vfnmsubss3,
+       __builtin_ia32_vfnmsubsd3, __builtin_ia32_vfnmsubps,
+       __builtin_ia32_vfnmsubpd, __builtin_ia32_vfnmsubps256 and.
+       __builtin_ia32_vfnmsubpd256.
+       * config/i386/sse.md (fma4i_fnmsub_<mode>): New.
+       (<avx512>_fnmsub_<mode>_maskz<round_expand_name>): Likewise.
+       (*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_1):
+       Likewise.
+       (*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_2):
+       Likewise.
+       (*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_3):
+       Likewise.
+       (fmai_vmfnmsub_<mode><round_name>): Likewise.
+
 2018-10-21  H.J. Lu  <hongjiu.lu@intel.com>
 
        PR target/72782
index 1445e9e8b3c2c2243956e73edbdaa797deab268f..001d610bc814c2f6018cd92771f6f80cba00d4cb 100644 (file)
@@ -3699,10 +3699,10 @@ extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
 {
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
-                                                   (__v8df) __B,
-                                                   -(__v8df) __C,
-                                                   (__mmask8) -1, __R);
+  return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
+                                                    (__v8df) __B,
+                                                    (__v8df) __C,
+                                                    (__mmask8) -1, __R);
 }
 
 extern __inline __m512d
@@ -3732,20 +3732,20 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
                              __m512d __C, const int __R)
 {
-  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
-                                                    (__v8df) __B,
-                                                    -(__v8df) __C,
-                                                    (__mmask8) __U, __R);
+  return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A,
+                                                     (__v8df) __B,
+                                                     (__v8df) __C,
+                                                     (__mmask8) __U, __R);
 }
 
 extern __inline __m512
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
 {
-  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
-                                                  (__v16sf) __B,
-                                                  -(__v16sf) __C,
-                                                  (__mmask16) -1, __R);
+  return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
+                                                   (__v16sf) __B,
+                                                   (__v16sf) __C,
+                                                   (__mmask16) -1, __R);
 }
 
 extern __inline __m512
@@ -3775,10 +3775,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
                              __m512 __C, const int __R)
 {
-  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
-                                                   (__v16sf) __B,
-                                                   -(__v16sf) __C,
-                                                   (__mmask16) __U, __R);
+  return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A,
+                                                    (__v16sf) __B,
+                                                    (__v16sf) __C,
+                                                    (__mmask16) __U, __R);
 }
 #else
 #define _mm512_fmadd_round_pd(A, B, C, R)            \
@@ -3902,7 +3902,7 @@ _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
     (__m512)__builtin_ia32_vfnmaddps512_maskz(A, B, C, U, R)
 
 #define _mm512_fnmsub_round_pd(A, B, C, R)            \
-    (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
+    (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, -1, R)
 
 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R)    \
     (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
@@ -3911,10 +3911,10 @@ _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
     (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
 
 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R)   \
-    (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
+    (__m512d)__builtin_ia32_vfnmsubpd512_maskz(A, B, C, U, R)
 
 #define _mm512_fnmsub_round_ps(A, B, C, R)            \
-    (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
+    (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, -1, R)
 
 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R)    \
     (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
@@ -3923,7 +3923,7 @@ _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
     (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
 
 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R)   \
-    (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
+    (__m512)__builtin_ia32_vfnmsubps512_maskz(A, B, C, U, R)
 #endif
 
 extern __inline __m512i
@@ -12768,11 +12768,11 @@ extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
 {
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
-                                                   (__v8df) __B,
-                                                   -(__v8df) __C,
-                                                   (__mmask8) -1,
-                                                   _MM_FROUND_CUR_DIRECTION);
+  return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
+                                                    (__v8df) __B,
+                                                    (__v8df) __C,
+                                                    (__mmask8) -1,
+                                                    _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m512d
@@ -12801,22 +12801,22 @@ extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
 {
-  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
-                                                    (__v8df) __B,
-                                                    -(__v8df) __C,
-                                                    (__mmask8) __U,
-                                                    _MM_FROUND_CUR_DIRECTION);
+  return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A,
+                                                     (__v8df) __B,
+                                                     (__v8df) __C,
+                                                     (__mmask8) __U,
+                                                     _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m512
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
 {
-  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
-                                                  (__v16sf) __B,
-                                                  -(__v16sf) __C,
-                                                  (__mmask16) -1,
-                                                  _MM_FROUND_CUR_DIRECTION);
+  return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
+                                                   (__v16sf) __B,
+                                                   (__v16sf) __C,
+                                                   (__mmask16) -1,
+                                                   _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m512
@@ -12845,11 +12845,11 @@ extern __inline __m512
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
 {
-  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
-                                                   (__v16sf) __B,
-                                                   -(__v16sf) __C,
-                                                   (__mmask16) __U,
-                                                   _MM_FROUND_CUR_DIRECTION);
+  return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A,
+                                                    (__v16sf) __B,
+                                                    (__v16sf) __C,
+                                                    (__mmask16) __U,
+                                                    _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m256i
index b46c38e1f8197580684d70ec163c5e4874d0658e..7ff78010689c8c2e0de0de98a48614bd363b35aa 100644 (file)
@@ -4665,10 +4665,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
                        __m256d __C)
 {
-  return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
-                                                    (__v4df) __B,
-                                                    -(__v4df) __C,
-                                                    (__mmask8) __U);
+  return (__m256d) __builtin_ia32_vfnmsubpd256_maskz ((__v4df) __A,
+                                                     (__v4df) __B,
+                                                     (__v4df) __C,
+                                                     (__mmask8) __U);
 }
 
 extern __inline __m128d
@@ -4698,10 +4698,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
                     __m128d __C)
 {
-  return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
-                                                    (__v2df) __B,
-                                                    -(__v2df) __C,
-                                                    (__mmask8) __U);
+  return (__m128d) __builtin_ia32_vfnmsubpd128_maskz ((__v2df) __A,
+                                                     (__v2df) __B,
+                                                     (__v2df) __C,
+                                                     (__mmask8) __U);
 }
 
 extern __inline __m256
@@ -4731,10 +4731,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
                        __m256 __C)
 {
-  return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
-                                                   (__v8sf) __B,
-                                                   -(__v8sf) __C,
-                                                   (__mmask8) __U);
+  return (__m256) __builtin_ia32_vfnmsubps256_maskz ((__v8sf) __A,
+                                                    (__v8sf) __B,
+                                                    (__v8sf) __C,
+                                                    (__mmask8) __U);
 }
 
 extern __inline __m128
@@ -4761,10 +4761,10 @@ extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
-                                                   (__v4sf) __B,
-                                                   -(__v4sf) __C,
-                                                   (__mmask8) __U);
+  return (__m128) __builtin_ia32_vfnmsubps128_maskz ((__v4sf) __A,
+                                                    (__v4sf) __B,
+                                                    (__v4sf) __C,
+                                                    (__mmask8) __U);
 }
 
 extern __inline __m128i
index 0a2f4a7f676f421bf046d853e38f13e710853ec1..4f13e81cf34652ca2a3b97073e19a65562b11a8d 100644 (file)
@@ -182,48 +182,48 @@ extern __inline __m128d
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B,
-                                           -(__v2df)__C);
+  return (__m128d)__builtin_ia32_vfnmsubpd ((__v2df)__A, (__v2df)__B,
+                                           (__v2df)__C);
 }
 
 extern __inline __m256d
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B,
-                                              -(__v4df)__C);
+  return (__m256d)__builtin_ia32_vfnmsubpd256 ((__v4df)__A, (__v4df)__B,
+                                              (__v4df)__C);
 }
 
 extern __inline __m128
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B,
-                                          -(__v4sf)__C);
+  return (__m128)__builtin_ia32_vfnmsubps ((__v4sf)__A, (__v4sf)__B,
+                                          (__v4sf)__C);
 }
 
 extern __inline __m256
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B,
-                                             -(__v8sf)__C);
+  return (__m256)__builtin_ia32_vfnmsubps256 ((__v8sf)__A, (__v8sf)__B,
+                                             (__v8sf)__C);
 }
 
 extern __inline __m128d
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, -(__v2df)__B,
-                                            -(__v2df)__C);
+  return (__m128d)__builtin_ia32_vfnmsubsd3 ((__v2df)__A, (__v2df)__B,
+                                            (__v2df)__C);
 }
 
 extern __inline __m128
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, -(__v4sf)__B,
-                                           -(__v4sf)__C);
+  return (__m128)__builtin_ia32_vfnmsubss3 ((__v4sf)__A, (__v4sf)__B,
+                                           (__v4sf)__C);
 }
 
 extern __inline __m128d
index 74343db1cbebab84451dbe19597e2083f70ec6f9..df0f7e975ac22a0b2ac4ea7d8dc3410e2a1d9b59 100644 (file)
@@ -1931,12 +1931,16 @@ BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask3, "__builtin
 BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_maskz, "__builtin_ia32_vfnmaddps128_maskz", IX86_BUILTIN_VFNMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_maskz, "__builtin_ia32_vfnmsubpd256_maskz", IX86_BUILTIN_VFNMSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_maskz, "__builtin_ia32_vfnmsubpd128_maskz", IX86_BUILTIN_VFNMSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_maskz, "__builtin_ia32_vfnmsubps256_maskz", IX86_BUILTIN_VFNMSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_maskz, "__builtin_ia32_vfnmsubps128_maskz", IX86_BUILTIN_VFNMSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
@@ -2800,8 +2804,10 @@ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask3_round, "__bu
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_maskz_round, "__builtin_ia32_vfnmaddps512_maskz", IX86_BUILTIN_VFNMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_maskz_round, "__builtin_ia32_vfnmsubpd512_maskz", IX86_BUILTIN_VFNMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_maskz_round, "__builtin_ia32_vfnmsubps512_maskz", IX86_BUILTIN_VFNMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 
 /* AVX512ER */
 BDESC (OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
@@ -2885,6 +2891,8 @@ BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmsub_v4sf, "__builtin_ia32_vfmsubss
 BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmsub_v2df, "__builtin_ia32_vfmsubsd3", IX86_BUILTIN_VFMSUBSD3, UNKNOWN, (int)MULTI_ARG_3_DF)
 BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfnmadd_v4sf, "__builtin_ia32_vfnmaddss3", IX86_BUILTIN_VFNMADDSS3, UNKNOWN, (int)MULTI_ARG_3_SF)
 BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfnmadd_v2df, "__builtin_ia32_vfnmaddsd3", IX86_BUILTIN_VFNMADDSD3, UNKNOWN, (int)MULTI_ARG_3_DF)
+BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfnmsub_v4sf, "__builtin_ia32_vfnmsubss3", IX86_BUILTIN_VFNMSUBSS3, UNKNOWN, (int)MULTI_ARG_3_SF)
+BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfnmsub_v2df, "__builtin_ia32_vfnmsubsd3", IX86_BUILTIN_VFNMSUBSD3, UNKNOWN, (int)MULTI_ARG_3_DF)
 
 BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF)
 BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF)
@@ -2898,6 +2906,10 @@ BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmadd_v4sf, "
 BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmadd_v2df, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF)
 BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmadd_v8sf, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2)
 BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmadd_v4df, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2)
+BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsub_v4sf, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF)
+BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsub_v2df, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF)
+BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsub_v8sf, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2)
+BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsub_v4df, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2)
 
 BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF)
 BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF)
index 0426da420e86e5534758cefdb53eb3febd393234..28cecbf9a091a79fa97247e7e102f92bcf4600a1 100644 (file)
          (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
          (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
 
+(define_expand "fma4i_fnmsub_<mode>"
+  [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
+       (fma:FMAMODE_AVX512
+         (neg:FMAMODE_AVX512
+           (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
+         (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
+         (neg:FMAMODE_AVX512
+           (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
+
 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
   [(match_operand:VF_AVX512VL 0 "register_operand")
    (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
+(define_expand "<avx512>_fnmsub_<mode>_maskz<round_expand_name>"
+  [(match_operand:VF_AVX512VL 0 "register_operand")
+   (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
+   (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
+   (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
+   (match_operand:<avx512fmaskmode> 4 "register_operand")]
+  "TARGET_AVX512F && <round_mode512bit_condition>"
+{
+  emit_insn (gen_fma_fnmsub_<mode>_maskz_1<round_expand_name> (
+    operands[0], operands[1], operands[2], operands[3],
+    CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
+  DONE;
+})
+
 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
   [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
        (fma:VF_SF_AVX512VL
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_1"
+  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
+       (fma:VF_AVX512
+         (neg:VF_AVX512
+           (match_operand:VF_AVX512 1 "register_operand" "0,v"))
+         (match_operand:VF_AVX512 2 "register_operand" "v,0")
+         (neg:VF_AVX512
+           (vec_duplicate:VF_AVX512
+             (match_operand:<ssescalarmode> 3 "memory_operand" "m,m")))))]
+  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
+  "vfnmsub213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_2"
+  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
+       (fma:VF_AVX512
+         (neg:VF_AVX512
+           (vec_duplicate:VF_AVX512
+             (match_operand:<ssescalarmode> 1 "memory_operand" "m,m")))
+         (match_operand:VF_AVX512 2 "register_operand" "0,v")
+         (neg:VF_AVX512
+           (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
+  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
+  "@
+   vfnmsub132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
+   vfnmsub231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_3"
+  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
+       (fma:VF_AVX512
+         (neg:VF_AVX512
+           (match_operand:VF_AVX512 1 "register_operand" "0,v"))
+         (vec_duplicate:VF_AVX512
+           (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
+         (neg:VF_AVX512
+           (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
+  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
+  "@
+   vfnmsub132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
+   vfnmsub231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
   [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
        (vec_merge:VF_AVX512VL
          (const_int 1)))]
   "TARGET_FMA")
 
+(define_expand "fmai_vmfnmsub_<mode><round_name>"
+  [(set (match_operand:VF_128 0 "register_operand")
+       (vec_merge:VF_128
+         (fma:VF_128
+           (neg:VF_128
+             (match_operand:VF_128 2 "<round_nimm_predicate>"))
+           (match_operand:VF_128 1 "<round_nimm_predicate>")
+           (neg:VF_128
+             (match_operand:VF_128 3 "<round_nimm_predicate>")))
+         (match_dup 1)
+         (const_int 1)))]
+  "TARGET_FMA")
+
 (define_insn "*fmai_fmadd_<mode>"
   [(set (match_operand:VF_128 0 "register_operand" "=v,v")
         (vec_merge:VF_128
index 649a8384c9290def4a5036dcd2dd910f2beaa499..f594beab31c0ef6514a6440089886cae7c6703b3 100644 (file)
@@ -1,3 +1,18 @@
+2018-10-21  H.J. Lu  <hongjiu.lu@intel.com>
+
+       PR target/72782
+       * gcc.target/i386/avx512f-fnmsub-df-zmm-1.c: New test.
+       * gcc.target/i386/avx512f-fnmsub-sf-zmm-1.c: Likewise.
+       * gcc.target/i386/avx512f-fnmsub-sf-zmm-2.c: Likewise.
+       * gcc.target/i386/avx512f-fnmsub-sf-zmm-3.c: Likewise.
+       * gcc.target/i386/avx512f-fnmsub-sf-zmm-4.c: Likewise.
+       * gcc.target/i386/avx512f-fnmsub-sf-zmm-5.c: Likewise.
+       * gcc.target/i386/avx512f-fnmsub-sf-zmm-6.c: Likewise.
+       * gcc.target/i386/avx512f-fnmsub-sf-zmm-7.c: Likewise.
+       * gcc.target/i386/avx512f-fnmsub-sf-zmm-8.c: Likewise.
+       * gcc.target/i386/avx512vl-fnmsub-sf-xmm-1.c: Likewise.
+       * gcc.target/i386/avx512vl-fnmsub-sf-ymm-1.c: Likewise.
+
 2018-10-21  H.J. Lu  <hongjiu.lu@intel.com>
 
        PR target/72782
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-df-zmm-1.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-df-zmm-1.c
new file mode 100644 (file)
index 0000000..4d4d31e
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...pd\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastsd\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512d
+#define vec 512
+#define op fnmsub
+#define suffix pd
+#define SCALAR double
+
+#include "avx512-fma-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-1.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-1.c
new file mode 100644 (file)
index 0000000..ff9632a
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fnmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-2.c
new file mode 100644 (file)
index 0000000..4256350
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fnmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-2.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-3.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-3.c
new file mode 100644 (file)
index 0000000..6966cea
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fnmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-3.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-4.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-4.c
new file mode 100644 (file)
index 0000000..0748c12
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fnmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-4.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-5.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-5.c
new file mode 100644 (file)
index 0000000..d24a80f
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fnmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-5.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-6.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-6.c
new file mode 100644 (file)
index 0000000..ee36dc9
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fnmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-6.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-7.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-7.c
new file mode 100644 (file)
index 0000000..7815251
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\]*%zmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[^\n\]*%zmm\[0-9\]+" 1 } } */
+
+#define type __m512
+#define vec 512
+#define op fnmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-7.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-8.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-8.c
new file mode 100644 (file)
index 0000000..8bd669e
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\]*%zmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+%zmm\[0-9\]+, %zmm\[0-9\]+, %zmm0" 1 } } */
+
+#define type __m512
+#define vec 512
+#define op fnmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-8.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-fnmsub-sf-xmm-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-fnmsub-sf-xmm-1.c
new file mode 100644 (file)
index 0000000..0a63df8
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mfma -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %xmm\[0-9\]+, %xmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%xmm\[0-9\]+" } } */
+
+#define type __m128
+#define vec
+#define op fnmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-fnmsub-sf-ymm-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-fnmsub-sf-ymm-1.c
new file mode 100644 (file)
index 0000000..d8d48d6
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mfma -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %ymm\[0-9\]+, %ymm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%ymm\[0-9\]+" } } */
+
+#define type __m256
+#define vec 256
+#define op fnmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-1.h"