]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
AVX512FP16: Add scalar fma instructions.
authorliuhongt <hongtao.liu@intel.com>
Tue, 9 Apr 2019 06:38:33 +0000 (14:38 +0800)
committerliuhongt <hongtao.liu@intel.com>
Sat, 18 Sep 2021 07:00:12 +0000 (15:00 +0800)
Add vfmadd[132,213,231]sh/vfnmadd[132,213,231]sh/
vfmsub[132,213,231]sh/vfnmsub[132,213,231]sh.

gcc/ChangeLog:

* config/i386/avx512fp16intrin.h (_mm_fmadd_sh):
New intrinsic.
(_mm_mask_fmadd_sh): Likewise.
(_mm_mask3_fmadd_sh): Likewise.
(_mm_maskz_fmadd_sh): Likewise.
(_mm_fmadd_round_sh): Likewise.
(_mm_mask_fmadd_round_sh): Likewise.
(_mm_mask3_fmadd_round_sh): Likewise.
(_mm_maskz_fmadd_round_sh): Likewise.
(_mm_fnmadd_sh): Likewise.
(_mm_mask_fnmadd_sh): Likewise.
(_mm_mask3_fnmadd_sh): Likewise.
(_mm_maskz_fnmadd_sh): Likewise.
(_mm_fnmadd_round_sh): Likewise.
(_mm_mask_fnmadd_round_sh): Likewise.
(_mm_mask3_fnmadd_round_sh): Likewise.
(_mm_maskz_fnmadd_round_sh): Likewise.
(_mm_fmsub_sh): Likewise.
(_mm_mask_fmsub_sh): Likewise.
(_mm_mask3_fmsub_sh): Likewise.
(_mm_maskz_fmsub_sh): Likewise.
(_mm_fmsub_round_sh): Likewise.
(_mm_mask_fmsub_round_sh): Likewise.
(_mm_mask3_fmsub_round_sh): Likewise.
(_mm_maskz_fmsub_round_sh): Likewise.
(_mm_fnmsub_sh): Likewise.
(_mm_mask_fnmsub_sh): Likewise.
(_mm_mask3_fnmsub_sh): Likewise.
(_mm_maskz_fnmsub_sh): Likewise.
(_mm_fnmsub_round_sh): Likewise.
(_mm_mask_fnmsub_round_sh): Likewise.
(_mm_mask3_fnmsub_round_sh): Likewise.
(_mm_maskz_fnmsub_round_sh): Likewise.
* config/i386/i386-builtin-types.def
(V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT): New builtin type.
* config/i386/i386-builtin.def: Add new builtins.
* config/i386/i386-expand.c: Handle new builtin type.
* config/i386/sse.md (fmai_vmfmadd_<mode><round_name>):
Ajdust to support FP16.
(fmai_vmfmsub_<mode><round_name>): Ditto.
(fmai_vmfnmadd_<mode><round_name>): Ditto.
(fmai_vmfnmsub_<mode><round_name>): Ditto.
(*fmai_fmadd_<mode>): Ditto.
(*fmai_fmsub_<mode>): Ditto.
(*fmai_fnmadd_<mode><round_name>): Ditto.
(*fmai_fnmsub_<mode><round_name>): Ditto.
(avx512f_vmfmadd_<mode>_mask<round_name>): Ditto.
(avx512f_vmfmadd_<mode>_mask3<round_name>): Ditto.
(avx512f_vmfmadd_<mode>_maskz<round_expand_name>): Ditto.
(avx512f_vmfmadd_<mode>_maskz_1<round_name>): Ditto.
(*avx512f_vmfmsub_<mode>_mask<round_name>): Ditto.
(avx512f_vmfmsub_<mode>_mask3<round_name>): Ditto.
(*avx512f_vmfmsub_<mode>_maskz_1<round_name>): Ditto.
(*avx512f_vmfnmsub_<mode>_mask<round_name>): Ditto.
(*avx512f_vmfnmsub_<mode>_mask3<round_name>): Ditto.
(*avx512f_vmfnmsub_<mode>_mask<round_name>): Ditto.
(*avx512f_vmfnmadd_<mode>_mask<round_name>): Renamed to ...
(avx512f_vmfnmadd_<mode>_mask<round_name>) ... this, and
adjust to support FP16.
(avx512f_vmfnmadd_<mode>_mask3<round_name>): Ditto.
(avx512f_vmfnmadd_<mode>_maskz_1<round_name>): Ditto.
(avx512f_vmfnmadd_<mode>_maskz<round_expand_name>): New
expander.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add test for new builtins.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/sse-14.c: Add test for new intrinsics.
* gcc.target/i386/sse-22.c: Ditto.

gcc/config/i386/avx512fp16intrin.h
gcc/config/i386/i386-builtin-types.def
gcc/config/i386/i386-builtin.def
gcc/config/i386/i386-expand.c
gcc/config/i386/sse.md
gcc/testsuite/gcc.target/i386/avx-1.c
gcc/testsuite/gcc.target/i386/sse-13.c
gcc/testsuite/gcc.target/i386/sse-14.c
gcc/testsuite/gcc.target/i386/sse-22.c
gcc/testsuite/gcc.target/i386/sse-23.c

index 9ccbc463a4752325ef411c2a1f9cd2a28d6b32d9..47146967e40bbfe9975daf7fb2f761ec08dde2dc 100644 (file)
@@ -5703,6 +5703,418 @@ _mm512_maskz_fnmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
 
 #endif /* __OPTIMIZE__ */
 
+/* Intrinsics vfmadd[132,213,231]sh.  */
+extern __inline __m128h
+  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_sh (__m128h __W, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+                                                 (__v8hf) __A,
+                                                 (__v8hf) __B,
+                                                 (__mmask8) -1,
+                                                 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+                                                 (__v8hf) __A,
+                                                 (__v8hf) __B,
+                                                 (__mmask8) __U,
+                                                 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W,
+                                                  (__v8hf) __A,
+                                                  (__v8hf) __B,
+                                                  (__mmask8) __U,
+                                                  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
+                                                  (__v8hf) __A,
+                                                  (__v8hf) __B,
+                                                  (__mmask8) __U,
+                                                  _MM_FROUND_CUR_DIRECTION);
+}
+
+
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+                                                 (__v8hf) __A,
+                                                 (__v8hf) __B,
+                                                 (__mmask8) -1,
+                                                 __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
+                        const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+                                                 (__v8hf) __A,
+                                                 (__v8hf) __B,
+                                                 (__mmask8) __U, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
+                         const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W,
+                                                  (__v8hf) __A,
+                                                  (__v8hf) __B,
+                                                  (__mmask8) __U, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
+                         __m128h __B, const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
+                                                  (__v8hf) __A,
+                                                  (__v8hf) __B,
+                                                  (__mmask8) __U, __R);
+}
+
+#else
+#define _mm_fmadd_round_sh(A, B, C, R)                                 \
+  ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (-1), (R)))
+#define _mm_mask_fmadd_round_sh(A, U, B, C, R)                         \
+  ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (U), (R)))
+#define _mm_mask3_fmadd_round_sh(A, B, C, U, R)                                \
+  ((__m128h) __builtin_ia32_vfmaddsh3_mask3 ((A), (B), (C), (U), (R)))
+#define _mm_maskz_fmadd_round_sh(U, A, B, C, R)                                \
+  ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), (C), (U), (R)))
+
+#endif /* __OPTIMIZE__ */
+
+/* Intrinsics vfnmadd[132,213,231]sh.  */
+extern __inline __m128h
+  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
+                                                  (__v8hf) __A,
+                                                  (__v8hf) __B,
+                                                  (__mmask8) -1,
+                                                  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
+                                                 (__v8hf) __A,
+                                                 (__v8hf) __B,
+                                                 (__mmask8) __U,
+                                                 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
+{
+  return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W,
+                                                  (__v8hf) __A,
+                                                  (__v8hf) __B,
+                                                  (__mmask8) __U,
+                                                  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W,
+                                                  (__v8hf) __A,
+                                                  (__v8hf) __B,
+                                                  (__mmask8) __U,
+                                                  _MM_FROUND_CUR_DIRECTION);
+}
+
+
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
+{
+  return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
+                                                  (__v8hf) __A,
+                                                  (__v8hf) __B,
+                                                  (__mmask8) -1,
+                                                  __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
+                        const int __R)
+{
+  return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
+                                                 (__v8hf) __A,
+                                                 (__v8hf) __B,
+                                                 (__mmask8) __U, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
+                         const int __R)
+{
+  return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W,
+                                                  (__v8hf) __A,
+                                                  (__v8hf) __B,
+                                                  (__mmask8) __U, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
+                         __m128h __B, const int __R)
+{
+  return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W,
+                                                  (__v8hf) __A,
+                                                  (__v8hf) __B,
+                                                  (__mmask8) __U, __R);
+}
+
+#else
+#define _mm_fnmadd_round_sh(A, B, C, R)                                        \
+  ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (-1), (R)))
+#define _mm_mask_fnmadd_round_sh(A, U, B, C, R)                                \
+  ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (U), (R)))
+#define _mm_mask3_fnmadd_round_sh(A, B, C, U, R)                       \
+  ((__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((A), (B), (C), (U), (R)))
+#define _mm_maskz_fnmadd_round_sh(U, A, B, C, R)                       \
+  ((__m128h) __builtin_ia32_vfnmaddsh3_maskz ((A), (B), (C), (U), (R)))
+
+#endif /* __OPTIMIZE__ */
+
+/* Intrinsics vfmsub[132,213,231]sh.  */
+extern __inline __m128h
+  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_sh (__m128h __W, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+                                                 (__v8hf) __A,
+                                                 -(__v8hf) __B,
+                                                 (__mmask8) -1,
+                                                 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+                                                 (__v8hf) __A,
+                                                 -(__v8hf) __B,
+                                                 (__mmask8) __U,
+                                                 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
+{
+  return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
+                                                  (__v8hf) __A,
+                                                  (__v8hf) __B,
+                                                  (__mmask8) __U,
+                                                  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
+                                                  (__v8hf) __A,
+                                                  -(__v8hf) __B,
+                                                  (__mmask8) __U,
+                                                  _MM_FROUND_CUR_DIRECTION);
+}
+
+
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+                                                 (__v8hf) __A,
+                                                 -(__v8hf) __B,
+                                                 (__mmask8) -1,
+                                                 __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
+                        const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+                                                 (__v8hf) __A,
+                                                 -(__v8hf) __B,
+                                                 (__mmask8) __U, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
+                         const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
+                                                  (__v8hf) __A,
+                                                  (__v8hf) __B,
+                                                  (__mmask8) __U, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
+                         __m128h __B, const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
+                                                  (__v8hf) __A,
+                                                  -(__v8hf) __B,
+                                                  (__mmask8) __U, __R);
+}
+
+#else
+#define _mm_fmsub_round_sh(A, B, C, R)                                 \
+  ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (-1), (R)))
+#define _mm_mask_fmsub_round_sh(A, U, B, C, R)                         \
+  ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (U), (R)))
+#define _mm_mask3_fmsub_round_sh(A, B, C, U, R)                                \
+  ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), (B), (C), (U), (R)))
+#define _mm_maskz_fmsub_round_sh(U, A, B, C, R)                                \
+  ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), -(C), (U), (R)))
+
+#endif /* __OPTIMIZE__ */
+
+/* Intrinsics vfnmsub[132,213,231]sh.  */
+extern __inline __m128h
+  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+                                                 -(__v8hf) __A,
+                                                 -(__v8hf) __B,
+                                                 (__mmask8) -1,
+                                                 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+                                                 -(__v8hf) __A,
+                                                 -(__v8hf) __B,
+                                                 (__mmask8) __U,
+                                                 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
+{
+  return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
+                                                  -(__v8hf) __A,
+                                                  (__v8hf) __B,
+                                                  (__mmask8) __U,
+                                                  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
+                                                  -(__v8hf) __A,
+                                                  -(__v8hf) __B,
+                                                  (__mmask8) __U,
+                                                  _MM_FROUND_CUR_DIRECTION);
+}
+
+
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+                                                 -(__v8hf) __A,
+                                                 -(__v8hf) __B,
+                                                 (__mmask8) -1,
+                                                 __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
+                        const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+                                                 -(__v8hf) __A,
+                                                 -(__v8hf) __B,
+                                                 (__mmask8) __U, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
+                         const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
+                                                  -(__v8hf) __A,
+                                                  (__v8hf) __B,
+                                                  (__mmask8) __U, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
+                         __m128h __B, const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
+                                                  -(__v8hf) __A,
+                                                  -(__v8hf) __B,
+                                                  (__mmask8) __U, __R);
+}
+
+#else
+#define _mm_fnmsub_round_sh(A, B, C, R)                                        \
+  ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (-1), (R)))
+#define _mm_mask_fnmsub_round_sh(A, U, B, C, R)                                \
+  ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (U), (R)))
+#define _mm_mask3_fnmsub_round_sh(A, B, C, U, R)                       \
+  ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), -(B), (C), (U), (R)))
+#define _mm_maskz_fnmsub_round_sh(U, A, B, C, R)                       \
+  ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), -(B), -(C), (U), (R)))
+
+#endif /* __OPTIMIZE__ */
+
 #ifdef __DISABLE_AVX512FP16__
 #undef __DISABLE_AVX512FP16__
 #pragma GCC pop_options
index 7fd4286ef2653feab7c649b2747c00eb0e01147d..5eae4d0376a7f9aa1d80f91e718ea8bc9e7c7a08 100644 (file)
@@ -1342,6 +1342,7 @@ DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, INT)
 DEF_FUNCTION_TYPE (V8HF, V8HF, INT, V8HF, UQI)
 DEF_FUNCTION_TYPE (UQI, V8HF, V8HF, INT, UQI)
 DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, V8HF, UQI)
+DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, V8HF, INT)
 DEF_FUNCTION_TYPE (UQI, V8HF, V8HF, INT, UQI, INT)
 DEF_FUNCTION_TYPE (V8DI, V8HF, V8DI, UQI, INT)
 DEF_FUNCTION_TYPE (V8DF, V8HF, V8DF, UQI, INT)
index 56d23b02658c9c83b1cfa2f19018faadad484b46..5950d5e5773e535938d6182d2a3aed756ab7e048 100644 (file)
@@ -3194,6 +3194,13 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsub_v32hf_maskz_round
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_mask_round, "__builtin_ia32_vfnmsubph512_mask", IX86_BUILTIN_VFNMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_mask3_round, "__builtin_ia32_vfnmsubph512_mask3", IX86_BUILTIN_VFNMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_maskz_round, "__builtin_ia32_vfnmsubph512_maskz", IX86_BUILTIN_VFNMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmadd_v8hf_mask_round, "__builtin_ia32_vfmaddsh3_mask", IX86_BUILTIN_VFMADDSH3_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmadd_v8hf_mask3_round, "__builtin_ia32_vfmaddsh3_mask3", IX86_BUILTIN_VFMADDSH3_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmadd_v8hf_maskz_round, "__builtin_ia32_vfmaddsh3_maskz", IX86_BUILTIN_VFMADDSH3_MASKZ, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfnmadd_v8hf_mask_round, "__builtin_ia32_vfnmaddsh3_mask", IX86_BUILTIN_VFNMADDSH3_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfnmadd_v8hf_mask3_round, "__builtin_ia32_vfnmaddsh3_mask3", IX86_BUILTIN_VFNMADDSH3_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfnmadd_v8hf_maskz_round, "__builtin_ia32_vfnmaddsh3_maskz", IX86_BUILTIN_VFNMADDSH3_MASKZ, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmsub_v8hf_mask3_round, "__builtin_ia32_vfmsubsh3_mask3", IX86_BUILTIN_VFMSUBSH3_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
 
 BDESC_END (ROUND_ARGS, MULTI_ARG)
 
index 3ec032b999407aa3e056eb8e546ebfdcc97afc0a..c88cb14bd72a1dbb93ec40baeefc761c12c25cc9 100644 (file)
@@ -10738,6 +10738,7 @@ ix86_expand_round_builtin (const struct builtin_description *d,
     case V8HF_FTYPE_V8DI_V8HF_UQI_INT:
     case V8HF_FTYPE_V8DF_V8HF_UQI_INT:
     case V16HF_FTYPE_V16SF_V16HF_UHI_INT:
+    case V8HF_FTYPE_V8HF_V8HF_V8HF_INT:
       nargs = 4;
       break;
     case V4SF_FTYPE_V4SF_V4SF_INT_INT:
index bcded09d6f9f1383599cc70be1de80adaec6d9a4..0016c027f0c36f1c26685fa4108fac3bad8dcbb6 100644 (file)
 ;; high-order elements from the destination register.
 
 (define_expand "fmai_vmfmadd_<mode><round_name>"
-  [(set (match_operand:VF_128 0 "register_operand")
-       (vec_merge:VF_128
-         (fma:VF_128
-           (match_operand:VF_128 1 "register_operand")
-           (match_operand:VF_128 2 "<round_nimm_scalar_predicate>")
-           (match_operand:VF_128 3 "<round_nimm_scalar_predicate>"))
+  [(set (match_operand:VFH_128 0 "register_operand")
+       (vec_merge:VFH_128
+         (fma:VFH_128
+           (match_operand:VFH_128 1 "register_operand")
+           (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>")
+           (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>"))
          (match_dup 1)
          (const_int 1)))]
   "TARGET_FMA")
 
 (define_expand "fmai_vmfmsub_<mode><round_name>"
-  [(set (match_operand:VF_128 0 "register_operand")
-       (vec_merge:VF_128
-         (fma:VF_128
-           (match_operand:VF_128 1 "register_operand")
-           (match_operand:VF_128 2 "<round_nimm_scalar_predicate>")
-           (neg:VF_128
-             (match_operand:VF_128 3 "<round_nimm_scalar_predicate>")))
+  [(set (match_operand:VFH_128 0 "register_operand")
+       (vec_merge:VFH_128
+         (fma:VFH_128
+           (match_operand:VFH_128 1 "register_operand")
+           (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>")
+           (neg:VFH_128
+             (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>")))
          (match_dup 1)
          (const_int 1)))]
   "TARGET_FMA")
 
 (define_expand "fmai_vmfnmadd_<mode><round_name>"
-  [(set (match_operand:VF_128 0 "register_operand")
-       (vec_merge:VF_128
-         (fma:VF_128
-           (neg:VF_128
-             (match_operand:VF_128 2 "<round_nimm_scalar_predicate>"))
-           (match_operand:VF_128 1 "register_operand")
-           (match_operand:VF_128 3 "<round_nimm_scalar_predicate>"))
+  [(set (match_operand:VFH_128 0 "register_operand")
+       (vec_merge:VFH_128
+         (fma:VFH_128
+           (neg:VFH_128
+             (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>"))
+           (match_operand:VFH_128 1 "register_operand")
+           (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>"))
          (match_dup 1)
          (const_int 1)))]
   "TARGET_FMA")
 
 (define_expand "fmai_vmfnmsub_<mode><round_name>"
-  [(set (match_operand:VF_128 0 "register_operand")
-       (vec_merge:VF_128
-         (fma:VF_128
-           (neg:VF_128
-             (match_operand:VF_128 2 "<round_nimm_scalar_predicate>"))
-           (match_operand:VF_128 1 "register_operand")
-           (neg:VF_128
-             (match_operand:VF_128 3 "<round_nimm_scalar_predicate>")))
+  [(set (match_operand:VFH_128 0 "register_operand")
+       (vec_merge:VFH_128
+         (fma:VFH_128
+           (neg:VFH_128
+             (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>"))
+           (match_operand:VFH_128 1 "register_operand")
+           (neg:VFH_128
+             (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>")))
          (match_dup 1)
          (const_int 1)))]
   "TARGET_FMA")
 
 (define_insn "*fmai_fmadd_<mode>"
-  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
-        (vec_merge:VF_128
-         (fma:VF_128
-           (match_operand:VF_128 1 "register_operand" "0,0")
-           (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>, v")
-           (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
+  [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
+        (vec_merge:VFH_128
+         (fma:VFH_128
+           (match_operand:VFH_128 1 "register_operand" "0,0")
+           (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>, v")
+           (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
          (match_dup 1)
          (const_int 1)))]
   "TARGET_FMA || TARGET_AVX512F"
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*fmai_fmsub_<mode>"
-  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
-        (vec_merge:VF_128
-         (fma:VF_128
-           (match_operand:VF_128   1 "register_operand" "0,0")
-           (match_operand:VF_128   2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
-           (neg:VF_128
-             (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
+  [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
+        (vec_merge:VFH_128
+         (fma:VFH_128
+           (match_operand:VFH_128   1 "register_operand" "0,0")
+           (match_operand:VFH_128   2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
+           (neg:VFH_128
+             (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
          (match_dup 1)
          (const_int 1)))]
   "TARGET_FMA || TARGET_AVX512F"
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*fmai_fnmadd_<mode><round_name>"
-  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
-        (vec_merge:VF_128
-         (fma:VF_128
-           (neg:VF_128
-             (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
-           (match_operand:VF_128   1 "register_operand" "0,0")
-           (match_operand:VF_128   3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
+  [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
+        (vec_merge:VFH_128
+         (fma:VFH_128
+           (neg:VFH_128
+             (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
+           (match_operand:VFH_128   1 "register_operand" "0,0")
+           (match_operand:VFH_128   3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
          (match_dup 1)
          (const_int 1)))]
   "TARGET_FMA || TARGET_AVX512F"
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*fmai_fnmsub_<mode><round_name>"
-  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
-        (vec_merge:VF_128
-         (fma:VF_128
-           (neg:VF_128
-             (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
-           (match_operand:VF_128   1 "register_operand" "0,0")
-           (neg:VF_128
-             (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
+  [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
+        (vec_merge:VFH_128
+         (fma:VFH_128
+           (neg:VFH_128
+             (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
+           (match_operand:VFH_128   1 "register_operand" "0,0")
+           (neg:VFH_128
+             (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
          (match_dup 1)
          (const_int 1)))]
   "TARGET_FMA || TARGET_AVX512F"
    (set_attr "mode" "<MODE>")])
 
 (define_insn "avx512f_vmfmadd_<mode>_mask<round_name>"
-  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
-       (vec_merge:VF_128
-         (vec_merge:VF_128
-           (fma:VF_128
-             (match_operand:VF_128 1 "register_operand" "0,0")
-             (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
-             (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
+  [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
+       (vec_merge:VFH_128
+         (vec_merge:VFH_128
+           (fma:VFH_128
+             (match_operand:VFH_128 1 "register_operand" "0,0")
+             (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
+             (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
            (match_dup 1)
            (match_operand:QI 4 "register_operand" "Yk,Yk"))
          (match_dup 1)
    (set_attr "mode" "<MODE>")])
 
 (define_insn "avx512f_vmfmadd_<mode>_mask3<round_name>"
-  [(set (match_operand:VF_128 0 "register_operand" "=v")
-       (vec_merge:VF_128
-         (vec_merge:VF_128
-           (fma:VF_128
-             (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
-             (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
-             (match_operand:VF_128 3 "register_operand" "0"))
+  [(set (match_operand:VFH_128 0 "register_operand" "=v")
+       (vec_merge:VFH_128
+         (vec_merge:VFH_128
+           (fma:VFH_128
+             (match_operand:VFH_128 1 "<round_nimm_scalar_predicate>" "%v")
+             (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
+             (match_operand:VFH_128 3 "register_operand" "0"))
            (match_dup 3)
            (match_operand:QI 4 "register_operand" "Yk"))
          (match_dup 3)
    (set_attr "mode" "<MODE>")])
 
 (define_expand "avx512f_vmfmadd_<mode>_maskz<round_expand_name>"
-  [(match_operand:VF_128 0 "register_operand")
-   (match_operand:VF_128 1 "<round_expand_nimm_predicate>")
-   (match_operand:VF_128 2 "<round_expand_nimm_predicate>")
-   (match_operand:VF_128 3 "<round_expand_nimm_predicate>")
+  [(match_operand:VFH_128 0 "register_operand")
+   (match_operand:VFH_128 1 "<round_expand_nimm_predicate>")
+   (match_operand:VFH_128 2 "<round_expand_nimm_predicate>")
+   (match_operand:VFH_128 3 "<round_expand_nimm_predicate>")
    (match_operand:QI 4 "register_operand")]
   "TARGET_AVX512F"
 {
 })
 
 (define_insn "avx512f_vmfmadd_<mode>_maskz_1<round_name>"
-  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
-       (vec_merge:VF_128
-         (vec_merge:VF_128
-           (fma:VF_128
-             (match_operand:VF_128 1 "register_operand" "0,0")
-             (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
-             (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
-           (match_operand:VF_128 4 "const0_operand" "C,C")
+  [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
+       (vec_merge:VFH_128
+         (vec_merge:VFH_128
+           (fma:VFH_128
+             (match_operand:VFH_128 1 "register_operand" "0,0")
+             (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
+             (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
+           (match_operand:VFH_128 4 "const0_operand" "C,C")
            (match_operand:QI 5 "register_operand" "Yk,Yk"))
          (match_dup 1)
          (const_int 1)))]
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*avx512f_vmfmsub_<mode>_mask<round_name>"
-  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
-       (vec_merge:VF_128
-         (vec_merge:VF_128
-           (fma:VF_128
-             (match_operand:VF_128 1 "register_operand" "0,0")
-             (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
-             (neg:VF_128
-               (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
+  [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
+       (vec_merge:VFH_128
+         (vec_merge:VFH_128
+           (fma:VFH_128
+             (match_operand:VFH_128 1 "register_operand" "0,0")
+             (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
+             (neg:VFH_128
+               (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
            (match_dup 1)
            (match_operand:QI 4 "register_operand" "Yk,Yk"))
          (match_dup 1)
    (set_attr "mode" "<MODE>")])
 
 (define_insn "avx512f_vmfmsub_<mode>_mask3<round_name>"
-  [(set (match_operand:VF_128 0 "register_operand" "=v")
-       (vec_merge:VF_128
-         (vec_merge:VF_128
-           (fma:VF_128
-             (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
-             (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
-             (neg:VF_128
-               (match_operand:VF_128 3 "register_operand" "0")))
+  [(set (match_operand:VFH_128 0 "register_operand" "=v")
+       (vec_merge:VFH_128
+         (vec_merge:VFH_128
+           (fma:VFH_128
+             (match_operand:VFH_128 1 "<round_nimm_scalar_predicate>" "%v")
+             (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
+             (neg:VFH_128
+               (match_operand:VFH_128 3 "register_operand" "0")))
            (match_dup 3)
            (match_operand:QI 4 "register_operand" "Yk"))
          (match_dup 3)
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*avx512f_vmfmsub_<mode>_maskz_1<round_name>"
-  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
-       (vec_merge:VF_128
-         (vec_merge:VF_128
-           (fma:VF_128
-             (match_operand:VF_128 1 "register_operand" "0,0")
-             (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
-             (neg:VF_128
-               (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
-           (match_operand:VF_128 4 "const0_operand" "C,C")
+  [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
+       (vec_merge:VFH_128
+         (vec_merge:VFH_128
+           (fma:VFH_128
+             (match_operand:VFH_128 1 "register_operand" "0,0")
+             (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
+             (neg:VFH_128
+               (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
+           (match_operand:VFH_128 4 "const0_operand" "C,C")
            (match_operand:QI 5 "register_operand" "Yk,Yk"))
          (match_dup 1)
          (const_int 1)))]
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*avx512f_vmfnmadd_<mode>_mask<round_name>"
-  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
-       (vec_merge:VF_128
-         (vec_merge:VF_128
-           (fma:VF_128
-             (neg:VF_128
-               (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
-             (match_operand:VF_128 1 "register_operand" "0,0")
-             (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
+(define_insn "avx512f_vmfnmadd_<mode>_mask<round_name>"
+  [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
+       (vec_merge:VFH_128
+         (vec_merge:VFH_128
+           (fma:VFH_128
+             (neg:VFH_128
+               (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
+             (match_operand:VFH_128 1 "register_operand" "0,0")
+             (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
            (match_dup 1)
            (match_operand:QI 4 "register_operand" "Yk,Yk"))
          (match_dup 1)
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*avx512f_vmfnmadd_<mode>_mask3<round_name>"
-  [(set (match_operand:VF_128 0 "register_operand" "=v")
-       (vec_merge:VF_128
-         (vec_merge:VF_128
-           (fma:VF_128
-             (neg:VF_128
-               (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
-             (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
-             (match_operand:VF_128 3 "register_operand" "0"))
+(define_insn "avx512f_vmfnmadd_<mode>_mask3<round_name>"
+  [(set (match_operand:VFH_128 0 "register_operand" "=v")
+       (vec_merge:VFH_128
+         (vec_merge:VFH_128
+           (fma:VFH_128
+             (neg:VFH_128
+               (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
+             (match_operand:VFH_128 1 "<round_nimm_scalar_predicate>" "%v")
+             (match_operand:VFH_128 3 "register_operand" "0"))
            (match_dup 3)
            (match_operand:QI 4 "register_operand" "Yk"))
          (match_dup 3)
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*avx512f_vmfnmadd_<mode>_maskz_1<round_name>"
-  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
-       (vec_merge:VF_128
-         (vec_merge:VF_128
-           (fma:VF_128
-             (neg:VF_128
-               (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
-             (match_operand:VF_128 1 "register_operand" "0,0")
-             (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
-           (match_operand:VF_128 4 "const0_operand" "C,C")
+(define_expand "avx512f_vmfnmadd_<mode>_maskz<round_expand_name>"
+  [(match_operand:VFH_128 0 "register_operand")
+   (match_operand:VFH_128 1 "<round_expand_nimm_predicate>")
+   (match_operand:VFH_128 2 "<round_expand_nimm_predicate>")
+   (match_operand:VFH_128 3 "<round_expand_nimm_predicate>")
+   (match_operand:QI 4 "register_operand")]
+  "TARGET_AVX512F"
+{
+  emit_insn (gen_avx512f_vmfnmadd_<mode>_maskz_1<round_expand_name> (
+    operands[0], operands[1], operands[2], operands[3],
+    CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
+  DONE;
+})
+
+(define_insn "avx512f_vmfnmadd_<mode>_maskz_1<round_name>"
+  [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
+       (vec_merge:VFH_128
+         (vec_merge:VFH_128
+           (fma:VFH_128
+             (neg:VFH_128
+               (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
+             (match_operand:VFH_128 1 "register_operand" "0,0")
+             (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
+           (match_operand:VFH_128 4 "const0_operand" "C,C")
            (match_operand:QI 5 "register_operand" "Yk,Yk"))
          (match_dup 1)
          (const_int 1)))]
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*avx512f_vmfnmsub_<mode>_mask<round_name>"
-  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
-       (vec_merge:VF_128
-         (vec_merge:VF_128
-           (fma:VF_128
-             (neg:VF_128
-               (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
-             (match_operand:VF_128 1 "register_operand" "0,0")
-             (neg:VF_128
-               (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
+  [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
+       (vec_merge:VFH_128
+         (vec_merge:VFH_128
+           (fma:VFH_128
+             (neg:VFH_128
+               (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
+             (match_operand:VFH_128 1 "register_operand" "0,0")
+             (neg:VFH_128
+               (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
            (match_dup 1)
            (match_operand:QI 4 "register_operand" "Yk,Yk"))
          (match_dup 1)
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*avx512f_vmfnmsub_<mode>_mask3<round_name>"
-  [(set (match_operand:VF_128 0 "register_operand" "=v")
-       (vec_merge:VF_128
-         (vec_merge:VF_128
-           (fma:VF_128
-             (neg:VF_128
-               (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
-             (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
-             (neg:VF_128
-               (match_operand:VF_128 3 "register_operand" "0")))
+  [(set (match_operand:VFH_128 0 "register_operand" "=v")
+       (vec_merge:VFH_128
+         (vec_merge:VFH_128
+           (fma:VFH_128
+             (neg:VFH_128
+               (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
+             (match_operand:VFH_128 1 "<round_nimm_scalar_predicate>" "%v")
+             (neg:VFH_128
+               (match_operand:VFH_128 3 "register_operand" "0")))
            (match_dup 3)
            (match_operand:QI 4 "register_operand" "Yk"))
          (match_dup 3)
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*avx512f_vmfnmsub_<mode>_maskz_1<round_name>"
-  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
-       (vec_merge:VF_128
-         (vec_merge:VF_128
-           (fma:VF_128
-             (neg:VF_128
-               (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
-             (match_operand:VF_128 1 "register_operand" "0,0")
-             (neg:VF_128
-               (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
-           (match_operand:VF_128 4 "const0_operand" "C,C")
+  [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
+       (vec_merge:VFH_128
+         (vec_merge:VFH_128
+           (fma:VFH_128
+             (neg:VFH_128
+               (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
+             (match_operand:VFH_128 1 "register_operand" "0,0")
+             (neg:VFH_128
+               (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
+           (match_operand:VFH_128 4 "const0_operand" "C,C")
            (match_operand:QI 5 "register_operand" "Yk,Yk"))
          (match_dup 1)
          (const_int 1)))]
index 8e2428a2476d01ec0d399f9a4500411b3952feab..a04c678dc3725080a3b5eded9b45e80bd269f42d 100644 (file)
 #define __builtin_ia32_vfnmsubph512_mask(A, B, C, D, E) __builtin_ia32_vfnmsubph512_mask(A, B, C, D, 8)
 #define __builtin_ia32_vfnmsubph512_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubph512_mask3(A, B, C, D, 8)
 #define __builtin_ia32_vfnmsubph512_maskz(A, B, C, D, E) __builtin_ia32_vfnmsubph512_maskz(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddsh3_mask(A, B, C, D, E) __builtin_ia32_vfmaddsh3_mask(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddsh3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsh3_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddsh3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsh3_maskz(A, B, C, D, 8)
+#define __builtin_ia32_vfnmaddsh3_mask(A, B, C, D, E) __builtin_ia32_vfnmaddsh3_mask(A, B, C, D, 8)
+#define __builtin_ia32_vfnmaddsh3_mask3(A, B, C, D, E) __builtin_ia32_vfnmaddsh3_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfnmaddsh3_maskz(A, B, C, D, E) __builtin_ia32_vfnmaddsh3_maskz(A, B, C, D, 8)
+#define __builtin_ia32_vfmsubsh3_mask(A, B, C, D, E) __builtin_ia32_vfmsubsh3_mask(A, B, C, D, 8)
+#define __builtin_ia32_vfmsubsh3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubsh3_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfmsubsh3_maskz(A, B, C, D, E) __builtin_ia32_vfmsubsh3_maskz(A, B, C, D, 8)
+#define __builtin_ia32_vfnmsubsh3_mask(A, B, C, D, E) __builtin_ia32_vfnmsubsh3_mask(A, B, C, D, 8)
+#define __builtin_ia32_vfnmsubsh3_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubsh3_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfnmsubsh3_maskz(A, B, C, D, E) __builtin_ia32_vfnmsubsh3_maskz(A, B, C, D, 8)
 
 /* avx512fp16vlintrin.h */
 #define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D)
index 7f4b2a3b28587f38d5322a17ad5cc24587d5818a..e9a838edf70d0123850645fce1f2249c2946d2d5 100644 (file)
 #define __builtin_ia32_vfnmsubph512_mask(A, B, C, D, E) __builtin_ia32_vfnmsubph512_mask(A, B, C, D, 8)
 #define __builtin_ia32_vfnmsubph512_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubph512_mask3(A, B, C, D, 8)
 #define __builtin_ia32_vfnmsubph512_maskz(A, B, C, D, E) __builtin_ia32_vfnmsubph512_maskz(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddsh3_mask(A, B, C, D, E) __builtin_ia32_vfmaddsh3_mask(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddsh3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsh3_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddsh3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsh3_maskz(A, B, C, D, 8)
+#define __builtin_ia32_vfnmaddsh3_mask(A, B, C, D, E) __builtin_ia32_vfnmaddsh3_mask(A, B, C, D, 8)
+#define __builtin_ia32_vfnmaddsh3_mask3(A, B, C, D, E) __builtin_ia32_vfnmaddsh3_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfnmaddsh3_maskz(A, B, C, D, E) __builtin_ia32_vfnmaddsh3_maskz(A, B, C, D, 8)
+#define __builtin_ia32_vfmsubsh3_mask(A, B, C, D, E) __builtin_ia32_vfmsubsh3_mask(A, B, C, D, 8)
+#define __builtin_ia32_vfmsubsh3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubsh3_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfmsubsh3_maskz(A, B, C, D, E) __builtin_ia32_vfmsubsh3_maskz(A, B, C, D, 8)
+#define __builtin_ia32_vfnmsubsh3_mask(A, B, C, D, E) __builtin_ia32_vfnmsubsh3_mask(A, B, C, D, 8)
+#define __builtin_ia32_vfnmsubsh3_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubsh3_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfnmsubsh3_maskz(A, B, C, D, E) __builtin_ia32_vfnmsubsh3_maskz(A, B, C, D, 8)
 
 /* avx512fp16vlintrin.h */
 #define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D)
index 9151e50afd2ba720c5388253e3260e1f30a6ec9b..01ac4e041735d9117240edfcd8ac668ae96acfac 100644 (file)
@@ -842,6 +842,10 @@ test_3 (_mm512_fmadd_round_ph, __m512h, __m512h, __m512h, __m512h, 9)
 test_3 (_mm512_fnmadd_round_ph, __m512h, __m512h, __m512h, __m512h, 9)
 test_3 (_mm512_fmsub_round_ph, __m512h, __m512h, __m512h, __m512h, 9)
 test_3 (_mm512_fnmsub_round_ph, __m512h, __m512h, __m512h, __m512h, 9)
+test_3 (_mm_fmadd_round_sh, __m128h, __m128h, __m128h, __m128h, 9)
+test_3 (_mm_fnmadd_round_sh, __m128h, __m128h, __m128h, __m128h, 9)
+test_3 (_mm_fmsub_round_sh, __m128h, __m128h, __m128h, __m128h, 9)
+test_3 (_mm_fnmsub_round_sh, __m128h, __m128h, __m128h, __m128h, 9)
 test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8)
 test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8)
 test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8)
@@ -892,6 +896,18 @@ test_4 (_mm512_maskz_fmsub_round_ph, __m512h, __mmask32, __m512h, __m512h, __m51
 test_4 (_mm512_mask_fnmsub_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 9)
 test_4 (_mm512_mask3_fnmsub_round_ph, __m512h, __m512h, __m512h, __m512h, __mmask32, 9)
 test_4 (_mm512_maskz_fnmsub_round_ph, __m512h, __mmask32, __m512h, __m512h, __m512h, 9)
+test_4 (_mm_mask_fmadd_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 9)
+test_4 (_mm_mask3_fmadd_round_sh, __m128h, __m128h, __m128h, __m128h, __mmask8, 9)
+test_4 (_mm_maskz_fmadd_round_sh, __m128h, __mmask8, __m128h, __m128h, __m128h, 9)
+test_4 (_mm_mask_fnmadd_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 9)
+test_4 (_mm_mask3_fnmadd_round_sh, __m128h, __m128h, __m128h, __m128h, __mmask8, 9)
+test_4 (_mm_maskz_fnmadd_round_sh, __m128h, __mmask8, __m128h, __m128h, __m128h, 9)
+test_4 (_mm_mask_fmsub_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 9)
+test_4 (_mm_mask3_fmsub_round_sh, __m128h, __m128h, __m128h, __m128h, __mmask8, 9)
+test_4 (_mm_maskz_fmsub_round_sh, __m128h, __mmask8, __m128h, __m128h, __m128h, 9)
+test_4 (_mm_mask_fnmsub_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 9)
+test_4 (_mm_mask3_fnmsub_round_sh, __m128h, __m128h, __m128h, __m128h, __mmask8, 9)
+test_4 (_mm_maskz_fnmsub_round_sh, __m128h, __mmask8, __m128h, __m128h, __m128h, 9)
 test_4x (_mm_mask_reduce_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8)
 test_4x (_mm_mask_roundscale_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8)
 test_4x (_mm_mask_getmant_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 1, 1)
index 892b6334ae2dc29e8061b802224a87ab18c8cfa9..79e3f35ab86930bb0f027870c16a4a9ee4ad5505 100644 (file)
@@ -945,6 +945,10 @@ test_3 (_mm512_fmadd_round_ph, __m512h, __m512h, __m512h, __m512h, 9)
 test_3 (_mm512_fnmadd_round_ph, __m512h, __m512h, __m512h, __m512h, 9)
 test_3 (_mm512_fmsub_round_ph, __m512h, __m512h, __m512h, __m512h, 9)
 test_3 (_mm512_fnmsub_round_ph, __m512h, __m512h, __m512h, __m512h, 9)
+test_3 (_mm_fmadd_round_sh, __m128h, __m128h, __m128h, __m128h, 9)
+test_3 (_mm_fnmadd_round_sh, __m128h, __m128h, __m128h, __m128h, 9)
+test_3 (_mm_fmsub_round_sh, __m128h, __m128h, __m128h, __m128h, 9)
+test_3 (_mm_fnmsub_round_sh, __m128h, __m128h, __m128h, __m128h, 9)
 test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8)
 test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8)
 test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8)
@@ -994,6 +998,18 @@ test_4 (_mm512_maskz_fmsub_round_ph, __m512h, __mmask32, __m512h, __m512h, __m51
 test_4 (_mm512_mask_fnmsub_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 9)
 test_4 (_mm512_mask3_fnmsub_round_ph, __m512h, __m512h, __m512h, __m512h, __mmask32, 9)
 test_4 (_mm512_maskz_fnmsub_round_ph, __m512h, __mmask32, __m512h, __m512h, __m512h, 9)
+test_4 (_mm_mask_fmadd_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 9)
+test_4 (_mm_mask3_fmadd_round_sh, __m128h, __m128h, __m128h, __m128h, __mmask8, 9)
+test_4 (_mm_maskz_fmadd_round_sh, __m128h, __mmask8, __m128h, __m128h, __m128h, 9)
+test_4 (_mm_mask_fnmadd_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 9)
+test_4 (_mm_mask3_fnmadd_round_sh, __m128h, __m128h, __m128h, __m128h, __mmask8, 9)
+test_4 (_mm_maskz_fnmadd_round_sh, __m128h, __mmask8, __m128h, __m128h, __m128h, 9)
+test_4 (_mm_mask_fmsub_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 9)
+test_4 (_mm_mask3_fmsub_round_sh, __m128h, __m128h, __m128h, __m128h, __mmask8, 9)
+test_4 (_mm_maskz_fmsub_round_sh, __m128h, __mmask8, __m128h, __m128h, __m128h, 9)
+test_4 (_mm_mask_fnmsub_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 9)
+test_4 (_mm_mask3_fnmsub_round_sh, __m128h, __m128h, __m128h, __m128h, __mmask8, 9)
+test_4 (_mm_maskz_fnmsub_round_sh, __m128h, __mmask8, __m128h, __m128h, __m128h, 9)
 test_4x (_mm_mask_reduce_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8)
 test_4x (_mm_mask_roundscale_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8)
 test_4x (_mm_mask_getmant_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 1, 1)
index 2eb5a649fe43fded6d3cd80754d55eb35746c121..4be2c1e16288648557f613c73320fed2f7a2cfb9 100644 (file)
 #define __builtin_ia32_vfnmsubph512_mask(A, B, C, D, E) __builtin_ia32_vfnmsubph512_mask(A, B, C, D, 8)
 #define __builtin_ia32_vfnmsubph512_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubph512_mask3(A, B, C, D, 8)
 #define __builtin_ia32_vfnmsubph512_maskz(A, B, C, D, E) __builtin_ia32_vfnmsubph512_maskz(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddsh3_mask(A, B, C, D, E) __builtin_ia32_vfmaddsh3_mask(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddsh3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsh3_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfmaddsh3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsh3_maskz(A, B, C, D, 8)
+#define __builtin_ia32_vfnmaddsh3_mask(A, B, C, D, E) __builtin_ia32_vfnmaddsh3_mask(A, B, C, D, 8)
+#define __builtin_ia32_vfnmaddsh3_mask3(A, B, C, D, E) __builtin_ia32_vfnmaddsh3_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfnmaddsh3_maskz(A, B, C, D, E) __builtin_ia32_vfnmaddsh3_maskz(A, B, C, D, 8)
+#define __builtin_ia32_vfmsubsh3_mask(A, B, C, D, E) __builtin_ia32_vfmsubsh3_mask(A, B, C, D, 8)
+#define __builtin_ia32_vfmsubsh3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubsh3_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfmsubsh3_maskz(A, B, C, D, E) __builtin_ia32_vfmsubsh3_maskz(A, B, C, D, 8)
+#define __builtin_ia32_vfnmsubsh3_mask(A, B, C, D, E) __builtin_ia32_vfnmsubsh3_mask(A, B, C, D, 8)
+#define __builtin_ia32_vfnmsubsh3_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubsh3_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfnmsubsh3_maskz(A, B, C, D, E) __builtin_ia32_vfnmsubsh3_maskz(A, B, C, D, 8)
 
 /* avx512fp16vlintrin.h */
 #define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D)