]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
AVX10.2: Support convert instructions
authorLevy Hsu <admin@levyhsu.com>
Mon, 26 Aug 2024 02:53:41 +0000 (10:53 +0800)
committerHaochen Jiang <haochen.jiang@intel.com>
Mon, 26 Aug 2024 03:13:38 +0000 (11:13 +0800)
gcc/ChangeLog:

* config.gcc: Add avx10_2-512convertintrin.h and
avx10_2convertintrin.h.
* config/i386/i386-builtin-types.def: Add new DEF_POINTER_TYPE
and DEF_FUNCTION_TYPE.
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/i386-expand.cc (ix86_expand_args_builtin):
Handle AVX10.2.
(ix86_expand_round_builtin): Ditto.
* config/i386/immintrin.h: Include avx10_2-512convertintrin.h,
avx10_2convertintrin.h.
* config/i386/sse.md (VHF_AVX10_2): New iterator.
(bf16_ph): Add 512 bit mode.
(avx10_2_cvt2ps2phx_<mode><mask_name<round_name>): New define_insn.
(ssebvecmode): New iterator.
(UNSPEC_NECONVERTFP8_PACK): Ditto.
(neconvertfp8_pack): Ditto.
(vcvt<neconvertfp8_pack><mode><mask_name>): New define_insn.
(ssebvecmode_2): New iterator.
(UNSPEC_VCVTBIASPH2FP8_PACK): Ditto.
(biasph2fp8_pack): Ditto.
(vcvt<biasph2fp8_pack>v8hf): New expander.
(vcvt<biasph2fp8_pack>v8hf_mask): Ditto.
(*vcvt<biasph2bf8_pack>v8hf): New define_insn.
(*vcvt<biasph2fp8_pack>v8hf_mask): Ditto.
(VHF_AVX10_2_2): New iterator.
(vcvt<biasph2fp8_pack><mode><mask_name>): New define_insn.
(VHF_256_512): New iterator.
(ph2fp8suff): Ditto.
(UNSPEC_NECONVERTPH2FP8_PACK): Ditto.
(neconvertph2fp8): Ditto.
(vcvt<neconvertph2fp8>v8hf_mask): New expander.
(*vcvt<neconvertph2fp8>v8hf): New define_insn.
(*vcvt<neconvertph2fp8>v8hf_mask): Ditto.
(vcvt<neconvertph2fp8><mode><mask_name>): Ditto.
(vcvthf82ph<mode><mask_name>): Ditto.
* config/i386/avx10_2-512convertintrin.h: New file.
* config/i386/avx10_2convertintrin.h: Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add macros for const.
* gcc.target/i386/avx-2.c: Ditto.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/avx10_2-512-convert-1.c: New test.
* gcc.target/i386/avx10_2-512-vcvt2ps2phx-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtbiasph2bf8-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtbiasph2bf8s-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtbiasph2hf8-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtbiasph2hf8s-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvthf82ph-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtne2ph2bf8-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtne2ph2bf8s-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtne2ph2hf8-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtne2ph2hf8s-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtneph2bf8-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtneph2bf8s-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtneph2hf8-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtneph2hf8s-2.c: Ditto.
* gcc.target/i386/avx10_2-convert-1.c: Ditto.
* gcc.target/i386/avx10_2-vcvt2ps2phx-2.c: Ditto.
* gcc.target/i386/avx10_2-vcvtbiasph2bf8-2.c: Ditto.
* gcc.target/i386/avx10_2-vcvtbiasph2bf8s-2.c: Ditto.
* gcc.target/i386/avx10_2-vcvtbiasph2hf8-2.c: Ditto.
* gcc.target/i386/avx10_2-vcvtbiasph2hf8s-2.c: Ditto.
* gcc.target/i386/avx10_2-vcvthf82ph-2.c: Ditto.
* gcc.target/i386/avx10_2-vcvtne2ph2bf8-2.c: Ditto.
* gcc.target/i386/avx10_2-vcvtne2ph2bf8s-2.c: Ditto.
* gcc.target/i386/avx10_2-vcvtne2ph2hf8-2.c: Ditto.
* gcc.target/i386/avx10_2-vcvtne2ph2hf8s-2.c: Ditto.
* gcc.target/i386/avx10_2-vcvtneph2bf8-2.c: Ditto.
* gcc.target/i386/avx10_2-vcvtneph2bf8s-2.c: Ditto.
* gcc.target/i386/avx10_2-vcvtneph2hf8-2.c: Ditto.
* gcc.target/i386/avx10_2-vcvtneph2hf8s-2.c: Ditto.
* gcc.target/i386/fp8-helper.h: New helper file.

Co-authored-by: Levy Hsu <admin@levyhsu.com>
Co-authored-by: Kong Lingling <lingling.kong@intel.com>
45 files changed:
gcc/config.gcc
gcc/config/i386/avx10_2-512convertintrin.h [new file with mode: 0644]
gcc/config/i386/avx10_2convertintrin.h [new file with mode: 0644]
gcc/config/i386/i386-builtin-types.def
gcc/config/i386/i386-builtin.def
gcc/config/i386/i386-expand.cc
gcc/config/i386/immintrin.h
gcc/config/i386/sse.md
gcc/testsuite/gcc.target/i386/avx-1.c
gcc/testsuite/gcc.target/i386/avx-2.c
gcc/testsuite/gcc.target/i386/avx10_2-512-convert-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ps2phx-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtbiasph2bf8-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtbiasph2bf8s-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtbiasph2hf8-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtbiasph2hf8s-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-512-vcvthf82ph-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2bf8-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2bf8s-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2hf8-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2hf8s-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2bf8-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2bf8s-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2hf8-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2hf8s-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-convert-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-vcvt2ps2phx-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-vcvtbiasph2bf8-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-vcvtbiasph2bf8s-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-vcvtbiasph2hf8-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-vcvtbiasph2hf8s-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-vcvthf82ph-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2bf8-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2bf8s-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2hf8-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2hf8s-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2bf8-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2bf8s-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2hf8-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2hf8s-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/fp8-helper.h [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/sse-13.c
gcc/testsuite/gcc.target/i386/sse-14.c
gcc/testsuite/gcc.target/i386/sse-22.c
gcc/testsuite/gcc.target/i386/sse-23.c

index 22353f2d69e6b33b722d79abd5ae563362a83541..5e9c36a2aadf80a0da687f8bda5caba508956ef4 100644 (file)
@@ -453,7 +453,8 @@ i[34567]86-*-* | x86_64-*-*)
                       raointintrin.h amxcomplexintrin.h avxvnniint16intrin.h
                       sm3intrin.h sha512intrin.h sm4intrin.h
                       usermsrintrin.h avx10_2roundingintrin.h
-                      avx10_2mediaintrin.h avx10_2-512mediaintrin.h"
+                      avx10_2mediaintrin.h avx10_2-512mediaintrin.h
+                      avx10_2convertintrin.h avx10_2-512convertintrin.h"
        ;;
 ia64-*-*)
        extra_headers=ia64intrin.h
diff --git a/gcc/config/i386/avx10_2-512convertintrin.h b/gcc/config/i386/avx10_2-512convertintrin.h
new file mode 100644 (file)
index 0000000..4ad339b
--- /dev/null
@@ -0,0 +1,548 @@
+/* Copyright (C) 2024 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx10_2-512convertintrin.h> directly; include <immintrin.h> instead."
+#endif // _IMMINTRIN_H_INCLUDED
+
+#ifndef __AVX10_2_512CONVERTINTRIN_H_INCLUDED
+#define __AVX10_2_512CONVERTINTRIN_H_INCLUDED
+
+#ifndef __AVX10_2_512__
+#pragma GCC push_options
+#pragma GCC target("avx10.2-512")
+#define __DISABLE_AVX10_2_512__
+#endif /* __AVX10_2_512__ */
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtx2ps_ph (__m512 __A, __m512 __B)
+{
+  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+                                                            (__v16sf) __B,
+                                                            (__v32hf)
+                                                            _mm512_setzero_ph (),
+                                                            (__mmask32) -1,
+                                                            _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtx2ps_ph (__m512h __W, __mmask32 __U, __m512 __A,
+                         __m512 __B)
+{
+  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+                                                            (__v16sf) __B,
+                                                            (__v32hf) __W,
+                                                            (__mmask32) __U,
+                                                            _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtx2ps_ph (__mmask32 __U, __m512 __A, __m512 __B)
+{
+  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+                                                            (__v16sf) __B,
+                                                            (__v32hf)
+                                                            _mm512_setzero_ph (),
+                                                            (__mmask32) __U,
+                                                            _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtx_round2ps_ph (__m512 __A, __m512 __B, const int __R)
+{
+  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+                                                           (__v16sf) __B,
+                                                           (__v32hf)
+                                                           _mm512_setzero_ph (),
+                                                           (__mmask32) -1,
+                                                           __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtx_round2ps_ph (__m512h __W, __mmask32 __U, __m512 __A,
+                            __m512 __B, const int __R)
+{
+  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+                                                           (__v16sf) __B,
+                                                           (__v32hf) __W,
+                                                           (__mmask32) __U,
+                                                           __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtx_round2ps_ph (__mmask32 __U, __m512 __A,
+                             __m512 __B, const int __R)
+{
+  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+                                                           (__v16sf) __B,
+                                                           (__v32hf)
+                                                           _mm512_setzero_ph (),
+                                                           (__mmask32) __U,
+                                                           __R);
+}
+
+#else
+#define _mm512_cvtx_round2ps_ph(A, B, R) \
+  ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
+                                                      (__v16sf) (B), \
+                                                      (__v32hf) \
+                                                      (_mm512_setzero_ph ()), \
+                                                      (__mmask32) (-1), \
+                                                      (R)))
+#define _mm512_mask_cvtx_round2ps_ph(W, U, A, B, R) \
+  ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
+                                                      (__v16sf) (B), \
+                                                      (__v32hf) (W), \
+                                                      (__mmask32) (U), \
+                                                      (R)))
+#define _mm512_maskz_cvtx_round2ps_ph(U, A, B, R) \
+  ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
+                                                      (__v16sf) (B), \
+                                                      (__v32hf) \
+                                                      (_mm512_setzero_ph ()), \
+                                                      (__mmask32) (U), \
+                                                      (R)))
+#endif  /* __OPTIMIZE__  */
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtbiasph_pbf8 (__m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
+                                                         (__v32hf) __B,
+                                                         (__v32qi)(__m256i)
+                                                         _mm256_undefined_si256 (),
+                                                         (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtbiasph_pbf8 (__m256i __W, __mmask32 __U,
+                           __m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
+                                                         (__v32hf) __B,
+                                                         (__v32qi)(__m256i) __W,
+                                                         (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtbiasph_pbf8 (__mmask32 __U, __m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
+                                                         (__v32hf) __B,
+                                                         (__v32qi)(__m256i)
+                                                         _mm256_setzero_si256 (),
+                                                         (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtbiassph_pbf8 (__m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
+                                                          (__v32hf) __B,
+                                                          (__v32qi)(__m256i)
+                                                          _mm256_undefined_si256 (),
+                                                          (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtbiassph_pbf8 (__m256i __W, __mmask32 __U,
+                            __m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
+                                                          (__v32hf) __B,
+                                                          (__v32qi)(__m256i) __W,
+                                                          (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtbiassph_pbf8 (__mmask32 __U, __m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
+                                                          (__v32hf) __B,
+                                                          (__v32qi)(__m256i)
+                                                          _mm256_setzero_si256 (),
+                                                          (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtbiasph_phf8 (__m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
+                                                         (__v32hf) __B,
+                                                         (__v32qi)(__m256i)
+                                                         _mm256_undefined_si256 (),
+                                                         (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtbiasph_phf8 (__m256i __W, __mmask32 __U, __m512i __A,
+                               __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
+                                                         (__v32hf) __B,
+                                                         (__v32qi)(__m256i) __W,
+                                                         (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtbiasph_phf8 (__mmask32 __U, __m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
+                                                         (__v32hf) __B,
+                                                         (__v32qi)(__m256i)
+                                                         _mm256_setzero_si256 (),
+                                                         (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtbiassph_phf8 (__m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
+                                                          (__v32hf) __B,
+                                                          (__v32qi)(__m256i)
+                                                          _mm256_undefined_si256 (),
+                                                          (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtbiassph_phf8 (__m256i __W, __mmask32 __U,
+                            __m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
+                                                          (__v32hf) __B,
+                                                          (__v32qi)(__m256i) __W,
+                                                          (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtbiassph_phf8 (__mmask32 __U, __m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
+                                                          (__v32hf) __B,
+                                                          (__v32qi)(__m256i)
+                                                          _mm256_setzero_si256 (),
+                                                          (__mmask32) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtne2ph_pbf8 (__m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvtne2ph2bf8512_mask ((__v32hf) __A,
+                                                        (__v32hf) __B,
+                                                        (__v64qi)
+                                                        _mm512_setzero_si512 (),
+                                                        (__mmask64) -1);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtne2ph_pbf8 (__m512i __W, __mmask64 __U, 
+                          __m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvtne2ph2bf8512_mask ((__v32hf) __A,
+                                                        (__v32hf) __B,
+                                                        (__v64qi) __W,
+                                                        (__mmask64) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtne2ph_pbf8 (__mmask64 __U,  __m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvtne2ph2bf8512_mask ((__v32hf) __A,
+                                                        (__v32hf) __B,
+                                                        (__v64qi)
+                                                        _mm512_setzero_si512 (),
+                                                        (__mmask64) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtnes2ph_pbf8 (__m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvtne2ph2bf8s512_mask ((__v32hf) __A,
+                                                         (__v32hf) __B,
+                                                         (__v64qi)
+                                                         _mm512_setzero_si512 (),
+                                                         (__mmask64) -1);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtnes2ph_pbf8 (__m512i __W, __mmask64 __U,
+                           __m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvtne2ph2bf8s512_mask ((__v32hf) __A,
+                                                         (__v32hf) __B,
+                                                         (__v64qi) __W,
+                                                         (__mmask64) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtnes2ph_pbf8 (__mmask64 __U, __m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvtne2ph2bf8s512_mask ((__v32hf) __A,
+                                                         (__v32hf) __B,
+                                                         (__v64qi)
+                                                         _mm512_setzero_si512 (),
+                                                         (__mmask64) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtne2ph_phf8 (__m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvtne2ph2hf8512_mask ((__v32hf) __A,
+                                                        (__v32hf) __B,
+                                                        (__v64qi)
+                                                        _mm512_setzero_si512 (),
+                                                        (__mmask64) -1);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtne2ph_phf8 (__m512i __W, __mmask64 __U,
+                          __m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvtne2ph2hf8512_mask ((__v32hf) __A,
+                                                        (__v32hf) __B,
+                                                        (__v64qi) __W,
+                                                        (__mmask64) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtne2ph_phf8 (__mmask64 __U, __m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvtne2ph2hf8512_mask ((__v32hf) __A,
+                                                        (__v32hf) __B,
+                                                        (__v64qi)
+                                                        _mm512_setzero_si512 (),
+                                                        (__mmask64) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtnes2ph_phf8 (__m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvtne2ph2hf8s512_mask ((__v32hf) __A,
+                                                         (__v32hf) __B,
+                                                         (__v64qi)
+                                                         _mm512_setzero_si512 (),
+                                                         (__mmask64) -1);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtnes2ph_phf8 (__m512i __W, __mmask64 __U, 
+                           __m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvtne2ph2hf8s512_mask ((__v32hf) __A,
+                                                         (__v32hf) __B,
+                                                         (__v64qi) __W,
+                                                         (__mmask64) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtnes2ph_phf8 (__mmask64 __U, __m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvtne2ph2hf8s512_mask ((__v32hf) __A,
+                                                         (__v32hf) __B,
+                                                         (__v64qi)
+                                                         _mm512_setzero_si512 (),
+                                                         (__mmask64) __U);
+}
+
+extern __inline__ __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvthf8_ph (__m256i __A)
+{
+  return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
+                                                     (__v32hf) (__m512h)
+                                                     _mm512_undefined_ph (),
+                                                     (__mmask32) -1);
+}
+
+extern __inline__ __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvthf8_ph (__m512h __W, __mmask32 __U, __m256i __A)
+{
+  return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
+                                                     (__v32hf) (__m512h) __W,
+                                                     (__mmask32) __U);
+}
+
+extern __inline__ __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvthf8_ph (__mmask32 __U, __m256i __A)
+{
+  return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
+                                                     (__v32hf) (__m512h)
+                                                     _mm512_setzero_ph (),
+                                                     (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtneph_pbf8 (__m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtneph2bf8512_mask ((__v32hf) __A,
+                                                       (__v32qi) (__m256i)
+                                                       _mm256_undefined_si256 (),
+                                                       (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtneph_pbf8 (__m256i __W, __mmask32 __U, __m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtneph2bf8512_mask ((__v32hf) __A,
+                                                       (__v32qi) (__m256i) __W,
+                                                       (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtneph_pbf8 (__mmask32 __U, __m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtneph2bf8512_mask ((__v32hf) __A,
+                                                       (__v32qi) (__m256i)
+                                                       _mm256_setzero_si256 (),
+                                                       (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtnesph_pbf8 (__m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtneph2bf8s512_mask ((__v32hf) __A,
+                                                        (__v32qi) (__m256i)
+                                                        _mm256_undefined_si256 (),
+                                                        (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtnesph_pbf8 (__m256i __W, __mmask32 __U, __m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtneph2bf8s512_mask ((__v32hf) __A,
+                                                        (__v32qi) (__m256i) __W,
+                                                        (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtnesph_pbf8 (__mmask32 __U, __m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtneph2bf8s512_mask ((__v32hf) __A,
+                                                        (__v32qi) (__m256i)
+                                                        _mm256_setzero_si256 (),
+                                                        (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtneph_phf8 (__m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtneph2hf8512_mask ((__v32hf) __A,
+                                                       (__v32qi) (__m256i)
+                                                       _mm256_undefined_si256 (),
+                                                       (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtneph_phf8 (__m256i __W, __mmask32 __U, __m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtneph2hf8512_mask ((__v32hf) __A,
+                                                       (__v32qi)(__m256i) __W,
+                                                       (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtneph_phf8 (__mmask32 __U, __m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtneph2hf8512_mask ((__v32hf) __A,
+                                                       (__v32qi) (__m256i)
+                                                       _mm256_setzero_si256 (),
+                                                       (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtnesph_phf8 (__m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtneph2hf8s512_mask ((__v32hf) __A,
+                                                        (__v32qi) (__m256i)
+                                                        _mm256_undefined_si256 (),
+                                                        (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtnesph_phf8 (__m256i __W, __mmask32 __U, __m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtneph2hf8s512_mask ((__v32hf) __A,
+                                                        (__v32qi) (__m256i) __W,
+                                                        (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtnesph_phf8 (__mmask32 __U, __m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtneph2hf8s512_mask ((__v32hf) __A,
+                                                        (__v32qi) (__m256i)
+                                                        _mm256_setzero_si256 (),
+                                                        (__mmask32) __U);
+}
+
+#ifdef __DISABLE_AVX10_2_512__
+#undef __DISABLE_AVX10_2_512__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX10_2_512__ */
+
+#endif /* __AVX10_2_512CONVERTINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2convertintrin.h b/gcc/config/i386/avx10_2convertintrin.h
new file mode 100644 (file)
index 0000000..ac62d12
--- /dev/null
@@ -0,0 +1,978 @@
+/* Copyright (C) 2024 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <avx10_2convertintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX10_2CONVERTINTRIN_H_INCLUDED
+#define _AVX10_2CONVERTINTRIN_H_INCLUDED
+
+#if !defined(__AVX10_2_256__)
+#pragma GCC push_options
+#pragma GCC target("avx10.2")
+#define __DISABLE_AVX10_2_256__
+#endif /* __AVX10_2__ */
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtx2ps_ph (__m128 __A, __m128 __B)
+{
+  return (__m128h) __builtin_ia32_vcvt2ps2phx128_mask ((__v4sf) __A,
+                                                      (__v4sf) __B,
+                                                      (__v8hf)
+                                                      _mm_setzero_ph (),
+                                                      (__mmask8) -1);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtx2ps_ph (__m128h __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+  return (__m128h) __builtin_ia32_vcvt2ps2phx128_mask ((__v4sf) __A,
+                                                      (__v4sf) __B,
+                                                      (__v8hf) __W,
+                                                      (__mmask8) __U);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtx2ps_ph (__mmask8 __U, __m128 __A, __m128 __B)
+{
+  return (__m128h) __builtin_ia32_vcvt2ps2phx128_mask ((__v4sf) __A,
+                                                      (__v4sf) __B,
+                                                      (__v8hf)
+                                                      _mm_setzero_ph (),
+                                                      (__mmask8) __U);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtx2ps_ph (__m256 __A, __m256 __B)
+{
+  return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) __A,
+                                                            (__v8sf) __B,
+                                                            (__v16hf)
+                                                            _mm256_setzero_ph (),
+                                                            (__mmask16) -1,
+                                                            _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtx2ps_ph (__m256h __W, __mmask16 __U, __m256 __A, __m256 __B)
+{
+  return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) __A,
+                                                            (__v8sf) __B,
+                                                            (__v16hf) __W,
+                                                            (__mmask16) __U,
+                                                            _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtx2ps_ph ( __mmask16 __U, __m256 __A, __m256 __B)
+{
+  return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) __A,
+                                                            (__v8sf) __B,
+                                                            (__v16hf)
+                                                            _mm256_setzero_ph (),
+                                                            (__mmask16) __U,
+                                                            _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtx_round2ps_ph (__m256 __A, __m256 __B, const int __R)
+{
+  return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) __A,
+                                                            (__v8sf) __B,
+                                                            (__v16hf)
+                                                            _mm256_setzero_ph (),
+                                                            (__mmask16) -1,
+                                                            __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtx_round2ps_ph (__m256h __W, __mmask16 __U, __m256 __A,
+                             __m256 __B, const int __R)
+{
+  return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) __A,
+                                                            (__v8sf) __B,
+                                                            (__v16hf) __W,
+                                                            (__mmask16) __U,
+                                                            __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtx_round2ps_ph (__mmask16 __U, __m256 __A,
+                              __m256 __B, const int __R)
+{
+  return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) __A,
+                                                            (__v8sf) __B,
+                                                            (__v16hf)
+                                                            _mm256_setzero_ph (),
+                                                            (__mmask16) __U,
+                                                            __R);
+}
+
+#else
+#define _mm256_cvtx_round2ps_ph(A, B, R) \
+  ((__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) (A), \
+                                                      (__v8sf) (B), \
+                                                      (__v16hf) \
+                                                      (_mm256_setzero_ph ()), \
+                                                      (__mmask16) (-1), \
+                                                      (R)))
+
+#define _mm256_mask_cvtx_round2ps_ph(W, U, A, B, R) \
+  ((__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) (A), \
+                                                      (__v8sf) (B),  \
+                                                      (__v16hf) (W), \
+                                                      (__mmask16) (U), \
+                                                      (R)))
+
+#define _mm256_maskz_cvtx_round2ps_ph(U, A, B, R) \
+  ((__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) (A), \
+                                                      (__v8sf) (B),  \
+                                                      (__v16hf) \
+                                                      (_mm256_setzero_ph ()),  \
+                                                      (__mmask16) (U), \
+                                                      (R)))
+#endif  /* __OPTIMIZE__  */
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtbiasph_pbf8 (__m128i __A, __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2bf8128 ((__v16qi) __A,
+                                                    (__v8hf) __B);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtbiasph_pbf8 (__m128i __W, __mmask8 __U, __m128i __A,
+                            __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2bf8128_mask ((__v16qi) __A,
+                                                         (__v8hf) __B,
+                                                         (__v16qi)(__m128i) __W,
+                                                         (__mmask8) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtbiasph_pbf8 (__mmask8 __U, __m128i __A, __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2bf8128_mask ((__v16qi) __A,
+                                                         (__v8hf) __B,
+                                                         (__v16qi)(__m128i)
+                                                          _mm_setzero_si128 (),
+                                                         (__mmask8) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtbiasph_pbf8 (__m256i __A, __m256h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2bf8256_mask ((__v32qi) __A,
+                                                         (__v16hf) __B,
+                                                         (__v16qi)(__m128i)
+                                                         _mm_undefined_si128 (),
+                                                         (__mmask16) -1);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtbiasph_pbf8 (__m128i __W, __mmask16 __U, __m256i __A,
+                               __m256h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2bf8256_mask ((__v32qi) __A,
+                                                         (__v16hf) __B,
+                                                         (__v16qi)(__m128i) __W,
+                                                         (__mmask16) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtbiasph_pbf8 (__mmask16 __U, __m256i __A, __m256h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2bf8256_mask ((__v32qi) __A,
+                                                         (__v16hf) __B,
+                                                         (__v16qi)(__m128i)
+                                                          _mm_setzero_si128 (),
+                                                         (__mmask16) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtbiassph_pbf8 (__m128i __A, __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2bf8s128 ((__v16qi) __A,
+                                                     (__v8hf) __B);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtbiassph_pbf8 (__m128i __W, __mmask8 __U,
+                         __m128i __A, __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2bf8s128_mask ((__v16qi) __A,
+                                                          (__v8hf) __B,
+                                                          (__v16qi)(__m128i) __W,
+                                                          (__mmask8) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtbiassph_pbf8 (__mmask8 __U, __m128i __A, __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2bf8s128_mask ((__v16qi) __A,
+                                                          (__v8hf) __B,
+                                                          (__v16qi)(__m128i)
+                                                          _mm_setzero_si128 (),
+                                                          (__mmask8) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtbiassph_pbf8 (__m256i __A, __m256h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2bf8s256_mask ((__v32qi) __A,
+                                                          (__v16hf) __B,
+                                                          (__v16qi)(__m128i)
+                                                          _mm_undefined_si128 (),
+                                                          (__mmask16) -1);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtbiassph_pbf8 (__m128i __W, __mmask16 __U,
+                            __m256i __A, __m256h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2bf8s256_mask ((__v32qi) __A,
+                                                          (__v16hf) __B,
+                                                          (__v16qi)(__m128i) __W,
+                                                          (__mmask16) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtbiassph_pbf8 (__mmask16 __U, __m256i __A, __m256h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2bf8s256_mask ((__v32qi) __A,
+                                                          (__v16hf) __B,
+                                                          (__v16qi)(__m128i)
+                                                          _mm_setzero_si128 (),
+                                                          (__mmask16) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtbiasph_phf8 (__m128i __A, __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2hf8128 ((__v16qi) __A,
+                                                    (__v8hf) __B);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtbiasph_phf8 (__m128i __W, __mmask8 __U, __m128i __A,
+                            __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2hf8128_mask ((__v16qi) __A,
+                                                         (__v8hf) __B,
+                                                         (__v16qi)(__m128i) __W,
+                                                         (__mmask8) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtbiasph_phf8 (__mmask8 __U, __m128i __A, __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2hf8128_mask ((__v16qi) __A,
+                                                         (__v8hf) __B,
+                                                         (__v16qi)(__m128i)
+                                                         _mm_setzero_si128 (),
+                                                         (__mmask8) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtbiasph_phf8 (__m256i __A, __m256h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2hf8256_mask ((__v32qi) __A,
+                                                         (__v16hf) __B,
+                                                         (__v16qi)(__m128i)
+                                                         _mm_undefined_si128 (),
+                                                         (__mmask16) -1);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtbiasph_phf8 (__m128i __W, __mmask16 __U,
+                           __m256i __A, __m256h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2hf8256_mask ((__v32qi) __A,
+                                                         (__v16hf) __B,
+                                                         (__v16qi)(__m128i) __W,
+                                                         (__mmask16) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtbiasph_phf8 (__mmask16 __U, __m256i __A, __m256h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2hf8256_mask ((__v32qi) __A,
+                                                         (__v16hf) __B,
+                                                         (__v16qi)(__m128i)
+                                                         _mm_setzero_si128 (),
+                                                         (__mmask16) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtbiassph_phf8 (__m128i __A, __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2hf8s128 ((__v16qi) __A,
+                                                     (__v8hf) __B);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtbiassph_phf8 (__m128i __W, __mmask8 __U,
+                         __m128i __A, __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2hf8s128_mask ((__v16qi) __A,
+                                                          (__v8hf) __B,
+                                                          (__v16qi)(__m128i) __W,
+                                                          (__mmask8) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtbiassph_phf8 (__mmask8 __U, __m128i __A, __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2hf8s128_mask ((__v16qi) __A,
+                                                          (__v8hf) __B,
+                                                          (__v16qi)(__m128i)
+                                                          _mm_setzero_si128 (),
+                                                          (__mmask8) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtbiassph_phf8 (__m256i __A, __m256h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2hf8s256_mask ((__v32qi) __A,
+                                                          (__v16hf) __B,
+                                                          (__v16qi)(__m128i)
+                                                          _mm_undefined_si128 (),
+                                                          (__mmask16) -1);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtbiassph_phf8 (__m128i __W, __mmask16 __U,
+                            __m256i __A, __m256h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2hf8s256_mask ((__v32qi) __A,
+                                                          (__v16hf) __B,
+                                                          (__v16qi)(__m128i) __W,
+                                                          (__mmask16) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtbiassph_phf8 (__mmask16 __U, __m256i __A, __m256h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtbiasph2hf8s256_mask ((__v32qi) __A,
+                                                          (__v16hf) __B,
+                                                          (__v16qi)(__m128i)
+                                                          _mm_setzero_si128 (),
+                                                          (__mmask16) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtne2ph_pbf8 (__m128h __A, __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtne2ph2bf8128_mask ((__v8hf) __A,
+                                                        (__v8hf) __B,
+                                                        (__v16qi)
+                                                        _mm_setzero_si128 (),
+                                                        (__mmask16) -1);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtne2ph_pbf8 (__m128i __W, __mmask16 __U, 
+                       __m128h __A, __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtne2ph2bf8128_mask ((__v8hf) __A,
+                                                        (__v8hf) __B,
+                                                        (__v16qi) __W,
+                                                        (__mmask16) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtne2ph_pbf8 (__mmask16 __U, __m128h __A, __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtne2ph2bf8128_mask ((__v8hf) __A,
+                                                        (__v8hf) __B,
+                                                        (__v16qi)
+                                                        _mm_setzero_si128 (),
+                                                        (__mmask16) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtne2ph_pbf8 (__m256h __A, __m256h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtne2ph2bf8256_mask ((__v16hf) __A,
+                                                        (__v16hf) __B,
+                                                        (__v32qi)
+                                                        _mm256_setzero_si256 (),
+                                                        (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtne2ph_pbf8 (__m256i __W, __mmask32 __U, 
+                          __m256h __A, __m256h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtne2ph2bf8256_mask ((__v16hf) __A,
+                                                        (__v16hf) __B,
+                                                        (__v32qi) __W,
+                                                        (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtne2ph_pbf8 (__mmask32 __U, __m256h __A, __m256h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtne2ph2bf8256_mask ((__v16hf) __A,
+                                                        (__v16hf) __B,
+                                                        (__v32qi)
+                                                        _mm256_setzero_si256 (),
+                                                        (__mmask32) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtnes2ph_pbf8 (__m128h __A, __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtne2ph2bf8s128_mask ((__v8hf) __A,
+                                                         (__v8hf) __B,
+                                                         (__v16qi)
+                                                         _mm_setzero_si128 (),
+                                                         (__mmask16) -1);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtnes2ph_pbf8 (__m128i __W, __mmask16 __U, 
+                        __m128h __A, __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtne2ph2bf8s128_mask ((__v8hf) __A,
+                                                         (__v8hf) __B,
+                                                         (__v16qi) __W,
+                                                         (__mmask16) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtnes2ph_pbf8 (__mmask16 __U, __m128h __A, __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtne2ph2bf8s128_mask ((__v8hf) __A,
+                                                         (__v8hf) __B,
+                                                         (__v16qi)
+                                                         _mm_setzero_si128 (),
+                                                         (__mmask16) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtnes2ph_pbf8 (__m256h __A, __m256h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtne2ph2bf8s256_mask ((__v16hf) __A,
+                                                         (__v16hf) __B,
+                                                         (__v32qi)
+                                                         _mm256_setzero_si256 (),
+                                                         (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtnes2ph_pbf8 (__m256i __W, __mmask32 __U,
+                           __m256h __A, __m256h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtne2ph2bf8s256_mask ((__v16hf) __A,
+                                                         (__v16hf) __B,
+                                                         (__v32qi) __W,
+                                                         (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtnes2ph_pbf8 (__mmask32 __U, __m256h __A, __m256h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtne2ph2bf8s256_mask ((__v16hf) __A,
+                                                         (__v16hf) __B,
+                                                         (__v32qi)
+                                                         _mm256_setzero_si256 (),
+                                                         (__mmask32) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtne2ph_phf8 (__m128h __A, __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtne2ph2hf8128_mask ((__v8hf) __A,
+                                                        (__v8hf) __B,
+                                                        (__v16qi)
+                                                        _mm_setzero_si128 (),
+                                                        (__mmask16) -1);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtne2ph_phf8 (__m128i __W, __mmask16 __U,
+                       __m128h __A, __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtne2ph2hf8128_mask ((__v8hf) __A,
+                                                        (__v8hf) __B,
+                                                        (__v16qi) __W,
+                                                        (__mmask16) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtne2ph_phf8 (__mmask16 __U, __m128h __A, __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtne2ph2hf8128_mask ((__v8hf) __A,
+                                                        (__v8hf) __B,
+                                                        (__v16qi)
+                                                        _mm_setzero_si128 (),
+                                                        (__mmask16) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtne2ph_phf8 (__m256h __A, __m256h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtne2ph2hf8256_mask ((__v16hf) __A,
+                                                        (__v16hf) __B,
+                                                        (__v32qi)
+                                                        _mm256_setzero_si256 (),
+                                                        (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtne2ph_phf8 (__m256i __W, __mmask32 __U,
+                          __m256h __A, __m256h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtne2ph2hf8256_mask ((__v16hf) __A,
+                                                        (__v16hf) __B,
+                                                        (__v32qi) __W,
+                                                        (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtne2ph_phf8 (__mmask32 __U, __m256h __A, __m256h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtne2ph2hf8256_mask ((__v16hf) __A,
+                                                        (__v16hf) __B,
+                                                        (__v32qi)
+                                                        _mm256_setzero_si256 (),
+                                                        (__mmask32) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtnes2ph_phf8 (__m128h __A, __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtne2ph2hf8s128_mask ((__v8hf) __A,
+                                                         (__v8hf) __B,
+                                                         (__v16qi)
+                                                         _mm_setzero_si128 (),
+                                                         (__mmask16) -1);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtnes2ph_phf8 (__m128i __W, __mmask16 __U,
+                        __m128h __A, __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtne2ph2hf8s128_mask ((__v8hf) __A,
+                                                         (__v8hf) __B,
+                                                         (__v16qi) __W,
+                                                         (__mmask16) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtnes2ph_phf8 (__mmask16 __U, __m128h __A, __m128h __B)
+{
+  return (__m128i) __builtin_ia32_vcvtne2ph2hf8s128_mask ((__v8hf) __A,
+                                                         (__v8hf) __B,
+                                                         (__v16qi)
+                                                         _mm_setzero_si128 (),
+                                                         (__mmask16) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtnes2ph_phf8 (__m256h __A, __m256h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtne2ph2hf8s256_mask ((__v16hf) __A,
+                                                         (__v16hf) __B,
+                                                         (__v32qi)
+                                                         _mm256_setzero_si256 (),
+                                                         (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtnes2ph_phf8 (__m256i __W, __mmask32 __U,
+                           __m256h __A, __m256h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtne2ph2hf8s256_mask ((__v16hf) __A,
+                                                         (__v16hf) __B,
+                                                         (__v32qi) __W,
+                                                         (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtnes2ph_phf8 (__mmask32 __U, __m256h __A, __m256h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtne2ph2hf8s256_mask ((__v16hf) __A,
+                                                         (__v16hf) __B,
+                                                         (__v32qi)
+                                                         _mm256_setzero_si256 (),
+                                                         (__mmask32) __U);
+}
+
+extern __inline__ __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvthf8_ph (__m128i __A)
+{
+  return (__m128h) __builtin_ia32_vcvthf82ph128_mask ((__v16qi) __A,
+                                                     (__v8hf)(__m128h)
+                                                     _mm_undefined_ph (),
+                                                     (__mmask8) -1);
+}
+
+extern __inline__ __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvthf8_ph (__m128h __W, __mmask8 __U, __m128i __A)
+{
+  return (__m128h) __builtin_ia32_vcvthf82ph128_mask ((__v16qi) __A,
+                                                     (__v8hf)(__m128h) __W,
+                                                     (__mmask8) __U);
+}
+
+extern __inline__ __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvthf8_ph (__mmask8 __U, __m128i __A)
+{
+  return (__m128h) __builtin_ia32_vcvthf82ph128_mask ((__v16qi) __A,
+                                                     (__v8hf)(__m128h)
+                                                     _mm_setzero_ph (),
+                                                     (__mmask8) __U);
+}
+
+extern __inline__ __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvthf8_ph (__m128i __A)
+{
+  return (__m256h) __builtin_ia32_vcvthf82ph256_mask ((__v16qi) __A,
+                                                     (__v16hf)(__m256h)
+                                                     _mm256_undefined_ph (),
+                                                     (__mmask16) -1);
+}
+
+extern __inline__ __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvthf8_ph (__m256h __W, __mmask16 __U, __m128i __A)
+{
+  return (__m256h) __builtin_ia32_vcvthf82ph256_mask ((__v16qi) __A,
+                                                     (__v16hf)(__m256h) __W,
+                                                     (__mmask16) __U);
+}
+
+extern __inline__ __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvthf8_ph (__mmask16 __U, __m128i __A)
+{
+  return (__m256h) __builtin_ia32_vcvthf82ph256_mask ((__v16qi) __A,
+                                                     (__v16hf)(__m256h)
+                                                     _mm256_setzero_ph (),
+                                                     (__mmask16) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtneph_pbf8 (__m128h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2bf8128_mask ((__v8hf) __A,
+                                                       (__v16qi)(__m128i)
+                                                       _mm_undefined_si128 (),
+                                                       (__mmask8) -1);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtneph_pbf8 (__m128i __W, __mmask8 __U, __m128h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2bf8128_mask ((__v8hf) __A,
+                                                       (__v16qi)(__m128i) __W,
+                                                       (__mmask8) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtneph_pbf8 (__mmask8 __U, __m128h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2bf8128_mask ((__v8hf) __A,
+                                                       (__v16qi)(__m128i)
+                                                       _mm_setzero_si128 (),
+                                                       (__mmask8) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtneph_pbf8 (__m256h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2bf8256_mask ((__v16hf) __A,
+                                                       (__v16qi)(__m128i)
+                                                       _mm_undefined_si128 (),
+                                                       (__mmask16) -1);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtneph_pbf8 (__m128i __W, __mmask16 __U, __m256h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2bf8256_mask ((__v16hf) __A,
+                                                       (__v16qi)(__m128i) __W,
+                                                       (__mmask16) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtneph_pbf8 (__mmask16 __U, __m256h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2bf8256_mask ((__v16hf) __A,
+                                                       (__v16qi)(__m128i)
+                                                       _mm_setzero_si128 (),
+                                                       (__mmask16) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtnesph_pbf8 (__m128h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2bf8s128_mask ((__v8hf) __A,
+                                                        (__v16qi)(__m128i)
+                                                        _mm_undefined_si128 (),
+                                                        (__mmask8) -1);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtnesph_pbf8 (__m128i __W, __mmask8 __U, __m128h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2bf8s128_mask ((__v8hf) __A,
+                                                        (__v16qi)(__m128i) __W,
+                                                        (__mmask8) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtnesph_pbf8 (__mmask8 __U, __m128h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2bf8s128_mask ((__v8hf) __A,
+                                                        (__v16qi)(__m128i)
+                                                        _mm_setzero_si128 (),
+                                                        (__mmask8) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtnesph_pbf8 (__m256h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2bf8s256_mask ((__v16hf) __A,
+                                                        (__v16qi)(__m128i)
+                                                        _mm_undefined_si128 (),
+                                                        (__mmask16) -1);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtnesph_pbf8 (__m128i __W, __mmask16 __U, __m256h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2bf8s256_mask ((__v16hf) __A,
+                                                        (__v16qi)(__m128i) __W,
+                                                        (__mmask16) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtnesph_pbf8 (__mmask16 __U, __m256h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2bf8s256_mask ((__v16hf) __A,
+                                                        (__v16qi)(__m128i)
+                                                        _mm_setzero_si128 (),
+                                                        (__mmask16) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtneph_phf8 (__m128h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2hf8128_mask ((__v8hf) __A,
+                                                       (__v16qi)(__m128i)
+                                                       _mm_undefined_si128 (),
+                                                       (__mmask8) -1);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtneph_phf8 (__m128i __W, __mmask8 __U, __m128h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2hf8128_mask ((__v8hf) __A,
+                                                       (__v16qi)(__m128i) __W,
+                                                       (__mmask8) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtneph_phf8 (__mmask8 __U, __m128h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2hf8128_mask ((__v8hf) __A,
+                                                       (__v16qi)(__m128i)
+                                                       _mm_setzero_si128 (),
+                                                       (__mmask8) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtneph_phf8 (__m256h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2hf8256_mask ((__v16hf) __A,
+                                                       (__v16qi)(__m128i)
+                                                       _mm_undefined_si128 (),
+                                                       (__mmask16) -1);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtneph_phf8 (__m128i __W, __mmask16 __U, __m256h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2hf8256_mask ((__v16hf) __A,
+                                                       (__v16qi)(__m128i) __W,
+                                                       (__mmask16) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtneph_phf8 (__mmask16 __U, __m256h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2hf8256_mask ((__v16hf) __A,
+                                                       (__v16qi)(__m128i)
+                                                       _mm_setzero_si128 (),
+                                                       (__mmask16) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtnesph_phf8 (__m128h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2hf8s128_mask ((__v8hf) __A,
+                                                        (__v16qi)(__m128i)
+                                                        _mm_undefined_si128 (),
+                                                        (__mmask8) -1);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtnesph_phf8 (__m128i __W, __mmask8 __U, __m128h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2hf8s128_mask ((__v8hf) __A,
+                                                        (__v16qi)(__m128i) __W,
+                                                        (__mmask8) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtnesph_phf8 (__mmask8 __U, __m128h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2hf8s128_mask ((__v8hf) __A,
+                                                        (__v16qi)(__m128i)
+                                                        _mm_setzero_si128 (),
+                                                        (__mmask8) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtnesph_phf8 (__m256h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2hf8s256_mask ((__v16hf) __A,
+                                                        (__v16qi)(__m128i)
+                                                        _mm_undefined_si128 (),
+                                                        (__mmask16) -1);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtnesph_phf8 (__m128i __W, __mmask16 __U, __m256h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2hf8s256_mask ((__v16hf) __A,
+                                                        (__v16qi)(__m128i) __W,
+                                                        (__mmask16) __U);
+}
+
+extern __inline__ __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtnesph_phf8 (__mmask16 __U, __m256h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtneph2hf8s256_mask ((__v16hf) __A,
+                                                        (__v16qi)(__m128i)
+                                                        _mm_setzero_si128 (),
+                                                        (__mmask16) __U);
+}
+
+#ifdef __DISABLE_AVX10_2_256__
+#undef __DISABLE_AVX10_2_256__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX10_2_256__ */
+
+#endif /* __AVX10_2CONVERTINTRIN_H_INCLUDED */
index f5fa2544cc5b08bd609790394a49967045683779..63b65846c8fba94ea15dbbb6c2d0757417c98d56 100644 (file)
@@ -1453,3 +1453,24 @@ DEF_FUNCTION_TYPE (V16HF, V16HF, INT, V16HF, UHI, INT)
 DEF_FUNCTION_TYPE (V8SF, V8SF, INT, V8SF, UQI, INT)
 DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, INT, V4DF, UQI, INT)
 DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, INT, V8SF, UQI, INT)
+DEF_FUNCTION_TYPE (V32HF, V16SF, V16SF, V32HF, USI, INT)
+DEF_FUNCTION_TYPE (V16HF, V8SF, V8SF, V16HF, UHI, INT)
+DEF_FUNCTION_TYPE (V32HF, V16SF, V16SF, V32HF, USI)
+DEF_FUNCTION_TYPE (V16HF, V8SF, V8SF, V16HF, UHI)
+DEF_FUNCTION_TYPE (V8HF, V4SF, V4SF, V8HF, UQI)
+DEF_FUNCTION_TYPE (V16QI, V16QI, V8HF)
+DEF_FUNCTION_TYPE (V16QI, V16QI, V8HF, V16QI, UHI)
+DEF_FUNCTION_TYPE (V16QI, V32QI, V16HF, V16QI, UHI)
+DEF_FUNCTION_TYPE (V32QI, V64QI, V32HF, V32QI, USI)
+DEF_FUNCTION_TYPE (V64QI, V64QI, V32HF, V32HF)
+DEF_FUNCTION_TYPE (V32HF, V32QI, V32HF, USI)
+DEF_FUNCTION_TYPE (V32QI, V32QI, V16HF, V16HF)
+DEF_FUNCTION_TYPE (V16QI, V16QI, V8HF, V8HF)
+DEF_FUNCTION_TYPE (V8HF, V16QI, V8HF, UQI)
+DEF_FUNCTION_TYPE (V16HF, V16QI, V16HF, UHI)
+DEF_FUNCTION_TYPE (V16QI, V8HF, V8HF, V16QI, UHI)
+DEF_FUNCTION_TYPE (V32QI, V16HF, V16HF, V32QI, USI)
+DEF_FUNCTION_TYPE (V64QI, V32HF, V32HF, V64QI, UDI)
+DEF_FUNCTION_TYPE (V16QI, V8HF, V16QI, UQI)
+DEF_FUNCTION_TYPE (V16QI, V16HF, V16QI, UHI)
+DEF_FUNCTION_TYPE (V32QI, V32HF, V32QI, USI)
index cdf28cd261c0ea36fd69803123445d7eb8d4d08e..6f5ab32dd0d07a96fd19a237595e67e8c09a9bea 100644 (file)
@@ -3115,6 +3115,50 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_mpsadbw, "__builtin_ia3
 BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_mpsadbw_mask, "__builtin_ia32_mpsadbw512_mask", IX86_BUILTIN_VMPSADBW_V32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI)
 BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx2_mpsadbw_mask, "__builtin_ia32_mpsadbw256_mask", IX86_BUILTIN_VMPSADBW_V16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI)
 BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_sse4_1_mpsadbw_mask, "__builtin_ia32_mpsadbw128_mask", IX86_BUILTIN_VMPSADBW_V8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvt2ps2phx_v8hf_mask, "__builtin_ia32_vcvt2ps2phx128_mask", IX86_BUILTIN_VCVT2PS2PHX_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V4SF_V4SF_V8HF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2bf8v8hf, "__builtin_ia32_vcvtbiasph2bf8128", IX86_BUILTIN_VCVTBIASPH2BF8128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2bf8v8hf_mask, "__builtin_ia32_vcvtbiasph2bf8128_mask", IX86_BUILTIN_VCVTBIASPH2BF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2bf8v16hf_mask, "__builtin_ia32_vcvtbiasph2bf8256_mask", IX86_BUILTIN_VCVTBIASPH2BF8256_MASK, UNKNOWN, (int) V16QI_FTYPE_V32QI_V16HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtbiasph2bf8v32hf_mask, "__builtin_ia32_vcvtbiasph2bf8512_mask", IX86_BUILTIN_VCVTBIASPH2BF8512_MASK, UNKNOWN, (int) V32QI_FTYPE_V64QI_V32HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2bf8sv8hf, "__builtin_ia32_vcvtbiasph2bf8s128", IX86_BUILTIN_VCVTBIASPH2BF8S128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2bf8sv8hf_mask, "__builtin_ia32_vcvtbiasph2bf8s128_mask", IX86_BUILTIN_VCVTBIASPH2BF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2bf8sv16hf_mask, "__builtin_ia32_vcvtbiasph2bf8s256_mask", IX86_BUILTIN_VCVTBIASPH2BF8S256_MASK, UNKNOWN, (int) V16QI_FTYPE_V32QI_V16HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtbiasph2bf8sv32hf_mask, "__builtin_ia32_vcvtbiasph2bf8s512_mask", IX86_BUILTIN_VCVTBIASPH2BF8S512_MASK, UNKNOWN, (int) V32QI_FTYPE_V64QI_V32HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2hf8v8hf, "__builtin_ia32_vcvtbiasph2hf8128", IX86_BUILTIN_VCVTBIASPH2HF8128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2hf8v8hf_mask, "__builtin_ia32_vcvtbiasph2hf8128_mask", IX86_BUILTIN_VCVTBIASPH2HF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2hf8v16hf_mask, "__builtin_ia32_vcvtbiasph2hf8256_mask", IX86_BUILTIN_VCVTBIASPH2HF8256_MASK, UNKNOWN, (int) V16QI_FTYPE_V32QI_V16HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtbiasph2hf8v32hf_mask, "__builtin_ia32_vcvtbiasph2hf8512_mask", IX86_BUILTIN_VCVTBIASPH2HF8512_MASK, UNKNOWN, (int) V32QI_FTYPE_V64QI_V32HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2hf8sv8hf, "__builtin_ia32_vcvtbiasph2hf8s128", IX86_BUILTIN_VCVTBIASPH2HF8S128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2hf8sv8hf_mask, "__builtin_ia32_vcvtbiasph2hf8s128_mask", IX86_BUILTIN_VCVTBIASPH2HF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2hf8sv16hf_mask, "__builtin_ia32_vcvtbiasph2hf8s256_mask", IX86_BUILTIN_VCVTBIASPH2HF8S256_MASK, UNKNOWN, (int) V16QI_FTYPE_V32QI_V16HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtbiasph2hf8sv32hf_mask, "__builtin_ia32_vcvtbiasph2hf8s512_mask", IX86_BUILTIN_VCVTBIASPH2HF8S512_MASK, UNKNOWN, (int) V32QI_FTYPE_V64QI_V32HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2bf8v8hf_mask, "__builtin_ia32_vcvtne2ph2bf8128_mask", IX86_BUILTIN_VCVTNE2PH2BF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V8HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2bf8v16hf_mask, "__builtin_ia32_vcvtne2ph2bf8256_mask", IX86_BUILTIN_VCVTNE2PH2BF8256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HF_V16HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtne2ph2bf8v32hf_mask, "__builtin_ia32_vcvtne2ph2bf8512_mask", IX86_BUILTIN_VCVTNE2PH2BF8512_MASK, UNKNOWN, (int) V64QI_FTYPE_V32HF_V32HF_V64QI_UDI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2bf8sv8hf_mask, "__builtin_ia32_vcvtne2ph2bf8s128_mask", IX86_BUILTIN_VCVTNE2PH2BF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V8HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2bf8sv16hf_mask, "__builtin_ia32_vcvtne2ph2bf8s256_mask", IX86_BUILTIN_VCVTNE2PH2BF8S256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HF_V16HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtne2ph2bf8sv32hf_mask, "__builtin_ia32_vcvtne2ph2bf8s512_mask", IX86_BUILTIN_VCVTNE2PH2BF8S512_MASK, UNKNOWN, (int) V64QI_FTYPE_V32HF_V32HF_V64QI_UDI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2hf8v8hf_mask, "__builtin_ia32_vcvtne2ph2hf8128_mask", IX86_BUILTIN_VCVTNE2PH2HF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V8HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2hf8v16hf_mask, "__builtin_ia32_vcvtne2ph2hf8256_mask", IX86_BUILTIN_VCVTNE2PH2HF8256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HF_V16HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtne2ph2hf8v32hf_mask, "__builtin_ia32_vcvtne2ph2hf8512_mask", IX86_BUILTIN_VCVTNE2PH2HF8512_MASK, UNKNOWN, (int) V64QI_FTYPE_V32HF_V32HF_V64QI_UDI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2hf8sv8hf_mask, "__builtin_ia32_vcvtne2ph2hf8s128_mask", IX86_BUILTIN_VCVTNE2PH2HF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V8HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2hf8sv16hf_mask, "__builtin_ia32_vcvtne2ph2hf8s256_mask", IX86_BUILTIN_VCVTNE2PH2HF8S256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HF_V16HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtne2ph2hf8sv32hf_mask, "__builtin_ia32_vcvtne2ph2hf8s512_mask", IX86_BUILTIN_VCVTNE2PH2HF8S512_MASK, UNKNOWN, (int) V64QI_FTYPE_V32HF_V32HF_V64QI_UDI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2bf8v8hf_mask, "__builtin_ia32_vcvtneph2bf8128_mask", IX86_BUILTIN_VCVTNEPH2BF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V16QI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2bf8v16hf_mask, "__builtin_ia32_vcvtneph2bf8256_mask", IX86_BUILTIN_VCVTNEPH2BF8256_MASK, UNKNOWN, (int) V16QI_FTYPE_V16HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtneph2bf8v32hf_mask, "__builtin_ia32_vcvtneph2bf8512_mask", IX86_BUILTIN_VCVTNEPH2BF8512_MASK, UNKNOWN, (int) V32QI_FTYPE_V32HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2bf8sv8hf_mask, "__builtin_ia32_vcvtneph2bf8s128_mask", IX86_BUILTIN_VCVTNEPH2BF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V16QI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2bf8sv16hf_mask, "__builtin_ia32_vcvtneph2bf8s256_mask", IX86_BUILTIN_VCVTNEPH2BF8S256_MASK, UNKNOWN, (int) V16QI_FTYPE_V16HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtneph2bf8sv32hf_mask, "__builtin_ia32_vcvtneph2bf8s512_mask", IX86_BUILTIN_VCVTNEPH2BF8S512_MASK, UNKNOWN, (int) V32QI_FTYPE_V32HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2hf8v8hf_mask, "__builtin_ia32_vcvtneph2hf8128_mask", IX86_BUILTIN_VCVTNEPH2HF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V16QI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2hf8v16hf_mask, "__builtin_ia32_vcvtneph2hf8256_mask", IX86_BUILTIN_VCVTNEPH2HF8256_MASK, UNKNOWN, (int) V16QI_FTYPE_V16HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtneph2hf8v32hf_mask, "__builtin_ia32_vcvtneph2hf8512_mask", IX86_BUILTIN_VCVTNEPH2HF8512_MASK, UNKNOWN, (int) V32QI_FTYPE_V32HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2hf8sv8hf_mask, "__builtin_ia32_vcvtneph2hf8s128_mask", IX86_BUILTIN_VCVTNEPH2HF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V16QI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2hf8sv16hf_mask, "__builtin_ia32_vcvtneph2hf8s256_mask", IX86_BUILTIN_VCVTNEPH2HF8S256_MASK, UNKNOWN, (int) V16QI_FTYPE_V16HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtneph2hf8sv32hf_mask, "__builtin_ia32_vcvtneph2hf8s512_mask", IX86_BUILTIN_VCVTNEPH2HF8S512_MASK, UNKNOWN, (int) V32QI_FTYPE_V32HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvthf82phv8hf_mask, "__builtin_ia32_vcvthf82ph128_mask", IX86_BUILTIN_VCVTHF82PH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V16QI_V8HF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvthf82phv16hf_mask, "__builtin_ia32_vcvthf82ph256_mask", IX86_BUILTIN_VCVTHF82PH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16QI_V16HF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvthf82phv32hf_mask, "__builtin_ia32_vcvthf82ph512_mask", IX86_BUILTIN_VCVTHF82PH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32QI_V32HF_USI)
 
 /* Builtins with rounding support.  */
 BDESC_END (ARGS, ROUND_ARGS)
@@ -3573,6 +3617,8 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_2_256,  CODE_FOR_avx_sqrtv8sf2_mask_round, "__b
 BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_subv4df3_mask_round, "__builtin_ia32_subpd256_mask_round", IX86_BUILTIN_VSUBPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_subv16hf3_mask_round, "__builtin_ia32_subph256_mask_round", IX86_BUILTIN_VSUBPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_subv8sf3_mask_round, "__builtin_ia32_subps256_mask_round", IX86_BUILTIN_VSUBPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvt2ps2phx_v32hf_mask_round, "__builtin_ia32_vcvt2ps2phx512_mask_round", IX86_BUILTIN_VCVT2PS2PHX_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V16SF_V16SF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvt2ps2phx_v16hf_mask_round, "__builtin_ia32_vcvt2ps2phx256_mask_round", IX86_BUILTIN_VCVT2PS2PHX_V16HF_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V8SF_V8SF_V16HF_UHI_INT)
 
 BDESC_END (ROUND_ARGS, MULTI_ARG)
 
index 86a9ca566cfd99412d3800ca96f9f40af93f7cfe..41d6eb836f22a98187cb7097e05d2a0817813d37 100644 (file)
@@ -11345,6 +11345,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
     case V16BF_FTYPE_V16SF_UHI:
     case V8BF_FTYPE_V8SF_UQI:
     case V8BF_FTYPE_V4SF_UQI:
+    case V16QI_FTYPE_V16QI_V8HF:
       nargs = 2;
       break;
     case V2DI_FTYPE_V2DI_INT_CONVERT:
@@ -11560,6 +11561,15 @@ ix86_expand_args_builtin (const struct builtin_description *d,
     case V16SF_FTYPE_V16SF_V32BF_V32BF:
     case V8SF_FTYPE_V8SF_V16BF_V16BF:
     case V4SF_FTYPE_V4SF_V8BF_V8BF:
+    case V16QI_FTYPE_V16QI_V8HF_V8HF:
+    case V32QI_FTYPE_V32QI_V16HF_V16HF:
+    case V64QI_FTYPE_V64QI_V32HF_V32HF:
+    case V16QI_FTYPE_V8HF_V16QI_UQI:
+    case V16QI_FTYPE_V16HF_V16QI_UHI:
+    case V32QI_FTYPE_V32HF_V32QI_USI:
+    case V8HF_FTYPE_V16QI_V8HF_UQI:
+    case V16HF_FTYPE_V16QI_V16HF_UHI:
+    case V32HF_FTYPE_V32QI_V32HF_USI:
       nargs = 3;
       break;
     case V32QI_FTYPE_V32QI_V32QI_INT:
@@ -11709,6 +11719,15 @@ ix86_expand_args_builtin (const struct builtin_description *d,
     case V32BF_FTYPE_V16SF_V16SF_V32BF_USI:
     case V16BF_FTYPE_V8SF_V8SF_V16BF_UHI:
     case V8BF_FTYPE_V4SF_V4SF_V8BF_UQI:
+    case V32HF_FTYPE_V16SF_V16SF_V32HF_USI:
+    case V16HF_FTYPE_V8SF_V8SF_V16HF_UHI:
+    case V8HF_FTYPE_V4SF_V4SF_V8HF_UQI:
+    case V16QI_FTYPE_V8HF_V8HF_V16QI_UHI:
+    case V32QI_FTYPE_V16HF_V16HF_V32QI_USI:
+    case V64QI_FTYPE_V32HF_V32HF_V64QI_UDI:
+    case V16QI_FTYPE_V16QI_V8HF_V16QI_UHI:
+    case V16QI_FTYPE_V32QI_V16HF_V16QI_UHI:
+    case V32QI_FTYPE_V64QI_V32HF_V32QI_USI:
       nargs = 4;
       break;
     case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
@@ -12462,6 +12481,8 @@ ix86_expand_round_builtin (const struct builtin_description *d,
     case V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT:
     case V8HF_FTYPE_V2DF_V8HF_V8HF_UQI_INT:
     case V8HF_FTYPE_V4SF_V8HF_V8HF_UQI_INT:
+    case V16HF_FTYPE_V8SF_V8SF_V16HF_UHI_INT:
+    case V32HF_FTYPE_V16SF_V16SF_V32HF_USI_INT:
       nargs = 5;
       break;
     case V32HF_FTYPE_V32HF_INT_V32HF_USI_INT:
index ce8437d00c2fb04f0e76ccef50d57992af677835..fea55a298fc49d4582e9249247a69912ab8e6c08 100644 (file)
 
 #include <avx10_2-512mediaintrin.h>
 
+#include <avx10_2convertintrin.h>
+
+#include <avx10_2-512convertintrin.h>
+
 #endif /* _IMMINTRIN_H_INCLUDED */
index c172d44a93a37aa623fdc6270ab39ab945547b20..622873b5a6f93d91ab15768e17d34cef186ec885 100644 (file)
 
   ;; For AVX10.2 suppport
   UNSPEC_VDPPHPS
+  UNSPEC_VCVTBIASPH2BF8
+  UNSPEC_VCVTBIASPH2BF8S
+  UNSPEC_VCVTBIASPH2HF8
+  UNSPEC_VCVTBIASPH2HF8S
+  UNSPEC_VCVTNE2PH2BF8
+  UNSPEC_VCVTNE2PH2BF8S
+  UNSPEC_VCVTNE2PH2HF8
+  UNSPEC_VCVTNE2PH2HF8S
+  UNSPEC_VCVTNEPH2BF8
+  UNSPEC_VCVTNEPH2BF8S
+  UNSPEC_VCVTNEPH2HF8
+  UNSPEC_VCVTNEPH2HF8S
+  UNSPEC_VCVTHF82PH
 ])
 
 (define_c_enum "unspecv" [
   [(V32HF "TARGET_EVEX512") (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
    (V32BF "TARGET_EVEX512") (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
 
+(define_mode_iterator VHF_AVX10_2
+  [(V32HF "TARGET_AVX10_2_512") V16HF V8HF])
+
 ;; All vector integer modes
 (define_mode_iterator VI
   [(V16SI "TARGET_AVX512F && TARGET_EVEX512")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_mode_attr bf16_ph
-  [(V8HF "ph") (V16HF "ph")
-   (V8BF "bf16") (V16BF "bf16")])
+  [(V8HF "ph") (V16HF "ph") (V32HF "ph")
+   (V8BF "bf16") (V16BF "bf16") (V32BF "bf16")])
 
 (define_insn "vcvtnee<bf16_ph>2ps_<mode>"
   [(set (match_operand:V4SF 0 "register_operand" "=x")
    (set_attr "addr" "gpr16")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn "avx10_2_cvt2ps2phx_<mode><mask_name><round_name>"
+  [(set (match_operand:VHF_AVX10_2 0 "register_operand" "=v")
+       (vec_concat:VHF_AVX10_2
+         (float_truncate:<ssehalfvecmode>
+           (match_operand:<ssePSmode> 2 "<round_nimm_predicate>" "<round_constraint>"))
+         (float_truncate:<ssehalfvecmode>
+           (match_operand:<ssePSmode> 1 "register_operand" "v"))))]
+  "TARGET_AVX10_2_256 && <round_mode_condition>"
+  "vcvt2ps2phx\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}")
+
+(define_mode_attr ssebvecmode
+  [(V8HF "V16QI") (V16HF "V32QI") (V32HF "V64QI")])
+
+(define_int_iterator UNSPEC_NECONVERTFP8_PACK
+   [UNSPEC_VCVTNE2PH2BF8 UNSPEC_VCVTNE2PH2BF8S
+    UNSPEC_VCVTNE2PH2HF8 UNSPEC_VCVTNE2PH2HF8S])
+
+(define_int_attr neconvertfp8_pack
+   [(UNSPEC_VCVTNE2PH2BF8 "ne2ph2bf8")
+    (UNSPEC_VCVTNE2PH2BF8S "ne2ph2bf8s")
+    (UNSPEC_VCVTNE2PH2HF8 "ne2ph2hf8")
+    (UNSPEC_VCVTNE2PH2HF8S "ne2ph2hf8s")])
+
+(define_insn "vcvt<neconvertfp8_pack><mode><mask_name>"
+  [(set (match_operand:<ssebvecmode> 0 "register_operand" "=v")
+       (unspec:<ssebvecmode>
+         [(match_operand:VHF_AVX10_2 1 "register_operand" "v")
+          (match_operand:VHF_AVX10_2 2 "nonimmediate_operand" "vm")]
+         UNSPEC_NECONVERTFP8_PACK))]
+  "TARGET_AVX10_2_256"
+  "vcvt<neconvertfp8_pack>\t{%2, %1, %0<mask_operand3>|%0<mask_operand2>, %1, %2}"
+  [(set_attr "prefix" "evex")])
+
+(define_mode_attr ssebvecmode_2
+  [(V8HF "V16QI") (V16HF "V16QI") (V32HF "V32QI")])
+
+(define_int_iterator UNSPEC_VCVTBIASPH2FP8_PACK
+   [UNSPEC_VCVTBIASPH2BF8 UNSPEC_VCVTBIASPH2BF8S
+    UNSPEC_VCVTBIASPH2HF8 UNSPEC_VCVTBIASPH2HF8S])
+
+(define_int_attr biasph2fp8_pack
+   [(UNSPEC_VCVTBIASPH2BF8 "biasph2bf8")
+    (UNSPEC_VCVTBIASPH2BF8S "biasph2bf8s")
+    (UNSPEC_VCVTBIASPH2HF8 "biasph2hf8")
+    (UNSPEC_VCVTBIASPH2HF8S "biasph2hf8s")])
+
+(define_expand "vcvt<biasph2fp8_pack>v8hf"
+  [(set (match_operand:V16QI 0 "register_operand")
+       (vec_concat:V16QI
+         (unspec:V8QI
+           [(match_operand:V16QI 1 "register_operand")
+            (match_operand:V8HF 2 "nonimmediate_operand")]
+             UNSPEC_VCVTBIASPH2FP8_PACK)
+         (match_dup 3)))]
+  "TARGET_AVX10_2_256"
+  "operands[3] = CONST0_RTX (V8QImode);")
+
+(define_insn "*vcvt<biasph2fp8_pack>v8hf"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+       (vec_concat:V16QI
+         (unspec:V8QI
+           [(match_operand:V16QI 1 "register_operand" "v")
+            (match_operand:V8HF 2 "nonimmediate_operand" "vm")]
+             UNSPEC_VCVTBIASPH2FP8_PACK)
+         (match_operand:V8QI 3 "const0_operand")))]
+  "TARGET_AVX10_2_256"
+  "vcvt<biasph2fp8_pack>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "HF")])
+
+(define_expand "vcvt<biasph2fp8_pack>v8hf_mask"
+  [(set (match_operand:V16QI 0 "register_operand")
+       (vec_concat:V16QI
+         (vec_merge:V8QI
+           (unspec:V8QI
+             [(match_operand:V16QI 1 "register_operand")
+              (match_operand:V8HF 2 "nonimmediate_operand")]
+             UNSPEC_VCVTBIASPH2FP8_PACK)
+           (vec_select:V8QI
+             (match_operand:V16QI 3 "nonimm_or_0_operand")
+             (parallel [(const_int 0) (const_int 1)
+                        (const_int 2) (const_int 3)
+                        (const_int 4) (const_int 5)
+                        (const_int 6) (const_int 7)]))
+           (match_operand:QI 4 "register_operand" "C"))
+         (match_dup 5)))]
+  "TARGET_AVX10_2_256"
+  "operands[5] = CONST0_RTX (V8QImode);")
+
+(define_insn "*vcvt<biasph2fp8_pack>v8hf_mask"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+       (vec_concat:V16QI
+         (vec_merge:V8QI
+           (unspec:V8QI
+             [(match_operand:V16QI 1 "register_operand" "v")
+              (match_operand:V8HF 2 "nonimmediate_operand" "vm")]
+             UNSPEC_VCVTBIASPH2FP8_PACK)
+           (vec_select:V8QI
+             (match_operand:V16QI 3 "nonimm_or_0_operand" "0C")
+             (parallel [(const_int 0) (const_int 1)
+                        (const_int 2) (const_int 3)
+                        (const_int 4) (const_int 5)
+                        (const_int 6) (const_int 7)]))
+           (match_operand:QI 4 "register_operand" "Yk"))
+         (match_operand:V8QI 5 "const0_operand")))]
+  "TARGET_AVX10_2_256"
+  "vcvt<biasph2fp8_pack>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
+  [(set_attr "prefix" "evex")])
+
+(define_mode_iterator VHF_AVX10_2_2
+  [(V32HF "TARGET_AVX10_2_512") V16HF])
+
+(define_insn "vcvt<biasph2fp8_pack><mode><mask_name>"
+  [(set (match_operand:<ssebvecmode_2> 0 "register_operand" "=v")
+       (unspec:<ssebvecmode_2>
+         [(match_operand:<ssebvecmode> 1 "register_operand" "v")
+          (match_operand:VHF_AVX10_2_2 2 "nonimmediate_operand" "vm")]
+         UNSPEC_VCVTBIASPH2FP8_PACK))]
+  "TARGET_AVX10_2_256"
+  "vcvt<biasph2fp8_pack>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "prefix" "evex")])
+
+(define_mode_iterator VHF_256_512
+  [V16HF (V32HF "TARGET_AVX10_2_512")])
+
+(define_mode_attr ph2fp8suff
+  [(V32HF "") (V16HF "{y}") (V8HF "{x}")])
+
+(define_int_iterator UNSPEC_NECONVERTPH2FP8
+   [UNSPEC_VCVTNEPH2BF8 UNSPEC_VCVTNEPH2BF8S
+    UNSPEC_VCVTNEPH2HF8 UNSPEC_VCVTNEPH2HF8S])
+
+(define_int_attr neconvertph2fp8
+   [(UNSPEC_VCVTNEPH2BF8 "neph2bf8")
+    (UNSPEC_VCVTNEPH2BF8S "neph2bf8s")
+    (UNSPEC_VCVTNEPH2HF8 "neph2hf8")
+    (UNSPEC_VCVTNEPH2HF8S "neph2hf8s")])
+
+(define_expand "vcvt<neconvertph2fp8>v8hf"
+  [(set (match_operand:V16QI 0 "register_operand")
+       (vec_concat:V16QI
+         (unspec:V8QI
+           [(match_operand:V8HF 1 "nonimmediate_operand")]
+             UNSPEC_NECONVERTPH2FP8)
+         (match_dup 2)))]
+  "TARGET_AVX10_2_256"
+  "operands[2] = CONST0_RTX (V8QImode);")
+
+(define_insn "*vcvt<neconvertph2fp8>v8hf"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+       (vec_concat:V16QI
+         (unspec:V8QI
+           [(match_operand:V8HF 1 "nonimmediate_operand" "vm")]
+             UNSPEC_NECONVERTPH2FP8)
+         (match_operand:V8QI 2 "const0_operand")))]
+  "TARGET_AVX10_2_256"
+  "vcvt<neconvertph2fp8>{x}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "HF")])
+
+(define_expand "vcvt<neconvertph2fp8>v8hf_mask"
+  [(set (match_operand:V16QI 0 "register_operand")
+       (vec_concat:V16QI
+         (vec_merge:V8QI
+           (unspec:V8QI
+             [(match_operand:V8HF 1 "nonimmediate_operand")]
+             UNSPEC_NECONVERTPH2FP8)
+           (vec_select:V8QI
+             (match_operand:V16QI 2 "nonimm_or_0_operand")
+             (parallel [(const_int 0) (const_int 1)
+                        (const_int 2) (const_int 3)
+                        (const_int 4) (const_int 5)
+                        (const_int 6) (const_int 7)]))
+           (match_operand:QI 3 "register_operand"))
+         (match_dup 4)))]
+  "TARGET_AVX10_2_256"
+  "operands[4] = CONST0_RTX (V8QImode);")
+
+(define_insn "*vcvt<neconvertph2fp8>v8hf_mask"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+       (vec_concat:V16QI
+         (vec_merge:V8QI
+           (unspec:V8QI
+             [(match_operand:V8HF 1 "nonimmediate_operand" "vm")]
+             UNSPEC_NECONVERTPH2FP8)
+           (vec_select:V8QI
+             (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
+             (parallel [(const_int 0) (const_int 1)
+                        (const_int 2) (const_int 3)
+                        (const_int 4) (const_int 5)
+                        (const_int 6) (const_int 7)]))
+           (match_operand:QI 3 "register_operand" "Yk"))
+         (match_operand:V8QI 4 "const0_operand")))]
+  "TARGET_AVX10_2_256"
+  "vcvt<neconvertph2fp8>{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+  [(set_attr "prefix" "evex")])
+
+(define_insn "vcvt<neconvertph2fp8><mode><mask_name>"
+  [(set (match_operand:<ssebvecmode_2> 0 "register_operand" "=v")
+       (unspec:<ssebvecmode_2>
+         [(match_operand:VHF_256_512 1 "nonimmediate_operand" "vm")]
+         UNSPEC_NECONVERTPH2FP8))]
+  "TARGET_AVX10_2_256"
+  "vcvt<neconvertph2fp8><ph2fp8suff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  [(set_attr "prefix" "evex")])
+
+(define_insn "vcvthf82ph<mode><mask_name>"
+  [(set (match_operand:VHF_AVX10_2 0 "register_operand" "=v")
+       (unspec:VHF_AVX10_2
+         [(match_operand:<ssebvecmode_2> 1 "nonimmediate_operand" "vm")]
+         UNSPEC_VCVTHF82PH))]
+  "TARGET_AVX10_2_256"
+  "vcvthf82ph\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  [(set_attr "prefix" "evex")])
+
 (define_int_iterator VPDPWPROD
   [UNSPEC_VPDPWUSD
    UNSPEC_VPDPWUSDS
index 5fc84234b574d5f32cd8c1bf36924704503153ae..4a47e3130965ba6973ddfb481cff357a698f6023 100644 (file)
 #define __builtin_ia32_mpsadbw128_mask(A, B, C, D, E) __builtin_ia32_mpsadbw128_mask (A, B, 1, D, E)
 #define __builtin_ia32_mpsadbw256_mask(A, B, C, D, E) __builtin_ia32_mpsadbw256_mask (A, B, 1, D, E)
 
+/* avx10_2convertintrin.h */
+#define __builtin_ia32_vcvt2ps2phx256_mask_round(A, B, C, D, E) __builtin_ia32_vcvt2ps2phx256_mask_round(A, B, C, D, 8)
+
+/* avx10_2-512convertintrin.h */
+#define __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, E) __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, 8)
+
 #include <wmmintrin.h>
 #include <immintrin.h>
 #include <mm3dnow.h>
index fb0ef9e2aa5e15daedbba1e1748c165d3bafcd8a..3f4d7353c620e36a2e5ff639aa544c7282cb576a 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -msse4a -maes -mpclmul -mavx10.2-512" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -msse4a -maes -mpclmul" } */
 /* { dg-add-options bind_pic_locally } */
 
 #include <mm_malloc.h>
@@ -160,4 +160,3 @@ test_2 (_m_pinsrw, __m64, __m64, int, 1)
 test_1 (_mm_shuffle_pi16, __m64, __m64, 1)
 test_1 (_m_pshufw, __m64, __m64, 1)
 test_1 (_mm_prefetch, void, void *, _MM_HINT_NTA)
-
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-convert-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-convert-1.c
new file mode 100644 (file)
index 0000000..bbbff18
--- /dev/null
@@ -0,0 +1,176 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx10.2-512 -O2" } */
+/* { dg-final { scan-assembler-times "vcvt2ps2phx\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvt2ps2phx\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvt2ps2phx\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvt2ps2phx\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvt2ps2phx\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvt2ps2phx\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2bf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2bf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2bf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2bf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2bf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2bf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2hf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2hf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2hf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2hf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2hf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2hf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2bf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2bf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2bf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2bf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2bf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2bf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2hf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2hf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2hf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2hf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2hf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2hf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvthf82ph\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvthf82ph\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvthf82ph\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2bf8\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2bf8\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2bf8\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2bf8s\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2bf8s\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2bf8s\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2hf8\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2hf8\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2hf8\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2hf8s\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2hf8s\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2hf8s\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x256i;
+volatile __m512i x512i;
+volatile __m512 x, a1, b1;
+volatile __m512h y, x512h;
+volatile __mmask16 m16;
+volatile __mmask32 m32;
+volatile __mmask64 m64;
+const void *a;
+__m512bh *c;
+__m512h *d;
+
+void extern
+avx10_2_512_test (void)
+{ 
+  y = _mm512_cvtx2ps_ph (a1, b1);
+  y = _mm512_mask_cvtx2ps_ph (y, m32, a1, b1);
+  y = _mm512_maskz_cvtx2ps_ph (m32, a1, b1);
+
+  y = _mm512_cvtx_round2ps_ph (a1, b1, 8);
+  y = _mm512_mask_cvtx_round2ps_ph (y, m32, a1, b1, 8);
+  y = _mm512_maskz_cvtx_round2ps_ph (m32, a1, b1, 8);
+}
+
+void extern
+avx10_2_512_vcvtbiasph2bf8_test (void)
+{
+  x256i = _mm512_cvtbiasph_pbf8 (x512i, x512h);
+  x256i = _mm512_mask_cvtbiasph_pbf8 (x256i, m32, x512i, x512h);
+  x256i = _mm512_maskz_cvtbiasph_pbf8 (m32, x512i, x512h);
+}
+
+void extern
+avx10_2_512_vcvtbiasph2bf8s_test (void)
+{
+  x256i = _mm512_cvtbiassph_pbf8 (x512i, x512h);
+  x256i = _mm512_mask_cvtbiassph_pbf8 (x256i, m32, x512i, x512h);
+  x256i = _mm512_maskz_cvtbiassph_pbf8 (m32, x512i, x512h);
+}
+
+void extern
+avx10_2_512_vcvtbiasph2hf8_test (void)
+{
+  x256i = _mm512_cvtbiasph_phf8 (x512i, x512h);
+  x256i = _mm512_mask_cvtbiasph_phf8 (x256i, m32, x512i, x512h);
+  x256i = _mm512_maskz_cvtbiasph_phf8 (m32, x512i, x512h);
+}
+
+void extern
+avx10_2_512_vcvtbiasph2hf8s_test (void)
+{
+  x256i = _mm512_cvtbiassph_phf8 (x512i, x512h);
+  x256i = _mm512_mask_cvtbiassph_phf8 (x256i, m32, x512i, x512h);
+  x256i = _mm512_maskz_cvtbiassph_phf8 (m32, x512i, x512h);
+}
+
+void extern
+avx10_2_512_vcvtne2ph2bf8_test (void)
+{
+  x512i = _mm512_cvtne2ph_pbf8 (x512h, x512h);
+  x512i = _mm512_mask_cvtne2ph_pbf8 (x512i, m64, x512h, x512h);
+  x512i = _mm512_maskz_cvtne2ph_pbf8 (m64, x512h, x512h);
+}
+
+void extern
+avx10_2_512_vcvtne2ph2bf8s_test (void)
+{
+  x512i = _mm512_cvtnes2ph_pbf8 (x512h, x512h);
+  x512i = _mm512_mask_cvtnes2ph_pbf8 (x512i, m64, x512h, x512h);
+  x512i = _mm512_maskz_cvtnes2ph_pbf8 (m64, x512h, x512h);
+}
+
+void extern
+avx10_2_512_vcvtne2ph2hf8_test (void)
+{
+  x512i = _mm512_cvtne2ph_phf8 (x512h, x512h);
+  x512i = _mm512_mask_cvtne2ph_phf8 (x512i, m64, x512h, x512h);
+  x512i = _mm512_maskz_cvtne2ph_phf8 (m64, x512h, x512h);
+}
+
+void extern
+avx10_2_512_vcvtne2ph2hf8s_test (void)
+{
+  x512i = _mm512_cvtnes2ph_phf8 (x512h, x512h);
+  x512i = _mm512_mask_cvtnes2ph_phf8 (x512i, m64, x512h, x512h);
+  x512i = _mm512_maskz_cvtnes2ph_phf8 (m64, x512h, x512h);
+}
+
+void extern
+avx10_2_512_vcvthf82ph_test (void)
+{
+  x512h = _mm512_cvthf8_ph (x256i);
+  x512h = _mm512_mask_cvthf8_ph (x512h, m32, x256i);
+  x512h = _mm512_maskz_cvthf8_ph (m32, x256i);
+}
+
+void extern
+avx10_2_512_vcvtneph2bf8_test (void)
+{
+  x256i = _mm512_cvtneph_pbf8 (x512h);
+  x256i = _mm512_mask_cvtneph_pbf8 (x256i, m32, x512h);
+  x256i = _mm512_maskz_cvtneph_pbf8 (m32, x512h);
+}
+
+void extern
+avx10_2_512_vcvtneph2bf8s_test (void)
+{
+  x256i = _mm512_cvtnesph_pbf8 (x512h);
+  x256i = _mm512_mask_cvtnesph_pbf8 (x256i, m32, x512h);
+  x256i = _mm512_maskz_cvtnesph_pbf8 (m32, x512h);
+}
+
+void extern
+avx10_2_512_vcvtneph2hf8_test (void)
+{
+  x256i = _mm512_cvtneph_phf8 (x512h);
+  x256i = _mm512_mask_cvtneph_phf8 (x256i, m32, x512h);
+  x256i = _mm512_maskz_cvtneph_phf8 (m32, x512h);
+}
+
+void extern
+avx10_2_512_vcvtneph2hf8s_test (void)
+{
+  x256i = _mm512_cvtnesph_phf8 (x512h);
+  x256i = _mm512_mask_cvtnesph_phf8 (x256i, m32, x512h);
+  x256i = _mm512_maskz_cvtnesph_phf8 (m32, x512h);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ps2phx-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ps2phx-2.c
new file mode 100644 (file)
index 0000000..40dbe18
--- /dev/null
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#include <stdint.h>
+
+#define SIZE_RES (AVX512F_LEN / 16)
+
+static void
+CALC (_Float16 *res_ref, float *src1, float *src2)
+{
+  float fp32;
+  int i;
+  for (i = 0; i < SIZE_RES / 2; i++)
+    {
+      fp32 = (float) 2 * i + 7 + i * 0.5;
+      res_ref[i] = fp32;
+      src2[i] = fp32;
+    }
+  for (i = SIZE_RES / 2; i < SIZE_RES; i++)
+    {
+      fp32 = (float)2 * i + 7 + i * 0.5;
+      res_ref[i] = fp32;
+      src1[i - (SIZE_RES / 2)] = fp32;
+    }
+}
+
+void
+TEST (void)
+{
+  int i;
+  UNION_TYPE (AVX512F_LEN, h) res1;
+  UNION_TYPE (AVX512F_LEN, ) src1, src2;
+  _Float16 res_ref[SIZE_RES];
+  float fp32;
+  
+  for (i = 0; i < SIZE_RES; i++)
+    res1.a[i] = 5;
+  
+  CALC (res_ref, src1.a, src2.a);
+  
+  res1.x = INTRINSIC (_cvtx2ps_ph) (src1.x, src2.x);
+  if (UNION_CHECK (AVX512F_LEN, h) (res1, res_ref))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtbiasph2bf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtbiasph2bf8-2.c
new file mode 100644 (file)
index 0000000..9ce3c90
--- /dev/null
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+
+#include "avx10-helper.h"
+#include "fp8-helper.h"
+
+#define SRC_F8_I8 (AVX512F_LEN / 8)
+#define SRC_F16 (AVX512F_LEN / 16)
+#define DST_F8_I8 (AVX512F_LEN_HALF / 8)
+#define DST_F16 (AVX512F_LEN_HALF / 16)
+
+void
+CALC (unsigned char *r, char *src1, _Float16 *src2)
+{
+  int i, hf8_bf8, saturate;
+
+  hf8_bf8 = 1;
+  saturate = 0;
+  
+  for (i = 0; i < DST_F8_I8; i++)
+    {
+      Float16Union usrc = {.f16 = src2[i]};
+      r[i] = convert_fp16_to_fp8(usrc.f16, src1[2 * i], hf8_bf8, saturate);
+    }
+
+  if (AVX512F_LEN == 128)
+    for (i = DST_F16; i < DST_F8_I8; i++)
+      r[i] = 0;
+}
+
+void
+TEST (void)
+{
+  int i,sign;
+  UNION_TYPE (AVX512F_LEN_HALF, i_b) res; 
+  UNION_TYPE (AVX512F_LEN, i_b) src1;
+  UNION_TYPE (AVX512F_LEN, h) src2;
+  unsigned char res_ref[DST_F8_I8];
+
+  sign = 1;
+  for (i = 0; i < SRC_F16; i++)
+    {
+      src2.a[i] = (_Float16)(sign * (2.5 * (1 << (i % 3))));
+      sign = -sign;
+    }
+
+  res.x = INTRINSIC (_cvtbiasph_pbf8) (src1.x, src2.x);
+  CALC(res_ref, src1.a, src2.a);
+
+  if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtbiasph2bf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtbiasph2bf8s-2.c
new file mode 100644 (file)
index 0000000..5e33b8d
--- /dev/null
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+
+#include "avx10-helper.h"
+#include "fp8-helper.h"
+
+#define SRC_F8_I8 (AVX512F_LEN / 8)
+#define SRC_F16 (AVX512F_LEN / 16)
+#define DST_F8_I8 (AVX512F_LEN_HALF / 8)
+#define DST_F16 (AVX512F_LEN_HALF / 16)
+
+void
+CALC (unsigned char *r, char *src1, _Float16 *src2)
+{
+  int i, hf8_bf8, saturate;
+
+  hf8_bf8 = 1;
+  saturate = 1;
+  
+  for (i = 0; i < DST_F8_I8; i++)
+    {
+      Float16Union usrc = {.f16 = src2[i]};
+      r[i] = convert_fp16_to_fp8(usrc.f16, src1[2 * i], hf8_bf8, saturate);
+    }
+
+  if (AVX512F_LEN == 128)
+    for (i = DST_F16; i < DST_F8_I8; i++)
+      r[i] = 0;
+}
+
+void
+TEST (void)
+{
+  int i,sign;
+  UNION_TYPE (AVX512F_LEN_HALF, i_b) res; 
+  UNION_TYPE (AVX512F_LEN, i_b) src1;
+  UNION_TYPE (AVX512F_LEN, h) src2;
+  unsigned char res_ref[DST_F8_I8];
+
+  sign = 1;
+  for (i = 0; i < SRC_F16; i++)
+    {
+      src2.a[i] = (_Float16)(sign * (2.5 * (1 << (i % 3))));
+      sign = -sign;
+    }
+
+  res.x = INTRINSIC (_cvtbiassph_pbf8) (src1.x, src2.x);
+  CALC(res_ref, src1.a, src2.a);
+  
+  if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtbiasph2hf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtbiasph2hf8-2.c
new file mode 100644 (file)
index 0000000..96d1a33
--- /dev/null
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+
+#include "avx10-helper.h"
+#include "fp8-helper.h"
+
+#define SRC_F8_I8 (AVX512F_LEN / 8)
+#define SRC_F16 (AVX512F_LEN / 16)
+#define DST_F8_I8 (AVX512F_LEN_HALF / 8)
+#define DST_F16 (AVX512F_LEN_HALF / 16)
+
+void
+CALC (unsigned char *r, char *src1, _Float16 *src2)
+{
+  int i, hf8_bf8, saturate;
+
+  hf8_bf8 = 0;
+  saturate = 0;
+  
+  for (i = 0; i < DST_F8_I8; i++)
+    {
+      Float16Union usrc = {.f16 = src2[i]};
+      r[i] = convert_fp16_to_fp8(usrc.f16, src1[2 * i], hf8_bf8, saturate);
+    }
+
+  if (AVX512F_LEN == 128)
+    for (i = DST_F16; i < DST_F8_I8; i++)
+      r[i] = 0;
+}
+
+void
+TEST (void)
+{
+  int i,sign;
+  UNION_TYPE (AVX512F_LEN_HALF, i_b) res; 
+  UNION_TYPE (AVX512F_LEN, i_b) src1;
+  UNION_TYPE (AVX512F_LEN, h) src2;
+  unsigned char res_ref[DST_F8_I8];
+
+  sign = 1;
+  for (i = 0; i < SRC_F16; i++)
+    {
+      src2.a[i] = (_Float16)(sign * (2.5 * (1 << (i % 3))));
+      sign = -sign;
+    }
+
+  res.x = INTRINSIC (_cvtbiasph_phf8) (src1.x, src2.x);
+  CALC(res_ref, src1.a, src2.a);
+
+  if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtbiasph2hf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtbiasph2hf8s-2.c
new file mode 100644 (file)
index 0000000..e66b952
--- /dev/null
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+
+#include "avx10-helper.h"
+#include "fp8-helper.h"
+
+#define SRC_F8_I8 (AVX512F_LEN / 8)
+#define SRC_F16 (AVX512F_LEN / 16)
+#define DST_F8_I8 (AVX512F_LEN_HALF / 8)
+#define DST_F16 (AVX512F_LEN_HALF / 16)
+
+void
+CALC (unsigned char *r, char *src1, _Float16 *src2)
+{
+  int i, hf8_bf8, saturate;
+
+  hf8_bf8 = 0;
+  saturate = 1;
+  
+  for (i = 0; i < DST_F8_I8; i++)
+    {
+      Float16Union usrc = {.f16 = src2[i]};
+      r[i] = convert_fp16_to_fp8(usrc.f16, src1[2 * i], hf8_bf8, saturate);
+    }
+
+  if (AVX512F_LEN == 128)
+    for (i = DST_F16; i < DST_F8_I8; i++)
+      r[i] = 0;
+}
+
+void
+TEST (void)
+{
+  int i,sign;
+  UNION_TYPE (AVX512F_LEN_HALF, i_b) res; 
+  UNION_TYPE (AVX512F_LEN, i_b) src1;
+  UNION_TYPE (AVX512F_LEN, h) src2;
+  unsigned char res_ref[DST_F8_I8];
+
+  sign = 1;
+  for (i = 0; i < SRC_F16; i++)
+    {
+      src2.a[i] = (_Float16)(sign * (2.5 * (1 << (i % 3))));
+      sign = -sign;
+    }
+
+  res.x = INTRINSIC (_cvtbiassph_phf8) (src1.x, src2.x);
+  CALC(res_ref, src1.a, src2.a);
+
+  if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvthf82ph-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvthf82ph-2.c
new file mode 100644 (file)
index 0000000..6b9f07f
--- /dev/null
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+
+#include "avx10-helper.h"
+#include "fp8-helper.h"
+
+#define SIZE_SRC (AVX512F_LEN_HALF / 8)
+#define SIZE_RES (AVX512F_LEN / 16)
+
+void
+CALC (_Float16 *r, unsigned char *s)
+{
+  int i;
+  for (i = 0; i < SIZE_RES; i++)
+    r[i] = convert_hf8_to_fp16(s[i]);
+}
+
+void
+TEST (void)
+{
+  int i,sign;
+  UNION_TYPE (AVX512F_LEN, h) res;
+  UNION_TYPE (AVX512F_LEN_HALF, i_b) src;
+  _Float16 res_ref[SIZE_RES];
+
+  sign = 1;
+  for (i = 0; i < SIZE_SRC; i++)
+    {
+      src.a[i] = sign * (2.5 * (1 << (i % 3)));
+      sign = -sign;
+    }
+
+  res.x = INTRINSIC (_cvthf8_ph) (src.x);
+  CALC(res_ref, src.a);
+
+  if (UNION_ROUGH_CHECK (AVX512F_LEN, h) (res, res_ref, 0.0009765625))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2bf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2bf8-2.c
new file mode 100644 (file)
index 0000000..96fa7c1
--- /dev/null
@@ -0,0 +1,65 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+
+#include "avx10-helper.h"
+#include "fp8-helper.h"
+
+#define SIZE_SRC (AVX512F_LEN / 16)
+#define SIZE_RES (AVX512F_LEN / 8)
+
+void
+CALC (unsigned char *r, _Float16 *s1, _Float16 *s2)
+{
+  _Float16 temp;
+  Float16Union ut = {.f16 = temp};
+  int i, hf8_bf8, saturate;
+
+  hf8_bf8 = 1;
+  saturate = 0;
+  
+  for (i = 0; i < SIZE_RES; i++)
+    {
+      r[i] = 0;
+      if (i < SIZE_SRC)
+        {
+          Float16Union usrc2 = {.f16 = s2[i]};
+          ut.u16 = usrc2.u16;
+        }
+      else
+        {
+          Float16Union usrc1 = {.f16 = s1[i-SIZE_SRC]};
+          ut.u16 = usrc1.u16;
+        }
+      r[i] = convert_fp16_to_fp8(ut.f16, 0, hf8_bf8, saturate);
+    }
+}
+
+void
+TEST (void)
+{
+  int i,sign;
+  UNION_TYPE (AVX512F_LEN, i_b) res; 
+  UNION_TYPE (AVX512F_LEN, h) src1, src2;
+  unsigned char res_ref[SIZE_RES];
+
+  sign = 1;
+  for (i = 0; i < SIZE_SRC; i++)
+    {
+      src1.a[i] = (_Float16)(sign * (1.5 * (1 << (i % 3))));
+      src2.a[i] = (_Float16)(-sign * (2.5 * (1 << (i % 3))));
+      sign = -sign;
+    }
+
+  res.x = INTRINSIC (_cvtne2ph_pbf8) (src1.x, src2.x);
+  CALC(res_ref, src1.a, src2.a);
+
+  if (UNION_CHECK (AVX512F_LEN, i_b) (res, res_ref))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2bf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2bf8s-2.c
new file mode 100644 (file)
index 0000000..cead411
--- /dev/null
@@ -0,0 +1,65 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+
+#include "avx10-helper.h"
+#include "fp8-helper.h"
+
+#define SIZE_SRC (AVX512F_LEN / 16)
+#define SIZE_RES (AVX512F_LEN / 8)
+
+void
+CALC (unsigned char *r, _Float16 *s1, _Float16 *s2)
+{
+  _Float16 temp;
+  Float16Union ut = {.f16 = temp};
+  int i, hf8_bf8, saturate;
+
+  hf8_bf8 = 1;
+  saturate = 1;
+  
+  for (i = 0; i < SIZE_RES; i++)
+    {
+      r[i] = 0;
+      if (i < SIZE_SRC)
+        {
+          Float16Union usrc2 = {.f16 = s2[i]};
+          ut.u16 = usrc2.u16;
+        }
+      else
+        {
+          Float16Union usrc1 = {.f16 = s1[i-SIZE_SRC]};
+          ut.u16 = usrc1.u16;
+        }
+      r[i] = convert_fp16_to_fp8(ut.f16, 0, hf8_bf8, saturate);
+    }
+}
+
+void
+TEST (void)
+{
+  int i,sign;
+  UNION_TYPE (AVX512F_LEN, i_b) res; 
+  UNION_TYPE (AVX512F_LEN, h) src1, src2;
+  unsigned char res_ref[SIZE_RES];
+
+  sign = 1;
+  for (i = 0; i < SIZE_SRC; i++)
+    {
+      src1.a[i] = (_Float16)(sign * (1.5 * (1 << (i % 3))));
+      src2.a[i] = (_Float16)(-sign * (2.5 * (1 << (i % 3))));
+      sign = -sign;
+    }
+
+  res.x = INTRINSIC (_cvtnes2ph_pbf8) (src1.x, src2.x);
+  CALC(res_ref, src1.a, src2.a);
+
+  if (UNION_CHECK (AVX512F_LEN, i_b) (res, res_ref))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2hf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2hf8-2.c
new file mode 100644 (file)
index 0000000..6887b40
--- /dev/null
@@ -0,0 +1,65 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+
+#include "avx10-helper.h"
+#include "fp8-helper.h"
+
+#define SIZE_SRC (AVX512F_LEN / 16)
+#define SIZE_RES (AVX512F_LEN / 8)
+
+void
+CALC (unsigned char *r, _Float16 *s1, _Float16 *s2)
+{
+  _Float16 temp;
+  Float16Union ut = {.f16 = temp};
+  int i, hf8_bf8, saturate;
+
+  hf8_bf8 = 0;
+  saturate = 0;
+  
+  for (i = 0; i < SIZE_RES; i++)
+    {
+      r[i] = 0;
+      if (i < SIZE_SRC)
+        {
+          Float16Union usrc2 = {.f16 = s2[i]};
+          ut.u16 = usrc2.u16;
+        }
+      else
+        {
+          Float16Union usrc1 = {.f16 = s1[i-SIZE_SRC]};
+          ut.u16 = usrc1.u16;
+        }
+      r[i] = convert_fp16_to_fp8(ut.f16, 0, hf8_bf8, saturate);
+    }
+}
+
+void
+TEST (void)
+{
+  int i,sign;
+  UNION_TYPE (AVX512F_LEN, i_b) res; 
+  UNION_TYPE (AVX512F_LEN, h) src1, src2;
+  unsigned char res_ref[SIZE_RES];
+
+  sign = 1;
+  for (i = 0; i < SIZE_SRC; i++)
+    {
+      src1.a[i] = (_Float16)(sign * (1.5 * (1 << (i % 3))));
+      src2.a[i] = (_Float16)(-sign * (2.5 * (1 << (i % 3))));
+      sign = -sign;
+    }
+
+  res.x = INTRINSIC (_cvtne2ph_phf8) (src1.x, src2.x);
+  CALC(res_ref, src1.a, src2.a);
+
+  if (UNION_CHECK (AVX512F_LEN, i_b) (res, res_ref))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2hf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2hf8s-2.c
new file mode 100644 (file)
index 0000000..6637d5e
--- /dev/null
@@ -0,0 +1,65 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+
+#include "avx10-helper.h"
+#include "fp8-helper.h"
+
+#define SIZE_SRC (AVX512F_LEN / 16)
+#define SIZE_RES (AVX512F_LEN / 8)
+
+void
+CALC (unsigned char *r, _Float16 *s1, _Float16 *s2)
+{
+  _Float16 temp;
+  Float16Union ut = {.f16 = temp};
+  int i, hf8_bf8, saturate;
+
+  hf8_bf8 = 0;
+  saturate = 1;
+  
+  for (i = 0; i < SIZE_RES; i++)
+    {
+      r[i] = 0;
+      if (i < SIZE_SRC)
+      {
+        Float16Union usrc2 = {.f16 = s2[i]};
+        ut.u16 = usrc2.u16;
+      }
+      else
+      {
+        Float16Union usrc1 = {.f16 = s1[i-SIZE_SRC]};
+        ut.u16 = usrc1.u16;
+      }
+      r[i] = convert_fp16_to_fp8(ut.f16, 0, hf8_bf8, saturate);
+    }
+}
+
+void
+TEST (void)
+{
+  int i,sign;
+  UNION_TYPE (AVX512F_LEN, i_b) res; 
+  UNION_TYPE (AVX512F_LEN, h) src1, src2;
+  unsigned char res_ref[SIZE_RES];
+
+  sign = 1;
+  for (i = 0; i < SIZE_SRC; i++)
+    {
+      src1.a[i] = (_Float16)(sign * (1.5 * (1 << (i % 3))));
+      src2.a[i] = (_Float16)(-sign * (2.5 * (1 << (i % 3))));
+      sign *= -1;
+    }
+
+  res.x = INTRINSIC (_cvtnes2ph_phf8) (src1.x, src2.x);
+  CALC(res_ref, src1.a, src2.a);
+
+  if (UNION_CHECK (AVX512F_LEN, i_b) (res, res_ref))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2bf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2bf8-2.c
new file mode 100644 (file)
index 0000000..253b842
--- /dev/null
@@ -0,0 +1,58 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#define AVX512F_LEN 512
+#define AVX512F_LEN_HALF 256
+#endif
+
+#include "avx10-helper.h"
+#include "fp8-helper.h"
+
+#define SIZE_SRC (AVX512F_LEN / 16)
+#define SIZE_RES (AVX512F_LEN_HALF / 8)
+
+void
+CALC (unsigned char *r, _Float16 *s)
+{
+  int i, hf8_bf8, saturate;
+
+  hf8_bf8 = 1;
+  saturate = 0;
+  
+  for (i = 0; i < SIZE_RES; i++)
+    {
+      r[i] = 0;
+      if (i < SIZE_SRC)
+        {
+          Float16Union usrc = {.f16 = s[i]};
+          r[i] = convert_fp16_to_fp8(usrc.f16, 0, hf8_bf8, saturate);
+        }
+    }
+}
+
+void
+TEST (void)
+{
+  int i,sign;
+  UNION_TYPE (AVX512F_LEN_HALF, i_b) res; 
+  UNION_TYPE (AVX512F_LEN, h) src;
+  unsigned char res_ref[SIZE_RES];
+
+  sign = 1;
+  for (i = 0; i < SIZE_SRC; i++)
+    {
+      src.a[i] = (_Float16)(sign * (2.5 * (1 << (i % 3))));
+      sign = -sign;
+    }
+
+  res.x = INTRINSIC (_cvtneph_pbf8) (src.x);
+  CALC(res_ref, src.a);
+  
+  if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2bf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2bf8s-2.c
new file mode 100644 (file)
index 0000000..b7f9944
--- /dev/null
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+
+#include "avx10-helper.h"
+#include "fp8-helper.h"
+
+#define SIZE_SRC (AVX512F_LEN / 16)
+#define SIZE_RES (AVX512F_LEN_HALF / 8)
+
+void
+CALC (unsigned char *r, _Float16 *s)
+{
+  int i, hf8_bf8, saturate;
+
+  hf8_bf8 = 1;
+  saturate = 1;
+  
+  for (i = 0; i < SIZE_RES; i++)
+    {
+      r[i] = 0;
+      if (i < SIZE_SRC)
+        {
+          Float16Union usrc = {.f16 = s[i]};
+          r[i] = convert_fp16_to_fp8(usrc.f16, 0, hf8_bf8, saturate);
+        }
+    }
+}
+
+void
+TEST (void)
+{
+  int i,sign;
+  UNION_TYPE (AVX512F_LEN_HALF, i_b) res; 
+  UNION_TYPE (AVX512F_LEN, h) src;
+  unsigned char res_ref[SIZE_RES];
+
+  sign = 1;
+  for (i = 0; i < SIZE_SRC; i++)
+    {
+      src.a[i] = (_Float16)(sign * (2.5 * (1 << (i % 3))));
+      sign = -sign;
+    }
+
+  res.x = INTRINSIC (_cvtnesph_pbf8) (src.x);
+  CALC(res_ref, src.a);
+  
+  if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2hf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2hf8-2.c
new file mode 100644 (file)
index 0000000..75f1292
--- /dev/null
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+
+#include "avx10-helper.h"
+#include "fp8-helper.h"
+
+#define SIZE_SRC (AVX512F_LEN / 16)
+#define SIZE_RES (AVX512F_LEN_HALF / 8)
+
+void
+CALC (unsigned char *r, _Float16 *s)
+{
+  int i, hf8_bf8, saturate;
+
+  hf8_bf8 = 0;
+  saturate = 0;
+  
+  for (i = 0; i < SIZE_RES; i++)
+    {
+      r[i] = 0;
+      if (i < SIZE_SRC)
+        {
+          Float16Union usrc = {.f16 = s[i]};
+          r[i] = convert_fp16_to_fp8(usrc.f16, 0, hf8_bf8, saturate);
+        }
+    }
+}
+
+void
+TEST (void)
+{
+  int i,sign;
+  UNION_TYPE (AVX512F_LEN_HALF, i_b) res; 
+  UNION_TYPE (AVX512F_LEN, h) src;
+  unsigned char res_ref[SIZE_RES];
+
+  sign = 1;
+  for (i = 0; i < SIZE_SRC; i++)
+    {
+      src.a[i] = (_Float16)(sign * (2.5 * (1 << (i % 3))));
+      sign = -sign;
+    }
+
+  res.x = INTRINSIC (_cvtneph_phf8) (src.x);
+  CALC(res_ref, src.a);
+  
+  if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2hf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2hf8s-2.c
new file mode 100644 (file)
index 0000000..b0f3cb0
--- /dev/null
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+
+#include "avx10-helper.h"
+#include "fp8-helper.h"
+
+#define SIZE_SRC (AVX512F_LEN / 16)
+#define SIZE_RES (AVX512F_LEN_HALF / 8)
+
+void
+CALC (unsigned char *r, _Float16 *s)
+{
+  int i, hf8_bf8, saturate;
+
+  hf8_bf8 = 0;
+  saturate = 1;
+  
+  for (i = 0; i < SIZE_RES; i++)
+    {
+      r[i] = 0;
+      if (i < SIZE_SRC)
+        {
+          Float16Union usrc = {.f16 = s[i]};
+          r[i] = convert_fp16_to_fp8(usrc.f16, 0, hf8_bf8, saturate);
+        }
+    }
+}
+
+void
+TEST (void)
+{
+  int i,sign;
+  UNION_TYPE (AVX512F_LEN_HALF, i_b) res; 
+  UNION_TYPE (AVX512F_LEN, h) src;
+  unsigned char res_ref[SIZE_RES];
+
+  sign = 1;
+  for (i = 0; i < SIZE_SRC; i++)
+    {
+      src.a[i] = (_Float16)(sign * (2.5 * (1 << (i % 3))));
+      sign = -sign;
+    }
+
+  res.x = INTRINSIC (_cvtnesph_phf8) (src.x);
+  CALC(res_ref, src.a);
+  
+  if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res, res_ref))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-convert-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-convert-1.c
new file mode 100644 (file)
index 0000000..015474f
--- /dev/null
@@ -0,0 +1,274 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx10.2 -O2" } */
+/* { dg-final { scan-assembler-times "vcvt2ps2phx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvt2ps2phx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvt2ps2phx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvt2ps2phx\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvt2ps2phx\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvt2ps2phx\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvt2ps2phx\[ \\t\]+\{rn-sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvt2ps2phx\[ \\t\]+\{rn-sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvt2ps2phx\[ \\t\]+\{rn-sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2bf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2bf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2bf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2bf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2bf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2bf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2bf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2bf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2bf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2bf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2bf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2bf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2hf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2hf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2hf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2hf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2hf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2hf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2hf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2hf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2hf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2hf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2hf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtbiasph2hf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2bf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2bf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2bf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2bf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2bf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2bf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2bf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2bf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2bf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2bf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2bf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2bf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2hf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2hf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2hf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2hf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2hf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2hf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2hf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2hf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2hf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2hf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2hf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ph2hf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vcvthf82ph\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvthf82ph\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvthf82ph\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvthf82ph\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvthf82ph\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvthf82ph\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2bf8x\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2bf8x\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2bf8x\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2bf8y\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2bf8y\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2bf8y\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2bf8sx\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2bf8sx\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2bf8sx\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2bf8sy\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2bf8sy\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2bf8sy\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2hf8x\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2hf8x\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2hf8x\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2hf8y\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2hf8y\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2hf8y\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2hf8sx\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2hf8sx\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2hf8sx\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2hf8sy\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2hf8sy\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneph2hf8sy\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1,a1,b1;
+volatile __m256 x2,a2,b2;
+volatile __m128h y,x128h;
+volatile __m256h y2,x256h;
+volatile __m128i x128i;
+volatile __m256i x256i;
+volatile __mmask8 m8;
+volatile __mmask16 m16;
+volatile __mmask32 m32;
+const void *a;
+__m128bh *b;
+__m256bh *c;
+__m128h *d;
+__m256h *e;
+
+void extern
+avx10_2_test (void)
+{
+  y = _mm_cvtx2ps_ph (a1, b1);
+  y = _mm_mask_cvtx2ps_ph (y, m8, a1, b1);
+  y = _mm_maskz_cvtx2ps_ph (m8, a1, b1);
+
+  y2 = _mm256_cvtx2ps_ph (a2, b2);
+  y2 = _mm256_mask_cvtx2ps_ph (y2, m16, a2, b2);
+  y2 = _mm256_maskz_cvtx2ps_ph (m16, a2, b2);
+
+  y2 = _mm256_cvtx_round2ps_ph (a2, b2, 8);
+  y2 = _mm256_mask_cvtx_round2ps_ph (y2, m16, a2, b2, 8);
+  y2 = _mm256_maskz_cvtx_round2ps_ph (m16, a2, b2, 8);
+}
+
+void extern
+avx10_2_vcvtbiasph2bf8_test (void)
+{
+  x128i = _mm_cvtbiasph_pbf8 (x128i, x128h);
+  x128i = _mm_mask_cvtbiasph_pbf8 (x128i, m8, x128i, x128h);
+  x128i = _mm_maskz_cvtbiasph_pbf8 (m8, x128i, x128h);
+
+  x128i = _mm256_cvtbiasph_pbf8 (x256i, x256h);
+  x128i = _mm256_mask_cvtbiasph_pbf8 (x128i, m16, x256i, x256h);
+  x128i = _mm256_maskz_cvtbiasph_pbf8 (m16, x256i, x256h);
+}
+
+void extern
+avx10_2_vcvtbiasph2bf8s_test (void)
+{
+  x128i = _mm_cvtbiassph_pbf8 (x128i, x128h);
+  x128i = _mm_mask_cvtbiassph_pbf8 (x128i, m8, x128i, x128h);
+  x128i = _mm_maskz_cvtbiassph_pbf8 (m8, x128i, x128h);
+
+  x128i = _mm256_cvtbiassph_pbf8 (x256i, x256h);
+  x128i = _mm256_mask_cvtbiassph_pbf8 (x128i, m16, x256i, x256h);
+  x128i = _mm256_maskz_cvtbiassph_pbf8 (m16, x256i, x256h);
+}
+
+void extern
+avx10_2_vcvtbiasph2hf8_test (void)
+{
+  x128i = _mm_cvtbiasph_phf8 (x128i, x128h);
+  x128i = _mm_mask_cvtbiasph_phf8 (x128i, m8, x128i, x128h);
+  x128i = _mm_maskz_cvtbiasph_phf8 (m8, x128i, x128h);
+
+  x128i = _mm256_cvtbiasph_phf8 (x256i, x256h);
+  x128i = _mm256_mask_cvtbiasph_phf8 (x128i, m16, x256i, x256h);
+  x128i = _mm256_maskz_cvtbiasph_phf8 (m16, x256i, x256h);
+}
+
+void extern
+avx10_2_vcvtbiasph2hf8s_test (void)
+{
+  x128i = _mm_cvtbiassph_phf8 (x128i, x128h);
+  x128i = _mm_mask_cvtbiassph_phf8 (x128i, m8, x128i, x128h);
+  x128i = _mm_maskz_cvtbiassph_phf8 (m8, x128i, x128h);
+
+  x128i = _mm256_cvtbiassph_phf8 (x256i, x256h);
+  x128i = _mm256_mask_cvtbiassph_phf8 (x128i, m16, x256i, x256h);
+  x128i = _mm256_maskz_cvtbiassph_phf8 (m16, x256i, x256h);
+}
+
+void extern
+avx10_2_vcvtne2ph2bf8_test (void)
+{
+  x128i = _mm_cvtne2ph_pbf8 (x128h, x128h);
+  x128i = _mm_mask_cvtne2ph_pbf8 (x128i, m16, x128h, x128h);
+  x128i = _mm_maskz_cvtne2ph_pbf8 (m16, x128h, x128h);
+  x256i = _mm256_cvtne2ph_pbf8 (x256h, x256h);
+  x256i = _mm256_mask_cvtne2ph_pbf8 (x256i, m32, x256h, x256h);
+  x256i = _mm256_maskz_cvtne2ph_pbf8 (m32, x256h, x256h);
+}
+
+void extern
+avx10_2_vcvtne2ph2bf8s_test (void)
+{
+  x128i = _mm_cvtnes2ph_pbf8 (x128h, x128h);
+  x128i = _mm_mask_cvtnes2ph_pbf8 (x128i, m16, x128h, x128h);
+  x128i = _mm_maskz_cvtnes2ph_pbf8 (m16, x128h, x128h);
+  x256i = _mm256_cvtnes2ph_pbf8 (x256h, x256h);
+  x256i = _mm256_mask_cvtnes2ph_pbf8 (x256i, m32, x256h, x256h);
+  x256i = _mm256_maskz_cvtnes2ph_pbf8 (m32, x256h, x256h);
+}
+
+void extern
+avx10_2_vcvtne2ph2hf8_test (void)
+{
+  x128i = _mm_cvtne2ph_phf8 (x128h, x128h);
+  x128i = _mm_mask_cvtne2ph_phf8 (x128i, m16, x128h, x128h);
+  x128i = _mm_maskz_cvtne2ph_phf8 (m16, x128h, x128h);
+  x256i = _mm256_cvtne2ph_phf8 (x256h, x256h);
+  x256i = _mm256_mask_cvtne2ph_phf8 (x256i, m32, x256h, x256h);
+  x256i = _mm256_maskz_cvtne2ph_phf8 (m32, x256h, x256h);
+}
+
+void extern
+avx10_2_vcvtne2ph2hf8s_test (void)
+{
+  x128i = _mm_cvtnes2ph_phf8 (x128h, x128h);
+  x128i = _mm_mask_cvtnes2ph_phf8 (x128i, m16, x128h, x128h);
+  x128i = _mm_maskz_cvtnes2ph_phf8 (m16, x128h, x128h);
+  x256i = _mm256_cvtnes2ph_phf8 (x256h, x256h);
+  x256i = _mm256_mask_cvtnes2ph_phf8 (x256i, m32, x256h, x256h);
+  x256i = _mm256_maskz_cvtnes2ph_phf8 (m32, x256h, x256h);
+}
+
+void extern
+avx10_2_vcvthf82ph_test (void)
+{
+  x128h = _mm_cvthf8_ph (x128i);
+  x128h = _mm_mask_cvthf8_ph (x128h, m8, x128i);
+  x128h = _mm_maskz_cvthf8_ph (m8, x128i);
+
+  x256h = _mm256_cvthf8_ph (x128i);
+  x256h = _mm256_mask_cvthf8_ph (x256h, m16, x128i);
+  x256h = _mm256_maskz_cvthf8_ph (m16, x128i);
+}
+
+void extern
+avx10_2_vcvtneph2bf8_test (void)
+{
+  x128i = _mm_cvtneph_pbf8 (x128h);
+  x128i = _mm_mask_cvtneph_pbf8 (x128i, m8, x128h);
+  x128i = _mm_maskz_cvtneph_pbf8 (m8, x128h);
+
+  x128i = _mm256_cvtneph_pbf8 (x256h);
+  x128i = _mm256_mask_cvtneph_pbf8 (x128i, m16, x256h);
+  x128i = _mm256_maskz_cvtneph_pbf8 (m16, x256h);
+}
+
+void extern
+avx10_2_vcvtneph2bf8s_test (void)
+{
+  x128i = _mm_cvtnesph_pbf8 (x128h);
+  x128i = _mm_mask_cvtnesph_pbf8 (x128i, m8, x128h);
+  x128i = _mm_maskz_cvtnesph_pbf8 (m8, x128h);
+
+  x128i = _mm256_cvtnesph_pbf8 (x256h);
+  x128i = _mm256_mask_cvtnesph_pbf8 (x128i, m16, x256h);
+  x128i = _mm256_maskz_cvtnesph_pbf8 (m16, x256h);
+}
+
+void extern
+avx10_2_vcvtneph2hf8_test (void)
+{
+  x128i = _mm_cvtneph_phf8 (x128h);
+  x128i = _mm_mask_cvtneph_phf8 (x128i, m8, x128h);
+  x128i = _mm_maskz_cvtneph_phf8 (m8, x128h);
+
+  x128i = _mm256_cvtneph_phf8 (x256h);
+  x128i = _mm256_mask_cvtneph_phf8 (x128i, m16, x256h);
+  x128i = _mm256_maskz_cvtneph_phf8 (m16, x256h);
+}
+
+void extern
+avx10_2_vcvtneph2hf8s_test (void)
+{
+  x128i = _mm_cvtnesph_phf8 (x128h);
+  x128i = _mm_mask_cvtnesph_phf8 (x128i, m8, x128h);
+  x128i = _mm_maskz_cvtnesph_phf8 (m8, x128h);
+
+  x128i = _mm256_cvtnesph_phf8 (x256h);
+  x128i = _mm256_mask_cvtnesph_phf8 (x128i, m16, x256h);
+  x128i = _mm256_maskz_cvtnesph_phf8 (m16, x256h);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvt2ps2phx-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvt2ps2phx-2.c
new file mode 100644 (file)
index 0000000..ba3a30c
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvt2ps2phx-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvt2ps2phx-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtbiasph2bf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtbiasph2bf8-2.c
new file mode 100644 (file)
index 0000000..b33d465
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtbiasph2bf8-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtbiasph2bf8-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtbiasph2bf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtbiasph2bf8s-2.c
new file mode 100644 (file)
index 0000000..dcf0d39
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtbiasph2bf8s-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtbiasph2bf8s-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtbiasph2hf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtbiasph2hf8-2.c
new file mode 100644 (file)
index 0000000..93b80c7
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtbiasph2hf8-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtbiasph2hf8-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtbiasph2hf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtbiasph2hf8s-2.c
new file mode 100644 (file)
index 0000000..ed35bf0
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtbiasph2hf8s-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtbiasph2hf8s-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvthf82ph-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvthf82ph-2.c
new file mode 100644 (file)
index 0000000..d0d9a8d
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256 
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvthf82ph-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvthf82ph-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2bf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2bf8-2.c
new file mode 100644 (file)
index 0000000..50948cf
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtne2ph2bf8-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtne2ph2bf8-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2bf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2bf8s-2.c
new file mode 100644 (file)
index 0000000..dda859c
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtne2ph2bf8s-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtne2ph2bf8s-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2hf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2hf8-2.c
new file mode 100644 (file)
index 0000000..5db139f
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtne2ph2hf8-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtne2ph2hf8-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2hf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2hf8s-2.c
new file mode 100644 (file)
index 0000000..84bd9b2
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtne2ph2hf8s-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtne2ph2hf8s-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2bf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2bf8-2.c
new file mode 100644 (file)
index 0000000..96deb4c
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtneph2bf8-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtneph2bf8-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2bf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2bf8s-2.c
new file mode 100644 (file)
index 0000000..ea34459
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtneph2bf8s-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtneph2bf8s-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2hf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2hf8-2.c
new file mode 100644 (file)
index 0000000..e43c608
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtneph2hf8-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtneph2hf8-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2hf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2hf8s-2.c
new file mode 100644 (file)
index 0000000..109df51
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtneph2hf8s-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvtneph2hf8s-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/fp8-helper.h b/gcc/testsuite/gcc.target/i386/fp8-helper.h
new file mode 100644 (file)
index 0000000..b486db5
--- /dev/null
@@ -0,0 +1,135 @@
+#ifndef FP8_HELPER_UNCLUDED
+#define FP8_HELPER_UNCLUDED
+
+typedef union 
+{
+  _Float16 f16;
+  unsigned short u16;
+} Float16Union;
+
+static unsigned char
+convert_fp16_to_hf8 (_Float16 x, unsigned char b, int s)
+{
+  Float16Union ux = { .f16 = x };
+  const unsigned short fp16_bias = 15, hf8_bias = 7;
+  unsigned short sign = (ux.u16 & 0x8000) >> 8;
+  unsigned short e_fp16 = (ux.u16 & 0x7c00) >> 10;
+  unsigned short m_fp16 = ux.u16 & 0x03ff;
+
+  /* If bias */
+  unsigned short x_bias = b ? ux.u16 + (b >> 1) : ux.u16;
+  unsigned short e = (x_bias & 0x7c00) >> 10;
+  unsigned short m = (x_bias & 0x03ff) >> 7;
+
+  if (e_fp16 == 0x1f)
+  {
+    /* Special value: NaN or Infinity. */
+    return (0xf << 3) | 0x7 | sign;
+  }
+  else if ((e_fp16 > (fp16_bias - hf8_bias + 15))
+          || ((e_fp16 == (fp16_bias - hf8_bias + 15))
+          && (m_fp16 > 0x0300)))
+  {
+    /* Overflow: Return Max or NaN. */
+    return (0xf << 3) | (s ? 0x6 : 0x7) | sign;
+  }
+  else if (e_fp16 < fp16_bias - hf8_bias - 3)
+  {
+    /* Value too small: Return zero. */
+    return sign;
+  }
+  else if (e_fp16 <= fp16_bias - hf8_bias)
+  {
+    /* Denormalized value: Adjust mantissa. */
+    m = ((m_fp16 | 0x0400) >> ((fp16_bias - hf8_bias) + 1 - e_fp16))
+        | (((m_fp16 & 0x007f) + 0x007f) >> 7);
+    return sign;
+  }
+  else
+  {
+    /* Normal value: Adjust exponent and mantissa. */
+    e -= (fp16_bias - hf8_bias);
+    return (e << 3) | m | sign;
+  }
+}
+
+static unsigned char
+convert_fp16_to_bf8 (_Float16 x, unsigned char b, int s)
+{
+  Float16Union ux = { .f16 = x };
+  unsigned short temp;
+  unsigned short fp8_res = 0;
+
+  if (__builtin_isinf (x) || __builtin_isnan (x))
+  {
+    /* Special value: NaN or Infinity. */
+    fp8_res = (ux.u16 >> 8) & 0xFF;
+    if (__builtin_isnan (x))
+      fp8_res |= 0x02;
+  }
+  else
+  {
+    unsigned short rounding_bias = b ? b & 0xFF 
+                                     : ((ux.u16 >> 8) & 0x1) + 0x7F;
+    temp = ux.u16 + rounding_bias;
+    fp8_res = (temp >> 8) & 0xFF;
+    if (((temp >> 8) & 0x7F) == 0x7C && s)
+      fp8_res = (fp8_res & 0x80) | 0x7B;
+    }
+  return fp8_res;
+}
+
+static unsigned char
+convert_fp16_to_fp8 (_Float16 x, unsigned char b, int y, int s)
+{
+  return y ? convert_fp16_to_bf8 (x, b, s) 
+           : convert_fp16_to_hf8 (x, b, s);
+}
+
+static _Float16
+convert_bf8_to_fp16(unsigned char x)
+{
+  Float16Union u = {.u16 = (x << 8) & 0xff00};
+  return u.f16;
+}
+
+static _Float16
+convert_hf8_to_fp16(unsigned char x)
+{
+  unsigned char hf8_bias;
+  Float16Union res;
+  unsigned short fp_16bias, s, e, m, e_norm, lz_cnt;
+
+  fp_16bias = 15;
+  hf8_bias = 7;
+  s = (x & 0x80) << 8;
+  e = (x & 0x78) >> 3;
+  m = x & 0x07;
+  e_norm = e + fp_16bias - hf8_bias;
+
+  /* convert denormal hf8 number into a normal fp16 number */
+  if ((e == 0) && (m !=0))
+  {
+    lz_cnt = 2;
+    lz_cnt = (m > 0x1) ? 1 : lz_cnt;
+    lz_cnt = (m > 0x3) ? 0 : lz_cnt;
+    e_norm -= lz_cnt;
+    m = (m << (lz_cnt + 1)) & 0x07;
+  }
+  else if ((e == 0) && (m == 0))
+    e_norm = 0;
+  else if ((e == 0xf) && (m == 0x7))
+  {
+    e_norm = 0x1f;
+    m = 0x4;
+  }
+
+  res.u16 = 0;
+  res.u16 |= e_norm << 10;
+  res.u16 |= m << 7;
+  res.u16 |= s;
+
+  return res.f16;
+}
+
+#endif
index 6b1c9e545f0026ea7c622497e31f751583361f7c..a5ba3decc97d8cd7d7fae10f355dca81a6437086 100644 (file)
 #define __builtin_ia32_mpsadbw128_mask(A, B, C, D, E) __builtin_ia32_mpsadbw128_mask (A, B, 1, D, E)
 #define __builtin_ia32_mpsadbw256_mask(A, B, C, D, E) __builtin_ia32_mpsadbw256_mask (A, B, 1, D, E)
 
+/* avx10_2convertintrin.h */
+#define __builtin_ia32_vcvt2ps2phx256_mask_round(A, B, C, D, E) __builtin_ia32_vcvt2ps2phx256_mask_round(A, B, C, D, 8)
+
+/* avx10_2-512convertintrin.h */
+#define __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, E) __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, 8)
+
 #include <x86intrin.h>
index 6dfdaa96c76cc208a0052056ac06d8f0e7073bfd..9253e5eb905566ed185cbfe38475135b14d11d42 100644 (file)
@@ -1382,3 +1382,9 @@ test_3 (_mm_maskz_mpsadbw_epu8, __m128i, __mmask8, __m128i, __m128i, 1)
 test_3 (_mm256_maskz_mpsadbw_epu8, __m256i, __mmask16, __m256i, __m256i, 1)
 test_4 (_mm_mask_mpsadbw_epu8, __m128i, __m128i, __mmask8, __m128i, __m128i, 1)
 test_4 (_mm256_mask_mpsadbw_epu8, __m256i, __m256i, __mmask16, __m256i, __m256i, 1)
+
+/* avx10_2convertintrin */
+test_2 (_mm256_cvtx_round2ps_ph, __m256h, __m256, __m256, 4)
+
+/* avx10_2-512convertintrin.h */
+test_2 (_mm512_cvtx_round2ps_ph, __m512h, __m512, __m512, 4)
index 102b6b878c878f0853c1a91fe63a96159c46f336..d57bbc41a49b1fcfd4f6c978cd5c8127ce08f23e 100644 (file)
@@ -1421,3 +1421,9 @@ test_3 (_mm_maskz_mpsadbw_epu8, __m128i, __mmask8, __m128i, __m128i, 1)
 test_3 (_mm256_maskz_mpsadbw_epu8, __m256i, __mmask16, __m256i, __m256i, 1)
 test_4 (_mm_mask_mpsadbw_epu8, __m128i, __m128i, __mmask8, __m128i, __m128i, 1)
 test_4 (_mm256_mask_mpsadbw_epu8, __m256i, __m256i, __mmask16, __m256i, __m256i, 1)
+
+/* avx10_2convertintrin */
+test_2 (_mm256_cvtx_round2ps_ph, __m256h, __m256, __m256, 4)
+
+/* avx10_2-512convertintrin.h */
+test_2 (_mm512_cvtx_round2ps_ph, __m512h, __m512, __m512, 4)
index 962b9507283b8f2236387dbdd0332f1933a4c734..438974cb0c64df0d68f0c22e63a1babf42b9ebe9 100644 (file)
 #define __builtin_ia32_mpsadbw128_mask(A, B, C, D, E) __builtin_ia32_mpsadbw128_mask (A, B, 1, D, E)
 #define __builtin_ia32_mpsadbw256_mask(A, B, C, D, E) __builtin_ia32_mpsadbw256_mask (A, B, 1, D, E)
 
+/* avx10_2convertintrin.h */
+#define __builtin_ia32_vcvt2ps2phx256_mask_round(A, B, C, D, E) __builtin_ia32_vcvt2ps2phx256_mask_round(A, B, C, D, 8)
+
+/* avx10_2-512convertintrin.h */
+#define __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, E) __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, 8)
+
 #pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512")
 
 #include <x86intrin.h>