#endif /* __OPTIMIZE__ */
+/* Intrinsics vcvtsh2si, vcvtsh2us. */
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_i32 (__m128h __A)
+{
+ return (int) __builtin_ia32_vcvtsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_u32 (__m128h __A)
+{
+ return (int) __builtin_ia32_vcvtsh2usi32_round (__A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_i32 (__m128h __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvtsh2si32_round (__A, __R);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_u32 (__m128h __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvtsh2usi32_round (__A, __R);
+}
+
+#else
+#define _mm_cvt_roundsh_i32(A, B) \
+ ((int)__builtin_ia32_vcvtsh2si32_round ((A), (B)))
+#define _mm_cvt_roundsh_u32(A, B) \
+ ((int)__builtin_ia32_vcvtsh2usi32_round ((A), (B)))
+
+#endif /* __OPTIMIZE__ */
+
+#ifdef __x86_64__
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_i64 (__m128h __A)
+{
+ return (long long)
+ __builtin_ia32_vcvtsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_u64 (__m128h __A)
+{
+ return (long long)
+ __builtin_ia32_vcvtsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_i64 (__m128h __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvtsh2si64_round (__A, __R);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_u64 (__m128h __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvtsh2usi64_round (__A, __R);
+}
+
+#else
+#define _mm_cvt_roundsh_i64(A, B) \
+ ((long long)__builtin_ia32_vcvtsh2si64_round ((A), (B)))
+#define _mm_cvt_roundsh_u64(A, B) \
+ ((long long)__builtin_ia32_vcvtsh2usi64_round ((A), (B)))
+
+#endif /* __OPTIMIZE__ */
+#endif /* __x86_64__ */
+
+/* Intrinsics vcvtsi2sh, vcvtusi2sh. */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvti32_sh (__m128h __A, int __B)
+{
+ return __builtin_ia32_vcvtsi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu32_sh (__m128h __A, unsigned int __B)
+{
+ return __builtin_ia32_vcvtusi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundi32_sh (__m128h __A, int __B, const int __R)
+{
+ return __builtin_ia32_vcvtsi2sh32_round (__A, __B, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundu32_sh (__m128h __A, unsigned int __B, const int __R)
+{
+ return __builtin_ia32_vcvtusi2sh32_round (__A, __B, __R);
+}
+
+#else
+#define _mm_cvt_roundi32_sh(A, B, C) \
+ (__builtin_ia32_vcvtsi2sh32_round ((A), (B), (C)))
+#define _mm_cvt_roundu32_sh(A, B, C) \
+ (__builtin_ia32_vcvtusi2sh32_round ((A), (B), (C)))
+
+#endif /* __OPTIMIZE__ */
+
+#ifdef __x86_64__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvti64_sh (__m128h __A, long long __B)
+{
+ return __builtin_ia32_vcvtsi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu64_sh (__m128h __A, unsigned long long __B)
+{
+ return __builtin_ia32_vcvtusi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundi64_sh (__m128h __A, long long __B, const int __R)
+{
+ return __builtin_ia32_vcvtsi2sh64_round (__A, __B, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundu64_sh (__m128h __A, unsigned long long __B, const int __R)
+{
+ return __builtin_ia32_vcvtusi2sh64_round (__A, __B, __R);
+}
+
+#else
+#define _mm_cvt_roundi64_sh(A, B, C) \
+ (__builtin_ia32_vcvtsi2sh64_round ((A), (B), (C)))
+#define _mm_cvt_roundu64_sh(A, B, C) \
+ (__builtin_ia32_vcvtusi2sh64_round ((A), (B), (C)))
+
+#endif /* __OPTIMIZE__ */
+#endif /* __x86_64__ */
+
+
#ifdef __DISABLE_AVX512FP16__
#undef __DISABLE_AVX512FP16__
#pragma GCC pop_options
DEF_FUNCTION_TYPE (QI, V8HF, INT, UQI)
DEF_FUNCTION_TYPE (HI, V16HF, INT, UHI)
DEF_FUNCTION_TYPE (SI, V32HF, INT, USI)
+DEF_FUNCTION_TYPE (INT, V8HF, INT)
+DEF_FUNCTION_TYPE (INT64, V8HF, INT)
+DEF_FUNCTION_TYPE (UINT, V8HF, INT)
+DEF_FUNCTION_TYPE (UINT64, V8HF, INT)
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF)
DEF_FUNCTION_TYPE (VOID, PCFLOAT16, V8HF, UQI)
DEF_FUNCTION_TYPE (V8HF, PCFLOAT16, V8HF, UQI)
+DEF_FUNCTION_TYPE (V8HF, V8HF, INT, INT)
+DEF_FUNCTION_TYPE (V8HF, V8HF, INT64, INT)
+DEF_FUNCTION_TYPE (V8HF, V8HF, UINT, INT)
+DEF_FUNCTION_TYPE (V8HF, V8HF, UINT64, INT)
DEF_FUNCTION_TYPE (V2DI, V8HF, V2DI, UQI)
DEF_FUNCTION_TYPE (V4DI, V8HF, V4DI, UQI)
DEF_FUNCTION_TYPE (V4SI, V8HF, V4SI, UQI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtudq2ph_v16si_mask_round, "__builtin_ia32_vcvtudq2ph512_mask_round", IX86_BUILTIN_VCVTUDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtqq2ph_v8di_mask_round, "__builtin_ia32_vcvtqq2ph512_mask_round", IX86_BUILTIN_VCVTQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuqq2ph_v8di_mask_round, "__builtin_ia32_vcvtuqq2ph512_mask_round", IX86_BUILTIN_VCVTUQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2si_round, "__builtin_ia32_vcvtsh2si32_round", IX86_BUILTIN_VCVTSH2SI32_ROUND, UNKNOWN, (int) INT_FTYPE_V8HF_INT)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2siq_round, "__builtin_ia32_vcvtsh2si64_round", IX86_BUILTIN_VCVTSH2SI64_ROUND, UNKNOWN, (int) INT64_FTYPE_V8HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2usi_round, "__builtin_ia32_vcvtsh2usi32_round", IX86_BUILTIN_VCVTSH2USI32_ROUND, UNKNOWN, (int) UINT_FTYPE_V8HF_INT)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2usiq_round, "__builtin_ia32_vcvtsh2usi64_round", IX86_BUILTIN_VCVTSH2USI64_ROUND, UNKNOWN, (int) UINT64_FTYPE_V8HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2sh_round, "__builtin_ia32_vcvtsi2sh32_round", IX86_BUILTIN_VCVTSI2SH32_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_INT)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2shq_round, "__builtin_ia32_vcvtsi2sh64_round", IX86_BUILTIN_VCVTSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT64_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2sh_round, "__builtin_ia32_vcvtusi2sh32_round", IX86_BUILTIN_VCVTUSI2SH32_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT_INT)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2shq_round, "__builtin_ia32_vcvtusi2sh64_round", IX86_BUILTIN_VCVTUSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT64_INT)
BDESC_END (ROUND_ARGS, MULTI_ARG)
{
case UINT64_FTYPE_V2DF_INT:
case UINT64_FTYPE_V4SF_INT:
+ case UINT64_FTYPE_V8HF_INT:
case UINT_FTYPE_V2DF_INT:
case UINT_FTYPE_V4SF_INT:
+ case UINT_FTYPE_V8HF_INT:
case INT64_FTYPE_V2DF_INT:
case INT64_FTYPE_V4SF_INT:
+ case INT64_FTYPE_V8HF_INT:
case INT_FTYPE_V2DF_INT:
case INT_FTYPE_V4SF_INT:
+ case INT_FTYPE_V8HF_INT:
nargs = 2;
break;
case V32HF_FTYPE_V32HF_V32HF_INT:
case V8HF_FTYPE_V8HF_V8HF_INT:
+ case V8HF_FTYPE_V8HF_INT_INT:
+ case V8HF_FTYPE_V8HF_UINT_INT:
+ case V8HF_FTYPE_V8HF_INT64_INT:
+ case V8HF_FTYPE_V8HF_UINT64_INT:
case V4SF_FTYPE_V4SF_UINT_INT:
case V4SF_FTYPE_V4SF_UINT64_INT:
case V2DF_FTYPE_V2DF_UINT64_INT:
(set_attr "prefix" "evex")
(set_attr "mode" "TI")])
+(define_insn "avx512fp16_vcvtsh2<sseintconvertsignprefix>si<rex64namesuffix><round_name>"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (unspec:SWI48
+ [(vec_select:HF
+ (match_operand:V8HF 1 "register_operand" "v")
+ (parallel [(const_int 0)]))]
+ UNSPEC_US_FIX_NOTRUNC))]
+ "TARGET_AVX512FP16"
+ "vcvtsh2<sseintconvertsignprefix>si\t{<round_op2>%1, %0|%0, %1<round_op2>}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512fp16_vcvtsh2<sseintconvertsignprefix>si<rex64namesuffix>_2"
+ [(set (match_operand:SWI48 0 "register_operand" "=r,r")
+ (unspec:SWI48
+ [(match_operand:HF 1 "nonimmediate_operand" "v,m")]
+ UNSPEC_US_FIX_NOTRUNC))]
+ "TARGET_AVX512FP16"
+ "vcvtsh2<sseintconvertsignprefix>si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_mode_attr sseicvtsuffix
+ [(SI "l") (DI "q")])
+(define_insn "avx512fp16_vcvt<floatsuffix>si2sh<rex64namesuffix><round_name>"
+ [(set (match_operand:V8HF 0 "register_operand" "=v")
+ (vec_merge:V8HF
+ (vec_duplicate:V8HF
+ (any_float:HF
+ (match_operand:SWI48 2 "<round_nimm_scalar_predicate>" "<round_constraint3>")))
+ (match_operand:V8HF 1 "register_operand" "v")
+ (const_int 1)))]
+ "TARGET_AVX512FP16"
+ "vcvt<floatsuffix>si2sh{<sseicvtsuffix>}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "HF")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
#define __builtin_ia32_vcvtudq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtudq2ph512_mask_round(A, B, C, 8)
#define __builtin_ia32_vcvtqq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtqq2ph512_mask_round(A, B, C, 8)
#define __builtin_ia32_vcvtuqq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtuqq2ph512_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtsh2si32_round(A, B) __builtin_ia32_vcvtsh2si32_round(A, 8)
+#define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8)
+#define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8)
+#define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8)
+#define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8)
+#define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8)
+#define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8)
+#define __builtin_ia32_vcvtusi2sh64_round(A, B, C) __builtin_ia32_vcvtusi2sh64_round(A, B, 8)
/* avx512fp16vlintrin.h */
#define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D)
#define __builtin_ia32_vcvtudq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtudq2ph512_mask_round(A, B, C, 8)
#define __builtin_ia32_vcvtqq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtqq2ph512_mask_round(A, B, C, 8)
#define __builtin_ia32_vcvtuqq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtuqq2ph512_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtsh2si32_round(A, B) __builtin_ia32_vcvtsh2si32_round(A, 8)
+#define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8)
+#define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8)
+#define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8)
+#define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8)
+#define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8)
+#define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8)
+#define __builtin_ia32_vcvtusi2sh64_round(A, B, C) __builtin_ia32_vcvtusi2sh64_round(A, B, 8)
/* avx512fp16vlintrin.h */
#define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D)
test_1 (_mm512_cvt_roundepu32_ph, __m256h, __m512i, 8)
test_1 (_mm512_cvt_roundepi64_ph, __m128h, __m512i, 8)
test_1 (_mm512_cvt_roundepu64_ph, __m128h, __m512i, 8)
+test_1 (_mm_cvt_roundsh_i32, int, __m128h, 8)
+test_1 (_mm_cvt_roundsh_u32, unsigned, __m128h, 8)
+#ifdef __x86_64__
+test_1 (_mm_cvt_roundsh_i64, long long, __m128h, 8)
+test_1 (_mm_cvt_roundsh_u64, unsigned long long, __m128h, 8)
+test_2 (_mm_cvt_roundi64_sh, __m128h, __m128h, long long, 8)
+test_2 (_mm_cvt_roundu64_sh, __m128h, __m128h, unsigned long long, 8)
+#endif
test_1x (_mm512_reduce_round_ph, __m512h, __m512h, 123, 8)
test_1x (_mm512_roundscale_round_ph, __m512h, __m512h, 123, 8)
test_1x (_mm512_getmant_ph, __m512h, __m512h, 1, 1)
test_2 (_mm512_maskz_cvt_roundepu32_ph, __m256h, __mmask16, __m512i, 8)
test_2 (_mm512_maskz_cvt_roundepi64_ph, __m128h, __mmask8, __m512i, 8)
test_2 (_mm512_maskz_cvt_roundepu64_ph, __m128h, __mmask8, __m512i, 8)
+test_2 (_mm_cvt_roundi32_sh, __m128h, __m128h, int, 8)
+test_2 (_mm_cvt_roundu32_sh, __m128h, __m128h, unsigned, 8)
test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8)
test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8)
test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8)
test_1 (_mm512_cvt_roundepu32_ph, __m256h, __m512i, 8)
test_1 (_mm512_cvt_roundepi64_ph, __m128h, __m512i, 8)
test_1 (_mm512_cvt_roundepu64_ph, __m128h, __m512i, 8)
+test_1 (_mm_cvt_roundsh_i32, int, __m128h, 8)
+test_1 (_mm_cvt_roundsh_u32, unsigned, __m128h, 8)
+#ifdef __x86_64__
+test_1 (_mm_cvt_roundsh_i64, long long, __m128h, 8)
+test_1 (_mm_cvt_roundsh_u64, unsigned long long, __m128h, 8)
+test_2 (_mm_cvt_roundi64_sh, __m128h, __m128h, long long, 8)
+test_2 (_mm_cvt_roundu64_sh, __m128h, __m128h, unsigned long long, 8)
+#endif
test_1x (_mm512_reduce_round_ph, __m512h, __m512h, 123, 8)
test_1x (_mm512_roundscale_round_ph, __m512h, __m512h, 123, 8)
test_1x (_mm512_getmant_ph, __m512h, __m512h, 1, 1)
test_2 (_mm512_maskz_cvt_roundepu32_ph, __m256h, __mmask16, __m512i, 8)
test_2 (_mm512_maskz_cvt_roundepi64_ph, __m128h, __mmask8, __m512i, 8)
test_2 (_mm512_maskz_cvt_roundepu64_ph, __m128h, __mmask8, __m512i, 8)
+test_2 (_mm_cvt_roundi32_sh, __m128h, __m128h, int, 8)
+test_2 (_mm_cvt_roundu32_sh, __m128h, __m128h, unsigned, 8)
test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8)
test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8)
test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8)
#define __builtin_ia32_vcvtudq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtudq2ph512_mask_round(A, B, C, 8)
#define __builtin_ia32_vcvtqq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtqq2ph512_mask_round(A, B, C, 8)
#define __builtin_ia32_vcvtuqq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtuqq2ph512_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtsh2si32_round(A, B) __builtin_ia32_vcvtsh2si32_round(A, 8)
+#define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8)
+#define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8)
+#define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8)
+#define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8)
+#define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8)
+#define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8)
+#define __builtin_ia32_vcvtusi2sh64_round(A, B, C) __builtin_ia32_vcvtusi2sh64_round(A, B, 8)
/* avx512fp16vlintrin.h */
#define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D)