(__mmask16) __U,
__R);
}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundpd_epi32 (__m512d __A, const int __R)
+{
+ return (__m256i)
+ __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
+{
+ return
+ (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundpd_epi64 (__m512d __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundpd_epi64 (__mmask8 __U, __m512d __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundpd_epu32 (__m512d __A, const int __R)
+{
+ return (__m256i)
+ __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
+{
+ return
+ (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundpd_epu64 (__m512d __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundpd_epu64 (__mmask8 __U, __m512d __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundps_epi32 (__m512 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundps_epi64 (__m256 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundps_epi64 (__mmask8 __U, __m256 __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundps_epu32 (__m512 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundps_epu64 (__m256 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundps_epu64 (__mmask8 __U, __m256 __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
#else
#define _mm512_ipcvt_roundph_epi16(A, R) \
((__m512i) \
(_mm512_setzero_si512 ()), \
(__mmask16) (U), \
(R)))
+
+#define _mm512_cvtts_roundpd_epi32(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
+ (__v8si) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundpd_epi32(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
+ (__v8si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundpd_epi32(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
+ (__v8si) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundpd_epi64(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
+ (__v8di) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundpd_epi64(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
+ (__v8di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundpd_epi64(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
+ (__v8di) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundpd_epu32(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
+ (__v8si) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundpd_epu32(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
+ (__v8si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundpd_epu32(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
+ (__v8si) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundpd_epu64(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
+ (__v8di) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundpd_epu64(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
+ (__v8di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundpd_epu64(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
+ (__v8di) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundps_epi32(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask16) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundps_epi32(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
+ (__v16si) (W), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundps_epi32(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundps_epi64(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
+ (__v8di) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundps_epi64(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
+ (__v8di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundps_epi64(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
+ (__v8di) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundps_epu32(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask16) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundps_epu32(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
+ (__v16si) (W), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundps_epu32(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundps_epu64(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
+ (__v8di) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundps_epu64(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
+ (__v8di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundps_epu64(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
+ (__v8di) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask8) (U), \
+ (R)))
#endif
#ifdef __DISABLE_AVX10_2_512__
(__mmask8) __U);
}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttspd_epi32 (__m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2dqs128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttspd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2dqs128_mask ((__v2df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttspd_epi32 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2dqs128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttspd_epi64 (__m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2qqs128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttspd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2qqs128_mask ((__v2df) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttspd_epi64 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2qqs128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttspd_epu32 (__m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2udqs128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttspd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2udqs128_mask ((__v2df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttspd_epu32 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2udqs128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttspd_epu64 (__m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2uqqs128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttspd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2uqqs128_mask ((__v2df) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttspd_epu64 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2uqqs128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsps_epi32 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2dqs128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttsps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2dqs128_mask ((__v4sf) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttsps_epi32 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2dqs128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsps_epi64 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2qqs128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttsps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2qqs128_mask ((__v4sf) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttsps_epi64 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2qqs128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsps_epu32 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2udqs128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttsps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2udqs128_mask ((__v4sf) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttsps_epu32 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2udqs128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsps_epu64 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2uqqs128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttsps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2uqqs128_mask ((__v4sf) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttsps_epu64 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2uqqs128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
(__mmask8) __U,
__R);
}
-#else
-#define _mm256_ipcvt_roundph_epi16(A, R) \
- ((__m256i) \
- __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) (A), \
- (__v16hi) \
- (_mm256_undefined_si256 ()), \
- (__mmask16) (-1), \
- (R)))
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtts_roundpd_epi32 (__m256d __A, const int __R)
+{
+ return
+ (__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) __A,
+ (__v4si)
+ _mm_undefined_si128 (),
+ (__mmask8) -1,
+ __R);
+}
-#define _mm256_mask_ipcvt_roundph_epi16(W, U, A, R) \
- ((__m256i) __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) (A), \
- (__v16hi) (W), \
- (__mmask16) (U), \
- (R)))
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtts_roundpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A,
+ const int __R)
+{
+ return (__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) __A,
+ (__v4si) __W,
+ (__mmask8) __U,
+ __R);
+}
-#define _mm256_maskz_ipcvt_roundph_epi16(U, A, R) \
- ((__m256i) \
- __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) (A), \
- (__v16hi) \
- (_mm256_setzero_si256 ()), \
- (__mmask16) (U), \
- (R)))
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtts_roundpd_epi32 (__mmask8 __U, __m256d __A, const int __R)
+{
+ return
+ (__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U,
+ __R);
+}
-#define _mm256_ipcvt_roundph_epu16(A, R) \
- ((__m256i) \
- __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) (A), \
- (__v16hi) \
- (_mm256_undefined_si256 ()), \
- (__mmask16) (-1), \
- (R)))
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtts_roundpd_epi64 (__m256d __A, const int __R)
+{
+ return (__m256i)
+ __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) __A,
+ (__v4di)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ __R);
+}
-#define _mm256_mask_ipcvt_roundph_epu16(W, U, A, R) \
- ((__m256i) __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) (A), \
- (__v16hi) (W), \
- (__mmask16) (U), \
- (R)))
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtts_roundpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) __A,
+ (__v4di) __W,
+ (__mmask8) __U,
+ __R);
+}
-#define _mm256_maskz_ipcvt_roundph_epu16(U, A, R) \
- ((__m256i) \
- __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) (A), \
- (__v16hi) \
- (_mm256_setzero_si256 ()), \
- (__mmask16) (U), \
- (R)))
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtts_roundpd_epi64 (__mmask8 __U, __m256d __A, const int __R)
+{
+ return
+ (__m256i) __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtts_roundpd_epu32 (__m256d __A, const int __R)
+{
+ return
+ (__m128i) __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) __A,
+ (__v4si)
+ _mm_undefined_si128 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtts_roundpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A,
+ const int __R)
+{
+ return (__m128i) __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) __A,
+ (__v4si) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtts_roundpd_epu32 (__mmask8 __U, __m256d __A, const int __R)
+{
+ return
+ (__m128i) __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtts_roundpd_epu64 (__m256d __A, const int __R)
+{
+ return (__m256i)
+ __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) __A,
+ (__v4di)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtts_roundpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) __A,
+ (__v4di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtts_roundpd_epu64 (__mmask8 __U, __m256d __A, const int __R)
+{
+ return
+ (__m256i) __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtts_roundps_epi32 (__m256 __A, const int __R)
+{
+ return (__m256i)
+ __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtts_roundps_epi32 (__m256i __W, __mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtts_roundps_epi32 (__mmask8 __U, __m256 __A, const int __R)
+{
+ return
+ (__m256i) __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtts_roundps_epi64 (__m128 __A, const int __R)
+{
+ return (__m256i)
+ __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) __A,
+ (__v4di)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtts_roundps_epi64 (__m256i __W, __mmask8 __U, __m128 __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) __A,
+ (__v4di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtts_roundps_epi64 (__mmask8 __U, __m128 __A, const int __R)
+{
+ return
+ (__m256i) __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtts_roundps_epu32 (__m256 __A, const int __R)
+{
+ return (__m256i)
+ __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtts_roundps_epu32 (__m256i __W, __mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtts_roundps_epu32 (__mmask8 __U, __m256 __A, const int __R)
+{
+ return
+ (__m256i) __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtts_roundps_epu64 (__m128 __A, const int __R)
+{
+ return (__m256i)
+ __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) __A,
+ (__v4di)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtts_roundps_epu64 (__m256i __W, __mmask8 __U, __m128 __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) __A,
+ (__v4di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtts_roundps_epu64 (__mmask8 __U, __m128 __A, const int __R)
+{
+ return
+ (__m256i) __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_roundsd_epi32 (__m128d __A, const int __R)
+{
+ return (int) __builtin_ia32_cvttsd2sis32_round ((__v2df) __A,
+ __R);
+}
+
+extern __inline unsigned int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_roundsd_epu32 (__m128d __A, const int __R)
+{
+ return (unsigned int) __builtin_ia32_cvttsd2usis32_round ((__v2df) __A,
+ __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_roundss_epi32 (__m128 __A, const int __R)
+{
+ return (int) __builtin_ia32_cvttss2sis32_round ((__v4sf) __A,
+ __R);
+}
+
+extern __inline unsigned int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_roundss_epu32 (__m128 __A, const int __R)
+{
+ return (unsigned int) __builtin_ia32_cvttss2usis32_round ((__v4sf) __A,
+ __R);
+}
+#else
+
+#define _mm256_ipcvt_roundph_epi16(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) (A), \
+ (__v16hi) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask16) (-1), \
+ (R)))
+
+#define _mm256_mask_ipcvt_roundph_epi16(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) (A), \
+ (__v16hi) (W), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm256_maskz_ipcvt_roundph_epi16(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) (A), \
+ (__v16hi) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm256_ipcvt_roundph_epu16(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) (A), \
+ (__v16hi) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask16) (-1), \
+ (R)))
+
+#define _mm256_mask_ipcvt_roundph_epu16(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) (A), \
+ (__v16hi) (W), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm256_maskz_ipcvt_roundph_epu16(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) (A), \
+ (__v16hi) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask16) (U), \
+ (R)))
#define _mm256_ipcvt_roundps_epi32(A, R) \
((__m256i) \
(_mm256_setzero_si256 ()), \
(__mmask8) (U), \
(R)))
+
+#define _mm256_cvtts_roundpd_epi32(A, R) \
+ ((__m128i) \
+ __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \
+ (__v4si) \
+ (_mm_undefined_si128 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundpd_epi32(W, U, A, R) \
+ ((__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \
+ (__v4si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundpd_epi32(U, A, R) \
+ ((__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \
+ (__v4si) \
+ (_mm_setzero_si128 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundpd_epi64(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \
+ (__v4di) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundpd_epi64(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \
+ (__v4di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundpd_epi64(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \
+ (__v4di) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundpd_epu32(A, R) \
+ ((__m128i) \
+ __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \
+ (__v4si) \
+ (_mm_undefined_si128 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundpd_epu32(W, U, A, R) \
+ ((__m128i) __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \
+ (__v4si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundpd_epu32(U, A, R) \
+ ((__m128i) \
+ __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \
+ (__v4si) (_mm_setzero_si128 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundpd_epu64(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \
+ (__v4di) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundpd_epu64(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \
+ (__v4di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundpd_epu64(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \
+ (__v4di) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundps_epi32(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \
+ (__v8si) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundps_epi32(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \
+ (__v8si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundps_epi32(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \
+ (__v8si) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundps_epi64(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \
+ (__v4di) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundps_epi64(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \
+ (__v4di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundps_epi64(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \
+ (__v4di) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundps_epu32(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \
+ (__v8si) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundps_epu32(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \
+ (__v8si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundps_epu32(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \
+ (__v8si) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundps_epu64(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \
+ (__v4di) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundps_epu64(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \
+ (__v4di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundps_epu64(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \
+ (__v4di) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm_cvtts_roundsd_epi32(A, R) \
+ ((int) __builtin_ia32_cvttsd2sis32_round ((__v2df) (A), \
+ (R)))
+
+#define _mm_cvtts_roundsd_epu32(A, R) \
+ ((unsigned int) __builtin_ia32_cvttsd2usis32_round ((__v2df) (A), \
+ (R)))
+
+#define _mm_cvtts_roundss_epi32(A, R) \
+ ((int) __builtin_ia32_cvttss2sis32_round ((__v4sf) (A), \
+ (R)))
+
+#define _mm_cvtts_roundss_epu32(A, R) \
+ ((unsigned int) __builtin_ia32_cvttss2usis32_round ((__v4sf) (A), \
+ (R)))
+#define _mm256_cvtts_roundpd_epi32(A, R) \
+ ((__m128i) \
+ __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \
+ (__v4si) \
+ (_mm_undefined_si128 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundpd_epi32(W, U, A, R) \
+ ((__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \
+ (__v4si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundpd_epi32(U, A, R) \
+ ((__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \
+ (__v4si) \
+ (_mm_setzero_si128 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundpd_epi64(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \
+ (__v4di) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundpd_epi64(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \
+ (__v4di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundpd_epi64(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \
+ (__v4di) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundpd_epu32(A, R) \
+ ((__m128i) \
+ __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \
+ (__v4si) \
+ (_mm_undefined_si128 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundpd_epu32(W, U, A, R) \
+ ((__m128i) __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \
+ (__v4si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundpd_epu32(U, A, R) \
+ ((__m128i) \
+ __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \
+ (__v4si) (_mm_setzero_si128 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundpd_epu64(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \
+ (__v4di) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundpd_epu64(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \
+ (__v4di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundpd_epu64(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \
+ (__v4di) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundps_epi32(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \
+ (__v8si) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundps_epi32(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \
+ (__v8si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundps_epi32(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \
+ (__v8si) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundps_epi64(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \
+ (__v4di) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundps_epi64(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \
+ (__v4di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundps_epi64(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \
+ (__v4di) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundps_epu32(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \
+ (__v8si) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundps_epu32(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \
+ (__v8si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundps_epu32(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \
+ (__v8si) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundps_epu64(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \
+ (__v4di) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundps_epu64(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \
+ (__v4di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundps_epu64(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \
+ (__v4di) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm_cvtts_roundsd_epi32(A, R) \
+ ((int) __builtin_ia32_cvttsd2sis32_round ((__v2df) (A), \
+ (R)))
+
+#define _mm_cvtts_roundsd_epu32(A, R) \
+ ((unsigned int) __builtin_ia32_cvttsd2usis32_round ((__v2df) (A), \
+ (R)))
+
+#define _mm_cvtts_roundss_epi32(A, R) \
+ ((int) __builtin_ia32_cvttss2sis32_round ((__v4sf) (A), \
+ (R)))
+
+#define _mm_cvtts_roundss_epu32(A, R) \
+ ((unsigned int) __builtin_ia32_cvttss2usis32_round ((__v4sf) (A), \
+ (R)))
+#endif
+
+#ifdef __x86_64__
+#ifdef __OPTIMIZE__
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_roundsd_epi64 (__m128d __A, const int __R)
+{
+ return (long long) __builtin_ia32_cvttsd2sis64_round ((__v2df) __A,
+ __R);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_roundsd_epu64 (__m128d __A, const int __R)
+{
+ return (unsigned long long) __builtin_ia32_cvttsd2usis64_round ((__v2df) __A,
+ __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_roundss_epi64 (__m128 __A, const int __R)
+{
+ return (long long) __builtin_ia32_cvttss2sis64_round ((__v4sf) __A,
+ __R);
+}
+
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_roundss_epu64 (__m128 __A, const int __R)
+{
+ return (unsigned long long) __builtin_ia32_cvttss2usis64_round ((__v4sf) __A,
+ __R);
+}
+#else
+
+#define _mm_cvtts_roundsd_epi64(A, R) \
+ ((long long) __builtin_ia32_cvttsd2sis64_round ((__v2df) (A), \
+ (R)))
+
+#define _mm_cvtts_roundsd_epu64(A, R) \
+ ((unsigned long long) __builtin_ia32_cvttsd2usis64_round ((__v2df) (A), \
+ (R)))
+
+#define _mm_cvtts_roundss_epi64(A, R) \
+ ((long long) __builtin_ia32_cvttss2sis64_round ((__v4sf) (A), \
+ (R)))
+
+#define _mm_cvtts_roundss_epu64(A, R) \
+ ((unsigned long long) __builtin_ia32_cvttss2usis64_round ((__v4sf) (A), \
+ (R)))
#endif
+#endif /* __x86_64__ */
#ifdef __DISABLE_AVX10_2_256__
#undef __DISABLE_AVX10_2_256__
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttph2iubsv8hf_mask, "__builtin_ia32_cvttph2iubs128_mask", IX86_BUILTIN_CVTTPH2IUBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttps2ibsv4sf_mask, "__builtin_ia32_cvttps2ibs128_mask", IX86_BUILTIN_CVTTPS2IBS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttps2iubsv4sf_mask, "__builtin_ia32_cvttps2iubs128_mask", IX86_BUILTIN_CVTTPS2IUBS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2dqsv2df_mask, "__builtin_ia32_cvttpd2dqs128_mask", IX86_BUILTIN_VCVTTPD2DQS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2qqsv2df_mask, "__builtin_ia32_cvttpd2qqs128_mask", IX86_BUILTIN_VCVTTPD2QQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2udqsv2df_mask, "__builtin_ia32_cvttpd2udqs128_mask", IX86_BUILTIN_VCVTTPD2UDQS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2uqqsv2df_mask, "__builtin_ia32_cvttpd2uqqs128_mask", IX86_BUILTIN_VCVTTPD2UQQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2dqsv4sf_mask, "__builtin_ia32_cvttps2dqs128_mask", IX86_BUILTIN_VCVTTPS2DQS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2qqsv2di_mask, "__builtin_ia32_cvttps2qqs128_mask", IX86_BUILTIN_VCVTTPS2QQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2udqsv4sf_mask, "__builtin_ia32_cvttps2udqs128_mask", IX86_BUILTIN_VCVTTPS2UDQS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2uqqsv2di_mask, "__builtin_ia32_cvttps2uqqs128_mask", IX86_BUILTIN_VCVTTPS2UQQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI)
/* Builtins with rounding support. */
BDESC_END (ARGS, ROUND_ARGS)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvttps2ibsv16sf_mask_round, "__builtin_ia32_cvttps2ibs512_mask_round", IX86_BUILTIN_CVTTPS2IBS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttps2iubsv8sf_mask_round, "__builtin_ia32_cvttps2iubs256_mask_round", IX86_BUILTIN_CVTTPS2IUBS256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvttps2iubsv16sf_mask_round, "__builtin_ia32_cvttps2iubs512_mask_round", IX86_BUILTIN_CVTTPS2IUBS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2dqsv4df_mask_round, "__builtin_ia32_cvttpd2dqs256_mask_round", IX86_BUILTIN_VCVTTPD2DQS256_MASK_ROUND, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttpd2dqsv8df_mask_round, "__builtin_ia32_cvttpd2dqs512_mask_round", IX86_BUILTIN_VCVTTPD2DQS512_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2qqsv4df_mask_round, "__builtin_ia32_cvttpd2qqs256_mask_round", IX86_BUILTIN_VCVTTPD2QQS256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttpd2qqsv8df_mask_round, "__builtin_ia32_cvttpd2qqs512_mask_round", IX86_BUILTIN_VCVTTPD2QQS512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2udqsv4df_mask_round, "__builtin_ia32_cvttpd2udqs256_mask_round", IX86_BUILTIN_VCVTTPD2UDQS256_MASK_ROUND, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttpd2udqsv8df_mask_round, "__builtin_ia32_cvttpd2udqs512_mask_round", IX86_BUILTIN_VCVTTPD2UDQS512_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2uqqsv4df_mask_round, "__builtin_ia32_cvttpd2uqqs256_mask_round", IX86_BUILTIN_VCVTTPD2UQQS256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttpd2uqqsv8df_mask_round, "__builtin_ia32_cvttpd2uqqs512_mask_round", IX86_BUILTIN_VCVTTPD2UQQS512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2dqsv8sf_mask_round, "__builtin_ia32_cvttps2dqs256_mask_round", IX86_BUILTIN_VCVTTPS2DQS256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttps2dqsv16sf_mask_round, "__builtin_ia32_cvttps2dqs512_mask_round", IX86_BUILTIN_VCVTTPS2DQS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2qqsv4di_mask_round, "__builtin_ia32_cvttps2qqs256_mask_round", IX86_BUILTIN_VCVTTPS2QQS256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttps2qqsv8di_mask_round, "__builtin_ia32_cvttps2qqs512_mask_round", IX86_BUILTIN_VCVTTPS2QQS512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2udqsv8sf_mask_round, "__builtin_ia32_cvttps2udqs256_mask_round", IX86_BUILTIN_VCVTTPS2UDQS256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttps2udqsv16sf_mask_round, "__builtin_ia32_cvttps2udqs512_mask_round", IX86_BUILTIN_VCVTTPS2UDQS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2uqqsv4di_mask_round, "__builtin_ia32_cvttps2uqqs256_mask_round", IX86_BUILTIN_VCVTTPS2UQQS256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttps2uqqsv8di_mask_round, "__builtin_ia32_cvttps2uqqs512_mask_round", IX86_BUILTIN_VCVTTPS2UQQS512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttsd2sissi_round, "__builtin_ia32_cvttsd2sis32_round", IX86_BUILTIN_VCVTTSD2SIS32_ROUND, UNKNOWN, (int) INT_FTYPE_V2DF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttsd2sisdi_round, "__builtin_ia32_cvttsd2sis64_round", IX86_BUILTIN_VCVTTSD2SIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V2DF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttsd2usissi_round, "__builtin_ia32_cvttsd2usis32_round", IX86_BUILTIN_VCVTTSD2USIS32_ROUND, UNKNOWN, (int) INT_FTYPE_V2DF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttsd2usisdi_round, "__builtin_ia32_cvttsd2usis64_round", IX86_BUILTIN_VCVTTSD2USIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V2DF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttss2sissi_round, "__builtin_ia32_cvttss2sis32_round", IX86_BUILTIN_VCVTTSS2SIS32_ROUND, UNKNOWN, (int) INT_FTYPE_V4SF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttss2sisdi_round, "__builtin_ia32_cvttss2sis64_round", IX86_BUILTIN_VCVTTSS2SIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V4SF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttss2usissi_round, "__builtin_ia32_cvttss2usis32_round", IX86_BUILTIN_VCVTTSS2USIS32_ROUND, UNKNOWN, (int) INT_FTYPE_V4SF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttss2usisdi_round, "__builtin_ia32_cvttss2usis64_round", IX86_BUILTIN_VCVTTSS2USIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V4SF_INT)
BDESC_END (ROUND_ARGS, MULTI_ARG)
UNSPEC_VCVTTPH2IUBS
UNSPEC_VCVTTPS2IBS
UNSPEC_VCVTTPS2IUBS
+ UNSPEC_SFIX_SATURATION
+ UNSPEC_UFIX_SATURATION
])
(define_c_enum "unspecv" [
(V4DF "TARGET_AVX512DQ && TARGET_AVX512VL")
(V2DF "TARGET_AVX512DQ && TARGET_AVX512VL")])
+(define_mode_iterator VF1_VF2_AVX10_2
+ [(V16SF "TARGET_AVX10_2_512") V8SF V4SF
+ (V8DF "TARGET_AVX10_2_512") V4DF V2DF])
+
(define_mode_iterator VFH
[(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
(V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(define_mode_iterator VF2
[(V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF])
+(define_mode_iterator VF2_AVX10_2
+ [(V8DF "TARGET_AVX10_2_512") V4DF V2DF])
+
;; All DFmode & HFmode vector float modes
(define_mode_iterator VF2H
[(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
(define_mode_iterator VI8
[(V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI])
+(define_mode_iterator VI8_AVX10_2
+ [(V8DI "TARGET_AVX10_2_512") V4DI V2DI])
+
(define_mode_iterator VI8_FVL
[(V8DI "TARGET_AVX512F && TARGET_EVEX512") V4DI (V2DI "TARGET_AVX512VL")])
(UNSPEC_VCVTPS2IBS "")
(UNSPEC_VCVTPS2IUBS "u")
(UNSPEC_VCVTTPS2IBS "")
- (UNSPEC_VCVTTPS2IUBS "u")])
+ (UNSPEC_VCVTTPS2IUBS "u")
+ (UNSPEC_SFIX_SATURATION "")
+ (UNSPEC_UFIX_SATURATION "u")])
(define_int_attr sat_cvt_trunc_prefix
[(UNSPEC_VCVTNEBF162IBS "")
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+
+(define_int_iterator UNSPEC_SAT_CVT_DS_SIGN_ITER
+ [UNSPEC_SFIX_SATURATION
+ UNSPEC_UFIX_SATURATION])
+
+(define_mode_attr pd2dqssuff
+ [(V16SF "") (V8SF "") (V4SF "")
+ (V8DF "") (V4DF "{y}") (V2DF "{x}")])
+
+(define_insn "avx10_2_vcvtt<castmode>2<sat_cvt_sign_prefix>dqs<mode><mask_name><round_saeonly_name>"
+ [(set (match_operand:<VEC_GATHER_IDXSI> 0 "register_operand" "=v")
+ (unspec:<VEC_GATHER_IDXSI>
+ [(match_operand:VF1_VF2_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+ UNSPEC_SAT_CVT_DS_SIGN_ITER))]
+ "TARGET_AVX10_2_256 && <round_saeonly_mode_condition>"
+ "vcvtt<castmode>2<sat_cvt_sign_prefix>dqs<pd2dqssuff>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx10_2_vcvttpd2<sat_cvt_sign_prefix>qqs<mode><mask_name><round_saeonly_name>"
+ [(set (match_operand:<VEC_GATHER_IDXDI> 0 "register_operand" "=v")
+ (unspec:<VEC_GATHER_IDXDI>
+ [(match_operand:VF2_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+ UNSPEC_SAT_CVT_DS_SIGN_ITER))]
+ "TARGET_AVX10_2_256 && <round_saeonly_mode_condition>"
+ "vcvttpd2<sat_cvt_sign_prefix>qqs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx10_2_vcvttps2<sat_cvt_sign_prefix>qqs<mode><mask_name><round_saeonly_name>"
+ [(set (match_operand:VI8_AVX10_2 0 "register_operand" "=v")
+ (unspec:VI8_AVX10_2
+ [(match_operand:<vpckfloat_temp_mode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+ UNSPEC_SAT_CVT_DS_SIGN_ITER))]
+ "TARGET_AVX10_2_256 && <round_saeonly_mode_condition>"
+ "vcvttps2<sat_cvt_sign_prefix>qqs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx10_2_vcvttsd2<sat_cvt_sign_prefix>sis<mode><round_saeonly_name>"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (unspec:SWI48
+ [(vec_select:DF
+ (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
+ (parallel [(const_int 0)]))]
+ UNSPEC_SAT_CVT_DS_SIGN_ITER))]
+ "TARGET_AVX10_2_256"
+ "vcvttsd2<sat_cvt_sign_prefix>sis\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx10_2_vcvttss2<sat_cvt_sign_prefix>sis<mode><round_saeonly_name>"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (unspec:SWI48
+ [(vec_select:SF
+ (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
+ (parallel [(const_int 0)]))]
+ UNSPEC_SAT_CVT_DS_SIGN_ITER))]
+ "TARGET_AVX10_2_256"
+ "vcvttss2<sat_cvt_sign_prefix>sis\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
#define __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2dqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2dqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2qqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2qqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2udqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2udqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2uqqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2uqqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2dqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2dqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, 8)
/* avx10_2satcvtintrin.h */
#define __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2dqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2dqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2qqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2qqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2udqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2udqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2uqqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2uqqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2dqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2dqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2qqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2qqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2udqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2udqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2uqqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2uqqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttsd2sis32_round(A, B) __builtin_ia32_cvttsd2sis32_round(A, 8)
+#define __builtin_ia32_cvttsd2usis32_round(A, B) __builtin_ia32_cvttsd2usis32_round(A, 8)
+#define __builtin_ia32_cvttss2sis32_round(A, B) __builtin_ia32_cvttss2sis32_round(A, 8)
+#define __builtin_ia32_cvttss2usis32_round(A, B) __builtin_ia32_cvttss2usis32_round(A, 8)
+#ifdef __x86_64__
+#define __builtin_ia32_cvttsd2sis64_round(A, B) __builtin_ia32_cvttsd2sis64_round(A, 8)
+#define __builtin_ia32_cvttsd2usis64_round(A, B) __builtin_ia32_cvttsd2usis64_round(A, 8)
+#define __builtin_ia32_cvttss2sis64_round(A, B) __builtin_ia32_cvttss2sis64_round(A, 8)
+#define __builtin_ia32_cvttss2usis64_round(A, B) __builtin_ia32_cvttss2usis64_round(A, 8)
+#endif
#include <wmmintrin.h>
#include <immintrin.h>
/* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
+volatile __m256 hx;
+volatile __m256i hxi;
volatile __m512 x;
volatile __m512h xh;
volatile __m512i xi;
+volatile __m512d xd;
volatile __m512bh xbh;
volatile __mmask8 m8;
volatile __mmask16 m16;
xi = _mm512_ipcvttnebf16_epu16 (xbh);
xi = _mm512_mask_ipcvttnebf16_epu16 (xi, m32, xbh);
xi = _mm512_maskz_ipcvttnebf16_epu16 (m32, xbh);
+
+ hxi = _mm512_cvtts_roundpd_epi32 (xd, 8);
+ hxi = _mm512_mask_cvtts_roundpd_epi32 (hxi, m8, xd, 8);
+ hxi = _mm512_maskz_cvtts_roundpd_epi32 (m8, xd, 8);
+
+ xi = _mm512_cvtts_roundpd_epi64 (xd, 8);
+ xi = _mm512_mask_cvtts_roundpd_epi64 (xi, m8, xd, 8);
+ xi = _mm512_maskz_cvtts_roundpd_epi64 (m8, xd, 8);
+
+ hxi = _mm512_cvtts_roundpd_epu32 (xd, 8);
+ hxi = _mm512_mask_cvtts_roundpd_epu32 (hxi, m8, xd, 8);
+ hxi = _mm512_maskz_cvtts_roundpd_epu32 (m8, xd, 8);
+
+ xi = _mm512_cvtts_roundpd_epu64 (xd, 8);
+ xi = _mm512_mask_cvtts_roundpd_epu64 (xi, m8, xd, 8);
+ xi = _mm512_maskz_cvtts_roundpd_epu64 (m8, xd, 8);
+
+ xi = _mm512_cvtts_roundps_epi32 (x, 8);
+ xi = _mm512_mask_cvtts_roundps_epi32 (xi, m16, x, 8);
+ xi = _mm512_maskz_cvtts_roundps_epi32 (m16, x, 8);
+
+ xi = _mm512_cvtts_roundps_epi64 (hx, 8);
+ xi = _mm512_mask_cvtts_roundps_epi64 (xi, m8, hx, 8);
+ xi = _mm512_maskz_cvtts_roundps_epi64 (m8, hx, 8);
+
+ xi = _mm512_cvtts_roundps_epu32 (x, 8);
+ xi = _mm512_mask_cvtts_roundps_epu32 (xi, m16, x, 8);
+ xi = _mm512_maskz_cvtts_roundps_epu32 (m16, x, 8);
+
+ xi = _mm512_cvtts_roundps_epu64 (hx, 8);
+ xi = _mm512_mask_cvtts_roundps_epu64 (xi, m8, hx, 8);
+ xi = _mm512_maskz_cvtts_roundps_epu64 (m8, hx, 8);
}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#include <limits.h>
+
+#define SRC_SIZE ((AVX512F_LEN) / 64)
+#define DST_SIZE ((AVX512F_LEN_HALF) / 32)
+
+static void
+CALC (double *s, int *r)
+{
+ int i;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ if (s[i] > INT_MAX)
+ r[i] = INT_MAX;
+ else if (s[i] < INT_MIN)
+ r[i] = INT_MIN;
+ else
+ r[i] = s[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s;
+ UNION_TYPE (AVX512F_LEN_HALF, i_d) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[DST_SIZE] = { 0 };
+ int i, sign = 1;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 1.23 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+#if AVX512F_LEN == 128
+ res1.x = INTRINSIC (_cvttspd_epi32) (s.x);
+ res2.x = INTRINSIC (_mask_cvttspd_epi32) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvttspd_epi32) (mask, s.x);
+#else
+ res1.x = INTRINSIC (_cvtts_roundpd_epi32) (s.x, 8);
+ res2.x = INTRINSIC (_mask_cvtts_roundpd_epi32) (res2.x, mask, s.x, 8);
+ res3.x = INTRINSIC (_maskz_cvtts_roundpd_epi32) (mask, s.x, 8);
+#endif
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res3, res_ref))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#include <limits.h>
+
+#define SRC_SIZE ((AVX512F_LEN) / 64)
+#define DST_SIZE ((AVX512F_LEN) / 64)
+
+static void
+CALC (double *s, long long *r)
+{
+ int i;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ if (s[i] > LLONG_MAX)
+ r[i] = LLONG_MAX;
+ else if (s[i] < LLONG_MIN)
+ r[i] = LLONG_MIN;
+ else
+ r[i] = s[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[DST_SIZE] = { 0 };
+ int i, sign = 1;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 1.23 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+#if AVX512F_LEN == 128
+ res1.x = INTRINSIC (_cvttspd_epi64) (s.x);
+ res2.x = INTRINSIC (_mask_cvttspd_epi64) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvttspd_epi64) (mask, s.x);
+#else
+ res1.x = INTRINSIC (_cvtts_roundpd_epi64) (s.x, 8);
+ res2.x = INTRINSIC (_mask_cvtts_roundpd_epi64) (res2.x, mask, s.x, 8);
+ res3.x = INTRINSIC (_maskz_cvtts_roundpd_epi64) (mask, s.x, 8);
+#endif
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#include <limits.h>
+
+#define SRC_SIZE ((AVX512F_LEN) / 64)
+#define DST_SIZE ((AVX512F_LEN_HALF) / 32)
+
+static void
+CALC (double *s, unsigned int *r)
+{
+ int i;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ if (s[i] > UINT_MAX)
+ r[i] = UINT_MAX;
+ else if (s[i] < 0)
+ r[i] = 0;
+ else
+ r[i] = s[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s;
+ UNION_TYPE (AVX512F_LEN_HALF, i_ud) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned int res_ref[DST_SIZE] = { 0 };
+ int i, sign = 1;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 1.23 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+#if AVX512F_LEN == 128
+ res1.x = INTRINSIC (_cvttspd_epu32) (s.x);
+ res2.x = INTRINSIC (_mask_cvttspd_epu32) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvttspd_epu32) (mask, s.x);
+#else
+ res1.x = INTRINSIC (_cvtts_roundpd_epu32) (s.x, 8);
+ res2.x = INTRINSIC (_mask_cvtts_roundpd_epu32) (res2.x, mask, s.x, 8);
+ res3.x = INTRINSIC (_maskz_cvtts_roundpd_epu32) (mask, s.x, 8);
+#endif
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_ud) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_ud) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_ud) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_ud) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_ud) (res3, res_ref))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#include <limits.h>
+
+#define SRC_SIZE ((AVX512F_LEN) / 64)
+#define DST_SIZE ((AVX512F_LEN) / 64)
+
+static void
+CALC (double *s, unsigned long long *r)
+{
+ int i;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ if (s[i] > ULONG_MAX)
+ r[i] = ULONG_MAX;
+ else if (s[i] < 0)
+ r[i] = 0;
+ else
+ r[i] = s[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s;
+ UNION_TYPE (AVX512F_LEN, i_uq) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned long long res_ref[DST_SIZE] = { 0 };
+ int i, sign = 1;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 1.23 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+#if AVX512F_LEN == 128
+ res1.x = INTRINSIC (_cvttspd_epu64) (s.x);
+ res2.x = INTRINSIC (_mask_cvttspd_epu64) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvttspd_epu64) (mask, s.x);
+#else
+ res1.x = INTRINSIC (_cvtts_roundpd_epu64) (s.x, 8);
+ res2.x = INTRINSIC (_mask_cvtts_roundpd_epu64) (res2.x, mask, s.x, 8);
+ res3.x = INTRINSIC (_maskz_cvtts_roundpd_epu64) (mask, s.x, 8);
+#endif
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_uq) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_uq) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_uq) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_uq) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_uq) (res3, res_ref))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#include <limits.h>
+
+#define SRC_SIZE ((AVX512F_LEN) / 32)
+#define DST_SIZE ((AVX512F_LEN) / 32)
+
+static void
+CALC (float *s, int *r)
+{
+ int i;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ if (s[i] > INT_MAX)
+ r[i] = INT_MAX;
+ else if (s[i] < INT_MIN)
+ r[i] = INT_MIN;
+ else
+ r[i] = s[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, ) s;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[DST_SIZE] = { 0 };
+ int i, sign = 1;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 1.23 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+#if AVX512F_LEN == 128
+ res1.x = INTRINSIC (_cvttsps_epi32) (s.x);
+ res2.x = INTRINSIC (_mask_cvttsps_epi32) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvttsps_epi32) (mask, s.x);
+#else
+ res1.x = INTRINSIC (_cvtts_roundps_epi32) (s.x, 8);
+ res2.x = INTRINSIC (_mask_cvtts_roundps_epi32) (res2.x, mask, s.x, 8);
+ res3.x = INTRINSIC (_maskz_cvtts_roundps_epi32) (mask, s.x, 8);
+#endif
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#include <limits.h>
+
+#define SRC_SIZE ((AVX512F_LEN_HALF) / 32)
+#define DST_SIZE ((AVX512F_LEN) / 64)
+
+static void
+CALC (float *s, long long *r)
+{
+ int i;
+
+ for (i = 0; i < DST_SIZE; i++)
+ {
+ if (s[i] > LLONG_MAX)
+ r[i] = LLONG_MAX;
+ else if (s[i] < LLONG_MIN)
+ r[i] = LLONG_MIN;
+ else
+ r[i] = s[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN_HALF, ) s;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[DST_SIZE] = { 0 };
+ int i, sign = 1;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 1.23 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+#if AVX512F_LEN == 128
+ res1.x = INTRINSIC (_cvttsps_epi64) (s.x);
+ res2.x = INTRINSIC (_mask_cvttsps_epi64) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvttsps_epi64) (mask, s.x);
+#else
+ res1.x = INTRINSIC (_cvtts_roundps_epi64) (s.x, 8);
+ res2.x = INTRINSIC (_mask_cvtts_roundps_epi64) (res2.x, mask, s.x, 8);
+ res3.x = INTRINSIC (_maskz_cvtts_roundps_epi64) (mask, s.x, 8);
+#endif
+
+ CALC (s.a, res_ref);
+
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#include <limits.h>
+
+#define SRC_SIZE ((AVX512F_LEN) / 32)
+#define DST_SIZE ((AVX512F_LEN) / 32)
+
+static void
+CALC (float *s, unsigned int *r)
+{
+ int i;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ if (s[i] > UINT_MAX)
+ r[i] = UINT_MAX;
+ else if (s[i] < 0)
+ r[i] = 0;
+ else
+ r[i] = s[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, ) s;
+ UNION_TYPE (AVX512F_LEN, i_ud) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned int res_ref[DST_SIZE] = { 0 };
+ int i, sign = 1;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 1.23 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+#if AVX512F_LEN == 128
+ res1.x = INTRINSIC (_cvttsps_epu32) (s.x);
+ res2.x = INTRINSIC (_mask_cvttsps_epu32) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvttsps_epu32) (mask, s.x);
+#else
+ res1.x = INTRINSIC (_cvtts_roundps_epu32) (s.x, 8);
+ res2.x = INTRINSIC (_mask_cvtts_roundps_epu32) (res2.x, mask, s.x, 8);
+ res3.x = INTRINSIC (_maskz_cvtts_roundps_epu32) (mask, s.x, 8);
+#endif
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_ud) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_ud) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_ud) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_ud) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_ud) (res3, res_ref))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#include <limits.h>
+
+#define SRC_SIZE ((AVX512F_LEN_HALF) / 32)
+#define DST_SIZE ((AVX512F_LEN) / 64)
+
+static void
+CALC (float *s, unsigned long long *r)
+{
+ int i;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ if (s[i] > ULONG_MAX)
+ r[i] = ULONG_MAX;
+ else if (s[i] < 0)
+ r[i] = 0;
+ else
+ r[i] = s[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN_HALF, ) s;
+ UNION_TYPE (AVX512F_LEN, i_uq) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned long long res_ref[DST_SIZE] = { 0 };
+ int i, sign = 1;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 1.23 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+#if AVX512F_LEN == 128
+ res1.x = INTRINSIC (_cvttsps_epu64) (s.x);
+ res2.x = INTRINSIC (_mask_cvttsps_epu64) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvttsps_epu64) (mask, s.x);
+#else
+ res1.x = INTRINSIC (_cvtts_roundps_epu64) (s.x, 8);
+ res2.x = INTRINSIC (_mask_cvtts_roundps_epu64) (res2.x, mask, s.x, 8);
+ res3.x = INTRINSIC (_maskz_cvtts_roundps_epu64) (mask, s.x, 8);
+#endif
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_uq) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_uq) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_uq) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_uq) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_uq) (res3, res_ref))
+ abort ();
+}
/* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dqsy\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dqsy\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dqsy\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqsy\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqsy\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqsy\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dqsx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dqsx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dqsx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqsx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqsx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqsx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttsd2sis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%e.x+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttsd2usis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%e.x+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttss2sis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%e.x+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttss2usis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%e.x+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttsd2sis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%r.x+(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttsd2usis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%r.x+(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttss2sis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%r.x+(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttss2usis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%r.x+(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */
#include <immintrin.h>
volatile __m128 hx;
volatile __m128i hxi;
volatile __m128h hxh;
+volatile __m128d hxd;
volatile __m128bh hxbh;
volatile __m256 x;
volatile __m256h xh;
volatile __m256i xi;
+volatile __m256d xd;
volatile __m256bh xbh;
volatile __mmask8 m8;
volatile __mmask16 m16;
+volatile int i;
+volatile unsigned int ui;
+volatile long long ll;
+volatile unsigned long long ull;
void extern
avx10_2_test (void)
hxi = _mm_ipcvttnebf16_epu16 (hxbh);
hxi = _mm_mask_ipcvttnebf16_epu16 (hxi, m8, hxbh);
hxi = _mm_maskz_ipcvttnebf16_epu16 (m8, hxbh);
+
+ hxi = _mm256_cvtts_roundpd_epi32 (xd, 8);
+ hxi = _mm256_mask_cvtts_roundpd_epi32 (hxi, m8, xd, 8);
+ hxi = _mm256_maskz_cvtts_roundpd_epi32 (m8, xd, 8);
+
+ xi = _mm256_cvtts_roundpd_epi64 (xd, 8);
+ xi = _mm256_mask_cvtts_roundpd_epi64 (xi, m8, xd, 8);
+ xi = _mm256_maskz_cvtts_roundpd_epi64 (m8, xd, 8);
+
+ hxi = _mm256_cvtts_roundpd_epu32 (xd, 8);
+ hxi = _mm256_mask_cvtts_roundpd_epu32 (hxi, m8, xd, 8);
+ hxi = _mm256_maskz_cvtts_roundpd_epu32 (m8, xd, 8);
+
+ xi = _mm256_cvtts_roundpd_epu64 (xd, 8);
+ xi = _mm256_mask_cvtts_roundpd_epu64 (xi, m8, xd, 8);
+ xi = _mm256_maskz_cvtts_roundpd_epu64 (m8, xd, 8);
+
+ xi = _mm256_cvtts_roundps_epi32 (x, 8);
+ xi = _mm256_mask_cvtts_roundps_epi32 (xi, m16, x, 8);
+ xi = _mm256_maskz_cvtts_roundps_epi32 (m16, x, 8);
+
+ xi = _mm256_cvtts_roundps_epi64 (hx, 8);
+ xi = _mm256_mask_cvtts_roundps_epi64 (xi, m8, hx, 8);
+ xi = _mm256_maskz_cvtts_roundps_epi64 (m8, hx, 8);
+
+ xi = _mm256_cvtts_roundps_epu32 (x, 8);
+ xi = _mm256_mask_cvtts_roundps_epu32 (xi, m16, x, 8);
+ xi = _mm256_maskz_cvtts_roundps_epu32 (m16, x, 8);
+
+ xi = _mm256_cvtts_roundps_epu64 (hx, 8);
+ xi = _mm256_mask_cvtts_roundps_epu64 (xi, m8, hx, 8);
+ xi = _mm256_maskz_cvtts_roundps_epu64 (m8, hx, 8);
+
+ hxi = _mm_cvttspd_epi32 (hxd);
+ hxi = _mm_mask_cvttspd_epi32 (hxi, m8, hxd);
+ hxi = _mm_maskz_cvttspd_epi32 (m8, hxd);
+
+ hxi = _mm_cvttspd_epi64 (hxd);
+ hxi = _mm_mask_cvttspd_epi64 (hxi, m8, hxd);
+ hxi = _mm_maskz_cvttspd_epi64 (m8, hxd);
+
+ hxi = _mm_cvttspd_epu32 (hxd);
+ hxi = _mm_mask_cvttspd_epu32 (hxi, m8, hxd);
+ hxi = _mm_maskz_cvttspd_epu32 (m8, hxd);
+
+ hxi = _mm_cvttspd_epu64 (hxd);
+ hxi = _mm_mask_cvttspd_epu64 (hxi, m8, hxd);
+ hxi = _mm_maskz_cvttspd_epu64 (m8, hxd);
+
+ hxi = _mm_cvttsps_epi32 (hx);
+ hxi = _mm_mask_cvttsps_epi32 (hxi, m8, hx);
+ hxi = _mm_maskz_cvttsps_epi32 (m8, hx);
+
+ hxi = _mm_cvttsps_epi64 (hx);
+ hxi = _mm_mask_cvttsps_epi64 (hxi, m8, hx);
+ hxi = _mm_maskz_cvttsps_epi64 (m8, hx);
+
+ hxi = _mm_cvttsps_epu32 (hx);
+ hxi = _mm_mask_cvttsps_epu32 (hxi, m8, hx);
+ hxi = _mm_maskz_cvttsps_epu32 (m8, hx);
+
+ hxi = _mm_cvttsps_epu64 (hx);
+ hxi = _mm_mask_cvttsps_epu64 (hxi, m8, hx);
+ hxi = _mm_maskz_cvttsps_epu64 (m8, hx);
+
+ i = _mm_cvtts_roundsd_epi32 (hxd, 8);
+ ui = _mm_cvtts_roundsd_epu32 (hxd, 8);
+ i = _mm_cvtts_roundss_epi32 (hx, 8);
+ ui = _mm_cvtts_roundss_epu32 (hx, 8);
+
+#ifdef __x86_64__
+ ll = _mm_cvtts_roundsd_epi64 (hxd, 8);
+ ull = _mm_cvtts_roundsd_epu64 (hxd, 8);
+ ll = _mm_cvtts_roundss_epi64 (hx, 8);
+ ull = _mm_cvtts_roundss_epu64 (hx, 8);
+#endif
}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttpd2dqs-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttpd2dqs-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttpd2qqs-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttpd2qqs-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttpd2udqs-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttpd2udqs-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttpd2uqqs-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttpd2uqqs-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttps2dqs-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttps2dqs-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttps2qqs-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttps2qqs-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttps2udqs-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttps2udqs-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttps2uqqs-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttps2uqqs-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX10_SCALAR
+#include "avx10-helper.h"
+#include <limits.h>
+
+void
+TEST (void)
+{
+ UNION_TYPE (128, d) s;
+ int res1;
+ long long res2;
+ int res1_ref = 0;
+ long long res2_ref = 0;
+ int i, sign = 1;
+
+ s.a[0] = 2.46;
+
+ res1 = _mm_cvtts_roundsd_epi32 (s.x, 8);
+
+ if (s.a[0] > INT_MAX)
+ res1_ref = INT_MAX;
+ else if (s.a[0] < INT_MIN)
+ res1_ref = INT_MIN;
+ else
+ res1_ref = s.a[0];
+
+ if (res1 != res1_ref)
+ abort();
+
+#ifdef __x86_64__
+ res2 = _mm_cvtts_roundsd_epi64 (s.x, 8);
+
+ if (s.a[0] > LLONG_MAX)
+ res2_ref = LLONG_MAX;
+ else if (s.a[0] < LLONG_MIN)
+ res2_ref = LLONG_MIN;
+ else
+ res2_ref = s.a[0];
+
+ if (res2 != res2_ref)
+ abort();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX10_SCALAR
+#include "avx10-helper.h"
+#include <limits.h>
+
+void
+TEST (void)
+{
+ UNION_TYPE (128, d) s;
+ unsigned int res1;
+ unsigned long long res2;
+ unsigned int res1_ref = 0;
+ unsigned long long res2_ref = 0;
+ int i, sign = 1;
+
+ s.a[0] = 2.46;
+
+ res1 = _mm_cvtts_roundsd_epu32 (s.x, 8);
+
+ if (s.a[0] > UINT_MAX)
+ res1_ref = UINT_MAX;
+ else if (s.a[0] < 0)
+ res1_ref = 0;
+ else
+ res1_ref = s.a[0];
+
+ if (res1 != res1_ref)
+ abort();
+
+#ifdef __x86_64__
+ res2 = _mm_cvtts_roundsd_epu64 (s.x, 8);
+
+ if (s.a[0] > ULONG_MAX)
+ res2_ref = ULONG_MAX;
+ else if (s.a[0] < 0)
+ res2_ref = 0;
+ else
+ res2_ref = s.a[0];
+
+ if (res2 != res2_ref)
+ abort();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX10_SCALAR
+#include "avx10-helper.h"
+#include <limits.h>
+
+void
+TEST (void)
+{
+ UNION_TYPE (128, ) s;
+ int res1;
+ long long res2;
+ int res1_ref = 0;
+ long long res2_ref = 0;
+ int i, sign = 1;
+
+ s.a[0] = 2.46;
+
+ res1 = _mm_cvtts_roundss_epi32 (s.x, 8);
+
+ if (s.a[0] > INT_MAX)
+ res1_ref = INT_MAX;
+ else if (s.a[0] < INT_MIN)
+ res1_ref = INT_MIN;
+ else
+ res1_ref = s.a[0];
+
+ if (res1 != res1_ref)
+ abort();
+
+#ifdef __x86_64__
+ res2 = _mm_cvtts_roundss_epi64 (s.x, 8);
+
+ if (s.a[0] > LLONG_MAX)
+ res2_ref = LLONG_MAX;
+ else if (s.a[0] < LLONG_MIN)
+ res2_ref = LLONG_MIN;
+ else
+ res2_ref = s.a[0];
+
+ if (res2 != res2_ref)
+ abort();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX10_SCALAR
+#include "avx10-helper.h"
+#include <limits.h>
+
+void
+TEST (void)
+{
+ UNION_TYPE (128, ) s;
+ unsigned int res1;
+ unsigned long long res2;
+ unsigned int res1_ref = 0;
+ unsigned long long res2_ref = 0;
+
+ s.a[0] = 2.46;
+
+ res1 = _mm_cvtts_roundss_epu32 (s.x, 8);
+
+ if (s.a[0] > UINT_MAX)
+ res1_ref = UINT_MAX;
+ else if (s.a[0] < 0)
+ res1_ref = 0;
+ else
+ res1_ref = s.a[0];
+
+ if (res1 != res1_ref)
+ abort();
+
+#ifdef __x86_64__
+ res2 = _mm_cvtts_roundss_epu64 (s.x, 8);
+
+ if (s.a[0] > ULONG_MAX)
+ res2_ref = ULONG_MAX;
+ else if (s.a[0] < 0)
+ res2_ref = 0;
+ else
+ res2_ref = s.a[0];
+
+ if (res2 != res2_ref)
+ abort();
+#endif
+}
#define __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2dqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2dqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2qqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2qqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2udqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2udqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2uqqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2uqqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2dqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2dqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, 8)
/* avx10_2satcvtintrin.h */
#define __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2dqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2dqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2qqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2qqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2udqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2udqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2uqqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2uqqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2dqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2dqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2qqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2qqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2udqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2udqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2uqqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2uqqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttsd2sis32_round(A, B) __builtin_ia32_cvttsd2sis32_round(A, 8)
+#define __builtin_ia32_cvttsd2usis32_round(A, B) __builtin_ia32_cvttsd2usis32_round(A, 8)
+#define __builtin_ia32_cvttss2sis32_round(A, B) __builtin_ia32_cvttss2sis32_round(A, 8)
+#define __builtin_ia32_cvttss2usis32_round(A, B) __builtin_ia32_cvttss2usis32_round(A, 8)
+#ifdef __x86_64__
+#define __builtin_ia32_cvttsd2sis64_round(A, B) __builtin_ia32_cvttsd2sis64_round(A, 8)
+#define __builtin_ia32_cvttsd2usis64_round(A, B) __builtin_ia32_cvttsd2usis64_round(A, 8)
+#define __builtin_ia32_cvttss2sis64_round(A, B) __builtin_ia32_cvttss2sis64_round(A, 8)
+#define __builtin_ia32_cvttss2usis64_round(A, B) __builtin_ia32_cvttss2usis64_round(A, 8)
+#endif
#include <x86intrin.h>
test_3 (_mm512_mask_ipcvtt_roundph_epu16, __m512i, __m512i, __mmask32, __m512h, 8)
test_3 (_mm512_mask_ipcvtt_roundps_epi32, __m512i, __m512i, __mmask16, __m512, 8)
test_3 (_mm512_mask_ipcvtt_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 8)
+test_1 (_mm512_cvtts_roundpd_epi32, __m256i, __m512d, 8)
+test_2 (_mm512_maskz_cvtts_roundpd_epi32, __m256i, __mmask8, __m512d, 8)
+test_3 (_mm512_mask_cvtts_roundpd_epi32, __m256i, __m256i, __mmask8, __m512d, 8)
+test_1 (_mm512_cvtts_roundpd_epi64, __m512i, __m512d, 8)
+test_2 (_mm512_maskz_cvtts_roundpd_epi64, __m512i, __mmask8, __m512d, 8)
+test_3 (_mm512_mask_cvtts_roundpd_epi64, __m512i, __m512i, __mmask8, __m512d, 8)
+test_1 (_mm512_cvtts_roundpd_epu32, __m256i, __m512d, 8)
+test_2 (_mm512_maskz_cvtts_roundpd_epu32, __m256i, __mmask8, __m512d, 8)
+test_3 (_mm512_mask_cvtts_roundpd_epu32, __m256i, __m256i, __mmask8, __m512d, 8)
+test_1 (_mm512_cvtts_roundpd_epu64, __m512i, __m512d, 8)
+test_2 (_mm512_maskz_cvtts_roundpd_epu64, __m512i, __mmask8, __m512d, 8)
+test_3 (_mm512_mask_cvtts_roundpd_epu64, __m512i, __m512i, __mmask8, __m512d, 8)
+test_1 (_mm512_cvtts_roundps_epi32, __m512i, __m512, 8)
+test_2 (_mm512_maskz_cvtts_roundps_epi32, __m512i, __mmask16, __m512, 8)
+test_3 (_mm512_mask_cvtts_roundps_epi32, __m512i, __m512i, __mmask16, __m512, 8)
+test_1 (_mm512_cvtts_roundps_epi64, __m512i, __m256, 8)
+test_2 (_mm512_maskz_cvtts_roundps_epi64, __m512i, __mmask8, __m256, 8)
+test_3 (_mm512_mask_cvtts_roundps_epi64, __m512i, __m512i, __mmask8, __m256, 8)
+test_1 (_mm512_cvtts_roundps_epu32, __m512i, __m512, 8)
+test_2 (_mm512_maskz_cvtts_roundps_epu32, __m512i, __mmask16, __m512, 8)
+test_3 (_mm512_mask_cvtts_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 8)
+test_1 (_mm512_cvtts_roundps_epu64, __m512i, __m256, 8)
+test_2 (_mm512_maskz_cvtts_roundps_epu64, __m512i, __mmask8, __m256, 8)
+test_3 (_mm512_mask_cvtts_roundps_epu64, __m512i, __m512i, __mmask8, __m256, 8)
/* avx10_2satcvtintrin.h */
test_1 (_mm256_ipcvt_roundph_epi16, __m256i, __m256h, 8)
test_3 (_mm256_mask_ipcvtt_roundph_epu16, __m256i, __m256i, __mmask16, __m256h, 8)
test_3 (_mm256_mask_ipcvtt_roundps_epi32, __m256i, __m256i, __mmask8, __m256, 8)
test_3 (_mm256_mask_ipcvtt_roundps_epu32, __m256i, __m256i, __mmask8, __m256, 8)
+test_1 (_mm256_cvtts_roundpd_epi32, __m128i, __m256d, 8)
+test_2 (_mm256_maskz_cvtts_roundpd_epi32, __m128i, __mmask8, __m256d, 8)
+test_3 (_mm256_mask_cvtts_roundpd_epi32, __m128i, __m128i, __mmask8, __m256d, 8)
+test_1 (_mm256_cvtts_roundpd_epi64, __m256i, __m256d, 8)
+test_2 (_mm256_maskz_cvtts_roundpd_epi64, __m256i, __mmask8, __m256d, 8)
+test_3 (_mm256_mask_cvtts_roundpd_epi64, __m256i, __m256i, __mmask8, __m256d, 8)
+test_1 (_mm256_cvtts_roundpd_epu32, __m128i, __m256d, 8)
+test_2 (_mm256_maskz_cvtts_roundpd_epu32, __m128i, __mmask8, __m256d, 8)
+test_3 (_mm256_mask_cvtts_roundpd_epu32, __m128i, __m128i, __mmask8, __m256d, 8)
+test_1 (_mm256_cvtts_roundpd_epu64, __m256i, __m256d, 8)
+test_2 (_mm256_maskz_cvtts_roundpd_epu64, __m256i, __mmask8, __m256d, 8)
+test_3 (_mm256_mask_cvtts_roundpd_epu64, __m256i, __m256i, __mmask8, __m256d, 8)
+test_1 (_mm256_cvtts_roundps_epi32, __m256i, __m256, 8)
+test_2 (_mm256_maskz_cvtts_roundps_epi32, __m256i, __mmask8, __m256, 8)
+test_3 (_mm256_mask_cvtts_roundps_epi32, __m256i, __m256i, __mmask8, __m256, 8)
+test_1 (_mm256_cvtts_roundps_epi64, __m256i, __m128, 8)
+test_2 (_mm256_maskz_cvtts_roundps_epi64, __m256i, __mmask8, __m128, 8)
+test_3 (_mm256_mask_cvtts_roundps_epi64, __m256i, __m256i, __mmask8, __m128, 8)
+test_1 (_mm256_cvtts_roundps_epu32, __m256i, __m256, 8)
+test_2 (_mm256_maskz_cvtts_roundps_epu32, __m256i, __mmask8, __m256, 8)
+test_3 (_mm256_mask_cvtts_roundps_epu32, __m256i, __m256i, __mmask8, __m256, 8)
+test_1 (_mm256_cvtts_roundps_epu64, __m256i, __m128, 8)
+test_2 (_mm256_maskz_cvtts_roundps_epu64, __m256i, __mmask8, __m128, 8)
+test_3 (_mm256_mask_cvtts_roundps_epu64, __m256i, __m256i, __mmask8, __m128, 8)
+test_1 (_mm_cvtts_roundsd_epi32, int, __m128d, 8)
+test_1 (_mm_cvtts_roundsd_epu32, unsigned int, __m128d, 8)
+test_1 (_mm_cvtts_roundss_epi32, int, __m128, 8)
+test_1 (_mm_cvtts_roundss_epu32, unsigned int, __m128, 8)
+#ifdef __x86_64__
+test_1 (_mm_cvtts_roundsd_epi64, long long, __m128d, 8)
+test_1 (_mm_cvtts_roundsd_epu64, unsigned long long, __m128d, 8)
+test_1 (_mm_cvtts_roundss_epi64, long long, __m128, 8)
+test_1 (_mm_cvtts_roundss_epu64, unsigned long long, __m128, 8)
+#endif
test_3 (_mm512_mask_ipcvtt_roundph_epu16, __m512i, __m512i, __mmask32, __m512h, 8)
test_3 (_mm512_mask_ipcvtt_roundps_epi32, __m512i, __m512i, __mmask16, __m512, 8)
test_3 (_mm512_mask_ipcvtt_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 8)
+test_1 (_mm512_cvtts_roundpd_epi32, __m256i, __m512d, 8)
+test_2 (_mm512_maskz_cvtts_roundpd_epi32, __m256i, __mmask8, __m512d, 8)
+test_3 (_mm512_mask_cvtts_roundpd_epi32, __m256i, __m256i, __mmask8, __m512d, 8)
+test_1 (_mm512_cvtts_roundpd_epi64, __m512i, __m512d, 8)
+test_2 (_mm512_maskz_cvtts_roundpd_epi64, __m512i, __mmask8, __m512d, 8)
+test_3 (_mm512_mask_cvtts_roundpd_epi64, __m512i, __m512i, __mmask8, __m512d, 8)
+test_1 (_mm512_cvtts_roundpd_epu32, __m256i, __m512d, 8)
+test_2 (_mm512_maskz_cvtts_roundpd_epu32, __m256i, __mmask8, __m512d, 8)
+test_3 (_mm512_mask_cvtts_roundpd_epu32, __m256i, __m256i, __mmask8, __m512d, 8)
+test_1 (_mm512_cvtts_roundpd_epu64, __m512i, __m512d, 8)
+test_2 (_mm512_maskz_cvtts_roundpd_epu64, __m512i, __mmask8, __m512d, 8)
+test_3 (_mm512_mask_cvtts_roundpd_epu64, __m512i, __m512i, __mmask8, __m512d, 8)
+test_1 (_mm512_cvtts_roundps_epi32, __m512i, __m512, 8)
+test_2 (_mm512_maskz_cvtts_roundps_epi32, __m512i, __mmask16, __m512, 8)
+test_3 (_mm512_mask_cvtts_roundps_epi32, __m512i, __m512i, __mmask16, __m512, 8)
+test_1 (_mm512_cvtts_roundps_epi64, __m512i, __m256, 8)
+test_2 (_mm512_maskz_cvtts_roundps_epi64, __m512i, __mmask8, __m256, 8)
+test_3 (_mm512_mask_cvtts_roundps_epi64, __m512i, __m512i, __mmask8, __m256, 8)
+test_1 (_mm512_cvtts_roundps_epu32, __m512i, __m512, 8)
+test_2 (_mm512_maskz_cvtts_roundps_epu32, __m512i, __mmask16, __m512, 8)
+test_3 (_mm512_mask_cvtts_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 8)
+test_1 (_mm512_cvtts_roundps_epu64, __m512i, __m256, 8)
+test_2 (_mm512_maskz_cvtts_roundps_epu64, __m512i, __mmask8, __m256, 8)
+test_3 (_mm512_mask_cvtts_roundps_epu64, __m512i, __m512i, __mmask8, __m256, 8)
/* avx10_2satcvtintrin.h */
test_1 (_mm256_ipcvt_roundph_epi16, __m256i, __m256h, 8)
test_3 (_mm256_mask_ipcvtt_roundph_epu16, __m256i, __m256i, __mmask16, __m256h, 8)
test_3 (_mm256_mask_ipcvtt_roundps_epi32, __m256i, __m256i, __mmask8, __m256, 8)
test_3 (_mm256_mask_ipcvtt_roundps_epu32, __m256i, __m256i, __mmask8, __m256, 8)
+test_1 (_mm256_cvtts_roundpd_epi32, __m128i, __m256d, 8)
+test_2 (_mm256_maskz_cvtts_roundpd_epi32, __m128i, __mmask8, __m256d, 8)
+test_3 (_mm256_mask_cvtts_roundpd_epi32, __m128i, __m128i, __mmask8, __m256d, 8)
+test_1 (_mm256_cvtts_roundpd_epi64, __m256i, __m256d, 8)
+test_2 (_mm256_maskz_cvtts_roundpd_epi64, __m256i, __mmask8, __m256d, 8)
+test_3 (_mm256_mask_cvtts_roundpd_epi64, __m256i, __m256i, __mmask8, __m256d, 8)
+test_1 (_mm256_cvtts_roundpd_epu32, __m128i, __m256d, 8)
+test_2 (_mm256_maskz_cvtts_roundpd_epu32, __m128i, __mmask8, __m256d, 8)
+test_3 (_mm256_mask_cvtts_roundpd_epu32, __m128i, __m128i, __mmask8, __m256d, 8)
+test_1 (_mm256_cvtts_roundpd_epu64, __m256i, __m256d, 8)
+test_2 (_mm256_maskz_cvtts_roundpd_epu64, __m256i, __mmask8, __m256d, 8)
+test_3 (_mm256_mask_cvtts_roundpd_epu64, __m256i, __m256i, __mmask8, __m256d, 8)
+test_1 (_mm256_cvtts_roundps_epi32, __m256i, __m256, 8)
+test_2 (_mm256_maskz_cvtts_roundps_epi32, __m256i, __mmask8, __m256, 8)
+test_3 (_mm256_mask_cvtts_roundps_epi32, __m256i, __m256i, __mmask8, __m256, 8)
+test_1 (_mm256_cvtts_roundps_epi64, __m256i, __m128, 8)
+test_2 (_mm256_maskz_cvtts_roundps_epi64, __m256i, __mmask8, __m128, 8)
+test_3 (_mm256_mask_cvtts_roundps_epi64, __m256i, __m256i, __mmask8, __m128, 8)
+test_1 (_mm256_cvtts_roundps_epu32, __m256i, __m256, 8)
+test_2 (_mm256_maskz_cvtts_roundps_epu32, __m256i, __mmask8, __m256, 8)
+test_3 (_mm256_mask_cvtts_roundps_epu32, __m256i, __m256i, __mmask8, __m256, 8)
+test_1 (_mm256_cvtts_roundps_epu64, __m256i, __m128, 8)
+test_2 (_mm256_maskz_cvtts_roundps_epu64, __m256i, __mmask8, __m128, 8)
+test_3 (_mm256_mask_cvtts_roundps_epu64, __m256i, __m256i, __mmask8, __m128, 8)
+test_1 (_mm_cvtts_roundsd_epi32, int, __m128d, 8)
+test_1 (_mm_cvtts_roundsd_epu32, unsigned int, __m128d, 8)
+test_1 (_mm_cvtts_roundss_epi32, int, __m128, 8)
+test_1 (_mm_cvtts_roundss_epu32, unsigned int, __m128, 8)
+#ifdef __x86_64__
+test_1 (_mm_cvtts_roundsd_epi64, long long, __m128d, 8)
+test_1 (_mm_cvtts_roundsd_epu64, unsigned long long, __m128d, 8)
+test_1 (_mm_cvtts_roundss_epi64, long long, __m128, 8)
+test_1 (_mm_cvtts_roundss_epu64, unsigned long long, __m128, 8)
+#endif
#define __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2dqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2dqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2qqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2qqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2udqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2udqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2uqqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2uqqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2dqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2dqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, 8)
/* avx10_2satcvtintrin.h */
#define __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2dqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2dqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2qqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2qqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2udqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2udqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2uqqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2uqqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2dqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2dqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2qqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2qqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2udqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2udqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2uqqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2uqqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttsd2sis32_round(A, B) __builtin_ia32_cvttsd2sis32_round(A, 8)
+#define __builtin_ia32_cvttsd2usis32_round(A, B) __builtin_ia32_cvttsd2usis32_round(A, 8)
+#define __builtin_ia32_cvttss2sis32_round(A, B) __builtin_ia32_cvttss2sis32_round(A, 8)
+#define __builtin_ia32_cvttss2usis32_round(A, B) __builtin_ia32_cvttss2usis32_round(A, 8)
+#ifdef __x86_64__
+#define __builtin_ia32_cvttsd2sis64_round(A, B) __builtin_ia32_cvttsd2sis64_round(A, 8)
+#define __builtin_ia32_cvttsd2usis64_round(A, B) __builtin_ia32_cvttsd2usis64_round(A, 8)
+#define __builtin_ia32_cvttss2sis64_round(A, B) __builtin_ia32_cvttss2sis64_round(A, 8)
+#define __builtin_ia32_cvttss2usis64_round(A, B) __builtin_ia32_cvttss2usis64_round(A, 8)
+#endif
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512")