__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtx2ps_ph (__m256 __A, __m256 __B)
{
- return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) __A,
- (__v8sf) __B,
- (__v16hf)
- _mm256_setzero_ph (),
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v16hf)
+ _mm256_setzero_ph (),
+ (__mmask16) -1);
}
extern __inline __m256h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtx2ps_ph (__m256h __W, __mmask16 __U, __m256 __A, __m256 __B)
{
- return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) __A,
- (__v8sf) __B,
- (__v16hf) __W,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v16hf) __W,
+ (__mmask16) __U);
}
extern __inline __m256h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtx2ps_ph ( __mmask16 __U, __m256 __A, __m256 __B)
{
- return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) __A,
- (__v8sf) __B,
- (__v16hf)
- _mm256_setzero_ph (),
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtx_round2ps_ph (__m256 __A, __m256 __B, const int __R)
-{
- return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) __A,
- (__v8sf) __B,
- (__v16hf)
- _mm256_setzero_ph (),
- (__mmask16) -1,
- __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtx_round2ps_ph (__m256h __W, __mmask16 __U, __m256 __A,
- __m256 __B, const int __R)
-{
- return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) __A,
- (__v8sf) __B,
- (__v16hf) __W,
- (__mmask16) __U,
- __R);
+ return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v16hf)
+ _mm256_setzero_ph (),
+ (__mmask16) __U);
}
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtx_round2ps_ph (__mmask16 __U, __m256 __A,
- __m256 __B, const int __R)
-{
- return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) __A,
- (__v8sf) __B,
- (__v16hf)
- _mm256_setzero_ph (),
- (__mmask16) __U,
- __R);
-}
-
-#else
-#define _mm256_cvtx_round2ps_ph(A, B, R) \
- ((__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) (A), \
- (__v8sf) (B), \
- (__v16hf) \
- (_mm256_setzero_ph ()), \
- (__mmask16) (-1), \
- (R)))
-
-#define _mm256_mask_cvtx_round2ps_ph(W, U, A, B, R) \
- ((__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) (A), \
- (__v8sf) (B), \
- (__v16hf) (W), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm256_maskz_cvtx_round2ps_ph(U, A, B, R) \
- ((__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) (A), \
- (__v8sf) (B), \
- (__v16hf) \
- (_mm256_setzero_ph ()), \
- (__mmask16) (U), \
- (R)))
-#endif /* __OPTIMIZE__ */
-
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtbiasph_bf8 (__m128i __A, __m128h __B)
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_minmax_pd (__m256d __A, __m256d __B, const int __C)
{
- return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
+ return (__m256d) __builtin_ia32_minmaxpd256_mask (
(__v4df) __A, (__v4df) __B, __C,
(__v4df) (__m256d) _mm256_undefined_pd (),
- (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
+ (__mmask8) -1);
}
extern __inline __m256d
_mm256_mask_minmax_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B,
const int __C)
{
- return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
+ return (__m256d) __builtin_ia32_minmaxpd256_mask (
(__v4df) __A, (__v4df) __B, __C, (__v4df) __W,
- (__mmask8) __U, _MM_FROUND_CUR_DIRECTION);
+ (__mmask8) __U);
}
extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_minmax_pd (__mmask8 __U, __m256d __A, __m256d __B, const int __C)
{
- return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
+ return (__m256d) __builtin_ia32_minmaxpd256_mask (
(__v4df) __A, (__v4df) __B, __C,
(__v4df) (__m256d) _mm256_setzero_pd (),
- (__mmask8) __U, _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_minmax_round_pd (__m256d __A, __m256d __B, const int __C, const int __R)
-{
- return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
- (__v4df) __A, (__v4df) __B, __C,
- (__v4df) (__m256d) _mm256_undefined_pd (),
- (__mmask8) -1, __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_minmax_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
- __m256d __B, const int __C, const int __R)
-{
- return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
- (__v4df) __A, (__v4df) __B, __C, (__v4df) __W,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_minmax_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
- const int __C, const int __R)
-{
- return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
- (__v4df) __A, (__v4df) __B, __C,
- (__v4df) (__m256d) _mm256_setzero_pd (),
- (__mmask8) __U, __R);
+ (__mmask8) __U);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_minmax_ph (__m256h __A, __m256h __B, const int __C)
{
- return (__m256h) __builtin_ia32_minmaxph256_mask_round (
+ return (__m256h) __builtin_ia32_minmaxph256_mask (
(__v16hf) __A, (__v16hf) __B, __C,
(__v16hf) (__m256h) _mm256_undefined_ph (),
- (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
+ (__mmask16) -1);
}
extern __inline __m256h
_mm256_mask_minmax_ph (__m256h __W, __mmask16 __U, __m256h __A, __m256h __B,
const int __C)
{
- return (__m256h) __builtin_ia32_minmaxph256_mask_round (
+ return (__m256h) __builtin_ia32_minmaxph256_mask (
(__v16hf) __A, (__v16hf) __B, __C, (__v16hf) __W,
- (__mmask16) __U, _MM_FROUND_CUR_DIRECTION);
+ (__mmask16) __U);
}
extern __inline __m256h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_minmax_ph (__mmask16 __U, __m256h __A, __m256h __B, const int __C)
{
- return (__m256h) __builtin_ia32_minmaxph256_mask_round (
+ return (__m256h) __builtin_ia32_minmaxph256_mask (
(__v16hf) __A, (__v16hf) __B, __C,
(__v16hf) (__m256h) _mm256_setzero_ph (),
- (__mmask16) __U, _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_minmax_round_ph (__m256h __A, __m256h __B, const int __C, const int __R)
-{
- return (__m256h) __builtin_ia32_minmaxph256_mask_round (
- (__v16hf) __A, (__v16hf) __B, __C,
- (__v16hf) (__m256h) _mm256_undefined_ph (),
- (__mmask16) -1, __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_minmax_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
- __m256h __B, const int __C, const int __R)
-{
- return (__m256h) __builtin_ia32_minmaxph256_mask_round (
- (__v16hf) __A, (__v16hf) __B, __C, (__v16hf) __W,
- (__mmask16) __U, __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_minmax_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
- const int __C, const int __R)
-{
- return (__m256h) __builtin_ia32_minmaxph256_mask_round (
- (__v16hf) __A, (__v16hf) __B, __C,
- (__v16hf) (__m256h) _mm256_setzero_ph (),
- (__mmask16) __U, __R);
+ (__mmask16) __U);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_minmax_ps (__m256 __A, __m256 __B, const int __C)
{
- return (__m256) __builtin_ia32_minmaxps256_mask_round (
+ return (__m256) __builtin_ia32_minmaxps256_mask (
(__v8sf) __A, (__v8sf) __B, __C,
(__v8sf) (__m256) _mm256_undefined_ps (),
- (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
+ (__mmask8) -1);
}
extern __inline __m256
_mm256_mask_minmax_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
const int __C)
{
- return (__m256) __builtin_ia32_minmaxps256_mask_round (
+ return (__m256) __builtin_ia32_minmaxps256_mask (
(__v8sf) __A, (__v8sf) __B, __C, (__v8sf) __W,
- (__mmask8) __U, _MM_FROUND_CUR_DIRECTION);
+ (__mmask8) __U);
}
extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_minmax_ps (__mmask8 __U, __m256 __A, __m256 __B, const int __C)
{
- return (__m256) __builtin_ia32_minmaxps256_mask_round (
+ return (__m256) __builtin_ia32_minmaxps256_mask (
(__v8sf) __A, (__v8sf) __B, __C,
(__v8sf) (__m256) _mm256_setzero_ps (),
- (__mmask8) __U, _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_minmax_round_ps (__m256 __A, __m256 __B, const int __C, const int __R)
-{
- return (__m256) __builtin_ia32_minmaxps256_mask_round (
- (__v8sf) __A, (__v8sf) __B, __C,
- (__v8sf) (__m256) _mm256_undefined_ps (),
- (__mmask8) -1, __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_minmax_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
- const int __C, const int __R)
-{
- return (__m256) __builtin_ia32_minmaxps256_mask_round (
- (__v8sf) __A, (__v8sf) __B, __C, (__v8sf) __W,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_minmax_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
- const int __C, const int __R)
-{
- return (__m256) __builtin_ia32_minmaxps256_mask_round (
- (__v8sf) __A, (__v8sf) __B, __C,
- (__v8sf) (__m256) _mm256_setzero_ps (),
- (__mmask8) __U, __R);
+ (__mmask8) __U);
}
extern __inline __m128d
(__mmask8) (U)))
#define _mm256_minmax_pd(A, B, C) \
- ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A), \
- (__v4df) (B), \
- (int) (C), \
- (__v4df) (__m256d) \
- _mm256_undefined_pd (), \
- (__mmask8) (-1), \
- _MM_FROUND_CUR_DIRECTION))
+ ((__m256d) __builtin_ia32_minmaxpd256_mask ((__v4df) (A), \
+ (__v4df) (B), \
+ (int) (C), \
+ (__v4df) (__m256d) \
+ _mm256_undefined_pd (), \
+ (__mmask8) (-1)))
#define _mm256_mask_minmax_pd(W, U, A, B, C) \
- ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A), \
- (__v4df) (B), \
- (int) (C), \
- (__v4df) (__m256d) (W), \
- (__mmask8) (U), \
- _MM_FROUND_CUR_DIRECTION))
+ ((__m256d) __builtin_ia32_minmaxpd256_mask ((__v4df) (A), \
+ (__v4df) (B), \
+ (int) (C), \
+ (__v4df) (__m256d) (W), \
+ (__mmask8) (U)))
#define _mm256_maskz_minmax_pd(U, A, B, C) \
- ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A), \
- (__v4df) (B), \
- (int) (C), \
- (__v4df) (__m256d) \
- _mm256_setzero_pd (), \
- (__mmask8) (U), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm256_minmax_round_pd(A, B, C, R) \
- ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A), \
- (__v4df) (B), \
- (int) (C), \
- (__v4df) (__m256d) \
- _mm256_undefined_pd (), \
- (__mmask8) (-1), \
- (int) (R)))
-
-#define _mm256_mask_minmax_round_pd(W, U, A, B, C, R) \
- ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A), \
- (__v4df) (B), \
- (int) (C), \
- (__v4df) (__m256d) (W), \
- (__mmask8) (U), \
- (int) (R)))
-
-#define _mm256_maskz_minmax_round_pd(U, A, B, C, R) \
- ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A), \
- (__v4df) (B), \
- (int) (C), \
- (__v4df) (__m256d) \
- _mm256_setzero_pd (), \
- (__mmask8) (U), \
- (int) (R)))
+ ((__m256d) __builtin_ia32_minmaxpd256_mask ((__v4df) (A), \
+ (__v4df) (B), \
+ (int) (C), \
+ (__v4df) (__m256d) \
+ _mm256_setzero_pd (), \
+ (__mmask8) (U)))
#define _mm_minmax_ph(A, B, C) \
((__m128h) __builtin_ia32_minmaxph128_mask ((__v8hf) (A), \
(__mmask8) (U)))
#define _mm256_minmax_ph(A, B, C) \
- ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A), \
- (__v16hf) (B), \
- (int) (C), \
- (__v16hf) (__m256h) \
- _mm256_undefined_ph (), \
- (__mmask16) (-1), \
- _MM_FROUND_CUR_DIRECTION))
+ ((__m256h) __builtin_ia32_minmaxph256_mask ((__v16hf) (A), \
+ (__v16hf) (B), \
+ (int) (C), \
+ (__v16hf) (__m256h) \
+ _mm256_undefined_ph (), \
+ (__mmask16) (-1)))
#define _mm256_mask_minmax_ph(W, U, A, B, C) \
- ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A), \
- (__v16hf) (B), \
- (int) (C), \
- (__v16hf) (__m256h) (W), \
- (__mmask16) (U), \
- _MM_FROUND_CUR_DIRECTION))
+ ((__m256h) __builtin_ia32_minmaxph256_mask ((__v16hf) (A), \
+ (__v16hf) (B), \
+ (int) (C), \
+ (__v16hf) (__m256h) (W), \
+ (__mmask16) (U)))
#define _mm256_maskz_minmax_ph(U, A, B, C) \
- ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A), \
- (__v16hf) (B), \
- (int) (C), \
- (__v16hf) (__m256h) \
- _mm256_setzero_ph (), \
- (__mmask16) (U), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm256_minmax_round_ph(A, B, C, R) \
- ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A), \
- (__v16hf) (B), \
- (int) (C), \
- (__v16hf) (__m256h) \
- _mm256_undefined_ph (), \
- (__mmask16) (-1), \
- (int) (R)))
-
-#define _mm256_mask_minmax_round_ph(W, U, A, B, C, R) \
- ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A), \
- (__v16hf) (B), \
- (int) (C), \
- (__v16hf) (__m256h) (W), \
- (__mmask16) (U), \
- (int) (R)))
-
-#define _mm256_maskz_minmax_round_ph(U, A, B, C, R) \
- ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A), \
- (__v16hf) (B), \
- (int) (C), \
- (__v16hf) (__m256h) \
- _mm256_setzero_ph (), \
- (__mmask16) (U), \
- (int) (R)))
+ ((__m256h) __builtin_ia32_minmaxph256_mask ((__v16hf) (A), \
+ (__v16hf) (B), \
+ (int) (C), \
+ (__v16hf) (__m256h) \
+ _mm256_setzero_ph (), \
+ (__mmask16) (U)))
#define _mm_minmax_ps(A, B, C) \
((__m128) __builtin_ia32_minmaxps128_mask ((__v4sf) (A), \
(__mmask8) (U)))
#define _mm256_minmax_ps(A, B, C) \
- ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A), \
- (__v8sf) (B), \
- (int) (C), \
- (__v8sf) (__m256) \
- _mm256_undefined_ps (), \
- (__mmask8) (-1), \
- _MM_FROUND_CUR_DIRECTION))
+ ((__m256) __builtin_ia32_minmaxps256_mask ((__v8sf) (A), \
+ (__v8sf) (B), \
+ (int) (C), \
+ (__v8sf) (__m256) \
+ _mm256_undefined_ps (), \
+ (__mmask8) (-1)))
#define _mm256_mask_minmax_ps(W, U, A, B, C) \
- ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A), \
- (__v8sf) (B), \
- (int) (C), \
- (__v8sf) (__m256) (W), \
- (__mmask8) (U), \
- _MM_FROUND_CUR_DIRECTION))
+ ((__m256) __builtin_ia32_minmaxps256_mask ((__v8sf) (A), \
+ (__v8sf) (B), \
+ (int) (C), \
+ (__v8sf) (__m256) (W), \
+ (__mmask8) (U)))
#define _mm256_maskz_minmax_ps(U, A, B, C) \
- ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A), \
- (__v8sf) (B), \
- (int) (C), \
- (__v8sf) (__m256) \
- _mm256_setzero_ps (), \
- (__mmask8) (U), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm256_minmax_round_ps(A, B, C, R) \
- ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A), \
- (__v8sf) (B), \
- (int) (C), \
- (__v8sf) (__m256) \
- _mm256_undefined_ps (), \
- (__mmask8) (-1), \
- (int) (R)))
-
-#define _mm256_mask_minmax_round_ps(W, U, A, B, C, R) \
- ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A), \
- (__v8sf) (B), \
- (int) (C), \
- (__v8sf) (__m256) (W), \
- (__mmask8) (U), \
- (int) (R)))
-
-#define _mm256_maskz_minmax_round_ps(U, A, B, C, R) \
- ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A), \
- (__v8sf) (B), \
- (int) (C), \
- (__v8sf) (__m256) \
- _mm256_setzero_ps (), \
- (__mmask8) (U), \
- (int) (R)))
+ ((__m256) __builtin_ia32_minmaxps256_mask ((__v8sf) (A), \
+ (__v8sf) (B), \
+ (int) (C), \
+ (__v8sf) (__m256) \
+ _mm256_setzero_ps (), \
+ (__mmask8) (U)))
#define _mm_minmax_round_sd(A, B, C, R) \
((__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) (A), \
DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, INT, V4DF, UQI, INT)
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, INT, V8SF, UQI, INT)
DEF_FUNCTION_TYPE (V32HF, V16SF, V16SF, V32HF, USI, INT)
-DEF_FUNCTION_TYPE (V16HF, V8SF, V8SF, V16HF, UHI, INT)
DEF_FUNCTION_TYPE (V32HF, V16SF, V16SF, V32HF, USI)
DEF_FUNCTION_TYPE (V16HF, V8SF, V8SF, V16HF, UHI)
DEF_FUNCTION_TYPE (V8HF, V4SF, V4SF, V8HF, UQI)
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, INT, V8HF, UQI)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, INT, V8DF, UQI, INT)
DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, INT, V32HF, USI, INT)
-DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, INT, V16HF, UHI, INT)
+DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, INT, V16HF, UHI)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT, V16SF, UHI, INT)
DEF_FUNCTION_TYPE (V8DI, V8SF, V8DI, UQI)
DEF_FUNCTION_TYPE (V8DI, V8DF, V8DI, UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_mpsadbw_mask, "__builtin_ia32_mpsadbw512_mask", IX86_BUILTIN_VMPSADBW_V32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx2_mpsadbw_mask, "__builtin_ia32_mpsadbw256_mask", IX86_BUILTIN_VMPSADBW_V16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_sse4_1_mpsadbw_mask, "__builtin_ia32_mpsadbw128_mask", IX86_BUILTIN_VMPSADBW_V8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvt2ps2phx_v16hf_mask, "__builtin_ia32_vcvt2ps2phx256_mask", IX86_BUILTIN_VCVT2PS2PHX_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V8SF_V8SF_V16HF_UHI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvt2ps2phx_v8hf_mask, "__builtin_ia32_vcvt2ps2phx128_mask", IX86_BUILTIN_VCVT2PS2PHX_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V4SF_V4SF_V8HF_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2bf8v8hf, "__builtin_ia32_vcvtbiasph2bf8128", IX86_BUILTIN_VCVTBIASPH2BF8128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2bf8v8hf_mask, "__builtin_ia32_vcvtbiasph2bf8128_mask", IX86_BUILTIN_VCVTBIASPH2BF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF_V16QI_UHI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxbf16_v8bf_mask, "__builtin_ia32_minmaxbf16128_mask", IX86_BUILTIN_MINMAXBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_INT_V8BF_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxbf16_v16bf_mask, "__builtin_ia32_minmaxbf16256_mask", IX86_BUILTIN_MINMAXBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_INT_V16BF_UHI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_minmaxbf16_v32bf_mask, "__builtin_ia32_minmaxbf16512_mask", IX86_BUILTIN_MINMAXBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_INT_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv4df_mask, "__builtin_ia32_minmaxpd256_mask", IX86_BUILTIN_MINMAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv16hf_mask, "__builtin_ia32_minmaxph256_mask", IX86_BUILTIN_MINMAXPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_INT_V16HF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv8sf_mask, "__builtin_ia32_minmaxps256_mask", IX86_BUILTIN_MINMAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv2df_mask, "__builtin_ia32_minmaxpd128_mask", IX86_BUILTIN_MINMAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv8hf_mask, "__builtin_ia32_minmaxph128_mask", IX86_BUILTIN_MINMAXPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv4sf_mask, "__builtin_ia32_minmaxps128_mask", IX86_BUILTIN_MINMAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_subv16hf3_mask_round, "__builtin_ia32_subph256_mask_round", IX86_BUILTIN_VSUBPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_subv8sf3_mask_round, "__builtin_ia32_subps256_mask_round", IX86_BUILTIN_VSUBPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvt2ps2phx_v32hf_mask_round, "__builtin_ia32_vcvt2ps2phx512_mask_round", IX86_BUILTIN_VCVT2PS2PHX_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V16SF_V16SF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvt2ps2phx_v16hf_mask_round, "__builtin_ia32_vcvt2ps2phx256_mask_round", IX86_BUILTIN_VCVT2PS2PHX_V16HF_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V8SF_V8SF_V16HF_UHI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtph2ibsv16hf_mask_round, "__builtin_ia32_cvtph2ibs256_mask_round", IX86_BUILTIN_CVTPH2IBS256_MASK_ROUND, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvtph2ibsv32hf_mask_round, "__builtin_ia32_cvtph2ibs512_mask_round", IX86_BUILTIN_CVTPH2IBS512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtph2iubsv16hf_mask_round, "__builtin_ia32_cvtph2iubs256_mask_round", IX86_BUILTIN_CVTPH2IUBS256_MASK_ROUND, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_minmaxpv8df_mask_round, "__builtin_ia32_minmaxpd512_mask_round", IX86_BUILTIN_MINMAXPD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_minmaxpv32hf_mask_round, "__builtin_ia32_minmaxph512_mask_round", IX86_BUILTIN_MINMAXPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_minmaxpv16sf_mask_round, "__builtin_ia32_minmaxps512_mask_round", IX86_BUILTIN_MINMAXPS512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv4df_mask_round, "__builtin_ia32_minmaxpd256_mask_round", IX86_BUILTIN_MINMAXPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv16hf_mask_round, "__builtin_ia32_minmaxph256_mask_round", IX86_BUILTIN_MINMAXPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_INT_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv8sf_mask_round, "__builtin_ia32_minmaxps256_mask_round", IX86_BUILTIN_MINMAXPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxsv2df_mask_round, "__builtin_ia32_minmaxsd_mask_round", IX86_BUILTIN_MINMAXSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxsv8hf_mask_round, "__builtin_ia32_minmaxsh_mask_round", IX86_BUILTIN_MINMAXSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxsv4sf_mask_round, "__builtin_ia32_minmaxss_mask_round", IX86_BUILTIN_MINMAXSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
case V8BF_FTYPE_V8BF_V8BF_INT_V8BF_UQI:
case V16BF_FTYPE_V16BF_V16BF_INT_V16BF_UHI:
case V32BF_FTYPE_V32BF_V32BF_INT_V32BF_USI:
+ case V16HF_FTYPE_V16HF_V16HF_INT_V16HF_UHI:
case V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI:
nargs = 5;
mask_pos = 1;
case V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT:
case V8HF_FTYPE_V2DF_V8HF_V8HF_UQI_INT:
case V8HF_FTYPE_V4SF_V8HF_V8HF_UQI_INT:
- case V16HF_FTYPE_V8SF_V8SF_V16HF_UHI_INT:
case V32HF_FTYPE_V16SF_V16SF_V32HF_USI_INT:
nargs = 5;
break;
case V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT:
case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI_INT:
case V32HF_FTYPE_V32HF_V32HF_INT_V32HF_USI_INT:
- case V16HF_FTYPE_V16HF_V16HF_INT_V16HF_UHI_INT:
case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI_INT:
nargs = 6;
nargs_constant = 4;
#define __builtin_ia32_mpsadbw128_mask(A, B, C, D, E) __builtin_ia32_mpsadbw128_mask (A, B, 1, D, E)
#define __builtin_ia32_mpsadbw256_mask(A, B, C, D, E) __builtin_ia32_mpsadbw256_mask (A, B, 1, D, E)
-/* avx10_2convertintrin.h */
-#define __builtin_ia32_vcvt2ps2phx256_mask_round(A, B, C, D, E) __builtin_ia32_vcvt2ps2phx256_mask_round(A, B, C, D, 8)
-
/* avx10_2-512convertintrin.h */
#define __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, E) __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, 8)
#define __builtin_ia32_minmaxbf16128_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16128_mask (A, B, 4, D, E)
#define __builtin_ia32_minmaxbf16256_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16256_mask (A, B, 4, D, E)
#define __builtin_ia32_minmaxpd128_mask(A, B, C, D, E) __builtin_ia32_minmaxpd128_mask (A, B, 4, D, E)
-#define __builtin_ia32_minmaxpd256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd256_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxpd256_mask(A, B, C, D, E) __builtin_ia32_minmaxpd256_mask (A, B, 4, D, E)
#define __builtin_ia32_minmaxph128_mask(A, B, C, D, E) __builtin_ia32_minmaxph128_mask (A, B, 4, D, E)
-#define __builtin_ia32_minmaxph256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxph256_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxph256_mask(A, B, C, D, E) __builtin_ia32_minmaxph256_mask (A, B, 4, D, E)
#define __builtin_ia32_minmaxps128_mask(A, B, C, D, E) __builtin_ia32_minmaxps128_mask (A, B, 4, D, E)
-#define __builtin_ia32_minmaxps256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps256_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxps256_mask(A, B, C, D, E) __builtin_ia32_minmaxps256_mask (A, B, 4, D, E)
#include <wmmintrin.h>
#include <immintrin.h>
/* { dg-final { scan-assembler-times "vcvt2ps2phx\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvt2ps2phx\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvt2ps2phx\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vcvt2ps2phx\[ \\t\]+\{rn-sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vcvt2ps2phx\[ \\t\]+\{rn-sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vcvt2ps2phx\[ \\t\]+\{rn-sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtbiasph2bf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtbiasph2bf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtbiasph2bf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
y2 = _mm256_mask_cvtx2ps_ph (y2, m16, a2, b2);
y2 = _mm256_maskz_cvtx2ps_ph (m16, a2, b2);
- y2 = _mm256_cvtx_round2ps_ph (a2, b2, 8);
- y2 = _mm256_mask_cvtx_round2ps_ph (y2, m16, a2, b2, 8);
- y2 = _mm256_maskz_cvtx_round2ps_ph (m16, a2, b2, 8);
}
void extern
/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxsh\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxsh\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxsh\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxsh\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxsh\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxsh\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxss\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxss\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxss\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxsd\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxsd\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxsd\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
y2 = _mm256_minmax_ph (y2, y2, 100);
y2 = _mm256_mask_minmax_ph (y2, m16, y2, y2, 100);
y2 = _mm256_maskz_minmax_ph (m16, y2, y2, 100);
- y2 = _mm256_minmax_round_ph (y2, y2, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
- y2 = _mm256_mask_minmax_round_ph (y2, m16, y2, y2, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
- y2 = _mm256_maskz_minmax_round_ph (m16, y2, y2, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
x3 = _mm_minmax_ps (x3, x3, 100);
x3 = _mm_mask_minmax_ps (x3, m8, x3, x3, 100);
x3 = _mm_maskz_minmax_ps (m8, x3, x3, 100);
y3 = _mm256_minmax_ps (y3, y3, 100);
y3 = _mm256_mask_minmax_ps (y3, m8, y3, y3, 100);
y3 = _mm256_maskz_minmax_ps (m8, y3, y3, 100);
- y3 = _mm256_minmax_round_ps (y3, y3, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
- y3 = _mm256_mask_minmax_round_ps (y3, m8, y3, y3, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
- y3 = _mm256_maskz_minmax_round_ps (m8, y3, y3, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
x4 = _mm_minmax_pd (x4, x4, 100);
x4 = _mm_mask_minmax_pd (x4, m8, x4, x4, 100);
x4 = _mm_maskz_minmax_pd (m8, x4, x4, 100);
y4 = _mm256_minmax_pd (y4, y4, 100);
y4 = _mm256_mask_minmax_pd (y4, m8, y4, y4, 100);
y4 = _mm256_maskz_minmax_pd (m8, y4, y4, 100);
- y4 = _mm256_minmax_round_pd (y4, y4, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
- y4 = _mm256_mask_minmax_round_pd (y4, m8, y4, y4, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
- y4 = _mm256_maskz_minmax_round_pd (m8, y4, y4, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
x2 = _mm_minmax_sh (x2, x2, 1);
x2 = _mm_mask_minmax_sh (x2, m8, x2, x2, 1);
x2 = _mm_maskz_minmax_sh (m8, x2, x2, 1);
#define __builtin_ia32_mpsadbw128_mask(A, B, C, D, E) __builtin_ia32_mpsadbw128_mask (A, B, 1, D, E)
#define __builtin_ia32_mpsadbw256_mask(A, B, C, D, E) __builtin_ia32_mpsadbw256_mask (A, B, 1, D, E)
-/* avx10_2convertintrin.h */
-#define __builtin_ia32_vcvt2ps2phx256_mask_round(A, B, C, D, E) __builtin_ia32_vcvt2ps2phx256_mask_round(A, B, C, D, 8)
-
/* avx10_2-512convertintrin.h */
#define __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, E) __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, 8)
#define __builtin_ia32_minmaxbf16128_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16128_mask (A, B, 4, D, E)
#define __builtin_ia32_minmaxbf16256_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16256_mask (A, B, 4, D, E)
#define __builtin_ia32_minmaxpd128_mask(A, B, C, D, E) __builtin_ia32_minmaxpd128_mask (A, B, 4, D, E)
-#define __builtin_ia32_minmaxpd256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd256_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxpd256_mask(A, B, C, D, E) __builtin_ia32_minmaxpd256_mask (A, B, 4, D, E)
#define __builtin_ia32_minmaxph128_mask(A, B, C, D, E) __builtin_ia32_minmaxph128_mask (A, B, 4, D, E)
-#define __builtin_ia32_minmaxph256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxph256_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxph256_mask(A, B, C, D, E) __builtin_ia32_minmaxph256_mask (A, B, 4, D, E)
#define __builtin_ia32_minmaxps128_mask(A, B, C, D, E) __builtin_ia32_minmaxps128_mask (A, B, 4, D, E)
-#define __builtin_ia32_minmaxps256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps256_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxps256_mask(A, B, C, D, E) __builtin_ia32_minmaxps256_mask (A, B, 4, D, E)
#include <x86intrin.h>
test_4 (_mm_mask_mpsadbw_epu8, __m128i, __m128i, __mmask8, __m128i, __m128i, 1)
test_4 (_mm256_mask_mpsadbw_epu8, __m256i, __m256i, __mmask16, __m256i, __m256i, 1)
-/* avx10_2convertintrin */
-test_2 (_mm256_cvtx_round2ps_ph, __m256h, __m256, __m256, 4)
-
/* avx10_2-512convertintrin.h */
test_2 (_mm512_cvtx_round2ps_ph, __m512h, __m512, __m512, 4)
test_2 (_mm256_minmax_pbh, __m256bh, __m256bh, __m256bh, 100)
test_3 (_mm256_maskz_minmax_pbh, __m256bh, __mmask16, __m256bh, __m256bh, 100)
test_4 (_mm256_mask_minmax_pbh, __m256bh, __m256bh, __mmask16, __m256bh, __m256bh, 100)
-test_2x (_mm256_minmax_round_pd, __m256d, __m256d, __m256d, 100, 4)
-test_3x (_mm256_maskz_minmax_round_pd, __m256d, __mmask8, __m256d, __m256d, 100, 4)
-test_4x (_mm256_mask_minmax_round_pd, __m256d, __m256d, __mmask8, __m256d, __m256d, 100, 4)
-test_2x (_mm256_minmax_round_ps, __m256, __m256, __m256, 100, 4)
-test_3x (_mm256_maskz_minmax_round_ps, __m256, __mmask8, __m256, __m256, 100, 4)
-test_4x (_mm256_mask_minmax_round_ps, __m256, __m256, __mmask8, __m256, __m256, 100, 4)
-test_2x (_mm256_minmax_round_ph, __m256h, __m256h, __m256h, 100, 4)
-test_3x (_mm256_maskz_minmax_round_ph, __m256h, __mmask16, __m256h, __m256h, 100, 4)
-test_4x (_mm256_mask_minmax_round_ph, __m256h, __m256h, __mmask16, __m256h, __m256h, 100, 4)
test_2 (_mm256_minmax_pd, __m256d, __m256d, __m256d, 100)
test_3 (_mm256_maskz_minmax_pd, __m256d, __mmask8, __m256d, __m256d, 100)
test_4 (_mm256_mask_minmax_pd, __m256d, __m256d, __mmask8, __m256d, __m256d, 100)
test_4 (_mm_mask_mpsadbw_epu8, __m128i, __m128i, __mmask8, __m128i, __m128i, 1)
test_4 (_mm256_mask_mpsadbw_epu8, __m256i, __m256i, __mmask16, __m256i, __m256i, 1)
-/* avx10_2convertintrin */
-test_2 (_mm256_cvtx_round2ps_ph, __m256h, __m256, __m256, 4)
-
/* avx10_2-512convertintrin.h */
test_2 (_mm512_cvtx_round2ps_ph, __m512h, __m512, __m512, 4)
test_2 (_mm256_minmax_pbh, __m256bh, __m256bh, __m256bh, 100)
test_3 (_mm256_maskz_minmax_pbh, __m256bh, __mmask16, __m256bh, __m256bh, 100)
test_4 (_mm256_mask_minmax_pbh, __m256bh, __m256bh, __mmask16, __m256bh, __m256bh, 100)
-test_2x (_mm256_minmax_round_pd, __m256d, __m256d, __m256d, 100, 4)
-test_3x (_mm256_maskz_minmax_round_pd, __m256d, __mmask8, __m256d, __m256d, 100, 4)
-test_4x (_mm256_mask_minmax_round_pd, __m256d, __m256d, __mmask8, __m256d, __m256d, 100, 4)
-test_2x (_mm256_minmax_round_ps, __m256, __m256, __m256, 100, 4)
-test_3x (_mm256_maskz_minmax_round_ps, __m256, __mmask8, __m256, __m256, 100, 4)
-test_4x (_mm256_mask_minmax_round_ps, __m256, __m256, __mmask8, __m256, __m256, 100, 4)
-test_2x (_mm256_minmax_round_ph, __m256h, __m256h, __m256h, 100, 4)
-test_3x (_mm256_maskz_minmax_round_ph, __m256h, __mmask16, __m256h, __m256h, 100, 4)
-test_4x (_mm256_mask_minmax_round_ph, __m256h, __m256h, __mmask16, __m256h, __m256h, 100, 4)
test_2 (_mm256_minmax_pd, __m256d, __m256d, __m256d, 100)
test_3 (_mm256_maskz_minmax_pd, __m256d, __mmask8, __m256d, __m256d, 100)
test_4 (_mm256_mask_minmax_pd, __m256d, __m256d, __mmask8, __m256d, __m256d, 100)
#define __builtin_ia32_mpsadbw128_mask(A, B, C, D, E) __builtin_ia32_mpsadbw128_mask (A, B, 1, D, E)
#define __builtin_ia32_mpsadbw256_mask(A, B, C, D, E) __builtin_ia32_mpsadbw256_mask (A, B, 1, D, E)
-/* avx10_2convertintrin.h */
-#define __builtin_ia32_vcvt2ps2phx256_mask_round(A, B, C, D, E) __builtin_ia32_vcvt2ps2phx256_mask_round(A, B, C, D, 8)
-
/* avx10_2-512convertintrin.h */
#define __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, E) __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, 8)
#define __builtin_ia32_minmaxbf16128_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16128_mask (A, B, 100, D, E)
#define __builtin_ia32_minmaxbf16256_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16256_mask (A, B, 100, D, E)
#define __builtin_ia32_minmaxpd128_mask(A, B, C, D, E) __builtin_ia32_minmaxpd128_mask (A, B, 100, D, E)
-#define __builtin_ia32_minmaxpd256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd256_mask_round (A, B, 100, D, E, 4)
+#define __builtin_ia32_minmaxpd256_mask(A, B, C, D, E) __builtin_ia32_minmaxpd256_mask (A, B, 100, D, E)
#define __builtin_ia32_minmaxph128_mask(A, B, C, D, E) __builtin_ia32_minmaxph128_mask (A, B, 100, D, E)
-#define __builtin_ia32_minmaxph256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxph256_mask_round (A, B, 100, D, E, 4)
+#define __builtin_ia32_minmaxph256_mask(A, B, C, D, E) __builtin_ia32_minmaxph256_mask (A, B, 100, D, E)
#define __builtin_ia32_minmaxps128_mask(A, B, C, D, E) __builtin_ia32_minmaxps128_mask (A, B, 100, D, E)
-#define __builtin_ia32_minmaxps256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps256_mask_round (A, B, 100, D, E, 4)
+#define __builtin_ia32_minmaxps256_mask(A, B, C, D, E) __builtin_ia32_minmaxps256_mask (A, B, 100, D, E)
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8,movrs,amx-movrs")