#ifdef __OPTIMIZE__
extern __inline __m512bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_nepbh (__m512bh __A, __m512bh __B, const int __C)
+_mm512_minmax_pbh (__m512bh __A, __m512bh __B, const int __C)
{
- return (__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) __A,
- (__v32bf) __B,
- __C,
- (__v32bf)(__m512bh)
- _mm512_setzero_si512 (),
- (__mmask32) -1);
+ return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
+ (__v32bf) __B,
+ __C,
+ (__v32bf)(__m512bh)
+ _mm512_setzero_si512 (),
+ (__mmask32) -1);
}
extern __inline __m512bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_nepbh (__m512bh __W, __mmask32 __U,
- __m512bh __A, __m512bh __B, const int __C)
+_mm512_mask_minmax_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B, const int __C)
{
- return (__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) __A,
- (__v32bf) __B,
- __C,
- (__v32bf) __W,
- (__mmask32) __U);
+ return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
+ (__v32bf) __B,
+ __C,
+ (__v32bf) __W,
+ (__mmask32) __U);
}
extern __inline __m512bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_nepbh (__mmask32 __U, __m512bh __A,
- __m512bh __B, const int __C)
+_mm512_maskz_minmax_pbh (__mmask32 __U, __m512bh __A,
+ __m512bh __B, const int __C)
{
- return (__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) __A,
- (__v32bf) __B,
- __C,
- (__v32bf)(__m512bh)
- _mm512_setzero_si512 (),
- (__mmask32) __U);
+ return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
+ (__v32bf) __B,
+ __C,
+ (__v32bf)(__m512bh)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
}
extern __inline __m512d
}
#else
-#define _mm512_minmax_nepbh(A, B, C) \
- ((__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) (A), \
- (__v32bf) (B), \
- (int) (C), \
- (__v32bf) (__m512bh) \
- _mm512_setzero_si512 (), \
- (__mmask32) (-1)))
-
-#define _mm512_mask_minmax_nepbh(W, U, A, B, C) \
- ((__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) (A), \
- (__v32bf) (B), \
- (int) (C), \
- (__v32bf) (__m512bh) (W), \
- (__mmask32) (U)))
-
-#define _mm512_maskz_minmax_nepbh(U, A, B, C) \
- ((__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) (A), \
- (__v32bf) (B), \
- (int) (C), \
- (__v32bf) (__m512bh) \
- _mm512_setzero_si512 (), \
- (__mmask32) (U)))
+#define _mm512_minmax_pbh(A, B, C) \
+ ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \
+ (__v32bf) (B), \
+ (int) (C), \
+ (__v32bf) (__m512bh) \
+ _mm512_setzero_si512 (), \
+ (__mmask32) (-1)))
+
+#define _mm512_mask_minmax_pbh(W, U, A, B, C) \
+ ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \
+ (__v32bf) (B), \
+ (int) (C), \
+ (__v32bf) (__m512bh) (W), \
+ (__mmask32) (U)))
+
+#define _mm512_maskz_minmax_pbh(U, A, B, C) \
+ ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \
+ (__v32bf) (B), \
+ (int) (C), \
+ (__v32bf) (__m512bh) \
+ _mm512_setzero_si512 (), \
+ (__mmask32) (U)))
#define _mm512_minmax_round_pd(A, B, C, R) \
((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
#ifdef __OPTIMIZE__
extern __inline __m128bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_minmax_nepbh (__m128bh __A, __m128bh __B, const int __C)
+_mm_minmax_pbh (__m128bh __A, __m128bh __B, const int __C)
{
- return (__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) __A,
- (__v8bf) __B,
- __C,
- (__v8bf)(__m128bh)
- _mm_setzero_si128 (),
- (__mmask8) -1);
+ return (__m128bh) __builtin_ia32_minmaxbf16128_mask ((__v8bf) __A,
+ (__v8bf) __B,
+ __C,
+ (__v8bf)(__m128bh)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
}
extern __inline __m128bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_minmax_nepbh (__m128bh __W, __mmask8 __U, __m128bh __A,
- __m128bh __B, const int __C)
+_mm_mask_minmax_pbh (__m128bh __W, __mmask8 __U, __m128bh __A,
+ __m128bh __B, const int __C)
{
- return (__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) __A,
- (__v8bf) __B,
- __C,
- (__v8bf) __W,
- (__mmask8) __U);
+ return (__m128bh) __builtin_ia32_minmaxbf16128_mask ((__v8bf) __A,
+ (__v8bf) __B,
+ __C,
+ (__v8bf) __W,
+ (__mmask8) __U);
}
extern __inline __m128bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_minmax_nepbh (__mmask8 __U, __m128bh __A, __m128bh __B, const int __C)
+_mm_maskz_minmax_pbh (__mmask8 __U, __m128bh __A, __m128bh __B, const int __C)
{
- return (__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) __A,
- (__v8bf) __B,
- __C,
- (__v8bf)(__m128bh)
- _mm_setzero_si128 (),
- (__mmask8) __U);
+ return (__m128bh) __builtin_ia32_minmaxbf16128_mask ((__v8bf) __A,
+ (__v8bf) __B,
+ __C,
+ (__v8bf)(__m128bh)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
}
extern __inline __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_minmax_nepbh (__m256bh __A, __m256bh __B, const int __C)
+_mm256_minmax_pbh (__m256bh __A, __m256bh __B, const int __C)
{
- return (__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) __A,
- (__v16bf) __B,
- __C,
- (__v16bf)(__m256bh)
- _mm256_setzero_si256 (),
- (__mmask16) -1);
+ return (__m256bh) __builtin_ia32_minmaxbf16256_mask ((__v16bf) __A,
+ (__v16bf) __B,
+ __C,
+ (__v16bf)(__m256bh)
+ _mm256_setzero_si256 (),
+ (__mmask16) -1);
}
extern __inline __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_minmax_nepbh (__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B,
- const int __C)
+_mm256_mask_minmax_pbh (__m256bh __W, __mmask16 __U, __m256bh __A,
+ __m256bh __B, const int __C)
{
- return (__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) __A,
- (__v16bf) __B,
- __C,
- (__v16bf) __W,
- (__mmask16) __U);
+ return (__m256bh) __builtin_ia32_minmaxbf16256_mask ((__v16bf) __A,
+ (__v16bf) __B,
+ __C,
+ (__v16bf) __W,
+ (__mmask16) __U);
}
extern __inline __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_minmax_nepbh (__mmask16 __U, __m256bh __A, __m256bh __B, const int __C)
+_mm256_maskz_minmax_pbh (__mmask16 __U, __m256bh __A,
+ __m256bh __B, const int __C)
{
- return (__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) __A,
- (__v16bf) __B,
- __C,
- (__v16bf)(__m256bh)
- _mm256_setzero_si256 (),
- (__mmask16) __U);
+ return (__m256bh) __builtin_ia32_minmaxbf16256_mask ((__v16bf) __A,
+ (__v16bf) __B,
+ __C,
+ (__v16bf)(__m256bh)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
}
extern __inline __m128d
}
#else
-#define _mm_minmax_nepbh(A, B, C) \
- ((__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) (A), \
- (__v8bf) (B), \
- (int) (C), \
- (__v8bf) (__m128bh) \
- _mm_setzero_si128 (), \
- (__mmask8) (-1)))
+#define _mm_minmax_pbh(A, B, C) \
+ ((__m128bh) __builtin_ia32_minmaxbf16128_mask ((__v8bf) (A), \
+ (__v8bf) (B), \
+ (int) (C), \
+ (__v8bf) (__m128bh) \
+ _mm_setzero_si128 (), \
+ (__mmask8) (-1)))
-#define _mm_mask_minmax_nepbh(W, U, A, B, C) \
- ((__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) (A), \
- (__v8bf) (B), \
- (int) (C), \
- (__v8bf) (__m128bh) (W), \
- (__mmask8) (U)))
+#define _mm_mask_minmax_pbh(W, U, A, B, C) \
+ ((__m128bh) __builtin_ia32_minmaxbf16128_mask ((__v8bf) (A), \
+ (__v8bf) (B), \
+ (int) (C), \
+ (__v8bf) (__m128bh) (W), \
+ (__mmask8) (U)))
-#define _mm_maskz_minmax_nepbh(U, A, B, C) \
- ((__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) (A), \
- (__v8bf) (B), \
- (int) (C), \
- (__v8bf) (__m128bh) \
- _mm_setzero_si128 (), \
- (__mmask8) (U)))
+#define _mm_maskz_minmax_pbh(U, A, B, C) \
+ ((__m128bh) __builtin_ia32_minmaxbf16128_mask ((__v8bf) (A), \
+ (__v8bf) (B), \
+ (int) (C), \
+ (__v8bf) (__m128bh) \
+ _mm_setzero_si128 (), \
+ (__mmask8) (U)))
-#define _mm256_minmax_nepbh(A, B, C) \
- ((__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) (A), \
- (__v16bf) (B), \
- (int) (C), \
- (__v16bf) (__m256bh) \
- _mm256_setzero_si256 (), \
- (__mmask16) (-1)))
+#define _mm256_minmax_pbh(A, B, C) \
+ ((__m256bh) __builtin_ia32_minmaxbf16256_mask ((__v16bf) (A), \
+ (__v16bf) (B), \
+ (int) (C), \
+ (__v16bf) (__m256bh) \
+ _mm256_setzero_si256 (), \
+ (__mmask16) (-1)))
-#define _mm256_mask_minmax_nepbh(W, U, A, B, C) \
- ((__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) (A), \
- (__v16bf) (B), \
- (int) (C), \
- (__v16bf) (__m256bh) (W), \
- (__mmask16) (U)))
+#define _mm256_mask_minmax_pbh(W, U, A, B, C) \
+ ((__m256bh) __builtin_ia32_minmaxbf16256_mask ((__v16bf) (A), \
+ (__v16bf) (B), \
+ (int) (C), \
+ (__v16bf) (__m256bh) (W), \
+ (__mmask16) (U)))
-#define _mm256_maskz_minmax_nepbh(U, A, B, C) \
- ((__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) (A), \
- (__v16bf) (B), \
- (int) (C), \
- (__v16bf) (__m256bh) \
- _mm256_setzero_si256 (), \
- (__mmask16) (U)))
+#define _mm256_maskz_minmax_pbh(U, A, B, C) \
+ ((__m256bh) __builtin_ia32_minmaxbf16256_mask ((__v16bf) (A), \
+ (__v16bf) (B), \
+ (int) (C), \
+ (__v16bf) (__m256bh) \
+ _mm256_setzero_si256 (), \
+ (__mmask16) (U)))
#define _mm_minmax_pd(A, B, C) \
((__m128d) __builtin_ia32_minmaxpd128_mask ((__v2df) (A), \
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2qqsv2di_mask, "__builtin_ia32_cvttps2qqs128_mask", IX86_BUILTIN_VCVTTPS2QQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2udqsv4sf_mask, "__builtin_ia32_cvttps2udqs128_mask", IX86_BUILTIN_VCVTTPS2UDQS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2uqqsv2di_mask, "__builtin_ia32_cvttps2uqqs128_mask", IX86_BUILTIN_VCVTTPS2UQQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxnepbf16_v8bf_mask, "__builtin_ia32_minmaxnepbf16128_mask", IX86_BUILTIN_MINMAXNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_INT_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxnepbf16_v16bf_mask, "__builtin_ia32_minmaxnepbf16256_mask", IX86_BUILTIN_MINMAXNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_INT_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_minmaxnepbf16_v32bf_mask, "__builtin_ia32_minmaxnepbf16512_mask", IX86_BUILTIN_MINMAXNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_INT_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxbf16_v8bf_mask, "__builtin_ia32_minmaxbf16128_mask", IX86_BUILTIN_MINMAXBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_INT_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxbf16_v16bf_mask, "__builtin_ia32_minmaxbf16256_mask", IX86_BUILTIN_MINMAXBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_INT_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_minmaxbf16_v32bf_mask, "__builtin_ia32_minmaxbf16512_mask", IX86_BUILTIN_MINMAXBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_INT_V32BF_USI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv2df_mask, "__builtin_ia32_minmaxpd128_mask", IX86_BUILTIN_MINMAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv8hf_mask, "__builtin_ia32_minmaxph128_mask", IX86_BUILTIN_MINMAXPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv4sf_mask, "__builtin_ia32_minmaxps128_mask", IX86_BUILTIN_MINMAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI)
UNSPEC_VCVTTPS2IUBS
UNSPEC_SFIX_SATURATION
UNSPEC_UFIX_SATURATION
- UNSPEC_MINMAXNEPBF16
+ UNSPEC_MINMAXBF16
UNSPEC_MINMAX
;; For MOVRS suppport
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "avx10_2_minmaxnepbf16_<mode><mask_name>"
+(define_insn "avx10_2_minmaxbf16_<mode><mask_name>"
[(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
(unspec:VBF_AVX10_2
[(match_operand:VBF_AVX10_2 1 "register_operand" "v")
(match_operand:VBF_AVX10_2 2 "bcst_vector_operand" "vmBr")
(match_operand:SI 3 "const_0_to_255_operand")]
- UNSPEC_MINMAXNEPBF16))]
+ UNSPEC_MINMAXBF16))]
"TARGET_AVX10_2_256"
- "vminmaxnepbf16\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
+ "vminmaxbf16\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
#define __builtin_ia32_minmaxpd512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd512_mask_round (A, B, 4, D, E, 4)
#define __builtin_ia32_minmaxph512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxph512_mask_round (A, B, 4, D, E, 4)
#define __builtin_ia32_minmaxps512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps512_mask_round (A, B, 4, D, E, 4)
-#define __builtin_ia32_minmaxnepbf16512_mask(A, B, C, W, U) __builtin_ia32_minmaxnepbf16512_mask (A, B, 4, W, U)
+#define __builtin_ia32_minmaxbf16512_mask(A, B, C, W, U) __builtin_ia32_minmaxbf16512_mask (A, B, 4, W, U)
/* avx10_2minmaxintrin.h */
#define __builtin_ia32_minmaxsd_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsd_mask_round (A, B, 4, D, E, 4)
#define __builtin_ia32_minmaxsh_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsh_mask_round (A, B, 4, D, E, 4)
#define __builtin_ia32_minmaxss_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxss_mask_round (A, B, 4, D, E, 4)
-#define __builtin_ia32_minmaxnepbf16128_mask(A, B, C, D, E) __builtin_ia32_minmaxnepbf16128_mask (A, B, 4, D, E)
-#define __builtin_ia32_minmaxnepbf16256_mask(A, B, C, D, E) __builtin_ia32_minmaxnepbf16256_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxbf16128_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16128_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxbf16256_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16256_mask (A, B, 4, D, E)
#define __builtin_ia32_minmaxpd128_mask(A, B, C, D, E) __builtin_ia32_minmaxpd128_mask (A, B, 4, D, E)
#define __builtin_ia32_minmaxpd256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd256_mask_round (A, B, 4, D, E, 4)
#define __builtin_ia32_minmaxph128_mask(A, B, C, D, E) __builtin_ia32_minmaxph128_mask (A, B, 4, D, E)
/* { dg-do compile } */
/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */
-/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 2 } } */
void extern
avx10_2_512_test (void)
{
- x1 = _mm512_minmax_nepbh (x1, x1, 100);
- x1 = _mm512_mask_minmax_nepbh (x1, m32, x1, x1, 100);
- x1 = _mm512_maskz_minmax_nepbh (m32, x1, x1, 100);
+ x1 = _mm512_minmax_pbh (x1, x1, 100);
+ x1 = _mm512_mask_minmax_pbh (x1, m32, x1, x1, 100);
+ x1 = _mm512_maskz_minmax_pbh (m32, x1, x1, 100);
x2 = _mm512_minmax_ph (x2, x2, 1);
x2 = _mm512_mask_minmax_ph (x2, m32, x2, x2, 1);
x2 = _mm512_maskz_minmax_ph (m32, x2, x2, 1);
MASK_TYPE mask = MASK_VALUE;
__bf16 res_ref[SIZE];
- UNIT_TEST(0, nepbh, bf16_bf, __bf16);
- UNIT_TEST(1, nepbh, bf16_bf, __bf16);
- UNIT_TEST(4, nepbh, bf16_bf, __bf16);
- UNIT_TEST(5, nepbh, bf16_bf, __bf16);
- UNIT_TEST(16, nepbh, bf16_bf, __bf16);
- UNIT_TEST(17, nepbh, bf16_bf, __bf16);
+ UNIT_TEST(0, pbh, bf16_bf, __bf16);
+ UNIT_TEST(1, pbh, bf16_bf, __bf16);
+ UNIT_TEST(4, pbh, bf16_bf, __bf16);
+ UNIT_TEST(5, pbh, bf16_bf, __bf16);
+ UNIT_TEST(16, pbh, bf16_bf, __bf16);
+ UNIT_TEST(17, pbh, bf16_bf, __bf16);
}
/* { dg-do compile } */
/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */
-/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
void extern
avx10_2_test (void)
{
- x1 = _mm_minmax_nepbh (x1, x1, 100);
- x1 = _mm_mask_minmax_nepbh (x1, m8, x1, x1, 100);
- x1 = _mm_maskz_minmax_nepbh (m8, x1, x1, 100);
- y1_ = _mm256_minmax_nepbh (y1_, y1_, 100);
- y1_ = _mm256_mask_minmax_nepbh (y1_, m16, y1_, y1_, 100);
- y1_ = _mm256_maskz_minmax_nepbh (m16, y1_, y1_, 100);
+ x1 = _mm_minmax_pbh (x1, x1, 100);
+ x1 = _mm_mask_minmax_pbh (x1, m8, x1, x1, 100);
+ x1 = _mm_maskz_minmax_pbh (m8, x1, x1, 100);
+ y1_ = _mm256_minmax_pbh (y1_, y1_, 100);
+ y1_ = _mm256_mask_minmax_pbh (y1_, m16, y1_, y1_, 100);
+ y1_ = _mm256_maskz_minmax_pbh (m16, y1_, y1_, 100);
x2 = _mm_minmax_ph (x2, x2, 100);
x2 = _mm_mask_minmax_ph (x2, m8, x2, x2, 100);
x2 = _mm_maskz_minmax_ph (m8, x2, x2, 100);
#define AVX10_2
#define AVX512VL
#define AVX512F_LEN 256
-#include "avx10_2-512-vminmaxnepbf16-2.c"
+#include "avx10_2-512-vminmaxbf16-2.c"
#undef AVX512F_LEN
#define AVX512F_LEN 128
-#include "avx10_2-512-vminmaxnepbf16-2.c"
+#include "avx10_2-512-vminmaxbf16-2.c"
#define __builtin_ia32_minmaxpd512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd512_mask_round (A, B, 4, D, E, 4)
#define __builtin_ia32_minmaxph512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxph512_mask_round (A, B, 4, D, E, 4)
#define __builtin_ia32_minmaxps512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps512_mask_round (A, B, 4, D, E, 4)
-#define __builtin_ia32_minmaxnepbf16512_mask(A, B, C, W, U) __builtin_ia32_minmaxnepbf16512_mask (A, B, 4, W, U)
+#define __builtin_ia32_minmaxbf16512_mask(A, B, C, W, U) __builtin_ia32_minmaxbf16512_mask (A, B, 4, W, U)
/* avx10_2minmaxintrin.h */
#define __builtin_ia32_minmaxsd_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsd_mask_round (A, B, 4, D, E, 4)
#define __builtin_ia32_minmaxsh_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsh_mask_round (A, B, 4, D, E, 4)
#define __builtin_ia32_minmaxss_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxss_mask_round (A, B, 4, D, E, 4)
-#define __builtin_ia32_minmaxnepbf16128_mask(A, B, C, D, E) __builtin_ia32_minmaxnepbf16128_mask (A, B, 4, D, E)
-#define __builtin_ia32_minmaxnepbf16256_mask(A, B, C, D, E) __builtin_ia32_minmaxnepbf16256_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxbf16128_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16128_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxbf16256_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16256_mask (A, B, 4, D, E)
#define __builtin_ia32_minmaxpd128_mask(A, B, C, D, E) __builtin_ia32_minmaxpd128_mask (A, B, 4, D, E)
#define __builtin_ia32_minmaxpd256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd256_mask_round (A, B, 4, D, E, 4)
#define __builtin_ia32_minmaxph128_mask(A, B, C, D, E) __builtin_ia32_minmaxph128_mask (A, B, 4, D, E)
#endif
/* avx10_2-512minmaxintrin.h */
-test_2 (_mm512_minmax_nepbh, __m512bh, __m512bh, __m512bh, 100)
-test_3 (_mm512_maskz_minmax_nepbh, __m512bh, __mmask32, __m512bh, __m512bh, 100)
-test_4 (_mm512_mask_minmax_nepbh, __m512bh, __m512bh, __mmask32, __m512bh, __m512bh, 100)
+test_2 (_mm512_minmax_pbh, __m512bh, __m512bh, __m512bh, 100)
+test_3 (_mm512_maskz_minmax_pbh, __m512bh, __mmask32, __m512bh, __m512bh, 100)
+test_4 (_mm512_mask_minmax_pbh, __m512bh, __m512bh, __mmask32, __m512bh, __m512bh, 100)
test_2x (_mm512_minmax_round_pd, __m512d, __m512d, __m512d, 100, 4)
test_3x (_mm512_maskz_minmax_round_pd, __m512d, __mmask8, __m512d, __m512d, 100, 4)
test_4x (_mm512_mask_minmax_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 100, 4)
test_4 (_mm512_mask_minmax_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 100)
/* avx10_2minmaxintrin.h */
-test_2 (_mm256_minmax_nepbh, __m256bh, __m256bh, __m256bh, 100)
-test_3 (_mm256_maskz_minmax_nepbh, __m256bh, __mmask16, __m256bh, __m256bh, 100)
-test_4 (_mm256_mask_minmax_nepbh, __m256bh, __m256bh, __mmask16, __m256bh, __m256bh, 100)
+test_2 (_mm256_minmax_pbh, __m256bh, __m256bh, __m256bh, 100)
+test_3 (_mm256_maskz_minmax_pbh, __m256bh, __mmask16, __m256bh, __m256bh, 100)
+test_4 (_mm256_mask_minmax_pbh, __m256bh, __m256bh, __mmask16, __m256bh, __m256bh, 100)
test_2x (_mm256_minmax_round_pd, __m256d, __m256d, __m256d, 100, 4)
test_3x (_mm256_maskz_minmax_round_pd, __m256d, __mmask8, __m256d, __m256d, 100, 4)
test_4x (_mm256_mask_minmax_round_pd, __m256d, __m256d, __mmask8, __m256d, __m256d, 100, 4)
test_2 (_mm256_minmax_ph, __m256h, __m256h, __m256h, 100)
test_3 (_mm256_maskz_minmax_ph, __m256h, __mmask16, __m256h, __m256h, 100)
test_4 (_mm256_mask_minmax_ph, __m256h, __m256h, __mmask16, __m256h, __m256h, 100)
-test_2 (_mm_minmax_nepbh, __m128bh, __m128bh, __m128bh, 100)
-test_3 (_mm_maskz_minmax_nepbh, __m128bh, __mmask8, __m128bh, __m128bh, 100)
-test_4 (_mm_mask_minmax_nepbh, __m128bh, __m128bh, __mmask8, __m128bh, __m128bh, 100)
+test_2 (_mm_minmax_pbh, __m128bh, __m128bh, __m128bh, 100)
+test_3 (_mm_maskz_minmax_pbh, __m128bh, __mmask8, __m128bh, __m128bh, 100)
+test_4 (_mm_mask_minmax_pbh, __m128bh, __m128bh, __mmask8, __m128bh, __m128bh, 100)
test_2 (_mm_minmax_pd, __m128d, __m128d, __m128d, 100)
test_3 (_mm_maskz_minmax_pd, __m128d, __mmask8, __m128d, __m128d, 100)
test_4 (_mm_mask_minmax_pd, __m128d, __m128d, __mmask8, __m128d, __m128d, 100)
#endif
/* avx10_2-512minmaxintrin.h */
-test_2 (_mm512_minmax_nepbh, __m512bh, __m512bh, __m512bh, 100)
-test_3 (_mm512_maskz_minmax_nepbh, __m512bh, __mmask32, __m512bh, __m512bh, 100)
-test_4 (_mm512_mask_minmax_nepbh, __m512bh, __m512bh, __mmask32, __m512bh, __m512bh, 100)
+test_2 (_mm512_minmax_pbh, __m512bh, __m512bh, __m512bh, 100)
+test_3 (_mm512_maskz_minmax_pbh, __m512bh, __mmask32, __m512bh, __m512bh, 100)
+test_4 (_mm512_mask_minmax_pbh, __m512bh, __m512bh, __mmask32, __m512bh, __m512bh, 100)
test_2x (_mm512_minmax_round_pd, __m512d, __m512d, __m512d, 100, 4)
test_3x (_mm512_maskz_minmax_round_pd, __m512d, __mmask8, __m512d, __m512d, 100, 4)
test_4x (_mm512_mask_minmax_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 100, 4)
test_4 (_mm512_mask_minmax_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 100)
/* avx10_2minmaxintrin.h */
-test_2 (_mm256_minmax_nepbh, __m256bh, __m256bh, __m256bh, 100)
-test_3 (_mm256_maskz_minmax_nepbh, __m256bh, __mmask16, __m256bh, __m256bh, 100)
-test_4 (_mm256_mask_minmax_nepbh, __m256bh, __m256bh, __mmask16, __m256bh, __m256bh, 100)
+test_2 (_mm256_minmax_pbh, __m256bh, __m256bh, __m256bh, 100)
+test_3 (_mm256_maskz_minmax_pbh, __m256bh, __mmask16, __m256bh, __m256bh, 100)
+test_4 (_mm256_mask_minmax_pbh, __m256bh, __m256bh, __mmask16, __m256bh, __m256bh, 100)
test_2x (_mm256_minmax_round_pd, __m256d, __m256d, __m256d, 100, 4)
test_3x (_mm256_maskz_minmax_round_pd, __m256d, __mmask8, __m256d, __m256d, 100, 4)
test_4x (_mm256_mask_minmax_round_pd, __m256d, __m256d, __mmask8, __m256d, __m256d, 100, 4)
test_2 (_mm256_minmax_ph, __m256h, __m256h, __m256h, 100)
test_3 (_mm256_maskz_minmax_ph, __m256h, __mmask16, __m256h, __m256h, 100)
test_4 (_mm256_mask_minmax_ph, __m256h, __m256h, __mmask16, __m256h, __m256h, 100)
-test_2 (_mm_minmax_nepbh, __m128bh, __m128bh, __m128bh, 100)
-test_3 (_mm_maskz_minmax_nepbh, __m128bh, __mmask8, __m128bh, __m128bh, 100)
-test_4 (_mm_mask_minmax_nepbh, __m128bh, __m128bh, __mmask8, __m128bh, __m128bh, 100)
+test_2 (_mm_minmax_pbh, __m128bh, __m128bh, __m128bh, 100)
+test_3 (_mm_maskz_minmax_pbh, __m128bh, __mmask8, __m128bh, __m128bh, 100)
+test_4 (_mm_mask_minmax_pbh, __m128bh, __m128bh, __mmask8, __m128bh, __m128bh, 100)
test_2 (_mm_minmax_pd, __m128d, __m128d, __m128d, 100)
test_3 (_mm_maskz_minmax_pd, __m128d, __mmask8, __m128d, __m128d, 100)
test_4 (_mm_mask_minmax_pd, __m128d, __m128d, __mmask8, __m128d, __m128d, 100)
#define __builtin_ia32_minmaxpd512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd512_mask_round (A, B, 100, D, E, 4)
#define __builtin_ia32_minmaxph512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxph512_mask_round (A, B, 100, D, E, 4)
#define __builtin_ia32_minmaxps512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps512_mask_round (A, B, 100, D, E, 4)
-#define __builtin_ia32_minmaxnepbf16512_mask(A, B, C, W, U) __builtin_ia32_minmaxnepbf16512_mask (A, B, 100, W, U)
+#define __builtin_ia32_minmaxbf16512_mask(A, B, C, W, U) __builtin_ia32_minmaxbf16512_mask (A, B, 100, W, U)
/* avx10_2-minmaxintrin.h */
#define __builtin_ia32_minmaxsd_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsd_mask_round (A, B, 100, D, E, 4)
#define __builtin_ia32_minmaxsh_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsh_mask_round (A, B, 100, D, E, 4)
#define __builtin_ia32_minmaxss_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxss_mask_round (A, B, 100, D, E, 4)
-#define __builtin_ia32_minmaxnepbf16128_mask(A, B, C, D, E) __builtin_ia32_minmaxnepbf16128_mask (A, B, 100, D, E)
-#define __builtin_ia32_minmaxnepbf16256_mask(A, B, C, D, E) __builtin_ia32_minmaxnepbf16256_mask (A, B, 100, D, E)
+#define __builtin_ia32_minmaxbf16128_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16128_mask (A, B, 100, D, E)
+#define __builtin_ia32_minmaxbf16256_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16256_mask (A, B, 100, D, E)
#define __builtin_ia32_minmaxpd128_mask(A, B, C, D, E) __builtin_ia32_minmaxpd128_mask (A, B, 100, D, E)
#define __builtin_ia32_minmaxpd256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd256_mask_round (A, B, 100, D, E, 4)
#define __builtin_ia32_minmaxph128_mask(A, B, C, D, E) __builtin_ia32_minmaxph128_mask (A, B, 100, D, E)