__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_max_pbh (__m512bh __A, __m512bh __B)
{
- return (__m512bh) __builtin_ia32_maxpbf16512 (__A, __B);
+ return (__m512bh) __builtin_ia32_maxbf16512 (__A, __B);
}
extern __inline__ __m512bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_max_pbh (__m512bh __W, __mmask32 __U,
- __m512bh __A, __m512bh __B)
+ __m512bh __A, __m512bh __B)
{
return (__m512bh)
- __builtin_ia32_maxpbf16512_mask (__A, __B, __W, __U);
+ __builtin_ia32_maxbf16512_mask (__A, __B, __W, __U);
}
extern __inline__ __m512bh
_mm512_maskz_max_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
{
return (__m512bh)
- __builtin_ia32_maxpbf16512_mask (__A, __B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
+ __builtin_ia32_maxbf16512_mask (__A, __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
}
extern __inline__ __m512bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_min_pbh (__m512bh __A, __m512bh __B)
{
- return (__m512bh) __builtin_ia32_minpbf16512 (__A, __B);
+ return (__m512bh) __builtin_ia32_minbf16512 (__A, __B);
}
extern __inline__ __m512bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_min_pbh (__m512bh __W, __mmask32 __U,
- __m512bh __A, __m512bh __B)
+ __m512bh __A, __m512bh __B)
{
return (__m512bh)
- __builtin_ia32_minpbf16512_mask (__A, __B, __W, __U);
+ __builtin_ia32_minbf16512_mask (__A, __B, __W, __U);
}
extern __inline__ __m512bh
_mm512_maskz_min_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
{
return (__m512bh)
- __builtin_ia32_minpbf16512_mask (__A, __B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
+ __builtin_ia32_minbf16512_mask (__A, __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
}
extern __inline__ __m512bh
#endif /* __OPIMTIZE__ */
-/* Intrinsics vcmppbf16. */
+/* Intrinsics vcmpbf16. */
#ifdef __OPTIMIZE__
extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
const int __imm)
{
return (__mmask32)
- __builtin_ia32_cmppbf16512_mask (__A, __B, __imm, __U);
+ __builtin_ia32_cmpbf16512_mask (__A, __B, __imm, __U);
}
extern __inline __mmask32
_mm512_cmp_pbh_mask (__m512bh __A, __m512bh __B, const int __imm)
{
return (__mmask32)
- __builtin_ia32_cmppbf16512_mask (__A, __B, __imm,
- (__mmask32) -1);
+ __builtin_ia32_cmpbf16512_mask (__A, __B, __imm,
+ (__mmask32) -1);
}
#else
#define _mm512_mask_cmp_pbh_mask(A, B, C, D) \
- ((__mmask32) __builtin_ia32_cmppbf16512_mask ((B), (C), (D), (A)))
+ ((__mmask32) __builtin_ia32_cmpbf16512_mask ((B), (C), (D), (A)))
#define _mm512_cmp_pbh_mask(A, B, C) \
- ((__mmask32) __builtin_ia32_cmppbf16512_mask ((A), (B), (C), (-1)))
+ ((__mmask32) __builtin_ia32_cmpbf16512_mask ((A), (B), (C), (-1)))
#endif /* __OPIMTIZE__ */
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_max_pbh (__m256bh __W, __mmask16 __U,
- __m256bh __A, __m256bh __B)
+ __m256bh __A, __m256bh __B)
{
return (__m256bh)
- __builtin_ia32_maxpbf16256_mask (__A, __B, __W, __U);
+ __builtin_ia32_maxbf16256_mask (__A, __B, __W, __U);
}
extern __inline__ __m256bh
_mm256_maskz_max_pbh (__mmask16 __U, __m256bh __A, __m256bh __B)
{
return (__m256bh)
- __builtin_ia32_maxpbf16256_mask (__A, __B,
- (__v16bf) _mm256_setzero_si256 (),
- __U);
+ __builtin_ia32_maxbf16256_mask (__A, __B,
+ (__v16bf) _mm256_setzero_si256 (),
+ __U);
}
extern __inline__ __m128bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_pbh (__m128bh __A, __m128bh __B)
{
- return (__m128bh) __builtin_ia32_maxpbf16128 (__A, __B);
+ return (__m128bh) __builtin_ia32_maxbf16128 (__A, __B);
}
extern __inline__ __m128bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_max_pbh (__m128bh __W, __mmask8 __U,
- __m128bh __A, __m128bh __B)
+ __m128bh __A, __m128bh __B)
{
return (__m128bh)
- __builtin_ia32_maxpbf16128_mask (__A, __B, __W, __U);
+ __builtin_ia32_maxbf16128_mask (__A, __B, __W, __U);
}
extern __inline__ __m128bh
_mm_maskz_max_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
{
return (__m128bh)
- __builtin_ia32_maxpbf16128_mask (__A, __B,
- (__v8bf) _mm_setzero_si128 (),
- __U);
+ __builtin_ia32_maxbf16128_mask (__A, __B,
+ (__v8bf) _mm_setzero_si128 (),
+ __U);
}
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_min_pbh (__m256bh __A, __m256bh __B)
{
- return (__m256bh) __builtin_ia32_minpbf16256 (__A, __B);
+ return (__m256bh) __builtin_ia32_minbf16256 (__A, __B);
}
extern __inline__ __m256bh
__m256bh __A, __m256bh __B)
{
return (__m256bh)
- __builtin_ia32_minpbf16256_mask (__A, __B, __W, __U);
+ __builtin_ia32_minbf16256_mask (__A, __B, __W, __U);
}
extern __inline__ __m256bh
_mm256_maskz_min_pbh (__mmask16 __U, __m256bh __A, __m256bh __B)
{
return (__m256bh)
- __builtin_ia32_minpbf16256_mask (__A, __B,
- (__v16bf) _mm256_setzero_si256 (),
- __U);
+ __builtin_ia32_minbf16256_mask (__A, __B,
+ (__v16bf) _mm256_setzero_si256 (),
+ __U);
}
extern __inline__ __m128bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_pbh (__m128bh __A, __m128bh __B)
{
- return (__m128bh) __builtin_ia32_minpbf16128 (__A, __B);
+ return (__m128bh) __builtin_ia32_minbf16128 (__A, __B);
}
extern __inline__ __m128bh
__m128bh __A, __m128bh __B)
{
return (__m128bh)
- __builtin_ia32_minpbf16128_mask (__A, __B, __W, __U);
+ __builtin_ia32_minbf16128_mask (__A, __B, __W, __U);
}
extern __inline__ __m128bh
_mm_maskz_min_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
{
return (__m128bh)
- __builtin_ia32_minpbf16128_mask (__A, __B,
- (__v8bf) _mm_setzero_si128 (),
- __U);
+ __builtin_ia32_minbf16128_mask (__A, __B,
+ (__v8bf) _mm_setzero_si128 (),
+ __U);
}
extern __inline__ __m256bh
#endif /* __OPIMTIZE__ */
-/* Intrinsics vcmppbf16. */
+/* Intrinsics vcmpbf16. */
#ifdef __OPTIMIZE__
extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmp_pbh_mask (__mmask16 __U, __m256bh __A,
- __m256bh __B, const int __imm)
+ __m256bh __B, const int __imm)
{
return (__mmask16)
- __builtin_ia32_cmppbf16256_mask (__A, __B, __imm, __U);
+ __builtin_ia32_cmpbf16256_mask (__A, __B, __imm, __U);
}
extern __inline __mmask16
_mm256_cmp_pbh_mask (__m256bh __A, __m256bh __B, const int __imm)
{
return (__mmask16)
- __builtin_ia32_cmppbf16256_mask (__A, __B, __imm, (__mmask16) -1);
+ __builtin_ia32_cmpbf16256_mask (__A, __B, __imm, (__mmask16) -1);
}
extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmp_pbh_mask (__mmask8 __U, __m128bh __A,
- __m128bh __B, const int __imm)
+ __m128bh __B, const int __imm)
{
return (__mmask8)
- __builtin_ia32_cmppbf16128_mask (__A, __B, __imm, __U);
+ __builtin_ia32_cmpbf16128_mask (__A, __B, __imm, __U);
}
extern __inline __mmask8
_mm_cmp_pbh_mask (__m128bh __A, __m128bh __B, const int __imm)
{
return (__mmask8)
- __builtin_ia32_cmppbf16128_mask (__A, __B, __imm, (__mmask8) -1);
+ __builtin_ia32_cmpbf16128_mask (__A, __B, __imm, (__mmask8) -1);
}
#else
#define _mm256_mask_cmp_pbh_mask(A, B, C, D) \
- ((__mmask16) __builtin_ia32_cmppbf16256_mask ((B), (C), (D), (A)))
+ ((__mmask16) __builtin_ia32_cmpbf16256_mask ((B), (C), (D), (A)))
#define _mm256_cmp_pbh_mask(A, B, C) \
- ((__mmask16) __builtin_ia32_cmppbf16256_mask ((A), (B), (C), \
- (__mmask16) (-1)))
+ ((__mmask16) __builtin_ia32_cmpbf16256_mask ((A), (B), (C), \
+ (__mmask16) (-1)))
#define _mm_mask_cmp_pbh_mask(A, B, C, D) \
- ((__mmask8) __builtin_ia32_cmppbf16128_mask ((B), (C), (D), (A)))
+ ((__mmask8) __builtin_ia32_cmpbf16128_mask ((B), (C), (D), (A)))
#define _mm_cmp_pbh_mask(A, B, C) \
- ((__mmask8) __builtin_ia32_cmppbf16128_mask ((A), (B), (C), \
- (__mmask8) (-1)))
+ ((__mmask8) __builtin_ia32_cmpbf16128_mask ((A), (B), (C), \
+ (__mmask8) (-1)))
#endif /* __OPIMTIZE__ */
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_divbf16_v16bf_mask, "__builtin_ia32_divbf16256_mask", IX86_BUILTIN_DIVBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_divbf16_v8bf, "__builtin_ia32_divbf16128", IX86_BUILTIN_DIVBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_divbf16_v8bf_mask, "__builtin_ia32_divbf16128_mask", IX86_BUILTIN_DIVBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_smaxpbf16_v32bf, "__builtin_ia32_maxpbf16512", IX86_BUILTIN_MAXPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_smaxpbf16_v32bf_mask, "__builtin_ia32_maxpbf16512_mask", IX86_BUILTIN_MAXPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxpbf16_v16bf, "__builtin_ia32_maxpbf16256", IX86_BUILTIN_MAXPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxpbf16_v16bf_mask, "__builtin_ia32_maxpbf16256_mask", IX86_BUILTIN_MAXPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxpbf16_v8bf, "__builtin_ia32_maxpbf16128", IX86_BUILTIN_MAXPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxpbf16_v8bf_mask, "__builtin_ia32_maxpbf16128_mask", IX86_BUILTIN_MAXPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_sminpbf16_v32bf, "__builtin_ia32_minpbf16512", IX86_BUILTIN_MINPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_sminpbf16_v32bf_mask, "__builtin_ia32_minpbf16512_mask", IX86_BUILTIN_MINPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminpbf16_v16bf, "__builtin_ia32_minpbf16256", IX86_BUILTIN_MINPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminpbf16_v16bf_mask, "__builtin_ia32_minpbf16256_mask", IX86_BUILTIN_MINPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminpbf16_v8bf, "__builtin_ia32_minpbf16128", IX86_BUILTIN_MINPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminpbf16_v8bf_mask, "__builtin_ia32_minpbf16128_mask", IX86_BUILTIN_MINPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_smaxbf16_v32bf, "__builtin_ia32_maxbf16512", IX86_BUILTIN_MAXBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_smaxbf16_v32bf_mask, "__builtin_ia32_maxbf16512_mask", IX86_BUILTIN_MAXBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxbf16_v16bf, "__builtin_ia32_maxbf16256", IX86_BUILTIN_MAXBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxbf16_v16bf_mask, "__builtin_ia32_maxbf16256_mask", IX86_BUILTIN_MAXBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxbf16_v8bf, "__builtin_ia32_maxbf16128", IX86_BUILTIN_MAXBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxbf16_v8bf_mask, "__builtin_ia32_maxbf16128_mask", IX86_BUILTIN_MAXBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_sminbf16_v32bf, "__builtin_ia32_minbf16512", IX86_BUILTIN_MINBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_sminbf16_v32bf_mask, "__builtin_ia32_minbf16512_mask", IX86_BUILTIN_MINBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminbf16_v16bf, "__builtin_ia32_minbf16256", IX86_BUILTIN_MINBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminbf16_v16bf_mask, "__builtin_ia32_minbf16256_mask", IX86_BUILTIN_MINBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminbf16_v8bf, "__builtin_ia32_minbf16128", IX86_BUILTIN_MINBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminbf16_v8bf_mask, "__builtin_ia32_minbf16128_mask", IX86_BUILTIN_MINBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_scalefpbf16_v32bf, "__builtin_ia32_scalefpbf16512", IX86_BUILTIN_SCALEFPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_scalefpbf16_v32bf_mask, "__builtin_ia32_scalefpbf16512_mask", IX86_BUILTIN_SCALEFPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefpbf16_v16bf, "__builtin_ia32_scalefpbf16256", IX86_BUILTIN_SCALEFPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fpclasspbf16_v32bf_mask, "__builtin_ia32_fpclasspbf16512_mask", IX86_BUILTIN_FPCLASSPBF16512_MASK, UNKNOWN, (int) SI_FTYPE_V32BF_INT_USI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fpclasspbf16_v16bf_mask, "__builtin_ia32_fpclasspbf16256_mask", IX86_BUILTIN_FPCLASSPBF16256_MASK, UNKNOWN, (int) HI_FTYPE_V16BF_INT_UHI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fpclasspbf16_v8bf_mask, "__builtin_ia32_fpclasspbf16128_mask", IX86_BUILTIN_FPCLASSPBF16128_MASK, UNKNOWN, (int) QI_FTYPE_V8BF_INT_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cmppbf16_v32bf_mask, "__builtin_ia32_cmppbf16512_mask", IX86_BUILTIN_CMPPBF16512_MASK, UNKNOWN, (int) USI_FTYPE_V32BF_V32BF_INT_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cmppbf16_v16bf_mask, "__builtin_ia32_cmppbf16256_mask", IX86_BUILTIN_CMPPBF16256_MASK, UNKNOWN, (int) UHI_FTYPE_V16BF_V16BF_INT_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cmppbf16_v8bf_mask, "__builtin_ia32_cmppbf16128_mask", IX86_BUILTIN_CMPPBF16128_MASK, UNKNOWN, (int) UQI_FTYPE_V8BF_V8BF_INT_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cmpbf16_v32bf_mask, "__builtin_ia32_cmpbf16512_mask", IX86_BUILTIN_CMPBF16512_MASK, UNKNOWN, (int) USI_FTYPE_V32BF_V32BF_INT_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cmpbf16_v16bf_mask, "__builtin_ia32_cmpbf16256_mask", IX86_BUILTIN_CMPBF16256_MASK, UNKNOWN, (int) UHI_FTYPE_V16BF_V16BF_INT_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cmpbf16_v8bf_mask, "__builtin_ia32_cmpbf16128_mask", IX86_BUILTIN_CMPBF16128_MASK, UNKNOWN, (int) UQI_FTYPE_V8BF_V8BF_INT_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comsbf16_v8bf, "__builtin_ia32_vcomsbf16eq", IX86_BUILTIN_VCOMSBF16EQ, EQ, (int) INT_FTYPE_V8BF_V8BF)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comsbf16_v8bf, "__builtin_ia32_vcomsbf16gt", IX86_BUILTIN_VCOMSBF16GT, GT, (int) INT_FTYPE_V8BF_V8BF)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comsbf16_v8bf, "__builtin_ia32_vcomsbf16ge", IX86_BUILTIN_VCOMSBF16GE, GE, (int) INT_FTYPE_V8BF_V8BF)
(match_operand:VBF_AVX10_2 2 "nonimmediate_operand")))]
"TARGET_AVX10_2_256")
-(define_insn "avx10_2_<code>pbf16_<mode><mask_name>"
+(define_insn "avx10_2_<code>bf16_<mode><mask_name>"
[(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
(smaxmin:VBF_AVX10_2
(match_operand:VBF_AVX10_2 1 "register_operand" "v")
(match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")))]
"TARGET_AVX10_2_256"
- "v<maxmin_float>pbf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ "v<maxmin_float>bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
"vfpclasspbf16<vecmemsuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
[(set_attr "prefix" "evex")])
-(define_insn "avx10_2_cmppbf16_<mode><mask_scalar_merge_name>"
+(define_insn "avx10_2_cmpbf16_<mode><mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
[(match_operand:VBF_AVX10_2 1 "register_operand" "v")
(match_operand 3 "const_0_to_31_operand" "n")]
UNSPEC_PCMP))]
"TARGET_AVX10_2_256"
- "vcmppbf16\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
+ "vcmpbf16\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
[(set_attr "prefix" "evex")])
(define_insn "avx10_2_comsbf16_v8bf"
#define __builtin_ia32_reducenepbf16512_mask(A, B, C, D) __builtin_ia32_reducenepbf16512_mask(A, 123, C, D)
#define __builtin_ia32_getmantpbf16512_mask(A, B, C, D) __builtin_ia32_getmantpbf16512_mask(A, 1, C, D)
#define __builtin_ia32_fpclasspbf16512_mask(A, B, C) __builtin_ia32_fpclasspbf16512_mask(A, 1, C)
-#define __builtin_ia32_cmppbf16512_mask(A, B, C, D) __builtin_ia32_cmppbf16512_mask(A, B, 1, D)
+#define __builtin_ia32_cmpbf16512_mask(A, B, C, D) __builtin_ia32_cmpbf16512_mask(A, B, 1, D)
/* avx10_2bf16intrin.h */
#define __builtin_ia32_rndscalenepbf16256_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16256_mask(A, 123, C, D)
#define __builtin_ia32_getmantpbf16128_mask(A, B, C, D) __builtin_ia32_getmantpbf16128_mask(A, 1, C, D)
#define __builtin_ia32_fpclasspbf16256_mask(A, B, C) __builtin_ia32_fpclasspbf16256_mask(A, 1, C)
#define __builtin_ia32_fpclasspbf16128_mask(A, B, C) __builtin_ia32_fpclasspbf16128_mask(A, 1, C)
-#define __builtin_ia32_cmppbf16256_mask(A, B, C, D) __builtin_ia32_cmppbf16256_mask(A, B, 1, D)
-#define __builtin_ia32_cmppbf16128_mask(A, B, C, D) __builtin_ia32_cmppbf16128_mask(A, B, 1, D)
+#define __builtin_ia32_cmpbf16256_mask(A, B, C, D) __builtin_ia32_cmpbf16256_mask(A, B, 1, D)
+#define __builtin_ia32_cmpbf16128_mask(A, B, C, D) __builtin_ia32_cmpbf16128_mask(A, B, 1, D)
/* avx10_2-512satcvtintrin.h */
#define __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, 8)
/* { dg-final { scan-assembler-times "vdivbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vdivbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vdivbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vgetmantpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vfpclasspbf16z\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vfpclasspbf16z\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vcmppbf16\[ \\t\]+\\\$1\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%k\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vcmppbf16\[ \\t\]+\\\$2\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%k\[0-9\]\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpbf16\[ \\t\]+\\\$1\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%k\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpbf16\[ \\t\]+\\\$2\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%k\[0-9\]\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
/* { dg-do compile } */
/* { dg-options "-march=x86-64-v3 -mavx10.2-512 -O2 -mprefer-vector-width=512" } */
-/* { dg-final { scan-assembler-times "vcmppbf16" 5 } } */
+/* { dg-final { scan-assembler-times "vcmpbf16" 5 } } */
typedef __bf16 v32bf __attribute__ ((__vector_size__ (64)));
/* { dg-do compile } */
/* { dg-options "-march=x86-64-v3 -mavx10.2-512 -mprefer-vector-width=512 -Ofast" } */
-/* /* { dg-final { scan-assembler-times "vmaxpbf16" 1 } } */
-/* /* { dg-final { scan-assembler-times "vminpbf16" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxbf16" 1 } } */
+/* { dg-final { scan-assembler-times "vminbf16" 1 } } */
void
-maxpbf16_512 (__bf16* dest, __bf16* src1, __bf16* src2)
+maxbf16_512 (__bf16* dest, __bf16* src1, __bf16* src2)
{
int i;
for (i = 0; i < 32; i++)
}
void
-minpbf16_512 (__bf16* dest, __bf16* src1, __bf16* src2)
+minbf16_512 (__bf16* dest, __bf16* src1, __bf16* src2)
{
int i;
for (i = 0; i < 32; i++)
/* { dg-final { scan-assembler-times "vdivbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vdivbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vdivbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vfpclasspbf16y\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vfpclasspbf16x\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vfpclasspbf16x\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vcmppbf16\[ \\t\]+\\\$1\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%k\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vcmppbf16\[ \\t\]+\\\$2\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%k\[0-9\]\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vcmppbf16\[ \\t\]+\\\$1\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vcmppbf16\[ \\t\]+\\\$2\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\]\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpbf16\[ \\t\]+\\\$1\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%k\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpbf16\[ \\t\]+\\\$2\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%k\[0-9\]\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpbf16\[ \\t\]+\\\$1\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpbf16\[ \\t\]+\\\$2\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\]\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
/* { dg-do compile } */
/* { dg-options "-march=x86-64-v3 -mavx10.2 -O2" } */
-/* { dg-final { scan-assembler-times "vcmppbf16" 10 } } */
+/* { dg-final { scan-assembler-times "vcmpbf16" 10 } } */
typedef __bf16 v16bf __attribute__ ((__vector_size__ (32)));
typedef __bf16 v8bf __attribute__ ((__vector_size__ (16)));
/* { dg-do compile } */
/* { dg-options "-march=x86-64-v3 -mavx10.2 -Ofast" } */
-/* { dg-final { scan-assembler-times "vmaxpbf16" 2 } } */
-/* { dg-final { scan-assembler-times "vminpbf16" 2 } } */
+/* { dg-final { scan-assembler-times "vmaxbf16" 2 } } */
+/* { dg-final { scan-assembler-times "vminbf16" 2 } } */
void
-maxpbf16_256 (__bf16* dest, __bf16* src1, __bf16* src2)
+maxbf16_256 (__bf16* dest, __bf16* src1, __bf16* src2)
{
int i;
for (i = 0; i < 16; i++)
}
void
-minpbf16_256 (__bf16* dest, __bf16* src1, __bf16* src2)
+minbf16_256 (__bf16* dest, __bf16* src1, __bf16* src2)
{
int i;
for (i = 0; i < 16; i++)
}
void
-maxpbf16_128 (__bf16* dest, __bf16* src1, __bf16* src2)
+maxbf16_128 (__bf16* dest, __bf16* src1, __bf16* src2)
{
int i;
for (i = 0; i < 16; i++)
}
void
-minpbf16_128 (__bf16* dest, __bf16* src1, __bf16* src2)
+minbf16_128 (__bf16* dest, __bf16* src1, __bf16* src2)
{
int i;
for (i = 0; i < 16; i++)
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-march=x86-64-v3 -mavx10.2 -Ofast" } */
-/* { dg-final { scan-assembler-times "vmaxpbf16" 2 } } */
-/* { dg-final { scan-assembler-times "vminpbf16" 2 } } */
+/* { dg-final { scan-assembler-times "vmaxbf16" 2 } } */
+/* { dg-final { scan-assembler-times "vminbf16" 2 } } */
void
maxpbf16_64 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2)
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
-#include "avx10_2-512-vmaxpbf16-2.c"
+#include "avx10_2-512-vcmpbf16-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
-#include "avx10_2-512-vmaxpbf16-2.c"
+#include "avx10_2-512-vcmpbf16-2.c"
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
-#include "avx10_2-512-vminpbf16-2.c"
+#include "avx10_2-512-vmaxbf16-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
-#include "avx10_2-512-vminpbf16-2.c"
+#include "avx10_2-512-vmaxbf16-2.c"
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
-#include "avx10_2-512-vcmppbf16-2.c"
+#include "avx10_2-512-vminbf16-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
-#include "avx10_2-512-vcmppbf16-2.c"
+#include "avx10_2-512-vminbf16-2.c"
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -mavx10.2" } */
-/* { dg-final { scan-assembler-times "vcmppbf16" 10 } } */
+/* { dg-final { scan-assembler-times "vcmpbf16" 10 } } */
typedef __bf16 __attribute__((__vector_size__ (4))) v2bf;
typedef __bf16 __attribute__((__vector_size__ (8))) v4bf;
#define __builtin_ia32_reducenepbf16512_mask(A, B, C, D) __builtin_ia32_reducenepbf16512_mask(A, 123, C, D)
#define __builtin_ia32_getmantpbf16512_mask(A, B, C, D) __builtin_ia32_getmantpbf16512_mask(A, 1, C, D)
#define __builtin_ia32_fpclasspbf16512_mask(A, B, C) __builtin_ia32_fpclasspbf16512_mask(A, 1, C)
-#define __builtin_ia32_cmppbf16512_mask(A, B, C, D) __builtin_ia32_cmppbf16512_mask(A, B, 1, D)
+#define __builtin_ia32_cmpbf16512_mask(A, B, C, D) __builtin_ia32_cmpbf16512_mask(A, B, 1, D)
/* avx10_2bf16intrin.h */
#define __builtin_ia32_rndscalenepbf16256_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16256_mask(A, 123, C, D)
#define __builtin_ia32_getmantpbf16128_mask(A, B, C, D) __builtin_ia32_getmantpbf16128_mask(A, 1, C, D)
#define __builtin_ia32_fpclasspbf16256_mask(A, B, C) __builtin_ia32_fpclasspbf16256_mask(A, 1, C)
#define __builtin_ia32_fpclasspbf16128_mask(A, B, C) __builtin_ia32_fpclasspbf16128_mask(A, 1, C)
-#define __builtin_ia32_cmppbf16256_mask(A, B, C, D) __builtin_ia32_cmppbf16256_mask(A, B, 1, D)
-#define __builtin_ia32_cmppbf16128_mask(A, B, C, D) __builtin_ia32_cmppbf16128_mask(A, B, 1, D)
+#define __builtin_ia32_cmpbf16256_mask(A, B, C, D) __builtin_ia32_cmpbf16256_mask(A, B, 1, D)
+#define __builtin_ia32_cmpbf16128_mask(A, B, C, D) __builtin_ia32_cmpbf16128_mask(A, B, 1, D)
/* avx10_2-512satcvtintrin.h */
#define __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, 8)
#define __builtin_ia32_reducenepbf16512_mask(A, B, C, D) __builtin_ia32_reducenepbf16512_mask(A, 123, C, D)
#define __builtin_ia32_getmantpbf16512_mask(A, B, C, D) __builtin_ia32_getmantpbf16512_mask(A, 1, C, D)
#define __builtin_ia32_fpclasspbf16512_mask(A, B, C) __builtin_ia32_fpclasspbf16512_mask(A, 1, C)
-#define __builtin_ia32_cmppbf16512_mask(A, B, C, D) __builtin_ia32_cmppbf16512_mask(A, B, 1, D)
+#define __builtin_ia32_cmpbf16512_mask(A, B, C, D) __builtin_ia32_cmpbf16512_mask(A, B, 1, D)
/* avx10_2bf16intrin.h */
#define __builtin_ia32_rndscalenepbf16256_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16256_mask(A, 123, C, D)
#define __builtin_ia32_getmantpbf16128_mask(A, B, C, D) __builtin_ia32_getmantpbf16128_mask(A, 1, C, D)
#define __builtin_ia32_fpclasspbf16256_mask(A, B, C) __builtin_ia32_fpclasspbf16256_mask(A, 1, C)
#define __builtin_ia32_fpclasspbf16128_mask(A, B, C) __builtin_ia32_fpclasspbf16128_mask(A, 1, C)
-#define __builtin_ia32_cmppbf16256_mask(A, B, C, D) __builtin_ia32_cmppbf16256_mask(A, B, 1, D)
-#define __builtin_ia32_cmppbf16128_mask(A, B, C, D) __builtin_ia32_cmppbf16128_mask(A, B, 1, D)
+#define __builtin_ia32_cmpbf16256_mask(A, B, C, D) __builtin_ia32_cmpbf16256_mask(A, B, 1, D)
+#define __builtin_ia32_cmpbf16128_mask(A, B, C, D) __builtin_ia32_cmpbf16128_mask(A, B, 1, D)
/* avx10_2-512satcvtintrin.h */
#define __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, 8)