avx10_2mediaintrin.h avx10_2-512mediaintrin.h
avx10_2convertintrin.h avx10_2-512convertintrin.h
avx10_2bf16intrin.h avx10_2-512bf16intrin.h
- avx10_2satcvtintrin.h avx10_2-512satcvtintrin.h"
+ avx10_2satcvtintrin.h avx10_2-512satcvtintrin.h
+ avx10_2minmaxintrin.h avx10_2-512minmaxintrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h
--- /dev/null
+/* Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of GCC.
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <avx10_2-512minmaxintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX10_2_512MINMAXINTRIN_H_INCLUDED
+#define _AVX10_2_512MINMAXINTRIN_H_INCLUDED
+
+#if !defined (__AVX10_2_512__)
+#pragma GCC push_options
+#pragma GCC target("avx10.2-512")
+#define __DISABLE_AVX10_2_512__
+#endif /* __AVX10_2_512__ */
+
+#ifdef __OPTIMIZE__
+extern __inline __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_nepbh (__m512bh __A, __m512bh __B, const int __C)
+{
+ return (__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) __A,
+ (__v32bf) __B,
+ __C,
+ (__v32bf)(__m512bh)
+ _mm512_setzero_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_nepbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B, const int __C)
+{
+ return (__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) __A,
+ (__v32bf) __B,
+ __C,
+ (__v32bf) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_nepbh (__mmask32 __U, __m512bh __A,
+ __m512bh __B, const int __C)
+{
+ return (__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) __A,
+ (__v32bf) __B,
+ __C,
+ (__v32bf)(__m512bh)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_pd (__m512d __A, __m512d __B, const int __C)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B, const int __C)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ const int __C)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_round_pd (__m512d __A, __m512d __B, const int __C,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B, const int __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ const int __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_ph (__m512h __A, __m512h __B, const int __C)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf)
+ _mm512_undefined_ph (),
+ (__mmask32) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_ph (__m512h __W, __mmask32 __U, __m512h __A,
+ __m512h __B, const int __C)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf) __W,
+ (__mmask32) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_ph (__mmask32 __U, __m512h __A, __m512h __B,
+ const int __C)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf)
+ _mm512_setzero_ph (),
+ (__mmask32) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_round_ph (__m512h __A, __m512h __B, const int __C, const int __R)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf)
+ _mm512_undefined_ph (),
+ (__mmask32) -1, __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
+ __m512h __B, const int __C, const int __R)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf) __W,
+ (__mmask32) __U, __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
+ const int __C, const int __R)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf)
+ _mm512_setzero_ph (),
+ (__mmask32) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_ps (__m512 __A, __m512 __B, const int __C)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __C)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ const int __C)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_round_ps (__m512 __A, __m512 __B, const int __C, const int __R)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __C, const int __R)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ const int __C, const int __R)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+
+#else
+#define _mm512_minmax_nepbh(A, B, C) \
+ ((__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) (A), \
+ (__v32bf) (B), \
+ (int) (C), \
+ (__v32bf) (__m512bh) \
+ _mm512_setzero_si512 (), \
+ (__mmask32) (-1)))
+
+#define _mm512_mask_minmax_nepbh(W, U, A, B, C) \
+ ((__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) (A), \
+ (__v32bf) (B), \
+ (int) (C), \
+ (__v32bf) (__m512bh) (W), \
+ (__mmask32) (U)))
+
+#define _mm512_maskz_minmax_nepbh(U, A, B, C) \
+ ((__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) (A), \
+ (__v32bf) (B), \
+ (int) (C), \
+ (__v32bf) (__m512bh) \
+ _mm512_setzero_si512 (), \
+ (__mmask32) (U)))
+
+#define _mm512_minmax_round_pd(A, B, C, R) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) \
+ _mm512_undefined_pd (), \
+ (__mmask8) (-1), \
+ (int) (R)))
+
+#define _mm512_mask_minmax_round_pd(W, U, A, B, C, R) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) (W), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm512_maskz_minmax_round_pd(U, A, B, C, R) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) \
+ _mm512_setzero_pd (), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm512_minmax_round_ph(A, B, C, R) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) \
+ _mm512_undefined_ph (), \
+ (__mmask32) (-1), \
+ (int) (R)))
+
+#define _mm512_mask_minmax_round_ph(W, U, A, B, C, R) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) (W), \
+ (__mmask32) (U), \
+ (int) (R)))
+
+#define _mm512_maskz_minmax_round_ph(U, A, B, C, R) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) \
+ _mm512_setzero_ph (), \
+ (__mmask32) (U), \
+ (int) (R)))
+
+#define _mm512_minmax_round_ps(A, B, C, R) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) \
+ _mm512_undefined_ps (), \
+ (__mmask16) (-1), \
+ (int) (R)))
+
+#define _mm512_mask_minmax_round_ps(W, U, A, B, C, R) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) (W), \
+ (__mmask16) (U), \
+ (int) (R)))
+
+#define _mm512_maskz_minmax_round_ps(U, A, B, C, R) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) \
+ _mm512_setzero_ps (), \
+ (__mmask16) (U), \
+ (int) (R)))
+
+#define _mm512_minmax_pd(A, B, C) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) \
+ _mm512_undefined_pd (), \
+ (__mmask8) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_pd(W, U, A, B, C) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) (W), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_pd(U, A, B, C) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) \
+ _mm512_setzero_pd (), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_minmax_ph(A, B, C) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) \
+ _mm512_undefined_ph (), \
+ (__mmask32) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_ph(W, U, A, B, C) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) (W), \
+ (__mmask32) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_ph(U, A, B, C) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) \
+ _mm512_setzero_ph (), \
+ (__mmask32) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_minmax_ps(A, B, C) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) \
+ _mm512_undefined_ps (), \
+ (__mmask16) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_ps(W, U, A, B, C) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) (W), \
+ (__mmask16) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_ps(U, A, B, C) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) \
+ _mm512_setzero_ps (), \
+ (__mmask16) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#endif
+
+#ifdef __DISABLE_AVX10_2_512__
+#undef __DISABLE_AVX10_2_512__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX10_2_512__ */
+
+#endif /* _AVX10_2_512MINMAXINTRIN_H_INCLUDED */
--- /dev/null
+/* Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of GCC.
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <avx10_2minmaxintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX10_2MINMAXINTRIN_H_INCLUDED
+#define _AVX10_2MINMAXINTRIN_H_INCLUDED
+
+#if !defined(__AVX10_2_256__)
+#pragma GCC push_options
+#pragma GCC target("avx10.2")
+#define __DISABLE_AVX10_2_256__
+#endif /* __AVX10_2_256__ */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minmax_nepbh (__m128bh __A, __m128bh __B, const int __C)
+{
+ return (__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) __A,
+ (__v8bf) __B,
+ __C,
+ (__v8bf)(__m128bh)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_minmax_nepbh (__m128bh __W, __mmask8 __U, __m128bh __A,
+ __m128bh __B, const int __C)
+{
+ return (__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) __A,
+ (__v8bf) __B,
+ __C,
+ (__v8bf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_minmax_nepbh (__mmask8 __U, __m128bh __A, __m128bh __B, const int __C)
+{
+ return (__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) __A,
+ (__v8bf) __B,
+ __C,
+ (__v8bf)(__m128bh)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_minmax_nepbh (__m256bh __A, __m256bh __B, const int __C)
+{
+ return (__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) __A,
+ (__v16bf) __B,
+ __C,
+ (__v16bf)(__m256bh)
+ _mm256_setzero_si256 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_minmax_nepbh (__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B,
+ const int __C)
+{
+ return (__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) __A,
+ (__v16bf) __B,
+ __C,
+ (__v16bf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_minmax_nepbh (__mmask16 __U, __m256bh __A, __m256bh __B, const int __C)
+{
+ return (__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) __A,
+ (__v16bf) __B,
+ __C,
+ (__v16bf)(__m256bh)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minmax_pd (__m128d __A, __m128d __B, const int __C)
+{
+ return (__m128d) __builtin_ia32_minmaxpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ __C,
+ (__v2df)(__m128d)
+ _mm_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_minmax_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
+ const int __C)
+{
+ return (__m128d) __builtin_ia32_minmaxpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ __C,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_minmax_pd (__mmask8 __U, __m128d __A, __m128d __B, const int __C)
+{
+ return (__m128d) __builtin_ia32_minmaxpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ __C,
+ (__v2df)(__m128d)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_minmax_pd (__m256d __A, __m256d __B, const int __C)
+{
+ return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
+ (__v4df) __A, (__v4df) __B, __C,
+ (__v4df) (__m256d) _mm256_undefined_pd (),
+ (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_minmax_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B,
+ const int __C)
+{
+ return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
+ (__v4df) __A, (__v4df) __B, __C, (__v4df) __W,
+ (__mmask8) __U, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_minmax_pd (__mmask8 __U, __m256d __A, __m256d __B, const int __C)
+{
+ return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
+ (__v4df) __A, (__v4df) __B, __C,
+ (__v4df) (__m256d) _mm256_setzero_pd (),
+ (__mmask8) __U, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_minmax_round_pd (__m256d __A, __m256d __B, const int __C, const int __R)
+{
+ return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
+ (__v4df) __A, (__v4df) __B, __C,
+ (__v4df) (__m256d) _mm256_undefined_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_minmax_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B, const int __C, const int __R)
+{
+ return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
+ (__v4df) __A, (__v4df) __B, __C, (__v4df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_minmax_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
+ const int __C, const int __R)
+{
+ return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
+ (__v4df) __A, (__v4df) __B, __C,
+ (__v4df) (__m256d) _mm256_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minmax_ph (__m128h __A, __m128h __B, const int __C)
+{
+ return (__m128h) __builtin_ia32_minmaxph128_mask ((__v8hf) __A,
+ (__v8hf) __B,
+ __C,
+ (__v8hf)(__m128h)
+ _mm_undefined_ph (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_minmax_ph (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
+ const int __C)
+{
+ return (__m128h) __builtin_ia32_minmaxph128_mask ((__v8hf) __A,
+ (__v8hf) __B,
+ __C,
+ (__v8hf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_minmax_ph (__mmask8 __U, __m128h __A, __m128h __B, const int __C)
+{
+ return (__m128h) __builtin_ia32_minmaxph128_mask ((__v8hf) __A,
+ (__v8hf) __B,
+ __C,
+ (__v8hf)(__m128h)
+ _mm_setzero_ph (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_minmax_ph (__m256h __A, __m256h __B, const int __C)
+{
+ return (__m256h) __builtin_ia32_minmaxph256_mask_round (
+ (__v16hf) __A, (__v16hf) __B, __C,
+ (__v16hf) (__m256h) _mm256_undefined_ph (),
+ (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_minmax_ph (__m256h __W, __mmask16 __U, __m256h __A, __m256h __B,
+ const int __C)
+{
+ return (__m256h) __builtin_ia32_minmaxph256_mask_round (
+ (__v16hf) __A, (__v16hf) __B, __C, (__v16hf) __W,
+ (__mmask16) __U, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_minmax_ph (__mmask16 __U, __m256h __A, __m256h __B, const int __C)
+{
+ return (__m256h) __builtin_ia32_minmaxph256_mask_round (
+ (__v16hf) __A, (__v16hf) __B, __C,
+ (__v16hf) (__m256h) _mm256_setzero_ph (),
+ (__mmask16) __U, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_minmax_round_ph (__m256h __A, __m256h __B, const int __C, const int __R)
+{
+ return (__m256h) __builtin_ia32_minmaxph256_mask_round (
+ (__v16hf) __A, (__v16hf) __B, __C,
+ (__v16hf) (__m256h) _mm256_undefined_ph (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_minmax_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
+ __m256h __B, const int __C, const int __R)
+{
+ return (__m256h) __builtin_ia32_minmaxph256_mask_round (
+ (__v16hf) __A, (__v16hf) __B, __C, (__v16hf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_minmax_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
+ const int __C, const int __R)
+{
+ return (__m256h) __builtin_ia32_minmaxph256_mask_round (
+ (__v16hf) __A, (__v16hf) __B, __C,
+ (__v16hf) (__m256h) _mm256_setzero_ph (),
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minmax_ps (__m128 __A, __m128 __B, const int __C)
+{
+ return (__m128) __builtin_ia32_minmaxps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ __C,
+ (__v4sf)(__m128)
+ _mm_undefined_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_minmax_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
+ const int __C)
+{
+ return (__m128) __builtin_ia32_minmaxps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ __C,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_minmax_ps (__mmask8 __U, __m128 __A, __m128 __B, const int __C)
+{
+ return (__m128) __builtin_ia32_minmaxps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ __C,
+ (__v4sf)(__m128)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_minmax_ps (__m256 __A, __m256 __B, const int __C)
+{
+ return (__m256) __builtin_ia32_minmaxps256_mask_round (
+ (__v8sf) __A, (__v8sf) __B, __C,
+ (__v8sf) (__m256) _mm256_undefined_ps (),
+ (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_minmax_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
+ const int __C)
+{
+ return (__m256) __builtin_ia32_minmaxps256_mask_round (
+ (__v8sf) __A, (__v8sf) __B, __C, (__v8sf) __W,
+ (__mmask8) __U, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_minmax_ps (__mmask8 __U, __m256 __A, __m256 __B, const int __C)
+{
+ return (__m256) __builtin_ia32_minmaxps256_mask_round (
+ (__v8sf) __A, (__v8sf) __B, __C,
+ (__v8sf) (__m256) _mm256_setzero_ps (),
+ (__mmask8) __U, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_minmax_round_ps (__m256 __A, __m256 __B, const int __C, const int __R)
+{
+ return (__m256) __builtin_ia32_minmaxps256_mask_round (
+ (__v8sf) __A, (__v8sf) __B, __C,
+ (__v8sf) (__m256) _mm256_undefined_ps (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_minmax_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
+ const int __C, const int __R)
+{
+ return (__m256) __builtin_ia32_minmaxps256_mask_round (
+ (__v8sf) __A, (__v8sf) __B, __C, (__v8sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_minmax_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
+ const int __C, const int __R)
+{
+ return (__m256) __builtin_ia32_minmaxps256_mask_round (
+ (__v8sf) __A, (__v8sf) __B, __C,
+ (__v8sf) (__m256) _mm256_setzero_ps (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minmax_sd (__m128d __A, __m128d __B, const int __C)
+{
+ return (__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ __C,
+ (__v2df)
+ _mm_undefined_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_minmax_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __C)
+{
+ return (__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ __C,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_minmax_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ const int __C)
+{
+ return (__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ __C,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minmax_round_sd (__m128d __A, __m128d __B, const int __C, const int __R)
+{
+ return (__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ __C,
+ (__v2df)
+ _mm_undefined_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_minmax_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __C, const int __R)
+{
+ return (__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ __C,
+ (__v2df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_minmax_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ const int __C, const int __R)
+{
+ return (__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ __C,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minmax_sh (__m128h __A, __m128h __B, const int __C)
+{
+ return (__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) __A,
+ (__v8hf) __B,
+ __C,
+ (__v8hf)
+ _mm_undefined_ph (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_minmax_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
+ const int __C)
+{
+ return (__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) __A,
+ (__v8hf) __B,
+ __C,
+ (__v8hf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_minmax_sh (__mmask8 __U, __m128h __A, __m128h __B,
+ const int __C)
+{
+ return (__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) __A,
+ (__v8hf) __B,
+ __C,
+ (__v8hf)
+ _mm_setzero_ph (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minmax_round_sh (__m128h __A, __m128h __B, const int __C, const int __R)
+{
+ return (__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) __A,
+ (__v8hf) __B,
+ __C,
+ (__v8hf)
+ _mm_undefined_ph (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_minmax_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
+ const int __C, const int __R)
+{
+ return (__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) __A,
+ (__v8hf) __B,
+ __C,
+ (__v8hf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_minmax_round_sh (__mmask8 __U, __m128h __A, __m128h __B,
+ const int __C, const int __R)
+{
+ return (__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) __A,
+ (__v8hf) __B,
+ __C,
+ (__v8hf)
+ _mm_setzero_ph (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minmax_ss (__m128 __A, __m128 __B, const int __C)
+{
+ return (__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __C,
+ (__v4sf)
+ _mm_undefined_ps (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_minmax_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
+ const int __C)
+{
+ return (__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __C,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_minmax_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ const int __C)
+{
+ return (__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __C,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minmax_round_ss (__m128 __A, __m128 __B, const int __C, const int __R)
+{
+ return (__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __C,
+ (__v4sf)
+ _mm_undefined_ps (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_minmax_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
+ const int __C, const int __R)
+{
+ return (__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __C,
+ (__v4sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ const int __C, const int __R)
+{
+ return (__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __C,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U, __R);
+}
+
+#else
+#define _mm_minmax_nepbh(A, B, C) \
+ ((__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) (A), \
+ (__v8bf) (B), \
+ (int) (C), \
+ (__v8bf) (__m128bh) \
+ _mm_setzero_si128 (), \
+ (__mmask8) (-1)))
+
+#define _mm_mask_minmax_nepbh(W, U, A, B, C) \
+ ((__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) (A), \
+ (__v8bf) (B), \
+ (int) (C), \
+ (__v8bf) (__m128bh) (W), \
+ (__mmask8) (U)))
+
+#define _mm_maskz_minmax_nepbh(U, A, B, C) \
+ ((__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) (A), \
+ (__v8bf) (B), \
+ (int) (C), \
+ (__v8bf) (__m128bh) \
+ _mm_setzero_si128 (), \
+ (__mmask8) (U)))
+
+#define _mm256_minmax_nepbh(A, B, C) \
+ ((__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) (A), \
+ (__v16bf) (B), \
+ (int) (C), \
+ (__v16bf) (__m256bh) \
+ _mm256_setzero_si256 (), \
+ (__mmask16) (-1)))
+
+#define _mm256_mask_minmax_nepbh(W, U, A, B, C) \
+ ((__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) (A), \
+ (__v16bf) (B), \
+ (int) (C), \
+ (__v16bf) (__m256bh) (W), \
+ (__mmask16) (U)))
+
+#define _mm256_maskz_minmax_nepbh(U, A, B, C) \
+ ((__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) (A), \
+ (__v16bf) (B), \
+ (int) (C), \
+ (__v16bf) (__m256bh) \
+ _mm256_setzero_si256 (), \
+ (__mmask16) (U)))
+
+#define _mm_minmax_pd(A, B, C) \
+ ((__m128d) __builtin_ia32_minmaxpd128_mask ((__v2df) (A), \
+ (__v2df) (B), \
+ (int) (C), \
+ (__v2df) (__m128d) \
+ _mm_undefined_pd (), \
+ (__mmask8) (-1)))
+
+#define _mm_mask_minmax_pd(W, U, A, B, C) \
+ ((__m128d) __builtin_ia32_minmaxpd128_mask ((__v2df) (A), \
+ (__v2df) (B), \
+ (int) (C), \
+ (__v2df) (__m128d) (W), \
+ (__mmask8) (U)))
+
+#define _mm_maskz_minmax_pd(U, A, B, C) \
+ ((__m128d) __builtin_ia32_minmaxpd128_mask ((__v2df) (A), \
+ (__v2df) (B), \
+ (int) (C), \
+ (__v2df) (__m128d) \
+ _mm_setzero_pd (), \
+ (__mmask8) (U)))
+
+#define _mm256_minmax_pd(A, B, C) \
+ ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A), \
+ (__v4df) (B), \
+ (int) (C), \
+ (__v4df) (__m256d) \
+ _mm256_undefined_pd (), \
+ (__mmask8) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm256_mask_minmax_pd(W, U, A, B, C) \
+ ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A), \
+ (__v4df) (B), \
+ (int) (C), \
+ (__v4df) (__m256d) (W), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm256_maskz_minmax_pd(U, A, B, C) \
+ ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A), \
+ (__v4df) (B), \
+ (int) (C), \
+ (__v4df) (__m256d) \
+ _mm256_setzero_pd (), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm256_minmax_round_pd(A, B, C, R) \
+ ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A), \
+ (__v4df) (B), \
+ (int) (C), \
+ (__v4df) (__m256d) \
+ _mm256_undefined_pd (), \
+ (__mmask8) (-1), \
+ (int) (R)))
+
+#define _mm256_mask_minmax_round_pd(W, U, A, B, C, R) \
+ ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A), \
+ (__v4df) (B), \
+ (int) (C), \
+ (__v4df) (__m256d) (W), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm256_maskz_minmax_round_pd(U, A, B, C, R) \
+ ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A), \
+ (__v4df) (B), \
+ (int) (C), \
+ (__v4df) (__m256d) \
+ _mm256_setzero_pd (), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm_minmax_ph(A, B, C) \
+ ((__m128h) __builtin_ia32_minmaxph128_mask ((__v8hf) (A), \
+ (__v8hf) (B), \
+ (int) (C), \
+ (__v8hf) (__m128h) \
+ _mm_undefined_ph (), \
+ (__mmask8) (-1)))
+
+#define _mm_mask_minmax_ph(W, U, A, B, C) \
+ ((__m128h) __builtin_ia32_minmaxph128_mask ((__v8hf) (A), \
+ (__v8hf) (B), \
+ (int) (C), \
+ (__v8hf) (__m128h) (W), \
+ (__mmask8) (U)))
+
+#define _mm_maskz_minmax_ph(U, A, B, C) \
+ ((__m128h) __builtin_ia32_minmaxph128_mask ((__v8hf) (A), \
+ (__v8hf) (B), \
+ (int) (C), \
+ (__v8hf) (__m128h) \
+ _mm_setzero_ph (), \
+ (__mmask8) (U)))
+
+#define _mm256_minmax_ph(A, B, C) \
+ ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A), \
+ (__v16hf) (B), \
+ (int) (C), \
+ (__v16hf) (__m256h) \
+ _mm256_undefined_ph (), \
+ (__mmask16) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm256_mask_minmax_ph(W, U, A, B, C) \
+ ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A), \
+ (__v16hf) (B), \
+ (int) (C), \
+ (__v16hf) (__m256h) (W), \
+ (__mmask16) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm256_maskz_minmax_ph(U, A, B, C) \
+ ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A), \
+ (__v16hf) (B), \
+ (int) (C), \
+ (__v16hf) (__m256h) \
+ _mm256_setzero_ph (), \
+ (__mmask16) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm256_minmax_round_ph(A, B, C, R) \
+ ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A), \
+ (__v16hf) (B), \
+ (int) (C), \
+ (__v16hf) (__m256h) \
+ _mm256_undefined_ph (), \
+ (__mmask16) (-1), \
+ (int) (R)))
+
+#define _mm256_mask_minmax_round_ph(W, U, A, B, C, R) \
+ ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A), \
+ (__v16hf) (B), \
+ (int) (C), \
+ (__v16hf) (__m256h) (W), \
+ (__mmask16) (U), \
+ (int) (R)))
+
+#define _mm256_maskz_minmax_round_ph(U, A, B, C, R) \
+ ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A), \
+ (__v16hf) (B), \
+ (int) (C), \
+ (__v16hf) (__m256h) \
+ _mm256_setzero_ph (), \
+ (__mmask16) (U), \
+ (int) (R)))
+
+#define _mm_minmax_ps(A, B, C) \
+ ((__m128) __builtin_ia32_minmaxps128_mask ((__v4sf) (A), \
+ (__v4sf) (B), \
+ (int) (C), \
+ (__v4sf) (__m128) \
+ _mm_undefined_ps (), \
+ (__mmask8) (-1)))
+
+#define _mm_mask_minmax_ps(W, U, A, B, C) \
+ ((__m128) __builtin_ia32_minmaxps128_mask ((__v4sf) (A), \
+ (__v4sf) (B), \
+ (int) (C), \
+ (__v4sf) (__m128) (W), \
+ (__mmask8) (U)))
+
+#define _mm_maskz_minmax_ps(U, A, B, C) \
+ ((__m128) __builtin_ia32_minmaxps128_mask ((__v4sf) (A), \
+ (__v4sf) (B), \
+ (int) (C), \
+ (__v4sf) (__m128) \
+ _mm_setzero_ps (), \
+ (__mmask8) (U)))
+
+#define _mm256_minmax_ps(A, B, C) \
+ ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A), \
+ (__v8sf) (B), \
+ (int) (C), \
+ (__v8sf) (__m256) \
+ _mm256_undefined_ps (), \
+ (__mmask8) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm256_mask_minmax_ps(W, U, A, B, C) \
+ ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A), \
+ (__v8sf) (B), \
+ (int) (C), \
+ (__v8sf) (__m256) (W), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm256_maskz_minmax_ps(U, A, B, C) \
+ ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A), \
+ (__v8sf) (B), \
+ (int) (C), \
+ (__v8sf) (__m256) \
+ _mm256_setzero_ps (), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm256_minmax_round_ps(A, B, C, R) \
+ ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A), \
+ (__v8sf) (B), \
+ (int) (C), \
+ (__v8sf) (__m256) \
+ _mm256_undefined_ps (), \
+ (__mmask8) (-1), \
+ (int) (R)))
+
+#define _mm256_mask_minmax_round_ps(W, U, A, B, C, R) \
+ ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A), \
+ (__v8sf) (B), \
+ (int) (C), \
+ (__v8sf) (__m256) (W), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm256_maskz_minmax_round_ps(U, A, B, C, R) \
+ ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A), \
+ (__v8sf) (B), \
+ (int) (C), \
+ (__v8sf) (__m256) \
+ _mm256_setzero_ps (), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm_minmax_round_sd(A, B, C, R) \
+ ((__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) (A), \
+ (__v2df) (B), \
+ (int) (C), \
+ (__v2df) (__m128d) \
+ _mm_undefined_pd (), \
+ (__mmask8) (-1), \
+ (int) (R)))
+
+#define _mm_mask_minmax_round_sd(W, U, A, B, C, R) \
+ ((__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) (A), \
+ (__v2df) (B), \
+ (int) (C), \
+ (__v2df) (__m128d) (W), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm_maskz_minmax_round_sd(U, A, B, C, R) \
+ ((__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) (A), \
+ (__v2df)(B), \
+ (int) (C), \
+ (__v2df) (__m128d) \
+ _mm_setzero_pd (), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm_minmax_round_sh(A, B, C, R) \
+ ((__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) (A), \
+ (__v8hf) (B), \
+ (int) (C), \
+ (__v8hf) (__m128h) \
+ _mm_undefined_ph (), \
+ (__mmask8) (-1), \
+ (int) (R)))
+
+#define _mm_mask_minmax_round_sh(W, U, A, B, C, R) \
+ ((__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) (A), \
+ (__v8hf) (B), \
+ (int) (C), \
+ (__v8hf) (__m128h) (W), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm_maskz_minmax_round_sh(U, A, B, C, R) \
+ ((__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) (A), \
+ (__v8hf) (B), \
+ (int) (C), \
+ (__v8hf) (__m128h) \
+ _mm_setzero_ph (), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm_minmax_round_ss(A, B, C, R) \
+ ((__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) (A), \
+ (__v4sf) (B), \
+ (int) (C), \
+ (__v4sf) (__m128) \
+ _mm_undefined_ps (), \
+ (__mmask8) (-1), \
+ (int) (R)))
+
+#define _mm_mask_minmax_round_ss(W, U, A, B, C, R) \
+ ((__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) (A), \
+ (__v4sf) (B), \
+ (int) (C), \
+ (__v4sf) (__m128) (W), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm_maskz_minmax_round_ss(U, A, B, C, R) \
+ ((__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) (A), \
+ (__v4sf) (B), \
+ (int) (C), \
+ (__v4sf)(__m128) \
+ _mm_setzero_ps (), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm_minmax_sd(A, B, C) \
+ ((__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) (A), \
+ (__v2df) (B), \
+ (int) (C), \
+ (__v2df) (__m128d) \
+ _mm_undefined_pd (), \
+ (__mmask8) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_minmax_sd(W, U, A, B, C) \
+ ((__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) (A), \
+ (__v2df) (B), \
+ (int) (C), \
+ (__v2df) (__m128d) (W), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_minmax_sd(U, A, B, C) \
+ ((__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) (A), \
+ (__v2df) (B), \
+ (int) (C), \
+ (__v2df) (__m128d) \
+ _mm_setzero_pd (), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_minmax_sh(A, B, C) \
+ ((__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) (A), \
+ (__v8hf) (B), \
+ (int) (C), \
+ (__v8hf) (__m128h) \
+ _mm_undefined_ph (), \
+ (__mmask8) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_minmax_sh(W, U, A, B, C) \
+ ((__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) (A), \
+ (__v8hf) (B), \
+ (int) (C), \
+ (__v8hf) (__m128h) (W), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_minmax_sh(U, A, B, C) \
+ ((__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) (A), \
+ (__v8hf) (B), \
+ (int) (C), \
+ (__v8hf) (__m128h) \
+ _mm_setzero_ph (), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_minmax_ss(A, B, C) \
+ ((__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) (A), \
+ (__v4sf) (B), \
+ (int) (C), \
+ (__v4sf) (__m128) \
+ _mm_undefined_ps (), \
+ (__mmask8) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_minmax_ss(W, U, A, B, C) \
+ ((__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) (A), \
+ (__v4sf) (B), \
+ (int) (C), \
+ (__v4sf) (__m128) (W), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_minmax_ss(U, A, B, C) \
+ ((__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) (A), \
+ (__v4sf) (B), \
+ (int) (C), \
+ (__v4sf) (__m128) \
+ _mm_setzero_ps (), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#endif
+
+#ifdef __DISABLE_AVX10_2_256__
+#undef __DISABLE_AVX10_2_256__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX10_2_256__ */
+
+#endif /* _AVX10_2MINMAXINTRIN_H_INCLUDED */
DEF_FUNCTION_TYPE (V16SI, V16SF, V16SI, UHI, INT)
DEF_FUNCTION_TYPE (V16HI, V16BF, V16HI, UHI, INT)
DEF_FUNCTION_TYPE (V32HI, V32BF, V32HI, USI, INT)
+DEF_FUNCTION_TYPE (V8BF, V8BF, V8BF, INT, V8BF, UQI)
+DEF_FUNCTION_TYPE (V16BF, V16BF, V16BF, INT, V16BF, UHI)
+DEF_FUNCTION_TYPE (V32BF, V32BF, V32BF, INT, V32BF, USI)
+DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, INT, V8HF, UQI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, INT, V8DF, UQI, INT)
+DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, INT, V32HF, USI, INT)
+DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, INT, V16HF, UHI, INT)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT, V16SF, UHI, INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2qqsv2di_mask, "__builtin_ia32_cvttps2qqs128_mask", IX86_BUILTIN_VCVTTPS2QQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2udqsv4sf_mask, "__builtin_ia32_cvttps2udqs128_mask", IX86_BUILTIN_VCVTTPS2UDQS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2uqqsv2di_mask, "__builtin_ia32_cvttps2uqqs128_mask", IX86_BUILTIN_VCVTTPS2UQQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxnepbf16_v8bf_mask, "__builtin_ia32_minmaxnepbf16128_mask", IX86_BUILTIN_MINMAXNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_INT_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxnepbf16_v16bf_mask, "__builtin_ia32_minmaxnepbf16256_mask", IX86_BUILTIN_MINMAXNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_INT_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_minmaxnepbf16_v32bf_mask, "__builtin_ia32_minmaxnepbf16512_mask", IX86_BUILTIN_MINMAXNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_INT_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv2df_mask, "__builtin_ia32_minmaxpd128_mask", IX86_BUILTIN_MINMAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv8hf_mask, "__builtin_ia32_minmaxph128_mask", IX86_BUILTIN_MINMAXPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv4sf_mask, "__builtin_ia32_minmaxps128_mask", IX86_BUILTIN_MINMAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI)
/* Builtins with rounding support. */
BDESC_END (ARGS, ROUND_ARGS)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttss2sisdi_round, "__builtin_ia32_cvttss2sis64_round", IX86_BUILTIN_VCVTTSS2SIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V4SF_INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttss2usissi_round, "__builtin_ia32_cvttss2usis32_round", IX86_BUILTIN_VCVTTSS2USIS32_ROUND, UNKNOWN, (int) INT_FTYPE_V4SF_INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttss2usisdi_round, "__builtin_ia32_cvttss2usis64_round", IX86_BUILTIN_VCVTTSS2USIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V4SF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_minmaxpv8df_mask_round, "__builtin_ia32_minmaxpd512_mask_round", IX86_BUILTIN_MINMAXPD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_minmaxpv32hf_mask_round, "__builtin_ia32_minmaxph512_mask_round", IX86_BUILTIN_MINMAXPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_minmaxpv16sf_mask_round, "__builtin_ia32_minmaxps512_mask_round", IX86_BUILTIN_MINMAXPS512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv4df_mask_round, "__builtin_ia32_minmaxpd256_mask_round", IX86_BUILTIN_MINMAXPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv16hf_mask_round, "__builtin_ia32_minmaxph256_mask_round", IX86_BUILTIN_MINMAXPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_INT_V16HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv8sf_mask_round, "__builtin_ia32_minmaxps256_mask_round", IX86_BUILTIN_MINMAXPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxsv2df_mask_round, "__builtin_ia32_minmaxsd_mask_round", IX86_BUILTIN_MINMAXSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxsv8hf_mask_round, "__builtin_ia32_minmaxsh_mask_round", IX86_BUILTIN_MINMAXSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxsv4sf_mask_round, "__builtin_ia32_minmaxss_mask_round", IX86_BUILTIN_MINMAXSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
BDESC_END (ROUND_ARGS, MULTI_ARG)
case V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT:
case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT:
case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT:
+ case V8BF_FTYPE_V8BF_V8BF_INT_V8BF_UQI:
+ case V16BF_FTYPE_V16BF_V16BF_INT_V16BF_UHI:
+ case V32BF_FTYPE_V32BF_V32BF_INT_V32BF_USI:
+ case V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI:
nargs = 5;
mask_pos = 1;
nargs_constant = 2;
case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT:
case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT:
case V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT:
+ case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI_INT:
+ case V32HF_FTYPE_V32HF_V32HF_INT_V32HF_USI_INT:
+ case V16HF_FTYPE_V16HF_V16HF_INT_V16HF_UHI_INT:
+ case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI_INT:
nargs = 6;
nargs_constant = 4;
break;
#include <avx10_2satcvtintrin.h>
#include <avx10_2-512satcvtintrin.h>
+
+#include <avx10_2minmaxintrin.h>
+
+#include <avx10_2-512minmaxintrin.h>
+
#endif /* _IMMINTRIN_H_INCLUDED */
UNSPEC_VCVTTPS2IUBS
UNSPEC_SFIX_SATURATION
UNSPEC_UFIX_SATURATION
+ UNSPEC_MINMAXNEPBF16
+ UNSPEC_MINMAX
])
(define_c_enum "unspecv" [
(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
(V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+(define_mode_iterator VFH_AVX10_2
+ [(V32HF "TARGET_AVX10_2_512") V16HF V8HF
+ (V16SF "TARGET_AVX10_2_512") V8SF V4SF
+ (V8DF "TARGET_AVX10_2_512") V4DF V2DF])
+
(define_mode_iterator VF2_AVX512VL
[(V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
+
+(define_insn "avx10_2_minmaxnepbf16_<mode><mask_name>"
+ [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
+ (unspec:VBF_AVX10_2
+ [(match_operand:VBF_AVX10_2 1 "register_operand" "v")
+ (match_operand:VBF_AVX10_2 2 "bcst_vector_operand" "vmBr")
+ (match_operand:SI 3 "const_0_to_255_operand")]
+ UNSPEC_MINMAXNEPBF16))]
+ "TARGET_AVX10_2_256"
+ "vminmaxnepbf16\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx10_2_minmaxp<mode><mask_name><round_saeonly_name>"
+ [(set (match_operand:VFH_AVX10_2 0 "register_operand" "=v")
+ (unspec:VFH_AVX10_2
+ [(match_operand:VFH_AVX10_2 1 "register_operand" "v")
+ (match_operand:VFH_AVX10_2 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+ (match_operand:SI 3 "const_0_to_255_operand")]
+ UNSPEC_MINMAX))]
+ "TARGET_AVX10_2_256"
+ "vminmax<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx10_2_minmaxs<mode><mask_scalar_name><round_saeonly_scalar_name>"
+ [(set (match_operand:VFH_128 0 "register_operand" "=v")
+ (vec_merge:VFH_128
+ (unspec:VFH_128
+ [(match_operand:VFH_128 1 "register_operand" "v")
+ (match_operand:VFH_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
+ (match_operand:SI 3 "const_0_to_255_operand")]
+ UNSPEC_MINMAX)
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX10_2_256"
+ "vminmax<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %2<round_saeonly_scalar_mask_op4>, %3}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<ssescalarmode>")])
#define __builtin_ia32_cvttss2usis64_round(A, B) __builtin_ia32_cvttss2usis64_round(A, 8)
#endif
+/* avx10_2-512minmaxintrin.h */
+#define __builtin_ia32_minmaxpd512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd512_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxph512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxph512_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxps512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps512_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxnepbf16512_mask(A, B, C, W, U) __builtin_ia32_minmaxnepbf16512_mask (A, B, 4, W, U)
+
+/* avx10_2minmaxintrin.h */
+#define __builtin_ia32_minmaxsd_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsd_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxsh_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsh_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxss_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxss_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxnepbf16128_mask(A, B, C, D, E) __builtin_ia32_minmaxnepbf16128_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxnepbf16256_mask(A, B, C, D, E) __builtin_ia32_minmaxnepbf16256_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxpd128_mask(A, B, C, D, E) __builtin_ia32_minmaxpd128_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxpd256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd256_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxph128_mask(A, B, C, D, E) __builtin_ia32_minmaxph128_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxph256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxph256_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxps128_mask(A, B, C, D, E) __builtin_ia32_minmaxps128_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxps256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps256_mask_round (A, B, 4, D, E, 4)
+
#include <wmmintrin.h>
#include <immintrin.h>
#include <mm3dnow.h>
--- /dev/null
+#ifndef AVX10MINMAX_HELPERFUNC_INCLUDED
+#define AVX10MINMAX_HELPERFUNC_INCLUDED
+
+#include <math.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <float.h>
+#include "avx512f-helper.h"
+#define SNAN_float __builtin_nansf ("")
+#define SNAN_flag_float 0x7fa00000
+#define QNAN_float __builtin_nanf ("")
+#define QNAN_flag_float 0x7fc00000
+#define SNAN_double ((double)__builtin_nans (""))
+#define SNAN_flag_double 0x7ff4000000000000
+#define QNAN_double ((double)__builtin_nan (""))
+#define QNAN_flag_double 0x7ff8000000000000
+#define SNAN__Float16 ((_Float16)__builtin_nansf16 (""))
+#define SNAN_flag__Float16 0x7d00
+#define QNAN__Float16 ((_Float16)__builtin_nanf16 (""))
+#define QNAN_flag__Float16 0x7e00
+#define SNAN___bf16 ((__bf16)__builtin_nansf16b (""))
+#define SNAN_flag___bf16 0x7fa0
+#define QNAN___bf16 ((__bf16)__builtin_nanf (""))
+#define QNAN_flag___bf16 0x7fc0
+#define ISNAN(x) (x != x)
+#define ABS_float(x) fabsf (x)
+#define ABS_double(x) fabs (x)
+#define ABS__Float16(x) __builtin_fabsf16 (x)
+#define ABS___bf16(x) __builtin_fabsf (x)
+
+#define Union_Data(typef, typei) \
+typedef union \
+{ \
+ typef f; \
+ typei i; \
+} union_##typef;
+
+Union_Data(float, int)
+Union_Data(double, long long)
+Union_Data(__bf16, short)
+Union_Data(_Float16, short)
+
+#define IS_SNAN(union_x, type) ((union_x.i & SNAN_flag_##type) == union_snan.i)
+
+#define IS_QNAN(union_x, type) ((union_x.i & QNAN_flag_##type) == union_qnan.i)
+
+#define CHECK_EXP_MINMAX(UNION_TYPE, VALUE_TYPE, INT_TYPE) \
+static int \
+__attribute__((noinline, unused)) \
+check_minmax_##UNION_TYPE (UNION_TYPE u, const VALUE_TYPE *v) \
+{ \
+ int i; \
+ int err = 0; \
+ for (i = 0; i < ARRAY_SIZE (u.a); i++) \
+ { \
+ union_##VALUE_TYPE union_x, union_y; \
+ union_x.f = u.a[i]; \
+ union_y.f = v[i]; \
+ if (union_x.i != union_y.i) \
+ { \
+ err++; \
+ PRINTF ("%i: " "%f" " != " "%f" "\n", \
+ i, v[i], u.a[i]); \
+ } \
+ } \
+ return err; \
+}
+
+#if defined (AVX10_512BIT)
+CHECK_EXP_MINMAX (union512, float, int)
+CHECK_EXP_MINMAX (union512d, double, long int)
+CHECK_EXP_MINMAX (union512bf16_bf, __bf16, short int)
+CHECK_EXP_MINMAX (union512h, _Float16, short int)
+#endif
+CHECK_EXP_MINMAX (union256, float, int)
+CHECK_EXP_MINMAX (union256d, double, long int)
+CHECK_EXP_MINMAX (union128, float, int)
+CHECK_EXP_MINMAX (union128d, double, long int)
+CHECK_EXP_MINMAX (union256bf16_bf, __bf16, short int)
+CHECK_EXP_MINMAX (union128bf16_bf, __bf16, short int)
+CHECK_EXP_MINMAX (union256h, _Float16, short int)
+CHECK_EXP_MINMAX (union128h, _Float16, short int)
+
+#define UNION_CHECK_MINMAX(SIZE, NAME) EVAL(check_minmax_union, SIZE, NAME)
+
+#define CMP(res, x, y, type, value, op1, np, op2, zero, num, mag) \
+{ \
+ union_##type union_a, union_b; \
+ union_a.f = x; \
+ union_b.f = y; \
+ union_##type union_snan, union_qnan; \
+ union_snan.f = SNAN_##type; \
+ union_qnan.f = QNAN_##type; \
+ bool flag = false; \
+ if(num) \
+ { \
+ if(ISNAN(x) && ISNAN(y)) \
+ { \
+ if(IS_SNAN(union_a,type) || (IS_QNAN(union_a,type) && IS_QNAN(union_b,type))) \
+ { \
+ union_a.i |= value; \
+ res = union_a.f; \
+ flag = true; \
+ } \
+ else \
+ { \
+ union_b.i |= value; \
+ res = union_b.f; \
+ flag = true; \
+ } \
+ } \
+ else if(ISNAN(x)) \
+ { \
+ res = y; \
+ flag = true; \
+ } \
+ else if(ISNAN(y)) \
+ { \
+ res = x; \
+ flag = true; \
+ } \
+ } \
+ else \
+ { \
+ if(IS_SNAN(union_a,type) || (IS_QNAN(union_a,type) && !IS_SNAN(union_b,type))) \
+ { \
+ union_a.i |= value; \
+ res = union_a.f; \
+ flag = true; \
+ } \
+ else if(ISNAN(y)) \
+ { \
+ union_b.i |= value; \
+ res = union_b.f; \
+ flag = true; \
+ } \
+ } \
+ if(!flag) \
+ { \
+ if(!mag) \
+ { \
+ if((x == zero && y == - zero) || (x == - zero && y == zero)) \
+ res = np zero; \
+ else if(x op1 y) \
+ res = x; \
+ else \
+ res = y; \
+ } \
+ else \
+ { \
+ if(ABS_##type(x) op2 ABS_##type(y)) \
+ res = x; \
+ else if(ABS_##type(y) op2 ABS_##type(x)) \
+ res = y; \
+ else \
+ { \
+ if((x == zero && y == - zero) || (x == - zero && y == zero)) \
+ res = np zero; \
+ else if(x op1 y) \
+ res = x; \
+ else \
+ res = y; \
+ } \
+ } \
+ } \
+}
+
+#define MINMAX(type, value, zero) \
+type \
+minmax_##type (type * a, type * b, int imm) \
+{ \
+ int op_select = imm & 0x03; \
+ int sign_control = (imm & 0x0C) >> 2; \
+ int nan_prop_select = (imm & 0x10) >> 4; \
+ type tmp; \
+ if(nan_prop_select == 0) \
+ if(op_select == 0) \
+ CMP(tmp, *a, *b, type, value, <=, -, <, zero, false, false) \
+ else if(op_select == 1) \
+ CMP(tmp, *a, *b, type, value, >=, +, >, zero, false, false) \
+ else if(op_select == 2) \
+ CMP(tmp, *a, *b, type, value, <=, -, <, zero, false, true) \
+ else \
+ CMP(tmp, *a, *b, type, value, >=, +, >, zero, false, true) \
+ else \
+ if(op_select == 0) \
+ CMP(tmp, *a, *b, type, value, <=, -, <, zero, true, false) \
+ else if(op_select == 1) \
+ CMP(tmp, *a, *b, type, value, >=, +, >, zero, true, false) \
+ else if(op_select == 2) \
+ CMP(tmp, *a, *b, type, value, <=, -, <, zero, true, true) \
+ else \
+ CMP(tmp, *a, *b, type, value, >=, +, >, zero, true, true) \
+ if(!ISNAN(tmp)) \
+ if(sign_control == 0 && !ISNAN(*a)) \
+ if((tmp < 0 && *a > 0) || (tmp > 0 && *a < 0)) \
+ tmp = -tmp; \
+ else if(sign_control == 2) \
+ if(tmp < 0) tmp = -tmp; \
+ else if(sign_control == 3) \
+ if(tmp > 0) tmp = -tmp; \
+ return tmp; \
+}
+
+
+MINMAX(double, 0x7ff8000000000000, 0.0)
+MINMAX(float, 0x7fc00000, 0.0f)
+MINMAX(_Float16, 0x7e00, 0.0f16)
+MINMAX(__bf16, 0x7fc0, 0.0bf16)
+
+#define UNIT_TEST(R, InsnSuffix, MaskType, type) \
+ sign = -1; \
+ for (i = 0; i < SIZE; i++) \
+ { \
+ src1.a[i] = i % 2 ? SNAN_##type : 1.5 + 34.67 * i * sign; \
+ src2.a[i] = i % 3 ? QNAN_##type : -22.17 * i * sign; \
+ sign = sign * -1; \
+ } \
+ for (i = 0; i < SIZE; i++) \
+ res2.a[i] = DEFAULT_VALUE; \
+ res1.x = INTRINSIC(_minmax_##InsnSuffix) (src1.x, src2.x, R); \
+ res2.x = INTRINSIC(_mask_minmax_##InsnSuffix) (res2.x, mask, src1.x, src2.x, R); \
+ res3.x = INTRINSIC(_maskz_minmax_##InsnSuffix) (mask, src1.x, src2.x, R); \
+ CALC (res_ref, src1.a, src2.a, R); \
+ if (UNION_CHECK_MINMAX (AVX512F_LEN, MaskType) (res1, res_ref)) \
+ abort(); \
+ MASK_MERGE (MaskType) (res_ref, mask, SIZE); \
+ if (UNION_CHECK_MINMAX (AVX512F_LEN, MaskType) (res2, res_ref)) \
+ abort(); \
+ MASK_ZERO (MaskType) (res_ref, mask, SIZE); \
+ if (UNION_CHECK_MINMAX (AVX512F_LEN, MaskType) (res3, res_ref)) \
+ abort();
+
+#define SCALAR_UNIT_TEST(R, InsnSuffix, MaskType, type) \
+ sign = -1; \
+ for (i = 0; i < SIZE; i++) \
+ { \
+ src1.a[i] = i % 2 ? SNAN_##type : 1.5 + 34.67 * i * sign; \
+ src2.a[i] = i % 3 ? QNAN_##type : -22.17 * i * sign; \
+ sign = sign * -1; \
+ } \
+ for (i = 0; i < SIZE; i++) \
+ res2.a[i] = DEFAULT_VALUE; \
+ res1.x = _mm_minmax_##InsnSuffix (src1.x, src2.x, R); \
+ res2.x = _mm_mask_minmax_##InsnSuffix (res2.x, mask, src1.x, src2.x, R); \
+ res3.x = _mm_maskz_minmax_##InsnSuffix (mask, src1.x, src2.x, R); \
+ CALC (res_ref, src1.a, src2.a, R); \
+ if (UNION_CHECK_MINMAX (128, MaskType) (res1, res_ref)) \
+ abort(); \
+ MASK_MERGE (MaskType) (res_ref, mask, 1); \
+ if (UNION_CHECK_MINMAX (128, MaskType) (res2, res_ref)) \
+ abort(); \
+ MASK_ZERO (MaskType) (res_ref, mask, 1); \
+ if (UNION_CHECK_MINMAX (128, MaskType) (res3, res_ref)) \
+ abort();
+
+#endif
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 2 } } */
+
+
+#include <immintrin.h>
+
+volatile __m512bh x1;
+volatile __m512h x2;
+volatile __m512 x3;
+volatile __m512d x4;
+volatile __mmask32 m32;
+volatile __mmask16 m16;
+volatile __mmask8 m8;
+
+void extern
+avx10_2_512_test (void)
+{
+ x1 = _mm512_minmax_nepbh (x1, x1, 100);
+ x1 = _mm512_mask_minmax_nepbh (x1, m32, x1, x1, 100);
+ x1 = _mm512_maskz_minmax_nepbh (m32, x1, x1, 100);
+ x2 = _mm512_minmax_ph (x2, x2, 1);
+ x2 = _mm512_mask_minmax_ph (x2, m32, x2, x2, 1);
+ x2 = _mm512_maskz_minmax_ph (m32, x2, x2, 1);
+ x2 = _mm512_minmax_round_ph (x2, x2, 1, 4);
+ x2 = _mm512_mask_minmax_round_ph (x2, m32, x2, x2, 1, 4);
+ x2 = _mm512_maskz_minmax_round_ph (m32, x2, x2, 1, 4);
+ x3 = _mm512_minmax_ps (x3, x3, 1);
+ x3 = _mm512_mask_minmax_ps (x3, m16, x3, x3, 1);
+ x3 = _mm512_maskz_minmax_ps (m16, x3, x3, 1);
+ x3 = _mm512_minmax_round_ps (x3, x3, 1, 4);
+ x3 = _mm512_mask_minmax_round_ps (x3, m16, x3, x3, 1, 4);
+ x3 = _mm512_maskz_minmax_round_ps (m16, x3, x3, 1, 4);
+ x4 = _mm512_minmax_pd (x4, x4, 100);
+ x4 = _mm512_mask_minmax_pd (x4, m8, x4, x4, 100);
+ x4 = _mm512_maskz_minmax_pd (m8, x4, x4, 100);
+ x4 = _mm512_minmax_round_pd (x4, x4, 100, 4);
+ x4 = _mm512_mask_minmax_round_pd (x4, m8, x4, x4, 100, 4);
+ x4 = _mm512_maskz_minmax_round_pd (m8, x4, x4, 100, 4);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-fsignaling-nans -mfpmath=sse -O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_512BIT
+#endif
+#define SIZE (AVX512F_LEN / 16)
+#include "avx10-helper.h"
+#include <stdbool.h>
+#include "avx10-minmax-helper.h"
+
+void static
+CALC (__bf16 *r, __bf16 *s1, __bf16 *s2, int R)
+{
+ for(int i = 0; i < SIZE; i++)
+ r[i] = minmax___bf16(&s1[i], &s2[i], R);
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, bf16_bf) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ __bf16 res_ref[SIZE];
+
+ UNIT_TEST(0, nepbh, bf16_bf, __bf16);
+ UNIT_TEST(1, nepbh, bf16_bf, __bf16);
+ UNIT_TEST(4, nepbh, bf16_bf, __bf16);
+ UNIT_TEST(5, nepbh, bf16_bf, __bf16);
+ UNIT_TEST(16, nepbh, bf16_bf, __bf16);
+ UNIT_TEST(17, nepbh, bf16_bf, __bf16);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-fsignaling-nans -mfpmath=sse -O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_512BIT
+#endif
+#define SIZE (AVX512F_LEN / 64)
+#include "avx10-helper.h"
+#include <stdbool.h>
+#include "avx10-minmax-helper.h"
+
+void static
+CALC (double *r, double *s1, double *s2, int R)
+{
+ for(int i = 0; i < SIZE; i++)
+ r[i] = minmax_double(&s1[i], &s2[i], R);
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, d) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+
+ UNIT_TEST(0, pd, d, double);
+ UNIT_TEST(1, pd, d, double);
+ UNIT_TEST(4, pd, d, double);
+ UNIT_TEST(5, pd, d, double);
+ UNIT_TEST(16, pd, d, double);
+ UNIT_TEST(17, pd, d, double);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-fsignaling-nans -mfpmath=sse -O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_512BIT
+#endif
+#define SIZE (AVX512F_LEN / 16)
+#include "avx10-helper.h"
+#include <stdbool.h>
+#include "avx10-minmax-helper.h"
+
+void static
+CALC (_Float16 *r, _Float16 *s1, _Float16 *s2, int R)
+{
+ for(int i = 0; i < SIZE; i++)
+ r[i] = minmax__Float16(&s1[i], &s2[i], R);
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, h) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ _Float16 res_ref[SIZE];
+
+ UNIT_TEST(0, ph, h, _Float16);
+ UNIT_TEST(1, ph, h, _Float16);
+ UNIT_TEST(4, ph, h, _Float16);
+ UNIT_TEST(5, ph, h, _Float16);
+ UNIT_TEST(16, ph, h, _Float16);
+ UNIT_TEST(17, ph, h, _Float16);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-fsignaling-nans -mfpmath=sse -O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_512BIT
+#endif
+#define SIZE (AVX512F_LEN / 32)
+#include "avx10-helper.h"
+#include <stdbool.h>
+#include "avx10-minmax-helper.h"
+
+void static
+CALC (float *r, float *s1, float *s2, int R)
+{
+ for(int i = 0; i < SIZE; i++)
+ r[i] = minmax_float(&s1[i], &s2[i], R);
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, ) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+
+ UNIT_TEST(0, ps, , float);
+ UNIT_TEST(1, ps, , float);
+ UNIT_TEST(4, ps, , float);
+ UNIT_TEST(5, ps, , float);
+ UNIT_TEST(16, ps, , float);
+ UNIT_TEST(17, ps, , float);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsh\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsh\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsh\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsh\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsh\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsh\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxss\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxss\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxss\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsd\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsd\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsd\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256bh y1_;
+volatile __m256h y2;
+volatile __m256 y3;
+volatile __m256d y4;
+volatile __m128bh x1;
+volatile __m128h x2;
+volatile __m128 x3;
+volatile __m128d x4;
+volatile __mmask16 m16;
+volatile __mmask8 m8;
+
+void extern
+avx10_2_test (void)
+{
+ x1 = _mm_minmax_nepbh (x1, x1, 100);
+ x1 = _mm_mask_minmax_nepbh (x1, m8, x1, x1, 100);
+ x1 = _mm_maskz_minmax_nepbh (m8, x1, x1, 100);
+ y1_ = _mm256_minmax_nepbh (y1_, y1_, 100);
+ y1_ = _mm256_mask_minmax_nepbh (y1_, m16, y1_, y1_, 100);
+ y1_ = _mm256_maskz_minmax_nepbh (m16, y1_, y1_, 100);
+ x2 = _mm_minmax_ph (x2, x2, 100);
+ x2 = _mm_mask_minmax_ph (x2, m8, x2, x2, 100);
+ x2 = _mm_maskz_minmax_ph (m8, x2, x2, 100);
+ y2 = _mm256_minmax_ph (y2, y2, 100);
+ y2 = _mm256_mask_minmax_ph (y2, m16, y2, y2, 100);
+ y2 = _mm256_maskz_minmax_ph (m16, y2, y2, 100);
+ y2 = _mm256_minmax_round_ph (y2, y2, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ y2 = _mm256_mask_minmax_round_ph (y2, m16, y2, y2, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ y2 = _mm256_maskz_minmax_round_ph (m16, y2, y2, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x3 = _mm_minmax_ps (x3, x3, 100);
+ x3 = _mm_mask_minmax_ps (x3, m8, x3, x3, 100);
+ x3 = _mm_maskz_minmax_ps (m8, x3, x3, 100);
+ y3 = _mm256_minmax_ps (y3, y3, 100);
+ y3 = _mm256_mask_minmax_ps (y3, m8, y3, y3, 100);
+ y3 = _mm256_maskz_minmax_ps (m8, y3, y3, 100);
+ y3 = _mm256_minmax_round_ps (y3, y3, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ y3 = _mm256_mask_minmax_round_ps (y3, m8, y3, y3, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ y3 = _mm256_maskz_minmax_round_ps (m8, y3, y3, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x4 = _mm_minmax_pd (x4, x4, 100);
+ x4 = _mm_mask_minmax_pd (x4, m8, x4, x4, 100);
+ x4 = _mm_maskz_minmax_pd (m8, x4, x4, 100);
+ y4 = _mm256_minmax_pd (y4, y4, 100);
+ y4 = _mm256_mask_minmax_pd (y4, m8, y4, y4, 100);
+ y4 = _mm256_maskz_minmax_pd (m8, y4, y4, 100);
+ y4 = _mm256_minmax_round_pd (y4, y4, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ y4 = _mm256_mask_minmax_round_pd (y4, m8, y4, y4, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ y4 = _mm256_maskz_minmax_round_pd (m8, y4, y4, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x2 = _mm_minmax_sh (x2, x2, 1);
+ x2 = _mm_mask_minmax_sh (x2, m8, x2, x2, 1);
+ x2 = _mm_maskz_minmax_sh (m8, x2, x2, 1);
+ x2 = _mm_minmax_round_sh (x2, x2, 1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x2 = _mm_mask_minmax_round_sh (x2, m8, x2, x2, 1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x2 = _mm_maskz_minmax_round_sh (m8, x2, x2, 1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x3 = _mm_minmax_ss (x3, x3, 1);
+ x3 = _mm_mask_minmax_ss (x3, m8, x3, x3, 1);
+ x3 = _mm_maskz_minmax_ss (m8, x3, x3, 1);
+ x3 = _mm_minmax_round_ss (x3, x3, 1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x3 = _mm_mask_minmax_round_ss (x3, m8, x3, x3, 1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x3 = _mm_maskz_minmax_round_ss (m8, x3, x3, 1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x4 = _mm_minmax_sd (x4, x4, 1);
+ x4 = _mm_mask_minmax_sd (x4, m8, x4, x4, 1);
+ x4 = _mm_maskz_minmax_sd (m8, x4, x4, 1);
+ x4 = _mm_minmax_round_sd (x4, x4, 1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x4 = _mm_mask_minmax_round_sd (x4, m8, x4, x4, 1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x4 = _mm_maskz_minmax_round_sd (m8, x4, x4, 1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-fsignaling-nans -mfpmath=sse -O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#include "avx10_2-512-vminmaxnepbf16-2.c"
+
+#undef AVX512F_LEN
+
+#define AVX512F_LEN 128
+#include "avx10_2-512-vminmaxnepbf16-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-fsignaling-nans -mfpmath=sse -O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#include "avx10_2-512-vminmaxpd-2.c"
+
+#undef AVX512F_LEN
+
+#define AVX512F_LEN 128
+#include "avx10_2-512-vminmaxpd-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-fsignaling-nans -mfpmath=sse -O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+typedef _Float16 __m256h __attribute__ ((__vector_size__ (32), __may_alias__));
+#include "avx10_2-512-vminmaxph-2.c"
+
+#undef AVX512F_LEN
+
+#define AVX512F_LEN 128
+typedef _Float16 __m128h __attribute__ ((__vector_size__ (16), __may_alias__));
+#include "avx10_2-512-vminmaxph-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-fsignaling-nans -mfpmath=sse -O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#include "avx10_2-512-vminmaxps-2.c"
+
+#undef AVX512F_LEN
+
+#define AVX512F_LEN 128
+#include "avx10_2-512-vminmaxps-2.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-fsignaling-nans -mfpmath=sse -O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX10_SCALAR
+#define SIZE (128 / 64)
+#include "avx10-helper.h"
+#include <stdbool.h>
+#include "avx10-minmax-helper.h"
+
+void static
+CALC (double *r, double *s1, double *s2, int R)
+{
+ r[0] = minmax_double(&s1[0], &s2[0], R);
+ for(int i = 1; i < SIZE; i++)
+ r[i] = s1[i];
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (128, d) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+
+ SCALAR_UNIT_TEST(0, sd, d, double);
+ SCALAR_UNIT_TEST(1, sd, d, double);
+ SCALAR_UNIT_TEST(4, sd, d, double);
+ SCALAR_UNIT_TEST(5, sd, d, double);
+ SCALAR_UNIT_TEST(16, sd, d, double);
+ SCALAR_UNIT_TEST(17, sd, d, double);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-fsignaling-nans -mfpmath=sse -O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX10_SCALAR
+#define SIZE (128 / 16)
+#include "avx10-helper.h"
+#include <stdbool.h>
+#include "avx10-minmax-helper.h"
+
+void static
+CALC (_Float16 *r, _Float16 *s1, _Float16 *s2, int R)
+{
+ r[0] = minmax__Float16(&s1[0], &s2[0], R);
+ for(int i = 1; i < SIZE; i++)
+ r[i] = s1[i];
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (128, h) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ _Float16 res_ref[SIZE];
+
+ SCALAR_UNIT_TEST(0, sh, h, _Float16);
+ SCALAR_UNIT_TEST(1, sh, h, _Float16);
+ SCALAR_UNIT_TEST(4, sh, h, _Float16);
+ SCALAR_UNIT_TEST(5, sh, h, _Float16);
+ SCALAR_UNIT_TEST(16, sh, h, _Float16);
+ SCALAR_UNIT_TEST(17, sh, h, _Float16);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-fsignaling-nans -mfpmath=sse -O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX10_SCALAR
+#define SIZE (128 / 32)
+#include "avx10-helper.h"
+#include <stdbool.h>
+#include "avx10-minmax-helper.h"
+
+void static
+CALC (float *r, float *s1, float *s2, int R)
+{
+ r[0] = minmax_float(&s1[0], &s2[0], R);
+ for(int i = 1; i < SIZE; i++)
+ r[i] = s1[i];
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (128, ) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+
+ SCALAR_UNIT_TEST(0, ss, , float);
+ SCALAR_UNIT_TEST(1, ss, , float);
+ SCALAR_UNIT_TEST(4, ss, , float);
+ SCALAR_UNIT_TEST(5, ss, , float);
+ SCALAR_UNIT_TEST(16, ss, , float);
+ SCALAR_UNIT_TEST(17, ss, , float);
+}
MAKE_MASK_MERGE(i_w, short)
MAKE_MASK_MERGE(i_d, int)
MAKE_MASK_MERGE(i_q, long long)
+MAKE_MASK_MERGE(h, _Float16)
MAKE_MASK_MERGE(, float)
MAKE_MASK_MERGE(d, double)
MAKE_MASK_MERGE(i_ub, unsigned char)
MAKE_MASK_ZERO(i_w, short)
MAKE_MASK_ZERO(i_d, int)
MAKE_MASK_ZERO(i_q, long long)
+MAKE_MASK_ZERO(h, _Float16)
MAKE_MASK_ZERO(, float)
MAKE_MASK_ZERO(d, double)
MAKE_MASK_ZERO(i_ub, unsigned char)
#define __builtin_ia32_cvttss2usis64_round(A, B) __builtin_ia32_cvttss2usis64_round(A, 8)
#endif
+/* avx10_2-512minmaxintrin.h */
+#define __builtin_ia32_minmaxpd512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd512_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxph512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxph512_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxps512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps512_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxnepbf16512_mask(A, B, C, W, U) __builtin_ia32_minmaxnepbf16512_mask (A, B, 4, W, U)
+
+/* avx10_2minmaxintrin.h */
+#define __builtin_ia32_minmaxsd_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsd_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxsh_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsh_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxss_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxss_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxnepbf16128_mask(A, B, C, D, E) __builtin_ia32_minmaxnepbf16128_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxnepbf16256_mask(A, B, C, D, E) __builtin_ia32_minmaxnepbf16256_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxpd128_mask(A, B, C, D, E) __builtin_ia32_minmaxpd128_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxpd256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd256_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxph128_mask(A, B, C, D, E) __builtin_ia32_minmaxph128_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxph256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxph256_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxps128_mask(A, B, C, D, E) __builtin_ia32_minmaxps128_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxps256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps256_mask_round (A, B, 4, D, E, 4)
+
#include <x86intrin.h>
test_1 (_mm_cvtts_roundss_epi64, long long, __m128, 8)
test_1 (_mm_cvtts_roundss_epu64, unsigned long long, __m128, 8)
#endif
+
+/* avx10_2-512minmaxintrin.h */
+test_2 (_mm512_minmax_nepbh, __m512bh, __m512bh, __m512bh, 100)
+test_3 (_mm512_maskz_minmax_nepbh, __m512bh, __mmask32, __m512bh, __m512bh, 100)
+test_4 (_mm512_mask_minmax_nepbh, __m512bh, __m512bh, __mmask32, __m512bh, __m512bh, 100)
+test_2x (_mm512_minmax_round_pd, __m512d, __m512d, __m512d, 100, 4)
+test_3x (_mm512_maskz_minmax_round_pd, __m512d, __mmask8, __m512d, __m512d, 100, 4)
+test_4x (_mm512_mask_minmax_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 100, 4)
+test_2x (_mm512_minmax_round_ps, __m512, __m512, __m512, 100, 4)
+test_3x (_mm512_maskz_minmax_round_ps, __m512, __mmask16, __m512, __m512, 100, 4)
+test_4x (_mm512_mask_minmax_round_ps, __m512, __m512, __mmask16, __m512, __m512, 100, 4)
+test_2x (_mm512_minmax_round_ph, __m512h, __m512h, __m512h, 100, 4)
+test_3x (_mm512_maskz_minmax_round_ph, __m512h, __mmask32, __m512h, __m512h, 100, 4)
+test_4x (_mm512_mask_minmax_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 100, 4)
+test_2 (_mm512_minmax_pd, __m512d, __m512d, __m512d, 100)
+test_3 (_mm512_maskz_minmax_pd, __m512d, __mmask8, __m512d, __m512d, 100)
+test_4 (_mm512_mask_minmax_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 100)
+test_2 (_mm512_minmax_ps, __m512, __m512, __m512, 100)
+test_3 (_mm512_maskz_minmax_ps, __m512, __mmask16, __m512, __m512, 100)
+test_4 (_mm512_mask_minmax_ps, __m512, __m512, __mmask16, __m512, __m512, 100)
+test_2 (_mm512_minmax_ph, __m512h, __m512h, __m512h, 100)
+test_3 (_mm512_maskz_minmax_ph, __m512h, __mmask32, __m512h, __m512h, 100)
+test_4 (_mm512_mask_minmax_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 100)
+
+/* avx10_2minmaxintrin.h */
+test_2 (_mm256_minmax_nepbh, __m256bh, __m256bh, __m256bh, 100)
+test_3 (_mm256_maskz_minmax_nepbh, __m256bh, __mmask16, __m256bh, __m256bh, 100)
+test_4 (_mm256_mask_minmax_nepbh, __m256bh, __m256bh, __mmask16, __m256bh, __m256bh, 100)
+test_2x (_mm256_minmax_round_pd, __m256d, __m256d, __m256d, 100, 4)
+test_3x (_mm256_maskz_minmax_round_pd, __m256d, __mmask8, __m256d, __m256d, 100, 4)
+test_4x (_mm256_mask_minmax_round_pd, __m256d, __m256d, __mmask8, __m256d, __m256d, 100, 4)
+test_2x (_mm256_minmax_round_ps, __m256, __m256, __m256, 100, 4)
+test_3x (_mm256_maskz_minmax_round_ps, __m256, __mmask8, __m256, __m256, 100, 4)
+test_4x (_mm256_mask_minmax_round_ps, __m256, __m256, __mmask8, __m256, __m256, 100, 4)
+test_2x (_mm256_minmax_round_ph, __m256h, __m256h, __m256h, 100, 4)
+test_3x (_mm256_maskz_minmax_round_ph, __m256h, __mmask16, __m256h, __m256h, 100, 4)
+test_4x (_mm256_mask_minmax_round_ph, __m256h, __m256h, __mmask16, __m256h, __m256h, 100, 4)
+test_2 (_mm256_minmax_pd, __m256d, __m256d, __m256d, 100)
+test_3 (_mm256_maskz_minmax_pd, __m256d, __mmask8, __m256d, __m256d, 100)
+test_4 (_mm256_mask_minmax_pd, __m256d, __m256d, __mmask8, __m256d, __m256d, 100)
+test_2 (_mm256_minmax_ps, __m256, __m256, __m256, 100)
+test_3 (_mm256_maskz_minmax_ps, __m256, __mmask8, __m256, __m256, 100)
+test_4 (_mm256_mask_minmax_ps, __m256, __m256, __mmask8, __m256, __m256, 100)
+test_2 (_mm256_minmax_ph, __m256h, __m256h, __m256h, 100)
+test_3 (_mm256_maskz_minmax_ph, __m256h, __mmask16, __m256h, __m256h, 100)
+test_4 (_mm256_mask_minmax_ph, __m256h, __m256h, __mmask16, __m256h, __m256h, 100)
+test_2 (_mm_minmax_nepbh, __m128bh, __m128bh, __m128bh, 100)
+test_3 (_mm_maskz_minmax_nepbh, __m128bh, __mmask8, __m128bh, __m128bh, 100)
+test_4 (_mm_mask_minmax_nepbh, __m128bh, __m128bh, __mmask8, __m128bh, __m128bh, 100)
+test_2 (_mm_minmax_pd, __m128d, __m128d, __m128d, 100)
+test_3 (_mm_maskz_minmax_pd, __m128d, __mmask8, __m128d, __m128d, 100)
+test_4 (_mm_mask_minmax_pd, __m128d, __m128d, __mmask8, __m128d, __m128d, 100)
+test_2 (_mm_minmax_ps, __m128, __m128, __m128, 100)
+test_3 (_mm_maskz_minmax_ps, __m128, __mmask8, __m128, __m128, 100)
+test_4 (_mm_mask_minmax_ps, __m128, __m128, __mmask8, __m128, __m128, 100)
+test_2 (_mm_minmax_ph, __m128h, __m128h, __m128h, 100)
+test_3 (_mm_maskz_minmax_ph, __m128h, __mmask8, __m128h, __m128h, 100)
+test_4 (_mm_mask_minmax_ph, __m128h, __m128h, __mmask8, __m128h, __m128h, 100)
+test_2x (_mm_minmax_round_sd, __m128d, __m128d, __m128d, 100, 4)
+test_3x (_mm_maskz_minmax_round_sd, __m128d, __mmask8, __m128d, __m128d, 100, 4)
+test_4x (_mm_mask_minmax_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 100, 4)
+test_2x (_mm_minmax_round_ss, __m128, __m128, __m128, 100, 4)
+test_3x (_mm_maskz_minmax_round_ss, __m128, __mmask8, __m128, __m128, 100, 4)
+test_4x (_mm_mask_minmax_round_ss, __m128, __m128, __mmask8, __m128, __m128, 100, 4)
+test_2x (_mm_minmax_round_sh, __m128h, __m128h, __m128h, 100, 4)
+test_3x (_mm_maskz_minmax_round_sh, __m128h, __mmask8, __m128h, __m128h, 100, 4)
+test_4x (_mm_mask_minmax_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 100, 4)
test_1 (_mm_cvtts_roundss_epi64, long long, __m128, 8)
test_1 (_mm_cvtts_roundss_epu64, unsigned long long, __m128, 8)
#endif
+
+/* avx10_2-512minmaxintrin.h */
+test_2 (_mm512_minmax_nepbh, __m512bh, __m512bh, __m512bh, 100)
+test_3 (_mm512_maskz_minmax_nepbh, __m512bh, __mmask32, __m512bh, __m512bh, 100)
+test_4 (_mm512_mask_minmax_nepbh, __m512bh, __m512bh, __mmask32, __m512bh, __m512bh, 100)
+test_2x (_mm512_minmax_round_pd, __m512d, __m512d, __m512d, 100, 4)
+test_3x (_mm512_maskz_minmax_round_pd, __m512d, __mmask8, __m512d, __m512d, 100, 4)
+test_4x (_mm512_mask_minmax_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 100, 4)
+test_2x (_mm512_minmax_round_ps, __m512, __m512, __m512, 100, 4)
+test_3x (_mm512_maskz_minmax_round_ps, __m512, __mmask16, __m512, __m512, 100, 4)
+test_4x (_mm512_mask_minmax_round_ps, __m512, __m512, __mmask16, __m512, __m512, 100, 4)
+test_2x (_mm512_minmax_round_ph, __m512h, __m512h, __m512h, 100, 4)
+test_3x (_mm512_maskz_minmax_round_ph, __m512h, __mmask32, __m512h, __m512h, 100, 4)
+test_4x (_mm512_mask_minmax_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 100, 4)
+test_2 (_mm512_minmax_pd, __m512d, __m512d, __m512d, 100)
+test_3 (_mm512_maskz_minmax_pd, __m512d, __mmask8, __m512d, __m512d, 100)
+test_4 (_mm512_mask_minmax_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 100)
+test_2 (_mm512_minmax_ps, __m512, __m512, __m512, 100)
+test_3 (_mm512_maskz_minmax_ps, __m512, __mmask16, __m512, __m512, 100)
+test_4 (_mm512_mask_minmax_ps, __m512, __m512, __mmask16, __m512, __m512, 100)
+test_2 (_mm512_minmax_ph, __m512h, __m512h, __m512h, 100)
+test_3 (_mm512_maskz_minmax_ph, __m512h, __mmask32, __m512h, __m512h, 100)
+test_4 (_mm512_mask_minmax_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 100)
+
+/* avx10_2minmaxintrin.h */
+test_2 (_mm256_minmax_nepbh, __m256bh, __m256bh, __m256bh, 100)
+test_3 (_mm256_maskz_minmax_nepbh, __m256bh, __mmask16, __m256bh, __m256bh, 100)
+test_4 (_mm256_mask_minmax_nepbh, __m256bh, __m256bh, __mmask16, __m256bh, __m256bh, 100)
+test_2x (_mm256_minmax_round_pd, __m256d, __m256d, __m256d, 100, 4)
+test_3x (_mm256_maskz_minmax_round_pd, __m256d, __mmask8, __m256d, __m256d, 100, 4)
+test_4x (_mm256_mask_minmax_round_pd, __m256d, __m256d, __mmask8, __m256d, __m256d, 100, 4)
+test_2x (_mm256_minmax_round_ps, __m256, __m256, __m256, 100, 4)
+test_3x (_mm256_maskz_minmax_round_ps, __m256, __mmask8, __m256, __m256, 100, 4)
+test_4x (_mm256_mask_minmax_round_ps, __m256, __m256, __mmask8, __m256, __m256, 100, 4)
+test_2x (_mm256_minmax_round_ph, __m256h, __m256h, __m256h, 100, 4)
+test_3x (_mm256_maskz_minmax_round_ph, __m256h, __mmask16, __m256h, __m256h, 100, 4)
+test_4x (_mm256_mask_minmax_round_ph, __m256h, __m256h, __mmask16, __m256h, __m256h, 100, 4)
+test_2 (_mm256_minmax_pd, __m256d, __m256d, __m256d, 100)
+test_3 (_mm256_maskz_minmax_pd, __m256d, __mmask8, __m256d, __m256d, 100)
+test_4 (_mm256_mask_minmax_pd, __m256d, __m256d, __mmask8, __m256d, __m256d, 100)
+test_2 (_mm256_minmax_ps, __m256, __m256, __m256, 100)
+test_3 (_mm256_maskz_minmax_ps, __m256, __mmask8, __m256, __m256, 100)
+test_4 (_mm256_mask_minmax_ps, __m256, __m256, __mmask8, __m256, __m256, 100)
+test_2 (_mm256_minmax_ph, __m256h, __m256h, __m256h, 100)
+test_3 (_mm256_maskz_minmax_ph, __m256h, __mmask16, __m256h, __m256h, 100)
+test_4 (_mm256_mask_minmax_ph, __m256h, __m256h, __mmask16, __m256h, __m256h, 100)
+test_2 (_mm_minmax_nepbh, __m128bh, __m128bh, __m128bh, 100)
+test_3 (_mm_maskz_minmax_nepbh, __m128bh, __mmask8, __m128bh, __m128bh, 100)
+test_4 (_mm_mask_minmax_nepbh, __m128bh, __m128bh, __mmask8, __m128bh, __m128bh, 100)
+test_2 (_mm_minmax_pd, __m128d, __m128d, __m128d, 100)
+test_3 (_mm_maskz_minmax_pd, __m128d, __mmask8, __m128d, __m128d, 100)
+test_4 (_mm_mask_minmax_pd, __m128d, __m128d, __mmask8, __m128d, __m128d, 100)
+test_2 (_mm_minmax_ps, __m128, __m128, __m128, 100)
+test_3 (_mm_maskz_minmax_ps, __m128, __mmask8, __m128, __m128, 100)
+test_4 (_mm_mask_minmax_ps, __m128, __m128, __mmask8, __m128, __m128, 100)
+test_2 (_mm_minmax_ph, __m128h, __m128h, __m128h, 100)
+test_3 (_mm_maskz_minmax_ph, __m128h, __mmask8, __m128h, __m128h, 100)
+test_4 (_mm_mask_minmax_ph, __m128h, __m128h, __mmask8, __m128h, __m128h, 100)
+test_2x (_mm_minmax_round_sd, __m128d, __m128d, __m128d, 100, 4)
+test_3x (_mm_maskz_minmax_round_sd, __m128d, __mmask8, __m128d, __m128d, 100, 4)
+test_4x (_mm_mask_minmax_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 100, 4)
+test_2x (_mm_minmax_round_ss, __m128, __m128, __m128, 100, 4)
+test_3x (_mm_maskz_minmax_round_ss, __m128, __mmask8, __m128, __m128, 100, 4)
+test_4x (_mm_mask_minmax_round_ss, __m128, __m128, __mmask8, __m128, __m128, 100, 4)
+test_2x (_mm_minmax_round_sh, __m128h, __m128h, __m128h, 100, 4)
+test_3x (_mm_maskz_minmax_round_sh, __m128h, __mmask8, __m128h, __m128h, 100, 4)
+test_4x (_mm_mask_minmax_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 100, 4)
#define __builtin_ia32_cvttss2usis64_round(A, B) __builtin_ia32_cvttss2usis64_round(A, 8)
#endif
+/* avx10_2-512minmaxintrin.h */
+#define __builtin_ia32_minmaxpd512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd512_mask_round (A, B, 100, D, E, 4)
+#define __builtin_ia32_minmaxph512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxph512_mask_round (A, B, 100, D, E, 4)
+#define __builtin_ia32_minmaxps512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps512_mask_round (A, B, 100, D, E, 4)
+#define __builtin_ia32_minmaxnepbf16512_mask(A, B, C, W, U) __builtin_ia32_minmaxnepbf16512_mask (A, B, 100, W, U)
+
+/* avx10_2-minmaxintrin.h */
+#define __builtin_ia32_minmaxsd_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsd_mask_round (A, B, 100, D, E, 4)
+#define __builtin_ia32_minmaxsh_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsh_mask_round (A, B, 100, D, E, 4)
+#define __builtin_ia32_minmaxss_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxss_mask_round (A, B, 100, D, E, 4)
+#define __builtin_ia32_minmaxnepbf16128_mask(A, B, C, D, E) __builtin_ia32_minmaxnepbf16128_mask (A, B, 100, D, E)
+#define __builtin_ia32_minmaxnepbf16256_mask(A, B, C, D, E) __builtin_ia32_minmaxnepbf16256_mask (A, B, 100, D, E)
+#define __builtin_ia32_minmaxpd128_mask(A, B, C, D, E) __builtin_ia32_minmaxpd128_mask (A, B, 100, D, E)
+#define __builtin_ia32_minmaxpd256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd256_mask_round (A, B, 100, D, E, 4)
+#define __builtin_ia32_minmaxph128_mask(A, B, C, D, E) __builtin_ia32_minmaxph128_mask (A, B, 100, D, E)
+#define __builtin_ia32_minmaxph256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxph256_mask_round (A, B, 100, D, E, 4)
+#define __builtin_ia32_minmaxps128_mask(A, B, C, D, E) __builtin_ia32_minmaxps128_mask (A, B, 100, D, E)
+#define __builtin_ia32_minmaxps256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps256_mask_round (A, B, 100, D, E, 4)
+
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512")
#include <x86intrin.h>