From: Haochen Jiang Date: Fri, 14 Mar 2025 07:00:33 +0000 (+0800) Subject: i386: Combine AVX10.2 intrin files X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5817101a6d30a86a267bfda3f0a408490632536f;p=thirdparty%2Fgcc.git i386: Combine AVX10.2 intrin files Since we use a single avx10.2 to enable everything, there is no need to split them into two files. gcc/ChangeLog: * config.gcc: Remove 512 intrin file. * config/i386/avx10_2-512bf16intrin.h: Removed and combined to ... * config/i386/avx10_2bf16intrin.h: ... this. * config/i386/avx10_2-512convertintrin.h: Removed and combined to ... * config/i386/avx10_2convertintrin.h: ... this. * config/i386/avx10_2-512mediaintrin.h: Removed and combined to ... * config/i386/avx10_2mediaintrin.h: ... this. * config/i386/avx10_2-512minmaxintrin.h: Removed and combined to ... * config/i386/avx10_2minmaxintrin.h: ... this. * config/i386/avx10_2-512satcvtintrin.h: Removed and combined to ... * config/i386/avx10_2satcvtintrin.h: ... this. * config/i386/immintrin.h: Remove 512 intrin file. gcc/testsuite/ChangeLog: * gcc.target/i386/avx-1.c: Combine tests and change intrin file name. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. --- diff --git a/gcc/config.gcc b/gcc/config.gcc index e552f469dcc..1e386a469e0 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -452,14 +452,11 @@ i[34567]86-*-* | x86_64-*-*) cmpccxaddintrin.h amxfp16intrin.h prfchiintrin.h raointintrin.h amxcomplexintrin.h avxvnniint16intrin.h sm3intrin.h sha512intrin.h sm4intrin.h usermsrintrin.h - avx10_2mediaintrin.h avx10_2-512mediaintrin.h - avx10_2convertintrin.h avx10_2-512convertintrin.h - avx10_2bf16intrin.h avx10_2-512bf16intrin.h - avx10_2satcvtintrin.h avx10_2-512satcvtintrin.h - avx10_2minmaxintrin.h avx10_2-512minmaxintrin.h - avx10_2copyintrin.h amxavx512intrin.h amxtf32intrin.h - amxtransposeintrin.h amxfp8intrin.h movrsintrin.h - amxmovrsintrin.h" + avx10_2mediaintrin.h avx10_2convertintrin.h + avx10_2bf16intrin.h avx10_2satcvtintrin.h + avx10_2minmaxintrin.h avx10_2copyintrin.h + amxavx512intrin.h amxtf32intrin.h amxtransposeintrin.h + amxfp8intrin.h movrsintrin.h amxmovrsintrin.h" ;; ia64-*-*) extra_headers=ia64intrin.h diff --git a/gcc/config/i386/avx10_2-512bf16intrin.h b/gcc/config/i386/avx10_2-512bf16intrin.h deleted file mode 100644 index 21e4b369c9e..00000000000 --- a/gcc/config/i386/avx10_2-512bf16intrin.h +++ /dev/null @@ -1,681 +0,0 @@ -/* Copyright (C) 2024-2025 Free Software Foundation, Inc. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GCC is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#ifndef _IMMINTRIN_H_INCLUDED -#error "Never use directly; include instead." -#endif - -#ifndef _AVX10_2_512BF16INTRIN_H_INCLUDED -#define _AVX10_2_512BF16INTRIN_H_INCLUDED - -#if !defined (__AVX10_2__) -#pragma GCC push_options -#pragma GCC target("avx10.2") -#define __DISABLE_AVX10_2__ -#endif /* __AVX10_2__ */ - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_add_pbh (__m512bh __A, __m512bh __B) -{ - return (__m512bh) __builtin_ia32_addbf16512 (__A, __B); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_add_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_addbf16512_mask (__A, __B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_add_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_addbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_sub_pbh (__m512bh __A, __m512bh __B) -{ - return (__m512bh) __builtin_ia32_subbf16512 (__A, __B); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_sub_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_subbf16512_mask (__A, __B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_sub_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_subbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mul_pbh (__m512bh __A, __m512bh __B) -{ - return (__m512bh) __builtin_ia32_mulbf16512 (__A, __B); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_mul_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_mulbf16512_mask (__A, __B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_mul_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_mulbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_div_pbh (__m512bh __A, __m512bh __B) -{ - return (__m512bh) __builtin_ia32_divbf16512 (__A, __B); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_div_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_divbf16512_mask (__A, __B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_div_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_divbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_max_pbh (__m512bh __A, __m512bh __B) -{ - return (__m512bh) __builtin_ia32_maxbf16512 (__A, __B); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_max_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_maxbf16512_mask (__A, __B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_max_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_maxbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_min_pbh (__m512bh __A, __m512bh __B) -{ - return (__m512bh) __builtin_ia32_minbf16512 (__A, __B); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_min_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_minbf16512_mask (__A, __B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_min_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_minbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_scalef_pbh (__m512bh __A, __m512bh __B) -{ - return (__m512bh) __builtin_ia32_scalefbf16512 (__A, __B); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_scalef_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_scalefbf16512_mask (__A, __B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_scalef_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_scalefbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_fmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_fmadd_pbh (__m512bh __A, __mmask32 __U, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask3_fmadd_pbh (__m512bh __A, __m512bh __B, - __m512bh __C, __mmask32 __U) -{ - return (__m512bh) - __builtin_ia32_fmaddbf16512_mask3 (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_fmadd_pbh (__mmask32 __U, __m512bh __A, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fmaddbf16512_maskz (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_fmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_fmsub_pbh (__m512bh __A, __mmask32 __U, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask3_fmsub_pbh (__m512bh __A, __m512bh __B, - __m512bh __C, __mmask32 __U) -{ - return (__m512bh) - __builtin_ia32_fmsubbf16512_mask3 (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_fmsub_pbh (__mmask32 __U, __m512bh __A, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fmsubbf16512_maskz (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_fnmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_fnmadd_pbh (__m512bh __A, __mmask32 __U, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask3_fnmadd_pbh (__m512bh __A, __m512bh __B, - __m512bh __C, __mmask32 __U) -{ - return (__m512bh) - __builtin_ia32_fnmaddbf16512_mask3 (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_fnmadd_pbh (__mmask32 __U, __m512bh __A, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fnmaddbf16512_maskz (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_fnmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_fnmsub_pbh (__m512bh __A, __mmask32 __U, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask3_fnmsub_pbh (__m512bh __A, __m512bh __B, - __m512bh __C, __mmask32 __U) -{ - return (__m512bh) - __builtin_ia32_fnmsubbf16512_mask3 (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_fnmsub_pbh (__mmask32 __U, __m512bh __A, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fnmsubbf16512_maskz (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_rsqrt_pbh (__m512bh __A) -{ - return (__m512bh) - __builtin_ia32_rsqrtbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); - -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_rsqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) -{ - return (__m512bh) - __builtin_ia32_rsqrtbf16512_mask (__A, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_rsqrt_pbh (__mmask32 __U, __m512bh __A) -{ - return (__m512bh) - __builtin_ia32_rsqrtbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_sqrt_pbh (__m512bh __A) -{ - return (__m512bh) - __builtin_ia32_sqrtbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_sqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) -{ - return (__m512bh) - __builtin_ia32_sqrtbf16512_mask (__A, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_sqrt_pbh (__mmask32 __U, __m512bh __A) -{ - return (__m512bh) - __builtin_ia32_sqrtbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_rcp_pbh (__m512bh __A) -{ - return (__m512bh) - __builtin_ia32_rcpbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_rcp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) -{ - return (__m512bh) - __builtin_ia32_rcpbf16512_mask (__A, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_rcp_pbh (__mmask32 __U, __m512bh __A) -{ - return (__m512bh) - __builtin_ia32_rcpbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_getexp_pbh (__m512bh __A) -{ - return (__m512bh) - __builtin_ia32_getexpbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_getexp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) -{ - return (__m512bh) __builtin_ia32_getexpbf16512_mask (__A, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_getexp_pbh (__mmask32 __U, __m512bh __A) -{ - return (__m512bh) - __builtin_ia32_getexpbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -/* Intrinsics vrndscalebf16. */ -#ifdef __OPTIMIZE__ -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_roundscale_pbh (__m512bh __A, int B) -{ - return (__m512bh) - __builtin_ia32_rndscalebf16512_mask (__A, B, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_roundscale_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, int B) -{ - return (__m512bh) - __builtin_ia32_rndscalebf16512_mask (__A, B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_roundscale_pbh (__mmask32 __U, __m512bh __A, int B) -{ - return (__m512bh) - __builtin_ia32_rndscalebf16512_mask (__A, B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -#else -#define _mm512_roundscale_pbh(A, B) \ - (__builtin_ia32_rndscalebf16512_mask ((A), (B), \ - (__v32bf) _mm512_setzero_si512 (), \ - (__mmask32) -1)) - -#define _mm512_mask_roundscale_pbh(A, B, C, D) \ - (__builtin_ia32_rndscalebf16512_mask ((C), (D), (A), (B))) - -#define _mm512_maskz_roundscale_pbh(A, B, C) \ - (__builtin_ia32_rndscalebf16512_mask ((B), (C), \ - (__v32bf) _mm512_setzero_si512 (), \ - (A))) - -#endif /* __OPTIMIZE__ */ - -/* Intrinsics vreducebf16. */ -#ifdef __OPTIMIZE__ -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_reduce_pbh (__m512bh __A, int B) -{ - return (__m512bh) - __builtin_ia32_reducebf16512_mask (__A, B, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_reduce_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, int B) -{ - return (__m512bh) - __builtin_ia32_reducebf16512_mask (__A, B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_reduce_pbh (__mmask32 __U, __m512bh __A, int B) -{ - return (__m512bh) - __builtin_ia32_reducebf16512_mask (__A, B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -#else -#define _mm512_reduce_pbh(A, B) \ - (__builtin_ia32_reducebf16512_mask ((A), (B), \ - (__v32bf) _mm512_setzero_si512 (), \ - (__mmask32) -1)) - -#define _mm512_mask_reduce_pbh(A, B, C, D) \ - (__builtin_ia32_reducebf16512_mask ((C), (D), (A), (B))) - -#define _mm512_maskz_reduce_pbh(A, B, C) \ - (__builtin_ia32_reducebf16512_mask ((B), (C), \ - (__v32bf) _mm512_setzero_si512 (), \ - (A))) - -#endif /* __OPTIMIZE__ */ - -/* Intrinsics vgetmantbf16. */ -#ifdef __OPTIMIZE__ -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_getmant_pbh (__m512bh __A, _MM_MANTISSA_NORM_ENUM __B, - _MM_MANTISSA_SIGN_ENUM __C) -{ - return (__m512bh) - __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_getmant_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, - _MM_MANTISSA_NORM_ENUM __B, - _MM_MANTISSA_SIGN_ENUM __C) -{ - return (__m512bh) - __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B, - __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_getmant_pbh (__mmask32 __U, __m512bh __A, - _MM_MANTISSA_NORM_ENUM __B, - _MM_MANTISSA_SIGN_ENUM __C) -{ - return (__m512bh) - __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -#else -#define _mm512_getmant_pbh(A, B, C) \ - (__builtin_ia32_getmantbf16512_mask ((A), (int)(((C)<<2) | (B)), \ - (__v32bf) _mm512_setzero_si512 (), \ - (__mmask32) -1)) - -#define _mm512_mask_getmant_pbh(A, B, C, D, E) \ - (__builtin_ia32_getmantbf16512_mask ((C), (int)(((D)<<2) | (E)), (A), (B))) - -#define _mm512_maskz_getmant_pbh(A, B, C, D) \ - (__builtin_ia32_getmantbf16512_mask ((B), (int)(((C)<<2) | (D)), \ - (__v32bf) _mm512_setzero_si512 (), \ - (A))) - -#endif /* __OPTIMIZE__ */ - -/* Intrinsics vfpclassbf16. */ -#ifdef __OPTIMIZE__ -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_fpclass_pbh_mask (__mmask32 __U, __m512bh __A, - const int __imm) -{ - return (__mmask32) - __builtin_ia32_fpclassbf16512_mask (__A, __imm, __U); -} - -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_fpclass_pbh_mask (__m512bh __A, const int __imm) -{ - return (__mmask32) - __builtin_ia32_fpclassbf16512_mask (__A, __imm, - (__mmask32) -1); -} - -#else -#define _mm512_mask_fpclass_pbh_mask(U, X, C) \ - ((__mmask32) __builtin_ia32_fpclassbf16512_mask ( \ - (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (U))) - -#define _mm512_fpclass_pbh_mask(X, C) \ - ((__mmask32) __builtin_ia32_fpclassbf16512_mask ( \ - (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (-1))) -#endif /* __OPIMTIZE__ */ - - -/* Intrinsics vcmpbf16. */ -#ifdef __OPTIMIZE__ -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cmp_pbh_mask (__mmask32 __U, __m512bh __A, __m512bh __B, - const int __imm) -{ - return (__mmask32) - __builtin_ia32_cmpbf16512_mask (__A, __B, __imm, __U); -} - -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cmp_pbh_mask (__m512bh __A, __m512bh __B, const int __imm) -{ - return (__mmask32) - __builtin_ia32_cmpbf16512_mask (__A, __B, __imm, - (__mmask32) -1); -} - -#else -#define _mm512_mask_cmp_pbh_mask(A, B, C, D) \ - ((__mmask32) __builtin_ia32_cmpbf16512_mask ((B), (C), (D), (A))) - -#define _mm512_cmp_pbh_mask(A, B, C) \ - ((__mmask32) __builtin_ia32_cmpbf16512_mask ((A), (B), (C), (-1))) - -#endif /* __OPIMTIZE__ */ - -#ifdef __DISABLE_AVX10_2__ -#undef __DISABLE_AVX10_2__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX10_2__ */ - -#endif /* _AVX10_2_512BF16INTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx10_2-512convertintrin.h b/gcc/config/i386/avx10_2-512convertintrin.h deleted file mode 100644 index 611a40d83e2..00000000000 --- a/gcc/config/i386/avx10_2-512convertintrin.h +++ /dev/null @@ -1,572 +0,0 @@ -/* Copyright (C) 2024-2025 Free Software Foundation, Inc. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GCC is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#ifndef _IMMINTRIN_H_INCLUDED -#error "Never use directly; include instead." -#endif // _IMMINTRIN_H_INCLUDED - -#ifndef __AVX10_2_512CONVERTINTRIN_H_INCLUDED -#define __AVX10_2_512CONVERTINTRIN_H_INCLUDED - -#ifndef __AVX10_2__ -#pragma GCC push_options -#pragma GCC target("avx10.2") -#define __DISABLE_AVX10_2__ -#endif /* __AVX10_2__ */ - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtx2ps_ph (__m512 __A, __m512 __B) -{ - return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, - (__v16sf) __B, - (__v32hf) - _mm512_setzero_ph (), - (__mmask32) -1, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtx2ps_ph (__m512h __W, __mmask32 __U, __m512 __A, - __m512 __B) -{ - return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, - (__v16sf) __B, - (__v32hf) __W, - (__mmask32) __U, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtx2ps_ph (__mmask32 __U, __m512 __A, __m512 __B) -{ - return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, - (__v16sf) __B, - (__v32hf) - _mm512_setzero_ph (), - (__mmask32) __U, - _MM_FROUND_CUR_DIRECTION); -} - -#ifdef __OPTIMIZE__ -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtx_round2ps_ph (__m512 __A, __m512 __B, const int __R) -{ - return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, - (__v16sf) __B, - (__v32hf) - _mm512_setzero_ph (), - (__mmask32) -1, - __R); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtx_round2ps_ph (__m512h __W, __mmask32 __U, __m512 __A, - __m512 __B, const int __R) -{ - return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, - (__v16sf) __B, - (__v32hf) __W, - (__mmask32) __U, - __R); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtx_round2ps_ph (__mmask32 __U, __m512 __A, - __m512 __B, const int __R) -{ - return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, - (__v16sf) __B, - (__v32hf) - _mm512_setzero_ph (), - (__mmask32) __U, - __R); -} - -#else -#define _mm512_cvtx_round2ps_ph(A, B, R) \ - ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (__v32hf) \ - (_mm512_setzero_ph ()), \ - (__mmask32) (-1), \ - (R))) -#define _mm512_mask_cvtx_round2ps_ph(W, U, A, B, R) \ - ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (__v32hf) (W), \ - (__mmask32) (U), \ - (R))) -#define _mm512_maskz_cvtx_round2ps_ph(U, A, B, R) \ - ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (__v32hf) \ - (_mm512_setzero_ph ()), \ - (__mmask32) (U), \ - (R))) -#endif /* __OPTIMIZE__ */ - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtbiasph_bf8 (__m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtbiasph_bf8 (__m256i __W, __mmask32 __U, - __m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtbiasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvts_biasph_bf8 (__m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvts_biasph_bf8 (__m256i __W, __mmask32 __U, - __m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvts_biasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtbiasph_hf8 (__m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtbiasph_hf8 (__m256i __W, __mmask32 __U, __m512i __A, - __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtbiasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvts_biasph_hf8 (__m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvts_biasph_hf8 (__m256i __W, __mmask32 __U, - __m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvts_biasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvt2ph_bf8 (__m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) -1); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvt2ph_bf8 (__m512i __W, __mmask64 __U, - __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) __W, - (__mmask64) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvt2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvts_2ph_bf8 (__m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) -1); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvts_2ph_bf8 (__m512i __W, __mmask64 __U, - __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) __W, - (__mmask64) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvts_2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvt2ph_hf8 (__m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) -1); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvt2ph_hf8 (__m512i __W, __mmask64 __U, - __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) __W, - (__mmask64) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvt2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvts_2ph_hf8 (__m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) -1); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvts_2ph_hf8 (__m512i __W, __mmask64 __U, - __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) __W, - (__mmask64) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvts_2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) __U); -} - -extern __inline__ __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvthf8_ph (__m256i __A) -{ - return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A, - (__v32hf) (__m512h) - _mm512_undefined_ph (), - (__mmask32) -1); -} - -extern __inline__ __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvthf8_ph (__m512h __W, __mmask32 __U, __m256i __A) -{ - return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A, - (__v32hf) (__m512h) __W, - (__mmask32) __U); -} - -extern __inline__ __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvthf8_ph (__mmask32 __U, __m256i __A) -{ - return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A, - (__v32hf) (__m512h) - _mm512_setzero_ph (), - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtph_bf8 (__m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtph_bf8 (__m256i __W, __mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A, - (__v32qi) (__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtph_bf8 (__mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvts_ph_bf8 (__m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvts_ph_bf8 (__m256i __W, __mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvts_ph_bf8 (__mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtph_hf8 (__m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtph_hf8 (__m256i __W, __mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A, - (__v32qi)(__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtph_hf8 (__mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvts_ph_hf8 (__m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvts_ph_hf8 (__m256i __W, __mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvts_ph_hf8 (__mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtbf8_ph (__m256i __A) -{ - return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 ( - (__m512i) _mm512_cvtepi8_epi16 (__A), 8)); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtbf8_ph (__m512h __S, __mmask32 __U, __m256i __A) -{ - return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_mask_slli_epi16 ( - (__m512i) __S, __U, (__m512i) _mm512_cvtepi8_epi16 (__A), 8)); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtbf8_ph (__mmask32 __U, __m256i __A) -{ - return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 ( - (__m512i) _mm512_maskz_cvtepi8_epi16 (__U, __A), 8)); -} - -#ifdef __DISABLE_AVX10_2__ -#undef __DISABLE_AVX10_2__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX10_2__ */ - -#endif /* __AVX10_2_512CONVERTINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx10_2-512mediaintrin.h b/gcc/config/i386/avx10_2-512mediaintrin.h deleted file mode 100644 index 43271e740dd..00000000000 --- a/gcc/config/i386/avx10_2-512mediaintrin.h +++ /dev/null @@ -1,514 +0,0 @@ -/* Copyright (C) 2024-2025 Free Software Foundation, Inc. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GCC is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#if !defined _IMMINTRIN_H_INCLUDED -#error "Never use directly; include instead." -#endif - -#ifndef _AVX10_2_512MEDIAINTRIN_H_INCLUDED -#define _AVX10_2_512MEDIAINTRIN_H_INCLUDED - -#if !defined(__AVX10_2__) -#pragma GCC push_options -#pragma GCC target("avx10.2") -#define __DISABLE_AVX10_2__ -#endif /* __AVX10_2__ */ - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpbssd_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbssd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpbssd_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbssd_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpbssd_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbssd_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpbssds_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbssds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpbssds_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbssds_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpbssds_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbssds_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpbsud_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpbsud_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbsud_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpbsud_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbsud_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpbsuds_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpbsuds_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbsuds_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpbsuds_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbsuds_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpbuud_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpbuud_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbuud_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpbuud_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbuud_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpbuuds_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpbuuds_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbuuds_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpbuuds_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbuuds_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpwsud_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpwsud_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwsud_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpwsud_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwsud_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpwsuds_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpwsuds_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwsuds_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpwsuds_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwsuds_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpwusd_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwusd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpwusd_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwusd_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpwusd_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwusd_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpwusds_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwusds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpwusds_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwusds_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpwusds_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwusds_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpwuud_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpwuud_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwuud_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpwuud_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwuud_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpwuuds_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpwuuds_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwuuds_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpwuuds_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwuuds_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512 -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpph_ps (__m512 __W, __m512h __A, __m512h __B) -{ - return (__m512) - __builtin_ia32_vdpphps512_mask ((__v16sf) __W, - (__v16sf) __A, - (__v16sf) __B, - (__mmask16) -1); -} - -extern __inline __m512 -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpph_ps (__m512 __W, __mmask16 __U, __m512h __A, - __m512h __B) -{ - return (__m512) - __builtin_ia32_vdpphps512_mask ((__v16sf) __W, - (__v16sf) __A, - (__v16sf) __B, - (__mmask16) __U); -} - -extern __inline __m512 -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpph_ps (__mmask16 __U, __m512 __W, __m512h __A, - __m512h __B) -{ - return (__m512) - __builtin_ia32_vdpphps512_maskz ((__v16sf) __W, - (__v16sf) __A, - (__v16sf) __B, - (__mmask16) __U); -} - -#ifdef __OPTIMIZE__ -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mpsadbw_epu8 (__m512i __X, __m512i __Y, const int __M) -{ - return (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi) __X, - (__v64qi) __Y, - __M); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_mpsadbw_epu8 (__m512i __W, __mmask32 __U, __m512i __X, - __m512i __Y, const int __M) -{ - return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X, - (__v64qi) __Y, - __M, - (__v32hi) __W, - __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_mpsadbw_epu8 (__mmask32 __U, __m512i __X, - __m512i __Y, const int __M) -{ - return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X, - (__v64qi) __Y, - __M, - (__v32hi) _mm512_setzero_epi32 (), - __U); -} -#else -#define _mm512_mpsadbw_epu8(X, Y, M) \ - (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi)(__m512i)(X), \ - (__v64qi)(__m512i)(Y), (int)(M)) - -#define _mm512_mask_mpsadbw_epu8(W, U, X, Y, M) \ - (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X), \ - (__v64qi)(__m512i)(Y), \ - (int)(M), \ - (__v32hi)(__m512i)(W), \ - (__mmask32)(U)) - -#define _mm512_maskz_mpsadbw_epu8(U, X, Y, M) \ - (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X), \ - (__v64qi)(__m512i)(Y), \ - (int)(M), \ - (__v32hi) _mm512_setzero_epi32 (), \ - (__mmask32)(U)) -#endif - -#ifdef __DISABLE_AVX10_2__ -#undef __DISABLE_AVX10_2__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX10_2__ */ - -#endif /* __AVX10_2_512MEDIAINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx10_2-512minmaxintrin.h b/gcc/config/i386/avx10_2-512minmaxintrin.h deleted file mode 100644 index a743346054b..00000000000 --- a/gcc/config/i386/avx10_2-512minmaxintrin.h +++ /dev/null @@ -1,489 +0,0 @@ -/* Copyright (C) 2024-2025 Free Software Foundation, Inc. - This file is part of GCC. - GCC is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - GCC is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#if !defined _IMMINTRIN_H_INCLUDED -#error "Never use directly; include instead." -#endif - -#ifndef _AVX10_2_512MINMAXINTRIN_H_INCLUDED -#define _AVX10_2_512MINMAXINTRIN_H_INCLUDED - -#if !defined (__AVX10_2__) -#pragma GCC push_options -#pragma GCC target("avx10.2") -#define __DISABLE_AVX10_2__ -#endif /* __AVX10_2__ */ - -#ifdef __OPTIMIZE__ -extern __inline __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_minmax_pbh (__m512bh __A, __m512bh __B, const int __C) -{ - return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A, - (__v32bf) __B, - __C, - (__v32bf)(__m512bh) - _mm512_setzero_si512 (), - (__mmask32) -1); -} - -extern __inline __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_minmax_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B, const int __C) -{ - return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A, - (__v32bf) __B, - __C, - (__v32bf) __W, - (__mmask32) __U); -} - -extern __inline __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_minmax_pbh (__mmask32 __U, __m512bh __A, - __m512bh __B, const int __C) -{ - return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A, - (__v32bf) __B, - __C, - (__v32bf)(__m512bh) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_minmax_pd (__m512d __A, __m512d __B, const int __C) -{ - return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, - (__v8df) __B, - __C, - (__v8df) - _mm512_undefined_pd (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_minmax_pd (__m512d __W, __mmask8 __U, __m512d __A, - __m512d __B, const int __C) -{ - return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, - (__v8df) __B, - __C, - (__v8df) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_minmax_pd (__mmask8 __U, __m512d __A, __m512d __B, - const int __C) -{ - return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, - (__v8df) __B, - __C, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_minmax_round_pd (__m512d __A, __m512d __B, const int __C, - const int __R) -{ - return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, - (__v8df) __B, - __C, - (__v8df) - _mm512_undefined_pd (), - (__mmask8) -1, __R); -} - -extern __inline __m512d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_minmax_round_pd (__m512d __W, __mmask8 __U, __m512d __A, - __m512d __B, const int __C, const int __R) -{ - return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, - (__v8df) __B, - __C, - (__v8df) __W, - (__mmask8) __U, __R); -} - -extern __inline __m512d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_minmax_round_pd (__mmask8 __U, __m512d __A, __m512d __B, - const int __C, const int __R) -{ - return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, - (__v8df) __B, - __C, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U, __R); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_minmax_ph (__m512h __A, __m512h __B, const int __C) -{ - return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, - (__v32hf) __B, - __C, - (__v32hf) - _mm512_undefined_ph (), - (__mmask32) -1, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_minmax_ph (__m512h __W, __mmask32 __U, __m512h __A, - __m512h __B, const int __C) -{ - return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, - (__v32hf) __B, - __C, - (__v32hf) __W, - (__mmask32) __U, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_minmax_ph (__mmask32 __U, __m512h __A, __m512h __B, - const int __C) -{ - return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, - (__v32hf) __B, - __C, - (__v32hf) - _mm512_setzero_ph (), - (__mmask32) __U, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_minmax_round_ph (__m512h __A, __m512h __B, const int __C, const int __R) -{ - return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, - (__v32hf) __B, - __C, - (__v32hf) - _mm512_undefined_ph (), - (__mmask32) -1, __R); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_minmax_round_ph (__m512h __W, __mmask32 __U, __m512h __A, - __m512h __B, const int __C, const int __R) -{ - return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, - (__v32hf) __B, - __C, - (__v32hf) __W, - (__mmask32) __U, __R); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_minmax_round_ph (__mmask32 __U, __m512h __A, __m512h __B, - const int __C, const int __R) -{ - return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, - (__v32hf) __B, - __C, - (__v32hf) - _mm512_setzero_ph (), - (__mmask32) __U, __R); -} - -extern __inline __m512 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_minmax_ps (__m512 __A, __m512 __B, const int __C) -{ - return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, - (__v16sf) __B, - __C, - (__v16sf) - _mm512_undefined_ps (), - (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_minmax_ps (__m512 __W, __mmask16 __U, __m512 __A, - __m512 __B, const int __C) -{ - return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, - (__v16sf) __B, - __C, - (__v16sf) __W, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_minmax_ps (__mmask16 __U, __m512 __A, __m512 __B, - const int __C) -{ - return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, - (__v16sf) __B, - __C, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_minmax_round_ps (__m512 __A, __m512 __B, const int __C, const int __R) -{ - return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, - (__v16sf) __B, - __C, - (__v16sf) - _mm512_undefined_ps (), - (__mmask16) -1, __R); -} - -extern __inline __m512 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_minmax_round_ps (__m512 __W, __mmask16 __U, __m512 __A, - __m512 __B, const int __C, const int __R) -{ - return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, - (__v16sf) __B, - __C, - (__v16sf) __W, - (__mmask16) __U, __R); -} - -extern __inline __m512 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_minmax_round_ps (__mmask16 __U, __m512 __A, __m512 __B, - const int __C, const int __R) -{ - return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, - (__v16sf) __B, - __C, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U, __R); -} - -#else -#define _mm512_minmax_pbh(A, B, C) \ - ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \ - (__v32bf) (B), \ - (int) (C), \ - (__v32bf) (__m512bh) \ - _mm512_setzero_si512 (), \ - (__mmask32) (-1))) - -#define _mm512_mask_minmax_pbh(W, U, A, B, C) \ - ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \ - (__v32bf) (B), \ - (int) (C), \ - (__v32bf) (__m512bh) (W), \ - (__mmask32) (U))) - -#define _mm512_maskz_minmax_pbh(U, A, B, C) \ - ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \ - (__v32bf) (B), \ - (int) (C), \ - (__v32bf) (__m512bh) \ - _mm512_setzero_si512 (), \ - (__mmask32) (U))) - -#define _mm512_minmax_round_pd(A, B, C, R) \ - ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ - (__v8df) (B), \ - (int) (C), \ - (__v8df) (__m512d) \ - _mm512_undefined_pd (), \ - (__mmask8) (-1), \ - (int) (R))) - -#define _mm512_mask_minmax_round_pd(W, U, A, B, C, R) \ - ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ - (__v8df) (B), \ - (int) (C), \ - (__v8df) (__m512d) (W), \ - (__mmask8) (U), \ - (int) (R))) - -#define _mm512_maskz_minmax_round_pd(U, A, B, C, R) \ - ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ - (__v8df) (B), \ - (int) (C), \ - (__v8df) (__m512d) \ - _mm512_setzero_pd (), \ - (__mmask8) (U), \ - (int) (R))) - -#define _mm512_minmax_round_ph(A, B, C, R) \ - ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ - (__v32hf) (B), \ - (int) (C), \ - (__v32hf) (__m512h) \ - _mm512_undefined_ph (), \ - (__mmask32) (-1), \ - (int) (R))) - -#define _mm512_mask_minmax_round_ph(W, U, A, B, C, R) \ - ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ - (__v32hf) (B), \ - (int) (C), \ - (__v32hf) (__m512h) (W), \ - (__mmask32) (U), \ - (int) (R))) - -#define _mm512_maskz_minmax_round_ph(U, A, B, C, R) \ - ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ - (__v32hf) (B), \ - (int) (C), \ - (__v32hf) (__m512h) \ - _mm512_setzero_ph (), \ - (__mmask32) (U), \ - (int) (R))) - -#define _mm512_minmax_round_ps(A, B, C, R) \ - ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (int) (C), \ - (__v16sf) (__m512) \ - _mm512_undefined_ps (), \ - (__mmask16) (-1), \ - (int) (R))) - -#define _mm512_mask_minmax_round_ps(W, U, A, B, C, R) \ - ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (int) (C), \ - (__v16sf) (__m512) (W), \ - (__mmask16) (U), \ - (int) (R))) - -#define _mm512_maskz_minmax_round_ps(U, A, B, C, R) \ - ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (int) (C), \ - (__v16sf) (__m512) \ - _mm512_setzero_ps (), \ - (__mmask16) (U), \ - (int) (R))) - -#define _mm512_minmax_pd(A, B, C) \ - ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ - (__v8df) (B), \ - (int) (C), \ - (__v8df) (__m512d) \ - _mm512_undefined_pd (), \ - (__mmask8) (-1), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_mask_minmax_pd(W, U, A, B, C) \ - ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ - (__v8df) (B), \ - (int) (C), \ - (__v8df) (__m512d) (W), \ - (__mmask8) (U), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_maskz_minmax_pd(U, A, B, C) \ - ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ - (__v8df) (B), \ - (int) (C), \ - (__v8df) (__m512d) \ - _mm512_setzero_pd (), \ - (__mmask8) (U), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_minmax_ph(A, B, C) \ - ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ - (__v32hf) (B), \ - (int) (C), \ - (__v32hf) (__m512h) \ - _mm512_undefined_ph (), \ - (__mmask32) (-1), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_mask_minmax_ph(W, U, A, B, C) \ - ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ - (__v32hf) (B), \ - (int) (C), \ - (__v32hf) (__m512h) (W), \ - (__mmask32) (U), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_maskz_minmax_ph(U, A, B, C) \ - ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ - (__v32hf) (B), \ - (int) (C), \ - (__v32hf) (__m512h) \ - _mm512_setzero_ph (), \ - (__mmask32) (U), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_minmax_ps(A, B, C) \ - ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (int) (C), \ - (__v16sf) (__m512) \ - _mm512_undefined_ps (), \ - (__mmask16) (-1), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_mask_minmax_ps(W, U, A, B, C) \ - ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (int) (C), \ - (__v16sf) (__m512) (W), \ - (__mmask16) (U), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_maskz_minmax_ps(U, A, B, C) \ - ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (int) (C), \ - (__v16sf) (__m512) \ - _mm512_setzero_ps (), \ - (__mmask16) (U), \ - _MM_FROUND_CUR_DIRECTION)) - -#endif - -#ifdef __DISABLE_AVX10_2__ -#undef __DISABLE_AVX10_2__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX10_2__ */ - -#endif /* _AVX10_2_512MINMAXINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx10_2-512satcvtintrin.h b/gcc/config/i386/avx10_2-512satcvtintrin.h deleted file mode 100644 index 215b7fdadf3..00000000000 --- a/gcc/config/i386/avx10_2-512satcvtintrin.h +++ /dev/null @@ -1,1575 +0,0 @@ -/* Copyright (C) 2024-2025 Free Software Foundation, Inc. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GCC is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#if !defined _IMMINTRIN_H_INCLUDED -#error "Never use directly; include instead." -#endif - -#ifndef _AVX10_2_512SATCVTINTRIN_H_INCLUDED -#define _AVX10_2_512SATCVTINTRIN_H_INCLUDED - -#if !defined (__AVX10_2__) -#pragma GCC push_options -#pragma GCC target("avx10.2") -#define __DISABLE_AVX10_2__ -#endif /* __AVX10_2__ */ - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_bf16_epi8 (__m512bh __A) -{ - return - (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A) -{ - return (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_bf16_epi8 (__mmask32 __U, __m512bh __A) -{ - return - (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_bf16_epu8 (__m512bh __A) -{ - return - (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A) -{ - return (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_bf16_epu8 (__mmask32 __U, __m512bh __A) -{ - return - (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_bf16_epi8 (__m512bh __A) -{ - return - (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A) -{ - return (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_bf16_epi8 (__mmask32 __U, __m512bh __A) -{ - return - (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_bf16_epu8 (__m512bh __A) -{ - return (__m512i) - __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A, - (__v32hi) _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A) -{ - return (__m512i) __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_bf16_epu8 (__mmask32 __U, __m512bh __A) -{ - return (__m512i) - __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_ph_epi8 (__m512h __A) -{ - return - (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A) -{ - return (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_ph_epi8 (__mmask32 __U, __m512h __A) -{ - return - (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_ph_epu8 (__m512h __A) -{ - return - (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A) -{ - return (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_ph_epu8 (__mmask32 __U, __m512h __A) -{ - return - (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_ps_epi8 (__m512 __A) -{ - return - (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A) -{ - return (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_ps_epi8 (__mmask16 __U, __m512 __A) -{ - return - (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_ps_epu8 (__m512 __A) -{ - return - (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A) -{ - return (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_ps_epu8 (__mmask16 __U, __m512 __A) -{ - return - (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_ph_epi8 (__m512h __A) -{ - return (__m512i) - __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A) -{ - return (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_ph_epi8 (__mmask32 __U, __m512h __A) -{ - return - (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_ph_epu8 (__m512h __A) -{ - return (__m512i) - __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A) -{ - return (__m512i) __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_ph_epu8 (__mmask32 __U, __m512h __A) -{ - return (__m512i) - __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_ps_epi8 (__m512 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A) -{ - return (__m512i) __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_ps_epi8 (__mmask16 __U, __m512 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_ps_epu8 (__m512 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A) -{ - return (__m512i) __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_ps_epu8 (__mmask16 __U, __m512 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_pd_epi32 (__m512d __A) -{ - return (__m256i) - __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A, - (__v8si) - _mm256_undefined_si256 (), - (__mmask8) -1); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_pd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) -{ - return (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A, - (__v8si) __W, - (__mmask8) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_pd_epi32 (__mmask8 __U, __m512d __A) -{ - return - (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_pd_epi64 (__m512d __A) -{ - return (__m512i) - __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_pd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) -{ - return (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A, - (__v8di) __W, - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_pd_epi64 (__mmask8 __U, __m512d __A) -{ - return - (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_pd_epu32 (__m512d __A) -{ - return (__m256i) - __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A, - (__v8si) - _mm256_undefined_si256 (), - (__mmask8) -1); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_pd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) -{ - return (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A, - (__v8si) __W, - (__mmask8) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_pd_epu32 (__mmask8 __U, __m512d __A) -{ - return - (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_pd_epu64 (__m512d __A) -{ - return (__m512i) - __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_pd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) -{ - return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A, - (__v8di) __W, - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_pd_epu64 (__mmask8 __U, __m512d __A) -{ - return (__m512i) - __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_ps_epi32 (__m512 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_ps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) -{ - return (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_ps_epi32 (__mmask16 __U, __m512 __A) -{ - return - (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_ps_epi64 (__m256 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_ps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) -{ - return (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A, - (__v8di) __W, - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_ps_epi64 (__mmask8 __U, __m256 __A) -{ - return - (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_ps_epu32 (__m512 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_ps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) -{ - return (__m512i) __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_ps_epu32 (__mmask16 __U, __m512 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_ps_epu64 (__m256 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_ps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) -{ - return (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A, - (__v8di) __W, - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_ps_epu64 (__mmask8 __U, __m256 __A) -{ - return - (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U); -} - -#ifdef __OPTIMIZE__ -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_roundph_epi8 (__m512h __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_roundph_epu8 (__m512h __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_roundps_epi8 (__m512 __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_roundps_epu8 (__m512 __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_roundph_epi8 (__m512h __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_roundph_epu8 (__m512h __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_roundps_epi8 (__m512 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_roundps_epu8 (__m512 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U, - __R); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundpd_epi32 (__m512d __A, const int __R) -{ - return (__m256i) - __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A, - (__v8si) - _mm256_undefined_si256 (), - (__mmask8) -1, - __R); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A, - const int __R) -{ - return (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A, - (__v8si) __W, - (__mmask8) __U, - __R); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R) -{ - return - (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundpd_epi64 (__m512d __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A, - (__v8di) __W, - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundpd_epi64 (__mmask8 __U, __m512d __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U, - __R); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundpd_epu32 (__m512d __A, const int __R) -{ - return (__m256i) - __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A, - (__v8si) - _mm256_undefined_si256 (), - (__mmask8) -1, - __R); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A, - const int __R) -{ - return (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A, - (__v8si) __W, - (__mmask8) __U, - __R); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R) -{ - return - (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundpd_epu64 (__m512d __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A, - (__v8di) __W, - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundpd_epu64 (__mmask8 __U, __m512d __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundps_epi32 (__m512 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundps_epi64 (__m256 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A, - (__v8di) __W, - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundps_epi64 (__mmask8 __U, __m256 __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundps_epu32 (__m512 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundps_epu64 (__m256 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A, - (__v8di) __W, - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundps_epu64 (__mmask8 __U, __m256 __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U, - __R); -} -#else -#define _mm512_ipcvts_roundph_epi8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_undefined_si512 ()), \ - (__mmask32) (-1), \ - (R))) - -#define _mm512_mask_ipcvts_roundph_epi8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \ - (__v32hi) (W), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_maskz_ipcvts_roundph_epi8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_setzero_si512 ()), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_ipcvts_roundph_epu8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_undefined_si512 ()), \ - (__mmask32) (-1), \ - (R))) - -#define _mm512_mask_ipcvts_roundph_epu8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \ - (__v32hi) (W), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_maskz_ipcvts_roundph_epu8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_setzero_si512 ()), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_ipcvts_roundps_epi8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_undefined_si512 ()), \ - (__mmask16) (-1), \ - (R))) - -#define _mm512_mask_ipcvts_roundps_epi8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \ - (__v16si) (W), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_maskz_ipcvts_roundps_epi8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_setzero_si512 ()), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_ipcvts_roundps_epu8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_undefined_si512 ()), \ - (__mmask16) (-1), \ - (R))) - -#define _mm512_mask_ipcvts_roundps_epu8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \ - (__v16si) (W), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_maskz_ipcvts_roundps_epu8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_setzero_si512 ()), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_ipcvtts_roundph_epi8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_undefined_si512 ()), \ - (__mmask32) (-1), \ - (R))) - -#define _mm512_mask_ipcvtts_roundph_epi8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \ - (__v32hi) (W), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_maskz_ipcvtts_roundph_epi8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_setzero_si512 ()), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_ipcvtts_roundph_epu8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_undefined_si512 ()), \ - (__mmask32) (-1), \ - (R))) - -#define _mm512_mask_ipcvtts_roundph_epu8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \ - (__v32hi) (W), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_maskz_ipcvtts_roundph_epu8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_setzero_si512 ()), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_ipcvtts_roundps_epi8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_undefined_si512 ()), \ - (__mmask16) (-1), \ - (R))) - -#define _mm512_mask_ipcvtts_roundps_epi8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \ - (__v16si) (W), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_maskz_ipcvtts_roundps_epi8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_setzero_si512 ()), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_ipcvtts_roundps_epu8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_undefined_si512 ()), \ - (__mmask16) (-1), \ - (R))) - -#define _mm512_mask_ipcvtts_roundps_epu8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \ - (__v16si) (W), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_maskz_ipcvtts_roundps_epu8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_setzero_si512 ()), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_cvtts_roundpd_epi32(A, R) \ - ((__m256i) \ - __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \ - (__v8si) \ - (_mm256_undefined_si256 ()), \ - (__mmask8) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundpd_epi32(W, U, A, R) \ - ((__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \ - (__v8si) (W), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundpd_epi32(U, A, R) \ - ((__m256i) \ - __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \ - (__v8si) \ - (_mm256_setzero_si256 ()), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_cvtts_roundpd_epi64(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \ - (__v8di) \ - (_mm512_undefined_si512 ()), \ - (__mmask8) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundpd_epi64(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \ - (__v8di) (W), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundpd_epi64(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \ - (__v8di) \ - (_mm512_setzero_si512 ()), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_cvtts_roundpd_epu32(A, R) \ - ((__m256i) \ - __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \ - (__v8si) \ - (_mm256_undefined_si256 ()), \ - (__mmask8) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundpd_epu32(W, U, A, R) \ - ((__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \ - (__v8si) (W), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundpd_epu32(U, A, R) \ - ((__m256i) \ - __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \ - (__v8si) \ - (_mm256_setzero_si256 ()), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_cvtts_roundpd_epu64(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \ - (__v8di) \ - (_mm512_undefined_si512 ()), \ - (__mmask8) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundpd_epu64(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \ - (__v8di) (W), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundpd_epu64(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \ - (__v8di) \ - (_mm512_setzero_si512 ()), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_cvtts_roundps_epi32(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_undefined_si512 ()), \ - (__mmask16) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundps_epi32(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \ - (__v16si) (W), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundps_epi32(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_setzero_si512 ()), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_cvtts_roundps_epi64(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \ - (__v8di) \ - (_mm512_undefined_si512 ()), \ - (__mmask8) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundps_epi64(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \ - (__v8di) (W), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundps_epi64(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \ - (__v8di) \ - (_mm512_setzero_si512 ()), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_cvtts_roundps_epu32(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_undefined_si512 ()), \ - (__mmask16) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundps_epu32(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \ - (__v16si) (W), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundps_epu32(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_setzero_si512 ()), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_cvtts_roundps_epu64(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \ - (__v8di) \ - (_mm512_undefined_si512 ()), \ - (__mmask8) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundps_epu64(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \ - (__v8di) (W), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundps_epu64(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \ - (__v8di) \ - (_mm512_setzero_si512 ()), \ - (__mmask8) (U), \ - (R))) -#endif - -#ifdef __DISABLE_AVX10_2__ -#undef __DISABLE_AVX10_2__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX10_2__ */ - -#endif /* _AVX10_2_512SATCVTINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx10_2bf16intrin.h b/gcc/config/i386/avx10_2bf16intrin.h index e6890fc2cfd..9560480d3a6 100644 --- a/gcc/config/i386/avx10_2bf16intrin.h +++ b/gcc/config/i386/avx10_2bf16intrin.h @@ -34,6 +34,32 @@ #define __DISABLE_AVX10_2__ #endif /* __AVX10_2__ */ +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_add_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_addbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_add_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_addbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_add_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_addbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_add_pbh (__m256bh __A, __m256bh __B) @@ -86,6 +112,32 @@ _mm_maskz_add_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sub_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_subbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sub_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_subbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sub_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_subbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sub_pbh (__m256bh __A, __m256bh __B) @@ -138,6 +190,32 @@ _mm_maskz_sub_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mul_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_mulbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_mul_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_mulbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_mul_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_mulbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mul_pbh (__m256bh __A, __m256bh __B) @@ -190,6 +268,32 @@ _mm_maskz_mul_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_div_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_divbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_div_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_divbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_div_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_divbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_div_pbh (__m256bh __A, __m256bh __B) @@ -242,6 +346,32 @@ _mm_maskz_div_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_max_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_maxbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_max_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_maxbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_max_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_maxbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_max_pbh (__m256bh __A, __m256bh __B) @@ -294,6 +424,32 @@ _mm_maskz_max_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_min_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_minbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_min_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_minbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_min_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_minbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_min_pbh (__m256bh __A, __m256bh __B) @@ -346,6 +502,32 @@ _mm_maskz_min_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_scalef_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_scalefbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_scalef_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_scalefbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_scalef_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_scalefbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_scalef_pbh (__m256bh __A, __m256bh __B) @@ -398,6 +580,41 @@ _mm_maskz_scalef_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmadd_pbh (__m512bh __A, __mmask32 __U, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmadd_pbh (__m512bh __A, __m512bh __B, + __m512bh __C, __mmask32 __U) +{ + return (__m512bh) + __builtin_ia32_fmaddbf16512_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmadd_pbh (__mmask32 __U, __m512bh __A, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmaddbf16512_maskz (__A, __B, __C, __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmadd_pbh (__m256bh __A, __m256bh __B, __m256bh __C) @@ -468,6 +685,41 @@ _mm_maskz_fmadd_pbh (__mmask8 __U, __m128bh __A, __builtin_ia32_fmaddbf16128_maskz (__A, __B, __C, __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmsub_pbh (__m512bh __A, __mmask32 __U, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmsub_pbh (__m512bh __A, __m512bh __B, + __m512bh __C, __mmask32 __U) +{ + return (__m512bh) + __builtin_ia32_fmsubbf16512_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmsub_pbh (__mmask32 __U, __m512bh __A, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmsubbf16512_maskz (__A, __B, __C, __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmsub_pbh (__m256bh __A, __m256bh __B, __m256bh __C) @@ -537,6 +789,41 @@ _mm_maskz_fmsub_pbh (__mmask8 __U, __m128bh __A, __builtin_ia32_fmsubbf16128_maskz (__A, __B, __C, __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmadd_pbh (__m512bh __A, __mmask32 __U, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmadd_pbh (__m512bh __A, __m512bh __B, + __m512bh __C, __mmask32 __U) +{ + return (__m512bh) + __builtin_ia32_fnmaddbf16512_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmadd_pbh (__mmask32 __U, __m512bh __A, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmaddbf16512_maskz (__A, __B, __C, __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fnmadd_pbh (__m256bh __A, __m256bh __B, __m256bh __C) @@ -607,6 +894,41 @@ _mm_maskz_fnmadd_pbh (__mmask8 __U, __m128bh __A, __builtin_ia32_fnmaddbf16128_maskz (__A, __B, __C, __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmsub_pbh (__m512bh __A, __mmask32 __U, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmsub_pbh (__m512bh __A, __m512bh __B, + __m512bh __C, __mmask32 __U) +{ + return (__m512bh) + __builtin_ia32_fnmsubbf16512_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmsub_pbh (__mmask32 __U, __m512bh __A, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmsubbf16512_maskz (__A, __B, __C, __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fnmsub_pbh (__m256bh __A, __m256bh __B, __m256bh __C) @@ -677,6 +999,35 @@ _mm_maskz_fnmsub_pbh (__mmask8 __U, __m128bh __A, __builtin_ia32_fnmsubbf16128_maskz (__A, __B, __C, __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rsqrt_pbh (__m512bh __A) +{ + return (__m512bh) + __builtin_ia32_rsqrtbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); + +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rsqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) +{ + return (__m512bh) + __builtin_ia32_rsqrtbf16512_mask (__A, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rsqrt_pbh (__mmask32 __U, __m512bh __A) +{ + return (__m512bh) + __builtin_ia32_rsqrtbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_rsqrt_pbh (__m256bh __A) @@ -733,6 +1084,34 @@ _mm_maskz_rsqrt_pbh (__mmask8 __U, __m128bh __A) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sqrt_pbh (__m512bh __A) +{ + return (__m512bh) + __builtin_ia32_sqrtbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) +{ + return (__m512bh) + __builtin_ia32_sqrtbf16512_mask (__A, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sqrt_pbh (__mmask32 __U, __m512bh __A) +{ + return (__m512bh) + __builtin_ia32_sqrtbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sqrt_pbh (__m256bh __A) @@ -789,6 +1168,34 @@ _mm_maskz_sqrt_pbh (__mmask8 __U, __m128bh __A) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rcp_pbh (__m512bh __A) +{ + return (__m512bh) + __builtin_ia32_rcpbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rcp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) +{ + return (__m512bh) + __builtin_ia32_rcpbf16512_mask (__A, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rcp_pbh (__mmask32 __U, __m512bh __A) +{ + return (__m512bh) + __builtin_ia32_rcpbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_rcp_pbh (__m256bh __A) @@ -845,6 +1252,33 @@ _mm_maskz_rcp_pbh (__mmask8 __U, __m128bh __A) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_getexp_pbh (__m512bh __A) +{ + return (__m512bh) + __builtin_ia32_getexpbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_getexp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) +{ + return (__m512bh) __builtin_ia32_getexpbf16512_mask (__A, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_getexp_pbh (__mmask32 __U, __m512bh __A) +{ + return (__m512bh) + __builtin_ia32_getexpbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_getexp_pbh (__m256bh __A) @@ -903,6 +1337,34 @@ _mm_maskz_getexp_pbh (__mmask8 __U, __m128bh __A) /* Intrinsics vrndscalebf16. */ #ifdef __OPTIMIZE__ +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_roundscale_pbh (__m512bh __A, int B) +{ + return (__m512bh) + __builtin_ia32_rndscalebf16512_mask (__A, B, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_roundscale_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, int B) +{ + return (__m512bh) + __builtin_ia32_rndscalebf16512_mask (__A, B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_roundscale_pbh (__mmask32 __U, __m512bh __A, int B) +{ + return (__m512bh) + __builtin_ia32_rndscalebf16512_mask (__A, B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_roundscale_pbh (__m256bh __A, int B) @@ -962,6 +1424,19 @@ _mm_maskz_roundscale_pbh (__mmask8 __U, __m128bh __A, int B) } #else +#define _mm512_roundscale_pbh(A, B) \ + (__builtin_ia32_rndscalebf16512_mask ((A), (B), \ + (__v32bf) _mm512_setzero_si512 (), \ + (__mmask32) -1)) + +#define _mm512_mask_roundscale_pbh(A, B, C, D) \ + (__builtin_ia32_rndscalebf16512_mask ((C), (D), (A), (B))) + +#define _mm512_maskz_roundscale_pbh(A, B, C) \ + (__builtin_ia32_rndscalebf16512_mask ((B), (C), \ + (__v32bf) _mm512_setzero_si512 (), \ + (A))) + #define _mm256_roundscale_pbh(A, B) \ (__builtin_ia32_rndscalebf16256_mask ((A), (B), \ (__v16bf) _mm256_setzero_si256 (), \ @@ -992,6 +1467,35 @@ _mm_maskz_roundscale_pbh (__mmask8 __U, __m128bh __A, int B) /* Intrinsics vreducebf16. */ #ifdef __OPTIMIZE__ +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_reduce_pbh (__m512bh __A, int B) +{ + return (__m512bh) + __builtin_ia32_reducebf16512_mask (__A, B, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_reduce_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, int B) +{ + return (__m512bh) + __builtin_ia32_reducebf16512_mask (__A, B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_reduce_pbh (__mmask32 __U, __m512bh __A, int B) +{ + return (__m512bh) + __builtin_ia32_reducebf16512_mask (__A, B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_reduce_pbh (__m256bh __A, int B) @@ -1051,6 +1555,19 @@ _mm_maskz_reduce_pbh (__mmask8 __U, __m128bh __A, int B) } #else +#define _mm512_reduce_pbh(A, B) \ + (__builtin_ia32_reducebf16512_mask ((A), (B), \ + (__v32bf) _mm512_setzero_si512 (), \ + (__mmask32) -1)) + +#define _mm512_mask_reduce_pbh(A, B, C, D) \ + (__builtin_ia32_reducebf16512_mask ((C), (D), (A), (B))) + +#define _mm512_maskz_reduce_pbh(A, B, C) \ + (__builtin_ia32_reducebf16512_mask ((B), (C), \ + (__v32bf) _mm512_setzero_si512 (), \ + (A))) + #define _mm256_reduce_pbh(A, B) \ (__builtin_ia32_reducebf16256_mask ((A), (B), \ (__v16bf) _mm256_setzero_si256 (), \ @@ -1082,6 +1599,40 @@ _mm_maskz_reduce_pbh (__mmask8 __U, __m128bh __A, int B) /* Intrinsics vgetmantbf16. */ #ifdef __OPTIMIZE__ +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_getmant_pbh (__m512bh __A, _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m512bh) + __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_getmant_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m512bh) + __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B, + __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_getmant_pbh (__mmask32 __U, __m512bh __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m512bh) + __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_getmant_pbh (__m256bh __A, _MM_MANTISSA_NORM_ENUM __B, @@ -1151,6 +1702,19 @@ _mm_maskz_getmant_pbh (__mmask8 __U, __m128bh __A, } #else +#define _mm512_getmant_pbh(A, B, C) \ + (__builtin_ia32_getmantbf16512_mask ((A), (int)(((C)<<2) | (B)), \ + (__v32bf) _mm512_setzero_si512 (), \ + (__mmask32) -1)) + +#define _mm512_mask_getmant_pbh(A, B, C, D, E) \ + (__builtin_ia32_getmantbf16512_mask ((C), (int)(((D)<<2) | (E)), (A), (B))) + +#define _mm512_maskz_getmant_pbh(A, B, C, D) \ + (__builtin_ia32_getmantbf16512_mask ((B), (int)(((C)<<2) | (D)), \ + (__v32bf) _mm512_setzero_si512 (), \ + (A))) + #define _mm256_getmant_pbh(A, B, C) \ (__builtin_ia32_getmantbf16256_mask ((A), (int)(((C)<<2) | (B)), \ (__v16bf) _mm256_setzero_si256 (), \ @@ -1180,6 +1744,24 @@ _mm_maskz_getmant_pbh (__mmask8 __U, __m128bh __A, /* Intrinsics vfpclassbf16. */ #ifdef __OPTIMIZE__ +extern __inline __mmask32 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fpclass_pbh_mask (__mmask32 __U, __m512bh __A, + const int __imm) +{ + return (__mmask32) + __builtin_ia32_fpclassbf16512_mask (__A, __imm, __U); +} + +extern __inline __mmask32 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fpclass_pbh_mask (__m512bh __A, const int __imm) +{ + return (__mmask32) + __builtin_ia32_fpclassbf16512_mask (__A, __imm, + (__mmask32) -1); +} + extern __inline __mmask16 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fpclass_pbh_mask (__mmask16 __U, __m256bh __A, @@ -1214,6 +1796,14 @@ _mm_fpclass_pbh_mask (__m128bh __A, const int __imm) } #else +#define _mm512_mask_fpclass_pbh_mask(U, X, C) \ + ((__mmask32) __builtin_ia32_fpclassbf16512_mask ( \ + (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (U))) + +#define _mm512_fpclass_pbh_mask(X, C) \ + ((__mmask32) __builtin_ia32_fpclassbf16512_mask ( \ + (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (-1))) + #define _mm256_mask_fpclass_pbh_mask(U, A, B) \ ((__mmask16) __builtin_ia32_fpclassbf16256_mask ((A), (B), (U))) @@ -1233,6 +1823,24 @@ _mm_fpclass_pbh_mask (__m128bh __A, const int __imm) /* Intrinsics vcmpbf16. */ #ifdef __OPTIMIZE__ +extern __inline __mmask32 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmp_pbh_mask (__mmask32 __U, __m512bh __A, __m512bh __B, + const int __imm) +{ + return (__mmask32) + __builtin_ia32_cmpbf16512_mask (__A, __B, __imm, __U); +} + +extern __inline __mmask32 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmp_pbh_mask (__m512bh __A, __m512bh __B, const int __imm) +{ + return (__mmask32) + __builtin_ia32_cmpbf16512_mask (__A, __B, __imm, + (__mmask32) -1); +} + extern __inline __mmask16 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cmp_pbh_mask (__mmask16 __U, __m256bh __A, @@ -1268,6 +1876,12 @@ _mm_cmp_pbh_mask (__m128bh __A, __m128bh __B, const int __imm) } #else +#define _mm512_mask_cmp_pbh_mask(A, B, C, D) \ + ((__mmask32) __builtin_ia32_cmpbf16512_mask ((B), (C), (D), (A))) + +#define _mm512_cmp_pbh_mask(A, B, C) \ + ((__mmask32) __builtin_ia32_cmpbf16512_mask ((A), (B), (C), (-1))) + #define _mm256_mask_cmp_pbh_mask(A, B, C, D) \ ((__mmask16) __builtin_ia32_cmpbf16256_mask ((B), (C), (D), (A))) diff --git a/gcc/config/i386/avx10_2convertintrin.h b/gcc/config/i386/avx10_2convertintrin.h index 8cbdc667159..f2fb98f8454 100644 --- a/gcc/config/i386/avx10_2convertintrin.h +++ b/gcc/config/i386/avx10_2convertintrin.h @@ -98,6 +98,103 @@ _mm256_maskz_cvtx2ps_ph ( __mmask16 __U, __m256 __A, __m256 __B) (__mmask16) __U); } +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtx2ps_ph (__m512 __A, __m512 __B) +{ + return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, + (__v16sf) __B, + (__v32hf) + _mm512_setzero_ph (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtx2ps_ph (__m512h __W, __mmask32 __U, __m512 __A, + __m512 __B) +{ + return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, + (__v16sf) __B, + (__v32hf) __W, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtx2ps_ph (__mmask32 __U, __m512 __A, __m512 __B) +{ + return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, + (__v16sf) __B, + (__v32hf) + _mm512_setzero_ph (), + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtx_round2ps_ph (__m512 __A, __m512 __B, const int __R) +{ + return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, + (__v16sf) __B, + (__v32hf) + _mm512_setzero_ph (), + (__mmask32) -1, + __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtx_round2ps_ph (__m512h __W, __mmask32 __U, __m512 __A, + __m512 __B, const int __R) +{ + return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, + (__v16sf) __B, + (__v32hf) __W, + (__mmask32) __U, + __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtx_round2ps_ph (__mmask32 __U, __m512 __A, + __m512 __B, const int __R) +{ + return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, + (__v16sf) __B, + (__v32hf) + _mm512_setzero_ph (), + (__mmask32) __U, + __R); +} + +#else +#define _mm512_cvtx_round2ps_ph(A, B, R) \ + ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (__v32hf) \ + (_mm512_setzero_ph ()), \ + (__mmask32) (-1), \ + (R))) +#define _mm512_mask_cvtx_round2ps_ph(W, U, A, B, R) \ + ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (__v32hf) (W), \ + (__mmask32) (U), \ + (R))) +#define _mm512_maskz_cvtx_round2ps_ph(U, A, B, R) \ + ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (__v32hf) \ + (_mm512_setzero_ph ()), \ + (__mmask32) (U), \ + (R))) +#endif /* __OPTIMIZE__ */ + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtbiasph_bf8 (__m128i __A, __m128h __B) @@ -161,6 +258,39 @@ _mm256_maskz_cvtbiasph_bf8 (__mmask16 __U, __m256i __A, __m256h __B) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtbiasph_bf8 (__m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtbiasph_bf8 (__m256i __W, __mmask32 __U, + __m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtbiasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvts_biasph_bf8 (__m128i __A, __m128h __B) @@ -224,6 +354,39 @@ _mm256_maskz_cvts_biasph_bf8 (__mmask16 __U, __m256i __A, __m256h __B) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvts_biasph_bf8 (__m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvts_biasph_bf8 (__m256i __W, __mmask32 __U, + __m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvts_biasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtbiasph_hf8 (__m128i __A, __m128h __B) @@ -287,6 +450,39 @@ _mm256_maskz_cvtbiasph_hf8 (__mmask16 __U, __m256i __A, __m256h __B) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtbiasph_hf8 (__m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtbiasph_hf8 (__m256i __W, __mmask32 __U, __m512i __A, + __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtbiasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvts_biasph_hf8 (__m128i __A, __m128h __B) @@ -350,6 +546,39 @@ _mm256_maskz_cvts_biasph_hf8 (__mmask16 __U, __m256i __A, __m256h __B) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvts_biasph_hf8 (__m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvts_biasph_hf8 (__m256i __W, __mmask32 __U, + __m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvts_biasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvt2ph_bf8 (__m128h __A, __m128h __B) @@ -416,6 +645,39 @@ _mm256_maskz_cvt2ph_bf8 (__mmask32 __U, __m256h __A, __m256h __B) (__mmask32) __U); } +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt2ph_bf8 (__m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) -1); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt2ph_bf8 (__m512i __W, __mmask64 __U, + __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvts_2ph_bf8 (__m128h __A, __m128h __B) @@ -482,6 +744,39 @@ _mm256_maskz_cvts_2ph_bf8 (__mmask32 __U, __m256h __A, __m256h __B) (__mmask32) __U); } +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvts_2ph_bf8 (__m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) -1); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvts_2ph_bf8 (__m512i __W, __mmask64 __U, + __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvts_2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvt2ph_hf8 (__m128h __A, __m128h __B) @@ -548,6 +843,39 @@ _mm256_maskz_cvt2ph_hf8 (__mmask32 __U, __m256h __A, __m256h __B) (__mmask32) __U); } +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt2ph_hf8 (__m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) -1); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt2ph_hf8 (__m512i __W, __mmask64 __U, + __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvts_2ph_hf8 (__m128h __A, __m128h __B) @@ -614,6 +942,39 @@ _mm256_maskz_cvts_2ph_hf8 (__mmask32 __U, __m256h __A, __m256h __B) (__mmask32) __U); } +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvts_2ph_hf8 (__m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) -1); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvts_2ph_hf8 (__m512i __W, __mmask64 __U, + __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvts_2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) __U); +} + extern __inline__ __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvthf8_ph (__m128i __A) @@ -672,6 +1033,35 @@ _mm256_maskz_cvthf8_ph (__mmask16 __U, __m128i __A) (__mmask16) __U); } +extern __inline__ __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvthf8_ph (__m256i __A) +{ + return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A, + (__v32hf) (__m512h) + _mm512_undefined_ph (), + (__mmask32) -1); +} + +extern __inline__ __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvthf8_ph (__m512h __W, __mmask32 __U, __m256i __A) +{ + return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A, + (__v32hf) (__m512h) __W, + (__mmask32) __U); +} + +extern __inline__ __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvthf8_ph (__mmask32 __U, __m256i __A) +{ + return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A, + (__v32hf) (__m512h) + _mm512_setzero_ph (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtph_bf8 (__m128h __A) @@ -730,6 +1120,35 @@ _mm256_maskz_cvtph_bf8 (__mmask16 __U, __m256h __A) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_bf8 (__m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_bf8 (__m256i __W, __mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A, + (__v32qi) (__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_bf8 (__mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvts_ph_bf8 (__m128h __A) @@ -788,6 +1207,35 @@ _mm256_maskz_cvts_ph_bf8 (__mmask16 __U, __m256h __A) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvts_ph_bf8 (__m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvts_ph_bf8 (__m256i __W, __mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvts_ph_bf8 (__mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtph_hf8 (__m128h __A) @@ -846,6 +1294,35 @@ _mm256_maskz_cvtph_hf8 (__mmask16 __U, __m256h __A) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_hf8 (__m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_hf8 (__m256i __W, __mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A, + (__v32qi)(__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_hf8 (__mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvts_ph_hf8 (__m128h __A) @@ -904,6 +1381,35 @@ _mm256_maskz_cvts_ph_hf8 (__mmask16 __U, __m256h __A) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvts_ph_hf8 (__m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvts_ph_hf8 (__m256i __W, __mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvts_ph_hf8 (__mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtbf8_ph (__m128i __A) @@ -952,6 +1458,30 @@ _mm256_maskz_cvtbf8_ph (__mmask16 __U, __m128i __A) (__m256i) _mm256_maskz_cvtepi8_epi16 (__U, __A), 8)); } +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtbf8_ph (__m256i __A) +{ + return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 ( + (__m512i) _mm512_cvtepi8_epi16 (__A), 8)); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtbf8_ph (__m512h __S, __mmask32 __U, __m256i __A) +{ + return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_mask_slli_epi16 ( + (__m512i) __S, __U, (__m512i) _mm512_cvtepi8_epi16 (__A), 8)); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtbf8_ph (__mmask32 __U, __m256i __A) +{ + return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 ( + (__m512i) _mm512_maskz_cvtepi8_epi16 (__U, __A), 8)); +} + #ifdef __DISABLE_AVX10_2__ #undef __DISABLE_AVX10_2__ #pragma GCC pop_options diff --git a/gcc/config/i386/avx10_2mediaintrin.h b/gcc/config/i386/avx10_2mediaintrin.h index 0993e8e4f56..7d30502f6a8 100644 --- a/gcc/config/i386/avx10_2mediaintrin.h +++ b/gcc/config/i386/avx10_2mediaintrin.h @@ -394,6 +394,198 @@ _mm256_maskz_dpbuuds_epi32 (__mmask8 __U, __m256i __W, (__mmask8) __U); } +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbssd_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbssd_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssd_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbssd_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssd_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbssds_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbssds_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssds_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbssds_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssds_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbsud_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbsud_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsud_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbsud_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsud_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbsuds_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbsuds_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsuds_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbsuds_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsuds_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbuud_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbuud_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuud_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbuud_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuud_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbuuds_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbuuds_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuuds_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbuuds_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuuds_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_dpwsud_epi32 (__m128i __W, __mmask8 __U, @@ -682,6 +874,233 @@ _mm256_maskz_dpwuuds_epi32 (__mmask8 __U, __m256i __W, (__mmask8) __U); } +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpwsud_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpwsud_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwsud_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpwsud_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwsud_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpwsuds_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpwsuds_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwsuds_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpwsuds_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwsuds_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpwusd_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwusd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpwusd_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwusd_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpwusd_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwusd_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpwusds_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwusds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpwusds_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwusds_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpwusds_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwusds_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpwuud_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpwuud_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwuud_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpwuud_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwuud_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpwuuds_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpwuuds_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwuuds_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpwuuds_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwuuds_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpph_ps (__m512 __W, __m512h __A, __m512h __B) +{ + return (__m512) + __builtin_ia32_vdpphps512_mask ((__v16sf) __W, + (__v16sf) __A, + (__v16sf) __B, + (__mmask16) -1); +} + +extern __inline __m512 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpph_ps (__m512 __W, __mmask16 __U, __m512h __A, + __m512h __B) +{ + return (__m512) + __builtin_ia32_vdpphps512_mask ((__v16sf) __W, + (__v16sf) __A, + (__v16sf) __B, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpph_ps (__mmask16 __U, __m512 __W, __m512h __A, + __m512h __B) +{ + return (__m512) + __builtin_ia32_vdpphps512_maskz ((__v16sf) __W, + (__v16sf) __A, + (__v16sf) __B, + (__mmask16) __U); +} + extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_dpph_ps (__m256 __W, __m256h __A, __m256h __B) @@ -800,6 +1219,39 @@ _mm256_maskz_mpsadbw_epu8 (__mmask16 __U, __m256i __X, (__v16hi) _mm256_setzero_si256 (), __U); } + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mpsadbw_epu8 (__m512i __X, __m512i __Y, const int __M) +{ + return (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi) __X, + (__v64qi) __Y, + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_mpsadbw_epu8 (__m512i __W, __mmask32 __U, __m512i __X, + __m512i __Y, const int __M) +{ + return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X, + (__v64qi) __Y, + __M, + (__v32hi) __W, + __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_mpsadbw_epu8 (__mmask32 __U, __m512i __X, + __m512i __Y, const int __M) +{ + return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X, + (__v64qi) __Y, + __M, + (__v32hi) _mm512_setzero_epi32 (), + __U); +} #else #define _mm_mask_mpsadbw_epu8(W, U, X, Y, M) \ (__m128i) __builtin_ia32_mpsadbw128_mask ((__v16qi)(__m128i)(X), \ @@ -829,6 +1281,23 @@ _mm256_maskz_mpsadbw_epu8 (__mmask16 __U, __m256i __X, (__v16hi) _mm256_setzero_si256 (), \ (__mmask16)(U)) +#define _mm512_mpsadbw_epu8(X, Y, M) \ + (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi)(__m512i)(X), \ + (__v64qi)(__m512i)(Y), (int)(M)) + +#define _mm512_mask_mpsadbw_epu8(W, U, X, Y, M) \ + (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X), \ + (__v64qi)(__m512i)(Y), \ + (int)(M), \ + (__v32hi)(__m512i)(W), \ + (__mmask32)(U)) + +#define _mm512_maskz_mpsadbw_epu8(U, X, Y, M) \ + (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X), \ + (__v64qi)(__m512i)(Y), \ + (int)(M), \ + (__v32hi) _mm512_setzero_epi32 (), \ + (__mmask32)(U)) #endif #ifdef __DISABLE_AVX10_2__ diff --git a/gcc/config/i386/avx10_2minmaxintrin.h b/gcc/config/i386/avx10_2minmaxintrin.h index 0a4a253aa8c..f9fe14ecee8 100644 --- a/gcc/config/i386/avx10_2minmaxintrin.h +++ b/gcc/config/i386/avx10_2minmaxintrin.h @@ -103,6 +103,43 @@ _mm256_maskz_minmax_pbh (__mmask16 __U, __m256bh __A, (__mmask16) __U); } +extern __inline __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_minmax_pbh (__m512bh __A, __m512bh __B, const int __C) +{ + return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A, + (__v32bf) __B, + __C, + (__v32bf)(__m512bh) + _mm512_setzero_si512 (), + (__mmask32) -1); +} + +extern __inline __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_minmax_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B, const int __C) +{ + return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A, + (__v32bf) __B, + __C, + (__v32bf) __W, + (__mmask32) __U); +} + +extern __inline __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_minmax_pbh (__mmask32 __U, __m512bh __A, + __m512bh __B, const int __C) +{ + return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A, + (__v32bf) __B, + __C, + (__v32bf)(__m512bh) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_minmax_pd (__m128d __A, __m128d __B, const int __C) @@ -169,6 +206,84 @@ _mm256_maskz_minmax_pd (__mmask8 __U, __m256d __A, __m256d __B, const int __C) (__mmask8) __U); } +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_minmax_pd (__m512d __A, __m512d __B, const int __C) +{ + return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, + (__v8df) __B, + __C, + (__v8df) + _mm512_undefined_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_minmax_pd (__m512d __W, __mmask8 __U, __m512d __A, + __m512d __B, const int __C) +{ + return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, + (__v8df) __B, + __C, + (__v8df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_minmax_pd (__mmask8 __U, __m512d __A, __m512d __B, + const int __C) +{ + return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, + (__v8df) __B, + __C, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_minmax_round_pd (__m512d __A, __m512d __B, const int __C, + const int __R) +{ + return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, + (__v8df) __B, + __C, + (__v8df) + _mm512_undefined_pd (), + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_minmax_round_pd (__m512d __W, __mmask8 __U, __m512d __A, + __m512d __B, const int __C, const int __R) +{ + return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, + (__v8df) __B, + __C, + (__v8df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_minmax_round_pd (__mmask8 __U, __m512d __A, __m512d __B, + const int __C, const int __R) +{ + return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, + (__v8df) __B, + __C, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, __R); +} + extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_minmax_ph (__m128h __A, __m128h __B, const int __C) @@ -235,6 +350,83 @@ _mm256_maskz_minmax_ph (__mmask16 __U, __m256h __A, __m256h __B, const int __C) (__mmask16) __U); } +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_minmax_ph (__m512h __A, __m512h __B, const int __C) +{ + return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, + (__v32hf) __B, + __C, + (__v32hf) + _mm512_undefined_ph (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_minmax_ph (__m512h __W, __mmask32 __U, __m512h __A, + __m512h __B, const int __C) +{ + return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, + (__v32hf) __B, + __C, + (__v32hf) __W, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_minmax_ph (__mmask32 __U, __m512h __A, __m512h __B, + const int __C) +{ + return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, + (__v32hf) __B, + __C, + (__v32hf) + _mm512_setzero_ph (), + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_minmax_round_ph (__m512h __A, __m512h __B, const int __C, const int __R) +{ + return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, + (__v32hf) __B, + __C, + (__v32hf) + _mm512_undefined_ph (), + (__mmask32) -1, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_minmax_round_ph (__m512h __W, __mmask32 __U, __m512h __A, + __m512h __B, const int __C, const int __R) +{ + return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, + (__v32hf) __B, + __C, + (__v32hf) __W, + (__mmask32) __U, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_minmax_round_ph (__mmask32 __U, __m512h __A, __m512h __B, + const int __C, const int __R) +{ + return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, + (__v32hf) __B, + __C, + (__v32hf) + _mm512_setzero_ph (), + (__mmask32) __U, __R); +} + extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_minmax_ps (__m128 __A, __m128 __B, const int __C) @@ -301,6 +493,83 @@ _mm256_maskz_minmax_ps (__mmask8 __U, __m256 __A, __m256 __B, const int __C) (__mmask8) __U); } +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_minmax_ps (__m512 __A, __m512 __B, const int __C) +{ + return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, + (__v16sf) __B, + __C, + (__v16sf) + _mm512_undefined_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_minmax_ps (__m512 __W, __mmask16 __U, __m512 __A, + __m512 __B, const int __C) +{ + return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, + (__v16sf) __B, + __C, + (__v16sf) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_minmax_ps (__mmask16 __U, __m512 __A, __m512 __B, + const int __C) +{ + return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, + (__v16sf) __B, + __C, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_minmax_round_ps (__m512 __A, __m512 __B, const int __C, const int __R) +{ + return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, + (__v16sf) __B, + __C, + (__v16sf) + _mm512_undefined_ps (), + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_minmax_round_ps (__m512 __W, __mmask16 __U, __m512 __A, + __m512 __B, const int __C, const int __R) +{ + return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, + (__v16sf) __B, + __C, + (__v16sf) __W, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_minmax_round_ps (__mmask16 __U, __m512 __A, __m512 __B, + const int __C, const int __R) +{ + return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, + (__v16sf) __B, + __C, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, __R); +} + extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_minmax_sd (__m128d __A, __m128d __B, const int __C) @@ -580,6 +849,29 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B, _mm256_setzero_si256 (), \ (__mmask16) (U))) +#define _mm512_minmax_pbh(A, B, C) \ + ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \ + (__v32bf) (B), \ + (int) (C), \ + (__v32bf) (__m512bh) \ + _mm512_setzero_si512 (), \ + (__mmask32) (-1))) + +#define _mm512_mask_minmax_pbh(W, U, A, B, C) \ + ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \ + (__v32bf) (B), \ + (int) (C), \ + (__v32bf) (__m512bh) (W), \ + (__mmask32) (U))) + +#define _mm512_maskz_minmax_pbh(U, A, B, C) \ + ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \ + (__v32bf) (B), \ + (int) (C), \ + (__v32bf) (__m512bh) \ + _mm512_setzero_si512 (), \ + (__mmask32) (U))) + #define _mm_minmax_pd(A, B, C) \ ((__m128d) __builtin_ia32_minmaxpd128_mask ((__v2df) (A), \ (__v2df) (B), \ @@ -626,6 +918,58 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B, _mm256_setzero_pd (), \ (__mmask8) (U))) +#define _mm512_minmax_pd(A, B, C) \ + ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ + (__v8df) (B), \ + (int) (C), \ + (__v8df) (__m512d) \ + _mm512_undefined_pd (), \ + (__mmask8) (-1), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_minmax_pd(W, U, A, B, C) \ + ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ + (__v8df) (B), \ + (int) (C), \ + (__v8df) (__m512d) (W), \ + (__mmask8) (U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_minmax_pd(U, A, B, C) \ + ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ + (__v8df) (B), \ + (int) (C), \ + (__v8df) (__m512d) \ + _mm512_setzero_pd (), \ + (__mmask8) (U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_minmax_round_pd(A, B, C, R) \ + ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ + (__v8df) (B), \ + (int) (C), \ + (__v8df) (__m512d) \ + _mm512_undefined_pd (), \ + (__mmask8) (-1), \ + (int) (R))) + +#define _mm512_mask_minmax_round_pd(W, U, A, B, C, R) \ + ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ + (__v8df) (B), \ + (int) (C), \ + (__v8df) (__m512d) (W), \ + (__mmask8) (U), \ + (int) (R))) + +#define _mm512_maskz_minmax_round_pd(U, A, B, C, R) \ + ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ + (__v8df) (B), \ + (int) (C), \ + (__v8df) (__m512d) \ + _mm512_setzero_pd (), \ + (__mmask8) (U), \ + (int) (R))) + #define _mm_minmax_ph(A, B, C) \ ((__m128h) __builtin_ia32_minmaxph128_mask ((__v8hf) (A), \ (__v8hf) (B), \ @@ -672,6 +1016,58 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B, _mm256_setzero_ph (), \ (__mmask16) (U))) +#define _mm512_minmax_ph(A, B, C) \ + ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ + (__v32hf) (B), \ + (int) (C), \ + (__v32hf) (__m512h) \ + _mm512_undefined_ph (), \ + (__mmask32) (-1), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_minmax_ph(W, U, A, B, C) \ + ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ + (__v32hf) (B), \ + (int) (C), \ + (__v32hf) (__m512h) (W), \ + (__mmask32) (U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_minmax_ph(U, A, B, C) \ + ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ + (__v32hf) (B), \ + (int) (C), \ + (__v32hf) (__m512h) \ + _mm512_setzero_ph (), \ + (__mmask32) (U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_minmax_round_ph(A, B, C, R) \ + ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ + (__v32hf) (B), \ + (int) (C), \ + (__v32hf) (__m512h) \ + _mm512_undefined_ph (), \ + (__mmask32) (-1), \ + (int) (R))) + +#define _mm512_mask_minmax_round_ph(W, U, A, B, C, R) \ + ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ + (__v32hf) (B), \ + (int) (C), \ + (__v32hf) (__m512h) (W), \ + (__mmask32) (U), \ + (int) (R))) + +#define _mm512_maskz_minmax_round_ph(U, A, B, C, R) \ + ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ + (__v32hf) (B), \ + (int) (C), \ + (__v32hf) (__m512h) \ + _mm512_setzero_ph (), \ + (__mmask32) (U), \ + (int) (R))) + #define _mm_minmax_ps(A, B, C) \ ((__m128) __builtin_ia32_minmaxps128_mask ((__v4sf) (A), \ (__v4sf) (B), \ @@ -718,6 +1114,58 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B, _mm256_setzero_ps (), \ (__mmask8) (U))) +#define _mm512_minmax_ps(A, B, C) \ + ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (int) (C), \ + (__v16sf) (__m512) \ + _mm512_undefined_ps (), \ + (__mmask16) (-1), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_minmax_ps(W, U, A, B, C) \ + ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (int) (C), \ + (__v16sf) (__m512) (W), \ + (__mmask16) (U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_minmax_ps(U, A, B, C) \ + ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (int) (C), \ + (__v16sf) (__m512) \ + _mm512_setzero_ps (), \ + (__mmask16) (U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_minmax_round_ps(A, B, C, R) \ + ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (int) (C), \ + (__v16sf) (__m512) \ + _mm512_undefined_ps (), \ + (__mmask16) (-1), \ + (int) (R))) + +#define _mm512_mask_minmax_round_ps(W, U, A, B, C, R) \ + ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (int) (C), \ + (__v16sf) (__m512) (W), \ + (__mmask16) (U), \ + (int) (R))) + +#define _mm512_maskz_minmax_round_ps(U, A, B, C, R) \ + ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (int) (C), \ + (__v16sf) (__m512) \ + _mm512_setzero_ps (), \ + (__mmask16) (U), \ + (int) (R))) + #define _mm_minmax_round_sd(A, B, C, R) \ ((__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) (A), \ (__v2df) (B), \ diff --git a/gcc/config/i386/avx10_2satcvtintrin.h b/gcc/config/i386/avx10_2satcvtintrin.h index 78bcd729ff8..c4fa19bdbe3 100644 --- a/gcc/config/i386/avx10_2satcvtintrin.h +++ b/gcc/config/i386/avx10_2satcvtintrin.h @@ -63,37 +63,6 @@ _mm_maskz_ipcvts_bf16_epi8 (__mmask8 __U, __m128bh __A) (__mmask8) __U); } -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_ipcvts_bf16_epi8 (__m256bh __A) -{ - return - (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A, - (__v16hi) - _mm256_undefined_si256 (), - (__mmask16) -1); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_ipcvts_bf16_epi8 (__m256i __W, __mmask16 __U, __m256bh __A) -{ - return (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A, - (__v16hi) __W, - (__mmask16) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_ipcvts_bf16_epi8 (__mmask16 __U, __m256bh __A) -{ - return - (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A, - (__v16hi) - _mm256_setzero_si256 (), - (__mmask16) __U); -} - extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_ipcvts_bf16_epu8 (__m128bh __A) @@ -125,6 +94,37 @@ _mm_maskz_ipcvts_bf16_epu8 (__mmask8 __U, __m128bh __A) (__mmask8) __U); } +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_ipcvts_bf16_epi8 (__m256bh __A) +{ + return + (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A, + (__v16hi) + _mm256_undefined_si256 (), + (__mmask16) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_ipcvts_bf16_epi8 (__m256i __W, __mmask16 __U, __m256bh __A) +{ + return (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A, + (__v16hi) __W, + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_ipcvts_bf16_epi8 (__mmask16 __U, __m256bh __A) +{ + return + (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_ipcvts_bf16_epu8 (__m256bh __A) @@ -156,120 +156,66 @@ _mm256_maskz_ipcvts_bf16_epu8 (__mmask16 __U, __m256bh __A) (__mmask16) __U); } -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ipcvts_ph_epi8 (__m128h __A) -{ - return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A, - (__v8hi) - _mm_undefined_si128 (), - (__mmask8) -1); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_ipcvts_ph_epi8 (__m128i __W, __mmask8 __U, __m128h __A) -{ - return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A, - (__v8hi) __W, - (__mmask8) __U); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_ipcvts_ph_epi8 (__mmask8 __U, __m128h __A) -{ - return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A, - (__v8hi) - _mm_setzero_si128 (), - (__mmask8) __U); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ipcvts_ph_epu8 (__m128h __A) -{ - return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A, - (__v8hi) - _mm_undefined_si128 (), - (__mmask8) -1); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_ipcvts_ph_epu8 (__m128i __W, __mmask8 __U, __m128h __A) -{ - return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A, - (__v8hi) __W, - (__mmask8) __U); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_ipcvts_ph_epu8 (__mmask8 __U, __m128h __A) -{ - return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A, - (__v8hi) - _mm_setzero_si128 (), - (__mmask8) __U); -} - -extern __inline __m128i +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ipcvts_ps_epi8 (__m128 __A) +_mm512_ipcvts_bf16_epi8 (__m512bh __A) { - return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A, - (__v4si) - _mm_undefined_si128 (), - (__mmask8) -1); + return + (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); } -extern __inline __m128i +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_ipcvts_ps_epi8 (__m128i __W, __mmask8 __U, __m128 __A) +_mm512_mask_ipcvts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A) { - return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A, - (__v4si) __W, - (__mmask8) __U); + return (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A, + (__v32hi) __W, + (__mmask32) __U); } -extern __inline __m128i +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_ipcvts_ps_epi8 (__mmask8 __U, __m128 __A) +_mm512_maskz_ipcvts_bf16_epi8 (__mmask32 __U, __m512bh __A) { - return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) __U); + return + (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); } -extern __inline __m128i +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ipcvts_ps_epu8 (__m128 __A) +_mm512_ipcvts_bf16_epu8 (__m512bh __A) { - return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A, - (__v4si) - _mm_undefined_si128 (), - (__mmask8) -1); + return + (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); } -extern __inline __m128i +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_ipcvts_ps_epu8 (__m128i __W, __mmask8 __U, __m128 __A) +_mm512_mask_ipcvts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A) { - return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A, - (__v4si) __W, - (__mmask8) __U); + return (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A, + (__v32hi) __W, + (__mmask32) __U); } -extern __inline __m128i +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_ipcvts_ps_epu8 (__mmask8 __U, __m128 __A) +_mm512_maskz_ipcvts_bf16_epu8 (__mmask32 __U, __m512bh __A) { - return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) __U); + return + (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); } extern __inline __m128i @@ -390,117 +336,294 @@ _mm256_maskz_ipcvtts_bf16_epu8 (__mmask16 __U, __m256bh __A) (__mmask16) __U); } -extern __inline __m128i +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ipcvtts_ph_epi8 (__m128h __A) +_mm512_ipcvtts_bf16_epi8 (__m512bh __A) { - return (__m128i) __builtin_ia32_cvttph2ibs128_mask ((__v8hf) __A, - (__v8hi) - _mm_undefined_si128 (), - (__mmask8) -1); + return + (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); } -extern __inline __m128i +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_ipcvtts_ph_epi8 (__m128i __W, __mmask8 __U, __m128h __A) +_mm512_mask_ipcvtts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A) { - return (__m128i) __builtin_ia32_cvttph2ibs128_mask ((__v8hf) __A, - (__v8hi) __W, - (__mmask8) __U); + return (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A, + (__v32hi) __W, + (__mmask32) __U); } -extern __inline __m128i +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_ipcvtts_ph_epi8 (__mmask8 __U, __m128h __A) +_mm512_maskz_ipcvtts_bf16_epi8 (__mmask32 __U, __m512bh __A) { - return (__m128i) __builtin_ia32_cvttph2ibs128_mask ((__v8hf) __A, - (__v8hi) - _mm_setzero_si128 (), - (__mmask8) __U); + return + (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_bf16_epu8 (__m512bh __A) +{ + return (__m512i) + __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A, + (__v32hi) _mm512_undefined_si512 (), + (__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A) +{ + return (__m512i) __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_bf16_epu8 (__mmask32 __U, __m512bh __A) +{ + return (__m512i) + __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ipcvtts_ph_epu8 (__m128h __A) +_mm_ipcvts_ph_epi8 (__m128h __A) { - return (__m128i) __builtin_ia32_cvttph2iubs128_mask ((__v8hf) __A, - (__v8hi) - _mm_undefined_si128 (), - (__mmask8) -1); + return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A, + (__v8hi) + _mm_undefined_si128 (), + (__mmask8) -1); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_ipcvtts_ph_epu8 (__m128i __W, __mmask8 __U, __m128h __A) +_mm_mask_ipcvts_ph_epi8 (__m128i __W, __mmask8 __U, __m128h __A) { - return (__m128i) __builtin_ia32_cvttph2iubs128_mask ((__v8hf) __A, - (__v8hi) __W, - (__mmask8) __U); + return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A, + (__v8hi) __W, + (__mmask8) __U); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_ipcvtts_ph_epu8 (__mmask8 __U, __m128h __A) +_mm_maskz_ipcvts_ph_epi8 (__mmask8 __U, __m128h __A) { - return (__m128i) __builtin_ia32_cvttph2iubs128_mask ((__v8hf) __A, - (__v8hi) - _mm_setzero_si128 (), - (__mmask8) __U); + return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ipcvtts_ps_epi8 (__m128 __A) +_mm_ipcvts_ph_epu8 (__m128h __A) { - return (__m128i) __builtin_ia32_cvttps2ibs128_mask ((__v4sf) __A, - (__v4si) + return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A, + (__v8hi) _mm_undefined_si128 (), (__mmask8) -1); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_ipcvtts_ps_epi8 (__m128i __W, __mmask8 __U, __m128 __A) +_mm_mask_ipcvts_ph_epu8 (__m128i __W, __mmask8 __U, __m128h __A) { - return (__m128i) __builtin_ia32_cvttps2ibs128_mask ((__v4sf) __A, - (__v4si) __W, + return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A, + (__v8hi) __W, (__mmask8) __U); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_ipcvtts_ps_epi8 (__mmask8 __U, __m128 __A) +_mm_maskz_ipcvts_ph_epu8 (__mmask8 __U, __m128h __A) { - return (__m128i) __builtin_ia32_cvttps2ibs128_mask ((__v4sf) __A, - (__v4si) + return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A, + (__v8hi) _mm_setzero_si128 (), (__mmask8) __U); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ipcvtts_ps_epu8 (__m128 __A) +_mm_ipcvts_ps_epi8 (__m128 __A) { - return (__m128i) __builtin_ia32_cvttps2iubs128_mask ((__v4sf) __A, - (__v4si) - _mm_undefined_si128 (), - (__mmask8) -1); + return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A, + (__v4si) + _mm_undefined_si128 (), + (__mmask8) -1); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_ipcvtts_ps_epu8 (__m128i __W, __mmask8 __U, __m128 __A) +_mm_mask_ipcvts_ps_epi8 (__m128i __W, __mmask8 __U, __m128 __A) { - return (__m128i) __builtin_ia32_cvttps2iubs128_mask ((__v4sf) __A, - (__v4si) __W, - (__mmask8) __U); + return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A, + (__v4si) __W, + (__mmask8) __U); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_ipcvtts_ps_epu8 (__mmask8 __U, __m128 __A) +_mm_maskz_ipcvts_ps_epi8 (__mmask8 __U, __m128 __A) { - return (__m128i) __builtin_ia32_cvttps2iubs128_mask ((__v4sf) __A, + return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvts_ps_epu8 (__m128 __A) +{ + return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A, + (__v4si) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvts_ps_epu8 (__m128i __W, __mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A, + (__v4si) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvts_ps_epu8 (__mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvtts_ph_epi8 (__m128h __A) +{ + return (__m128i) __builtin_ia32_cvttph2ibs128_mask ((__v8hf) __A, + (__v8hi) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvtts_ph_epi8 (__m128i __W, __mmask8 __U, __m128h __A) +{ + return (__m128i) __builtin_ia32_cvttph2ibs128_mask ((__v8hf) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvtts_ph_epi8 (__mmask8 __U, __m128h __A) +{ + return (__m128i) __builtin_ia32_cvttph2ibs128_mask ((__v8hf) __A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvtts_ph_epu8 (__m128h __A) +{ + return (__m128i) __builtin_ia32_cvttph2iubs128_mask ((__v8hf) __A, + (__v8hi) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvtts_ph_epu8 (__m128i __W, __mmask8 __U, __m128h __A) +{ + return (__m128i) __builtin_ia32_cvttph2iubs128_mask ((__v8hf) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvtts_ph_epu8 (__mmask8 __U, __m128h __A) +{ + return (__m128i) __builtin_ia32_cvttph2iubs128_mask ((__v8hf) __A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvtts_ps_epi8 (__m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2ibs128_mask ((__v4sf) __A, + (__v4si) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvtts_ps_epi8 (__m128i __W, __mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2ibs128_mask ((__v4sf) __A, + (__v4si) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvtts_ps_epi8 (__mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2ibs128_mask ((__v4sf) __A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvtts_ps_epu8 (__m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2iubs128_mask ((__v4sf) __A, + (__v4si) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvtts_ps_epu8 (__m128i __W, __mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2iubs128_mask ((__v4sf) __A, + (__v4si) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvtts_ps_epu8 (__mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2iubs128_mask ((__v4sf) __A, (__v4si) _mm_setzero_si128 (), (__mmask8) __U); @@ -1234,6 +1357,1416 @@ _mm256_maskz_cvtts_ps_epu64 (__mmask8 __U, __m128 __A) (__mmask8) __U); } +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_ph_epi8 (__m512h __A) +{ + return + (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A) +{ + return (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_ph_epi8 (__mmask32 __U, __m512h __A) +{ + return + (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_ph_epu8 (__m512h __A) +{ + return + (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A) +{ + return (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_ph_epu8 (__mmask32 __U, __m512h __A) +{ + return + (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_ps_epi8 (__m512 __A) +{ + return + (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_ps_epi8 (__mmask16 __U, __m512 __A) +{ + return + (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_ps_epu8 (__m512 __A) +{ + return + (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_ps_epu8 (__mmask16 __U, __m512 __A) +{ + return + (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_ph_epi8 (__m512h __A) +{ + return (__m512i) + __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A) +{ + return (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_ph_epi8 (__mmask32 __U, __m512h __A) +{ + return + (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_ph_epu8 (__m512h __A) +{ + return (__m512i) + __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A) +{ + return (__m512i) __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_ph_epu8 (__mmask32 __U, __m512h __A) +{ + return (__m512i) + __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_ps_epi8 (__m512 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_ps_epi8 (__mmask16 __U, __m512 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_ps_epu8 (__m512 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_ps_epu8 (__mmask16 __U, __m512 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_pd_epi32 (__m512d __A) +{ + return (__m256i) + __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_pd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) +{ + return (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A, + (__v8si) __W, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_pd_epi32 (__mmask8 __U, __m512d __A) +{ + return + (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_pd_epi64 (__m512d __A) +{ + return (__m512i) + __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_pd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) +{ + return (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_pd_epi64 (__mmask8 __U, __m512d __A) +{ + return + (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_pd_epu32 (__m512d __A) +{ + return (__m256i) + __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_pd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) +{ + return (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A, + (__v8si) __W, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_pd_epu32 (__mmask8 __U, __m512d __A) +{ + return + (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_pd_epu64 (__m512d __A) +{ + return (__m512i) + __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_pd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) +{ + return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_pd_epu64 (__mmask8 __U, __m512d __A) +{ + return (__m512i) + __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_ps_epi32 (__m512 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_ps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_ps_epi32 (__mmask16 __U, __m512 __A) +{ + return + (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_ps_epi64 (__m256 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_ps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) +{ + return (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_ps_epi64 (__mmask8 __U, __m256 __A) +{ + return + (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_ps_epu32 (__m512 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_ps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_ps_epu32 (__mmask16 __U, __m512 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_ps_epu64 (__m256 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_ps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) +{ + return (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_ps_epu64 (__mmask8 __U, __m256 __A) +{ + return + (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_roundph_epi8 (__m512h __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_roundph_epu8 (__m512h __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_roundps_epi8 (__m512 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_roundps_epu8 (__m512 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_roundph_epi8 (__m512h __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_roundph_epu8 (__m512h __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_roundps_epi8 (__m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_roundps_epu8 (__m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundpd_epi32 (__m512d __A, const int __R) +{ + return (__m256i) + __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A, + (__v8si) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundpd_epi64 (__m512d __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A, + (__v8di) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundpd_epi64 (__mmask8 __U, __m512d __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundpd_epu32 (__m512d __A, const int __R) +{ + return (__m256i) + __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A, + (__v8si) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundpd_epu64 (__m512d __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A, + (__v8di) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundpd_epu64 (__mmask8 __U, __m512d __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundps_epi32 (__m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundps_epi64 (__m256 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A, + (__v8di) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundps_epi64 (__mmask8 __U, __m256 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundps_epu32 (__m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundps_epu64 (__m256 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A, + (__v8di) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundps_epu64 (__mmask8 __U, __m256 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U, + __R); +} +#else +#define _mm512_ipcvts_roundph_epi8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_undefined_si512 ()), \ + (__mmask32) (-1), \ + (R))) + +#define _mm512_mask_ipcvts_roundph_epi8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) (W), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_maskz_ipcvts_roundph_epi8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_setzero_si512 ()), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_ipcvts_roundph_epu8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_undefined_si512 ()), \ + (__mmask32) (-1), \ + (R))) + +#define _mm512_mask_ipcvts_roundph_epu8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) (W), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_maskz_ipcvts_roundph_epu8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_setzero_si512 ()), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_ipcvts_roundps_epi8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_ipcvts_roundps_epi8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_ipcvts_roundps_epi8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_ipcvts_roundps_epu8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_ipcvts_roundps_epu8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_ipcvts_roundps_epu8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_ipcvtts_roundph_epi8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_undefined_si512 ()), \ + (__mmask32) (-1), \ + (R))) + +#define _mm512_mask_ipcvtts_roundph_epi8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) (W), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_maskz_ipcvtts_roundph_epi8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_setzero_si512 ()), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_ipcvtts_roundph_epu8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_undefined_si512 ()), \ + (__mmask32) (-1), \ + (R))) + +#define _mm512_mask_ipcvtts_roundph_epu8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) (W), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_maskz_ipcvtts_roundph_epu8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_setzero_si512 ()), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_ipcvtts_roundps_epi8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_ipcvtts_roundps_epi8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_ipcvtts_roundps_epi8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_ipcvtts_roundps_epu8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_ipcvtts_roundps_epu8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_ipcvtts_roundps_epu8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_cvtts_roundpd_epi32(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \ + (__v8si) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundpd_epi32(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \ + (__v8si) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundpd_epi32(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \ + (__v8si) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_cvtts_roundpd_epi64(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \ + (__v8di) \ + (_mm512_undefined_si512 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundpd_epi64(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \ + (__v8di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundpd_epi64(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \ + (__v8di) \ + (_mm512_setzero_si512 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_cvtts_roundpd_epu32(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \ + (__v8si) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundpd_epu32(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \ + (__v8si) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundpd_epu32(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \ + (__v8si) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_cvtts_roundpd_epu64(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \ + (__v8di) \ + (_mm512_undefined_si512 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundpd_epu64(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \ + (__v8di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundpd_epu64(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \ + (__v8di) \ + (_mm512_setzero_si512 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_cvtts_roundps_epi32(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundps_epi32(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundps_epi32(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_cvtts_roundps_epi64(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \ + (__v8di) \ + (_mm512_undefined_si512 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundps_epi64(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \ + (__v8di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundps_epi64(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \ + (__v8di) \ + (_mm512_setzero_si512 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_cvtts_roundps_epu32(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundps_epu32(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundps_epu32(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_cvtts_roundps_epu64(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \ + (__v8di) \ + (_mm512_undefined_si512 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundps_epu64(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \ + (__v8di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundps_epu64(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \ + (__v8di) \ + (_mm512_setzero_si512 ()), \ + (__mmask8) (U), \ + (R))) +#endif + extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtts_sd_epi32 (__m128d __A) diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index c30a4e036d6..b195fe58d08 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -148,24 +148,14 @@ #include -#include - #include -#include - #include -#include - #include -#include - #include -#include - #include #include diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index 444a25e197a..5da06b539f1 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -842,37 +842,33 @@ /* sm3intrin.h */ #define __builtin_ia32_vsm3rnds2(A, B, C, D) __builtin_ia32_vsm3rnds2 (A, B, C, 1) -/* avx10_2-512mediaintrin.h */ +/* avx10_2mediaintrin.h */ #define __builtin_ia32_mpsadbw512(A, B, C) __builtin_ia32_mpsadbw512 (A, B, 1) #define __builtin_ia32_mpsadbw512_mask(A, B, C, D, E) __builtin_ia32_mpsadbw512_mask (A, B, 1, D, E) - -/* avx10_2mediaintrin.h */ -#define __builtin_ia32_mpsadbw128_mask(A, B, C, D, E) __builtin_ia32_mpsadbw128_mask (A, B, 1, D, E) #define __builtin_ia32_mpsadbw256_mask(A, B, C, D, E) __builtin_ia32_mpsadbw256_mask (A, B, 1, D, E) +#define __builtin_ia32_mpsadbw128_mask(A, B, C, D, E) __builtin_ia32_mpsadbw128_mask (A, B, 1, D, E) -/* avx10_2-512convertintrin.h */ +/* avx10_2convertintrin.h */ #define __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, E) __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, 8) -/* avx10_2-512bf16intrin.h */ -#define __builtin_ia32_rndscalebf16512_mask(A, B, C, D) __builtin_ia32_rndscalebf16512_mask(A, 123, C, D) -#define __builtin_ia32_reducebf16512_mask(A, B, C, D) __builtin_ia32_reducebf16512_mask(A, 123, C, D) -#define __builtin_ia32_getmantbf16512_mask(A, B, C, D) __builtin_ia32_getmantbf16512_mask(A, 1, C, D) -#define __builtin_ia32_fpclassbf16512_mask(A, B, C) __builtin_ia32_fpclassbf16512_mask(A, 1, C) -#define __builtin_ia32_cmpbf16512_mask(A, B, C, D) __builtin_ia32_cmpbf16512_mask(A, B, 1, D) - /* avx10_2bf16intrin.h */ +#define __builtin_ia32_rndscalebf16512_mask(A, B, C, D) __builtin_ia32_rndscalebf16512_mask(A, 123, C, D) #define __builtin_ia32_rndscalebf16256_mask(A, B, C, D) __builtin_ia32_rndscalebf16256_mask(A, 123, C, D) #define __builtin_ia32_rndscalebf16128_mask(A, B, C, D) __builtin_ia32_rndscalebf16128_mask(A, 123, C, D) +#define __builtin_ia32_reducebf16512_mask(A, B, C, D) __builtin_ia32_reducebf16512_mask(A, 123, C, D) #define __builtin_ia32_reducebf16256_mask(A, B, C, D) __builtin_ia32_reducebf16256_mask(A, 123, C, D) #define __builtin_ia32_reducebf16128_mask(A, B, C, D) __builtin_ia32_reducebf16128_mask(A, 123, C, D) +#define __builtin_ia32_getmantbf16512_mask(A, B, C, D) __builtin_ia32_getmantbf16512_mask(A, 1, C, D) #define __builtin_ia32_getmantbf16256_mask(A, B, C, D) __builtin_ia32_getmantbf16256_mask(A, 1, C, D) #define __builtin_ia32_getmantbf16128_mask(A, B, C, D) __builtin_ia32_getmantbf16128_mask(A, 1, C, D) +#define __builtin_ia32_fpclassbf16512_mask(A, B, C) __builtin_ia32_fpclassbf16512_mask(A, 1, C) #define __builtin_ia32_fpclassbf16256_mask(A, B, C) __builtin_ia32_fpclassbf16256_mask(A, 1, C) #define __builtin_ia32_fpclassbf16128_mask(A, B, C) __builtin_ia32_fpclassbf16128_mask(A, 1, C) +#define __builtin_ia32_cmpbf16512_mask(A, B, C, D) __builtin_ia32_cmpbf16512_mask(A, B, 1, D) #define __builtin_ia32_cmpbf16256_mask(A, B, C, D) __builtin_ia32_cmpbf16256_mask(A, B, 1, D) #define __builtin_ia32_cmpbf16128_mask(A, B, C, D) __builtin_ia32_cmpbf16128_mask(A, B, 1, D) -/* avx10_2-512satcvtintrin.h */ +/* avx10_2satcvtintrin.h */ #define __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, 8) #define __builtin_ia32_cvtph2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvtph2iubs512_mask_round(A, B, C, 8) #define __builtin_ia32_cvtps2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvtps2ibs512_mask_round(A, B, C, 8) @@ -889,8 +885,6 @@ #define __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, 8) #define __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, 8) #define __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, 8) - -/* avx10_2satcvtintrin.h */ #define __builtin_ia32_cvttsd2sis32_round(A, B) __builtin_ia32_cvttsd2sis32_round(A, 8) #define __builtin_ia32_cvttsd2usis32_round(A, B) __builtin_ia32_cvttsd2usis32_round(A, 8) #define __builtin_ia32_cvttss2sis32_round(A, B) __builtin_ia32_cvttss2sis32_round(A, 8) @@ -902,24 +896,22 @@ #define __builtin_ia32_cvttss2usis64_round(A, B) __builtin_ia32_cvttss2usis64_round(A, 8) #endif -/* avx10_2-512minmaxintrin.h */ +/* avx10_2minmaxintrin.h */ +#define __builtin_ia32_minmaxbf16512_mask(A, B, C, W, U) __builtin_ia32_minmaxbf16512_mask (A, B, 4, W, U) +#define __builtin_ia32_minmaxbf16256_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16256_mask (A, B, 4, D, E) +#define __builtin_ia32_minmaxbf16128_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16128_mask (A, B, 4, D, E) #define __builtin_ia32_minmaxpd512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd512_mask_round (A, B, 4, D, E, 4) +#define __builtin_ia32_minmaxpd256_mask(A, B, C, D, E) __builtin_ia32_minmaxpd256_mask (A, B, 4, D, E) +#define __builtin_ia32_minmaxpd128_mask(A, B, C, D, E) __builtin_ia32_minmaxpd128_mask (A, B, 4, D, E) #define __builtin_ia32_minmaxph512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxph512_mask_round (A, B, 4, D, E, 4) +#define __builtin_ia32_minmaxph256_mask(A, B, C, D, E) __builtin_ia32_minmaxph256_mask (A, B, 4, D, E) +#define __builtin_ia32_minmaxph128_mask(A, B, C, D, E) __builtin_ia32_minmaxph128_mask (A, B, 4, D, E) #define __builtin_ia32_minmaxps512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps512_mask_round (A, B, 4, D, E, 4) -#define __builtin_ia32_minmaxbf16512_mask(A, B, C, W, U) __builtin_ia32_minmaxbf16512_mask (A, B, 4, W, U) - -/* avx10_2minmaxintrin.h */ +#define __builtin_ia32_minmaxps256_mask(A, B, C, D, E) __builtin_ia32_minmaxps256_mask (A, B, 4, D, E) +#define __builtin_ia32_minmaxps128_mask(A, B, C, D, E) __builtin_ia32_minmaxps128_mask (A, B, 4, D, E) #define __builtin_ia32_minmaxsd_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsd_mask_round (A, B, 4, D, E, 4) #define __builtin_ia32_minmaxsh_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsh_mask_round (A, B, 4, D, E, 4) #define __builtin_ia32_minmaxss_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxss_mask_round (A, B, 4, D, E, 4) -#define __builtin_ia32_minmaxbf16128_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16128_mask (A, B, 4, D, E) -#define __builtin_ia32_minmaxbf16256_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16256_mask (A, B, 4, D, E) -#define __builtin_ia32_minmaxpd128_mask(A, B, C, D, E) __builtin_ia32_minmaxpd128_mask (A, B, 4, D, E) -#define __builtin_ia32_minmaxpd256_mask(A, B, C, D, E) __builtin_ia32_minmaxpd256_mask (A, B, 4, D, E) -#define __builtin_ia32_minmaxph128_mask(A, B, C, D, E) __builtin_ia32_minmaxph128_mask (A, B, 4, D, E) -#define __builtin_ia32_minmaxph256_mask(A, B, C, D, E) __builtin_ia32_minmaxph256_mask (A, B, 4, D, E) -#define __builtin_ia32_minmaxps128_mask(A, B, C, D, E) __builtin_ia32_minmaxps128_mask (A, B, 4, D, E) -#define __builtin_ia32_minmaxps256_mask(A, B, C, D, E) __builtin_ia32_minmaxps256_mask (A, B, 4, D, E) #include #include diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 3132ecafc0d..189e19e7780 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -849,37 +849,33 @@ /* sm3intrin.h */ #define __builtin_ia32_vsm3rnds2(A, B, C, D) __builtin_ia32_vsm3rnds2 (A, B, C, 1) -/* avx10_2-512mediaintrin.h */ +/* avx10_2mediaintrin.h */ #define __builtin_ia32_mpsadbw512(A, B, C) __builtin_ia32_mpsadbw512 (A, B, 1) #define __builtin_ia32_mpsadbw512_mask(A, B, C, D, E) __builtin_ia32_mpsadbw512_mask (A, B, 1, D, E) - -/* avx10_2mediaintrin.h */ -#define __builtin_ia32_mpsadbw128_mask(A, B, C, D, E) __builtin_ia32_mpsadbw128_mask (A, B, 1, D, E) #define __builtin_ia32_mpsadbw256_mask(A, B, C, D, E) __builtin_ia32_mpsadbw256_mask (A, B, 1, D, E) +#define __builtin_ia32_mpsadbw128_mask(A, B, C, D, E) __builtin_ia32_mpsadbw128_mask (A, B, 1, D, E) -/* avx10_2-512convertintrin.h */ +/* avx10_2convertintrin.h */ #define __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, E) __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, 8) -/* avx10_2-512bf16intrin.h */ -#define __builtin_ia32_rndscalebf16512_mask(A, B, C, D) __builtin_ia32_rndscalebf16512_mask(A, 123, C, D) -#define __builtin_ia32_reducebf16512_mask(A, B, C, D) __builtin_ia32_reducebf16512_mask(A, 123, C, D) -#define __builtin_ia32_getmantbf16512_mask(A, B, C, D) __builtin_ia32_getmantbf16512_mask(A, 1, C, D) -#define __builtin_ia32_fpclassbf16512_mask(A, B, C) __builtin_ia32_fpclassbf16512_mask(A, 1, C) -#define __builtin_ia32_cmpbf16512_mask(A, B, C, D) __builtin_ia32_cmpbf16512_mask(A, B, 1, D) - /* avx10_2bf16intrin.h */ +#define __builtin_ia32_rndscalebf16512_mask(A, B, C, D) __builtin_ia32_rndscalebf16512_mask(A, 123, C, D) #define __builtin_ia32_rndscalebf16256_mask(A, B, C, D) __builtin_ia32_rndscalebf16256_mask(A, 123, C, D) #define __builtin_ia32_rndscalebf16128_mask(A, B, C, D) __builtin_ia32_rndscalebf16128_mask(A, 123, C, D) +#define __builtin_ia32_reducebf16512_mask(A, B, C, D) __builtin_ia32_reducebf16512_mask(A, 123, C, D) #define __builtin_ia32_reducebf16256_mask(A, B, C, D) __builtin_ia32_reducebf16256_mask(A, 123, C, D) #define __builtin_ia32_reducebf16128_mask(A, B, C, D) __builtin_ia32_reducebf16128_mask(A, 123, C, D) +#define __builtin_ia32_getmantbf16512_mask(A, B, C, D) __builtin_ia32_getmantbf16512_mask(A, 1, C, D) #define __builtin_ia32_getmantbf16256_mask(A, B, C, D) __builtin_ia32_getmantbf16256_mask(A, 1, C, D) #define __builtin_ia32_getmantbf16128_mask(A, B, C, D) __builtin_ia32_getmantbf16128_mask(A, 1, C, D) +#define __builtin_ia32_fpclassbf16512_mask(A, B, C) __builtin_ia32_fpclassbf16512_mask(A, 1, C) #define __builtin_ia32_fpclassbf16256_mask(A, B, C) __builtin_ia32_fpclassbf16256_mask(A, 1, C) #define __builtin_ia32_fpclassbf16128_mask(A, B, C) __builtin_ia32_fpclassbf16128_mask(A, 1, C) +#define __builtin_ia32_cmpbf16512_mask(A, B, C, D) __builtin_ia32_cmpbf16512_mask(A, B, 1, D) #define __builtin_ia32_cmpbf16256_mask(A, B, C, D) __builtin_ia32_cmpbf16256_mask(A, B, 1, D) #define __builtin_ia32_cmpbf16128_mask(A, B, C, D) __builtin_ia32_cmpbf16128_mask(A, B, 1, D) -/* avx10_2-512satcvtintrin.h */ +/* avx10_2satcvtintrin.h */ #define __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, 8) #define __builtin_ia32_cvtph2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvtph2iubs512_mask_round(A, B, C, 8) #define __builtin_ia32_cvtps2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvtps2ibs512_mask_round(A, B, C, 8) @@ -896,8 +892,6 @@ #define __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, 8) #define __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, 8) #define __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, 8) - -/* avx10_2satcvtintrin.h */ #define __builtin_ia32_cvttsd2sis32_round(A, B) __builtin_ia32_cvttsd2sis32_round(A, 8) #define __builtin_ia32_cvttsd2usis32_round(A, B) __builtin_ia32_cvttsd2usis32_round(A, 8) #define __builtin_ia32_cvttss2sis32_round(A, B) __builtin_ia32_cvttss2sis32_round(A, 8) @@ -909,23 +903,21 @@ #define __builtin_ia32_cvttss2usis64_round(A, B) __builtin_ia32_cvttss2usis64_round(A, 8) #endif -/* avx10_2-512minmaxintrin.h */ +/* avx10_2minmaxintrin.h */ +#define __builtin_ia32_minmaxbf16512_mask(A, B, C, W, U) __builtin_ia32_minmaxbf16512_mask (A, B, 4, W, U) +#define __builtin_ia32_minmaxbf16256_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16256_mask (A, B, 4, D, E) +#define __builtin_ia32_minmaxbf16128_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16128_mask (A, B, 4, D, E) #define __builtin_ia32_minmaxpd512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd512_mask_round (A, B, 4, D, E, 4) +#define __builtin_ia32_minmaxpd256_mask(A, B, C, D, E) __builtin_ia32_minmaxpd256_mask (A, B, 4, D, E) +#define __builtin_ia32_minmaxpd128_mask(A, B, C, D, E) __builtin_ia32_minmaxpd128_mask (A, B, 4, D, E) #define __builtin_ia32_minmaxph512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxph512_mask_round (A, B, 4, D, E, 4) +#define __builtin_ia32_minmaxph256_mask(A, B, C, D, E) __builtin_ia32_minmaxph256_mask (A, B, 4, D, E) +#define __builtin_ia32_minmaxph128_mask(A, B, C, D, E) __builtin_ia32_minmaxph128_mask (A, B, 4, D, E) #define __builtin_ia32_minmaxps512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps512_mask_round (A, B, 4, D, E, 4) -#define __builtin_ia32_minmaxbf16512_mask(A, B, C, W, U) __builtin_ia32_minmaxbf16512_mask (A, B, 4, W, U) - -/* avx10_2minmaxintrin.h */ +#define __builtin_ia32_minmaxps256_mask(A, B, C, D, E) __builtin_ia32_minmaxps256_mask (A, B, 4, D, E) +#define __builtin_ia32_minmaxps128_mask(A, B, C, D, E) __builtin_ia32_minmaxps128_mask (A, B, 4, D, E) #define __builtin_ia32_minmaxsd_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsd_mask_round (A, B, 4, D, E, 4) #define __builtin_ia32_minmaxsh_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsh_mask_round (A, B, 4, D, E, 4) #define __builtin_ia32_minmaxss_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxss_mask_round (A, B, 4, D, E, 4) -#define __builtin_ia32_minmaxbf16128_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16128_mask (A, B, 4, D, E) -#define __builtin_ia32_minmaxbf16256_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16256_mask (A, B, 4, D, E) -#define __builtin_ia32_minmaxpd128_mask(A, B, C, D, E) __builtin_ia32_minmaxpd128_mask (A, B, 4, D, E) -#define __builtin_ia32_minmaxpd256_mask(A, B, C, D, E) __builtin_ia32_minmaxpd256_mask (A, B, 4, D, E) -#define __builtin_ia32_minmaxph128_mask(A, B, C, D, E) __builtin_ia32_minmaxph128_mask (A, B, 4, D, E) -#define __builtin_ia32_minmaxph256_mask(A, B, C, D, E) __builtin_ia32_minmaxph256_mask (A, B, 4, D, E) -#define __builtin_ia32_minmaxps128_mask(A, B, C, D, E) __builtin_ia32_minmaxps128_mask (A, B, 4, D, E) -#define __builtin_ia32_minmaxps256_mask(A, B, C, D, E) __builtin_ia32_minmaxps256_mask (A, B, 4, D, E) #include diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index 8ae41c1944c..f3b7c112b2d 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -1020,64 +1020,60 @@ test_2 (_mm512_gf2p8affine_epi64_epi8, __m512i, __m512i, __m512i, 1) /* sm3intrin.h */ test_3 (_mm_sm3rnds2_epi32, __m128i, __m128i, __m128i, __m128i, 1) -/* avx10_2-512mediaintrin.h */ +/* avx10_2mediaintrin.h */ test_2 (_mm512_mpsadbw_epu8, __m512i, __m512i, __m512i, 1) test_3 (_mm512_maskz_mpsadbw_epu8, __m512i, __mmask32, __m512i, __m512i, 1) -test_4 (_mm512_mask_mpsadbw_epu8, __m512i, __m512i, __mmask32, __m512i, __m512i, 1) - -/* avx10_2mediaintrin.h */ -test_3 (_mm_maskz_mpsadbw_epu8, __m128i, __mmask8, __m128i, __m128i, 1) test_3 (_mm256_maskz_mpsadbw_epu8, __m256i, __mmask16, __m256i, __m256i, 1) -test_4 (_mm_mask_mpsadbw_epu8, __m128i, __m128i, __mmask8, __m128i, __m128i, 1) +test_3 (_mm_maskz_mpsadbw_epu8, __m128i, __mmask8, __m128i, __m128i, 1) +test_4 (_mm512_mask_mpsadbw_epu8, __m512i, __m512i, __mmask32, __m512i, __m512i, 1) test_4 (_mm256_mask_mpsadbw_epu8, __m256i, __m256i, __mmask16, __m256i, __m256i, 1) +test_4 (_mm_mask_mpsadbw_epu8, __m128i, __m128i, __mmask8, __m128i, __m128i, 1) -/* avx10_2-512convertintrin.h */ +/* avx10_2convertintrin.h */ test_2 (_mm512_cvtx_round2ps_ph, __m512h, __m512, __m512, 4) -/* avx10_2-512bf16intrin.h */ -test_1 (_mm512_roundscale_pbh, __m512bh, __m512bh, 123) -test_2 (_mm512_maskz_roundscale_pbh, __m512bh, __mmask32, __m512bh, 123) -test_3 (_mm512_mask_roundscale_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123) -test_1 (_mm512_reduce_pbh, __m512bh, __m512bh, 123) -test_2 (_mm512_maskz_reduce_pbh, __m512bh, __mmask32, __m512bh, 123) -test_3 (_mm512_mask_reduce_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123) -test_1x (_mm512_getmant_pbh, __m512bh, __m512bh, 1, 1) -test_2x (_mm512_maskz_getmant_pbh, __m512bh, __mmask32,__m512bh, 1, 1) -test_3x (_mm512_mask_getmant_pbh, __m512bh, __m512bh, __mmask32,__m512bh, 1, 1) -test_1 (_mm512_fpclass_pbh_mask, __mmask32, __m512bh, 13) -test_2 (_mm512_mask_fpclass_pbh_mask, __mmask32, __mmask32, __m512bh, 13) -test_2 (_mm512_cmp_pbh_mask, __mmask32, __m512bh, __m512bh, 1) -test_3 (_mm512_mask_cmp_pbh_mask, __mmask32, __mmask32,__m512bh, __m512bh, 1) - /* avx10_2bf16intrin.h */ +test_1 (_mm512_roundscale_pbh, __m512bh, __m512bh, 123) test_1 (_mm256_roundscale_pbh, __m256bh, __m256bh, 123) test_1 (_mm_roundscale_pbh, __m128bh, __m128bh, 123) +test_2 (_mm512_maskz_roundscale_pbh, __m512bh, __mmask32, __m512bh, 123) test_2 (_mm256_maskz_roundscale_pbh, __m256bh, __mmask16, __m256bh, 123) test_2 (_mm_maskz_roundscale_pbh, __m128bh, __mmask8, __m128bh, 123) +test_3 (_mm512_mask_roundscale_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123) test_3 (_mm256_mask_roundscale_pbh, __m256bh, __m256bh, __mmask16, __m256bh, 123) test_3 (_mm_mask_roundscale_pbh, __m128bh, __m128bh, __mmask8, __m128bh, 123) +test_1 (_mm512_reduce_pbh, __m512bh, __m512bh, 123) test_1 (_mm256_reduce_pbh, __m256bh, __m256bh, 123) test_1 (_mm_reduce_pbh, __m128bh, __m128bh, 123) +test_2 (_mm512_maskz_reduce_pbh, __m512bh, __mmask32, __m512bh, 123) test_2 (_mm256_maskz_reduce_pbh, __m256bh, __mmask16, __m256bh, 123) test_2 (_mm_maskz_reduce_pbh, __m128bh, __mmask8, __m128bh, 123) +test_3 (_mm512_mask_reduce_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123) test_3 (_mm256_mask_reduce_pbh, __m256bh, __m256bh, __mmask16, __m256bh, 123) test_3 (_mm_mask_reduce_pbh, __m128bh, __m128bh, __mmask8, __m128bh, 123) +test_1x (_mm512_getmant_pbh, __m512bh, __m512bh, 1, 1) test_1x (_mm256_getmant_pbh, __m256bh, __m256bh, 1, 1) test_1x (_mm_getmant_pbh, __m128bh, __m128bh, 1, 1) +test_2x (_mm512_maskz_getmant_pbh, __m512bh, __mmask32,__m512bh, 1, 1) test_2x (_mm256_maskz_getmant_pbh, __m256bh, __mmask16,__m256bh, 1, 1) test_2x (_mm_maskz_getmant_pbh, __m128bh, __mmask8, __m128bh, 1, 1) +test_3x (_mm512_mask_getmant_pbh, __m512bh, __m512bh, __mmask32,__m512bh, 1, 1) test_3x (_mm256_mask_getmant_pbh, __m256bh, __m256bh, __mmask16,__m256bh, 1, 1) test_3x (_mm_mask_getmant_pbh, __m128bh, __m128bh, __mmask8, __m128bh, 1, 1) +test_1 (_mm512_fpclass_pbh_mask, __mmask32, __m512bh, 13) test_1 (_mm256_fpclass_pbh_mask, __mmask16, __m256bh, 13) test_1 (_mm_fpclass_pbh_mask, __mmask8, __m128bh, 13) +test_2 (_mm512_mask_fpclass_pbh_mask, __mmask32, __mmask32, __m512bh, 13) test_2 (_mm256_mask_fpclass_pbh_mask, __mmask16, __mmask16, __m256bh, 13) test_2 (_mm_mask_fpclass_pbh_mask, __mmask8, __mmask8, __m128bh, 13) +test_2 (_mm512_cmp_pbh_mask, __mmask32, __m512bh, __m512bh, 1) test_2 (_mm256_cmp_pbh_mask, __mmask16, __m256bh, __m256bh, 1) test_2 (_mm_cmp_pbh_mask, __mmask8, __m128bh, __m128bh, 1) +test_3 (_mm512_mask_cmp_pbh_mask, __mmask32, __mmask32,__m512bh, __m512bh, 1) test_3 (_mm256_mask_cmp_pbh_mask, __mmask16, __mmask16, __m256bh, __m256bh, 1) test_3 (_mm_mask_cmp_pbh_mask, __mmask8, __mmask8, __m128bh, __m128bh, 1) -/* avx10_2-512satcvtintrin.h */ +/* avx10_2satcvtintrin.h */ test_1 (_mm512_ipcvts_roundph_epi8, __m512i, __m512h, 8) test_1 (_mm512_ipcvts_roundph_epu8, __m512i, __m512h, 8) test_1 (_mm512_ipcvts_roundps_epi8, __m512i, __m512, 8) @@ -1126,8 +1122,6 @@ test_3 (_mm512_mask_cvtts_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 8) test_1 (_mm512_cvtts_roundps_epu64, __m512i, __m256, 8) test_2 (_mm512_maskz_cvtts_roundps_epu64, __m512i, __mmask8, __m256, 8) test_3 (_mm512_mask_cvtts_roundps_epu64, __m512i, __m512i, __mmask8, __m256, 8) - -/* avx10_2satcvtintrin.h */ test_1 (_mm_cvtts_roundsd_epi32, int, __m128d, 8) test_1 (_mm_cvtts_roundsd_epu32, unsigned int, __m128d, 8) test_1 (_mm_cvtts_roundss_epi32, int, __m128, 8) @@ -1139,7 +1133,7 @@ test_1 (_mm_cvtts_roundss_epi64, long long, __m128, 8) test_1 (_mm_cvtts_roundss_epu64, unsigned long long, __m128, 8) #endif -/* avx10_2-512minmaxintrin.h */ +/* avx10_2minmaxintrin.h */ test_2 (_mm512_minmax_pbh, __m512bh, __m512bh, __m512bh, 100) test_3 (_mm512_maskz_minmax_pbh, __m512bh, __mmask32, __m512bh, __m512bh, 100) test_4 (_mm512_mask_minmax_pbh, __m512bh, __m512bh, __mmask32, __m512bh, __m512bh, 100) @@ -1161,8 +1155,6 @@ test_4 (_mm512_mask_minmax_ps, __m512, __m512, __mmask16, __m512, __m512, 100) test_2 (_mm512_minmax_ph, __m512h, __m512h, __m512h, 100) test_3 (_mm512_maskz_minmax_ph, __m512h, __mmask32, __m512h, __m512h, 100) test_4 (_mm512_mask_minmax_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 100) - -/* avx10_2minmaxintrin.h */ test_2 (_mm256_minmax_pbh, __m256bh, __m256bh, __m256bh, 100) test_3 (_mm256_maskz_minmax_pbh, __m256bh, __mmask16, __m256bh, __m256bh, 100) test_4 (_mm256_mask_minmax_pbh, __m256bh, __m256bh, __mmask16, __m256bh, __m256bh, 100) diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index 16b059edeaa..0cb0368cf4f 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -1061,64 +1061,60 @@ test_1 ( __bextri_u64, unsigned long long, unsigned long long, 1) /* sm3intrin.h */ test_3 (_mm_sm3rnds2_epi32, __m128i, __m128i, __m128i, __m128i, 1) -/* avx10_2-512mediaintrin.h */ +/* avx10_2mediaintrin.h */ test_2 (_mm512_mpsadbw_epu8, __m512i, __m512i, __m512i, 1) test_3 (_mm512_maskz_mpsadbw_epu8, __m512i, __mmask32, __m512i, __m512i, 1) -test_4 (_mm512_mask_mpsadbw_epu8, __m512i, __m512i, __mmask32, __m512i, __m512i, 1) - -/* avx10_2mediaintrin.h */ -test_3 (_mm_maskz_mpsadbw_epu8, __m128i, __mmask8, __m128i, __m128i, 1) test_3 (_mm256_maskz_mpsadbw_epu8, __m256i, __mmask16, __m256i, __m256i, 1) -test_4 (_mm_mask_mpsadbw_epu8, __m128i, __m128i, __mmask8, __m128i, __m128i, 1) +test_3 (_mm_maskz_mpsadbw_epu8, __m128i, __mmask8, __m128i, __m128i, 1) +test_4 (_mm512_mask_mpsadbw_epu8, __m512i, __m512i, __mmask32, __m512i, __m512i, 1) test_4 (_mm256_mask_mpsadbw_epu8, __m256i, __m256i, __mmask16, __m256i, __m256i, 1) +test_4 (_mm_mask_mpsadbw_epu8, __m128i, __m128i, __mmask8, __m128i, __m128i, 1) -/* avx10_2-512convertintrin.h */ +/* avx10_2convertintrin.h */ test_2 (_mm512_cvtx_round2ps_ph, __m512h, __m512, __m512, 4) -/* avx10_2-512bf16intrin.h */ -test_1 (_mm512_roundscale_pbh, __m512bh, __m512bh, 123) -test_2 (_mm512_maskz_roundscale_pbh, __m512bh, __mmask32, __m512bh, 123) -test_3 (_mm512_mask_roundscale_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123) -test_1 (_mm512_reduce_pbh, __m512bh, __m512bh, 123) -test_2 (_mm512_maskz_reduce_pbh, __m512bh, __mmask32, __m512bh, 123) -test_3 (_mm512_mask_reduce_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123) -test_1x (_mm512_getmant_pbh, __m512bh, __m512bh, 1, 1) -test_2x (_mm512_maskz_getmant_pbh, __m512bh, __mmask32,__m512bh, 1, 1) -test_3x (_mm512_mask_getmant_pbh, __m512bh, __m512bh, __mmask32,__m512bh, 1, 1) -test_1 (_mm512_fpclass_pbh_mask, __mmask32, __m512bh, 13) -test_2 (_mm512_mask_fpclass_pbh_mask, __mmask32, __mmask32, __m512bh, 13) -test_2 (_mm512_cmp_pbh_mask, __mmask32, __m512bh, __m512bh, 1) -test_3 (_mm512_mask_cmp_pbh_mask, __mmask32, __mmask32,__m512bh, __m512bh, 1) - /* avx10_2bf16intrin.h */ +test_1 (_mm512_roundscale_pbh, __m512bh, __m512bh, 123) test_1 (_mm256_roundscale_pbh, __m256bh, __m256bh, 123) test_1 (_mm_roundscale_pbh, __m128bh, __m128bh, 123) +test_2 (_mm512_maskz_roundscale_pbh, __m512bh, __mmask32, __m512bh, 123) test_2 (_mm256_maskz_roundscale_pbh, __m256bh, __mmask16, __m256bh, 123) test_2 (_mm_maskz_roundscale_pbh, __m128bh, __mmask8, __m128bh, 123) +test_3 (_mm512_mask_roundscale_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123) test_3 (_mm256_mask_roundscale_pbh, __m256bh, __m256bh, __mmask16, __m256bh, 123) test_3 (_mm_mask_roundscale_pbh, __m128bh, __m128bh, __mmask8, __m128bh, 123) +test_1 (_mm512_reduce_pbh, __m512bh, __m512bh, 123) test_1 (_mm256_reduce_pbh, __m256bh, __m256bh, 123) test_1 (_mm_reduce_pbh, __m128bh, __m128bh, 123) +test_2 (_mm512_maskz_reduce_pbh, __m512bh, __mmask32, __m512bh, 123) test_2 (_mm256_maskz_reduce_pbh, __m256bh, __mmask16, __m256bh, 123) test_2 (_mm_maskz_reduce_pbh, __m128bh, __mmask8, __m128bh, 123) +test_3 (_mm512_mask_reduce_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123) test_3 (_mm256_mask_reduce_pbh, __m256bh, __m256bh, __mmask16, __m256bh, 123) test_3 (_mm_mask_reduce_pbh, __m128bh, __m128bh, __mmask8, __m128bh, 123) +test_1x (_mm512_getmant_pbh, __m512bh, __m512bh, 1, 1) test_1x (_mm256_getmant_pbh, __m256bh, __m256bh, 1, 1) test_1x (_mm_getmant_pbh, __m128bh, __m128bh, 1, 1) +test_2x (_mm512_maskz_getmant_pbh, __m512bh, __mmask32,__m512bh, 1, 1) test_2x (_mm256_maskz_getmant_pbh, __m256bh, __mmask16,__m256bh, 1, 1) test_2x (_mm_maskz_getmant_pbh, __m128bh, __mmask8, __m128bh, 1, 1) +test_3x (_mm512_mask_getmant_pbh, __m512bh, __m512bh, __mmask32,__m512bh, 1, 1) test_3x (_mm256_mask_getmant_pbh, __m256bh, __m256bh, __mmask16,__m256bh, 1, 1) test_3x (_mm_mask_getmant_pbh, __m128bh, __m128bh, __mmask8, __m128bh, 1, 1) +test_1 (_mm512_fpclass_pbh_mask, __mmask32, __m512bh, 13) test_1 (_mm256_fpclass_pbh_mask, __mmask16, __m256bh, 13) test_1 (_mm_fpclass_pbh_mask, __mmask8, __m128bh, 13) +test_2 (_mm512_mask_fpclass_pbh_mask, __mmask32, __mmask32, __m512bh, 13) test_2 (_mm256_mask_fpclass_pbh_mask, __mmask16, __mmask16, __m256bh, 13) test_2 (_mm_mask_fpclass_pbh_mask, __mmask8, __mmask8, __m128bh, 13) +test_2 (_mm512_cmp_pbh_mask, __mmask32, __m512bh, __m512bh, 1) test_2 (_mm256_cmp_pbh_mask, __mmask16, __m256bh, __m256bh, 1) test_2 (_mm_cmp_pbh_mask, __mmask8, __m128bh, __m128bh, 1) +test_3 (_mm512_mask_cmp_pbh_mask, __mmask32, __mmask32,__m512bh, __m512bh, 1) test_3 (_mm256_mask_cmp_pbh_mask, __mmask16, __mmask16, __m256bh, __m256bh, 1) test_3 (_mm_mask_cmp_pbh_mask, __mmask8, __mmask8, __m128bh, __m128bh, 1) -/* avx10_2-512satcvtintrin.h */ +/* avx10_2satcvtintrin.h */ test_1 (_mm512_ipcvts_roundph_epi8, __m512i, __m512h, 8) test_1 (_mm512_ipcvts_roundph_epu8, __m512i, __m512h, 8) test_1 (_mm512_ipcvts_roundps_epi8, __m512i, __m512, 8) @@ -1167,8 +1163,6 @@ test_3 (_mm512_mask_cvtts_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 8) test_1 (_mm512_cvtts_roundps_epu64, __m512i, __m256, 8) test_2 (_mm512_maskz_cvtts_roundps_epu64, __m512i, __mmask8, __m256, 8) test_3 (_mm512_mask_cvtts_roundps_epu64, __m512i, __m512i, __mmask8, __m256, 8) - -/* avx10_2satcvtintrin.h */ test_1 (_mm_cvtts_roundsd_epi32, int, __m128d, 8) test_1 (_mm_cvtts_roundsd_epu32, unsigned int, __m128d, 8) test_1 (_mm_cvtts_roundss_epi32, int, __m128, 8) @@ -1180,7 +1174,7 @@ test_1 (_mm_cvtts_roundss_epi64, long long, __m128, 8) test_1 (_mm_cvtts_roundss_epu64, unsigned long long, __m128, 8) #endif -/* avx10_2-512minmaxintrin.h */ +/* avx10_2minmaxintrin.h */ test_2 (_mm512_minmax_pbh, __m512bh, __m512bh, __m512bh, 100) test_3 (_mm512_maskz_minmax_pbh, __m512bh, __mmask32, __m512bh, __m512bh, 100) test_4 (_mm512_mask_minmax_pbh, __m512bh, __m512bh, __mmask32, __m512bh, __m512bh, 100) @@ -1202,8 +1196,6 @@ test_4 (_mm512_mask_minmax_ps, __m512, __m512, __mmask16, __m512, __m512, 100) test_2 (_mm512_minmax_ph, __m512h, __m512h, __m512h, 100) test_3 (_mm512_maskz_minmax_ph, __m512h, __mmask32, __m512h, __m512h, 100) test_4 (_mm512_mask_minmax_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 100) - -/* avx10_2minmaxintrin.h */ test_2 (_mm256_minmax_pbh, __m256bh, __m256bh, __m256bh, 100) test_3 (_mm256_maskz_minmax_pbh, __m256bh, __mmask16, __m256bh, __m256bh, 100) test_4 (_mm256_mask_minmax_pbh, __m256bh, __m256bh, __mmask16, __m256bh, __m256bh, 100) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 2cfcf285188..95db1f78aac 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -824,37 +824,33 @@ /* sm3intrin.h */ #define __builtin_ia32_vsm3rnds2(A, B, C, D) __builtin_ia32_vsm3rnds2 (A, B, C, 1) -/* avx10_2-512mediaintrin.h */ +/* avx10_2-mediaintrin.h */ #define __builtin_ia32_mpsadbw512(A, B, C) __builtin_ia32_mpsadbw512 (A, B, 1) #define __builtin_ia32_mpsadbw512_mask(A, B, C, D, E) __builtin_ia32_mpsadbw512_mask (A, B, 1, D, E) - -/* avx10_2-mediaintrin.h */ -#define __builtin_ia32_mpsadbw128_mask(A, B, C, D, E) __builtin_ia32_mpsadbw128_mask (A, B, 1, D, E) #define __builtin_ia32_mpsadbw256_mask(A, B, C, D, E) __builtin_ia32_mpsadbw256_mask (A, B, 1, D, E) +#define __builtin_ia32_mpsadbw128_mask(A, B, C, D, E) __builtin_ia32_mpsadbw128_mask (A, B, 1, D, E) -/* avx10_2-512convertintrin.h */ +/* avx10_2convertintrin.h */ #define __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, E) __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, 8) -/* avx10_2-512bf16intrin.h */ -#define __builtin_ia32_rndscalebf16512_mask(A, B, C, D) __builtin_ia32_rndscalebf16512_mask(A, 123, C, D) -#define __builtin_ia32_reducebf16512_mask(A, B, C, D) __builtin_ia32_reducebf16512_mask(A, 123, C, D) -#define __builtin_ia32_getmantbf16512_mask(A, B, C, D) __builtin_ia32_getmantbf16512_mask(A, 1, C, D) -#define __builtin_ia32_fpclassbf16512_mask(A, B, C) __builtin_ia32_fpclassbf16512_mask(A, 1, C) -#define __builtin_ia32_cmpbf16512_mask(A, B, C, D) __builtin_ia32_cmpbf16512_mask(A, B, 1, D) - /* avx10_2bf16intrin.h */ +#define __builtin_ia32_rndscalebf16512_mask(A, B, C, D) __builtin_ia32_rndscalebf16512_mask(A, 123, C, D) #define __builtin_ia32_rndscalebf16256_mask(A, B, C, D) __builtin_ia32_rndscalebf16256_mask(A, 123, C, D) #define __builtin_ia32_rndscalebf16128_mask(A, B, C, D) __builtin_ia32_rndscalebf16128_mask(A, 123, C, D) +#define __builtin_ia32_reducebf16512_mask(A, B, C, D) __builtin_ia32_reducebf16512_mask(A, 123, C, D) #define __builtin_ia32_reducebf16256_mask(A, B, C, D) __builtin_ia32_reducebf16256_mask(A, 123, C, D) #define __builtin_ia32_reducebf16128_mask(A, B, C, D) __builtin_ia32_reducebf16128_mask(A, 123, C, D) +#define __builtin_ia32_getmantbf16512_mask(A, B, C, D) __builtin_ia32_getmantbf16512_mask(A, 1, C, D) #define __builtin_ia32_getmantbf16256_mask(A, B, C, D) __builtin_ia32_getmantbf16256_mask(A, 1, C, D) #define __builtin_ia32_getmantbf16128_mask(A, B, C, D) __builtin_ia32_getmantbf16128_mask(A, 1, C, D) +#define __builtin_ia32_fpclassbf16512_mask(A, B, C) __builtin_ia32_fpclassbf16512_mask(A, 1, C) #define __builtin_ia32_fpclassbf16256_mask(A, B, C) __builtin_ia32_fpclassbf16256_mask(A, 1, C) #define __builtin_ia32_fpclassbf16128_mask(A, B, C) __builtin_ia32_fpclassbf16128_mask(A, 1, C) +#define __builtin_ia32_cmpbf16512_mask(A, B, C, D) __builtin_ia32_cmpbf16512_mask(A, B, 1, D) #define __builtin_ia32_cmpbf16256_mask(A, B, C, D) __builtin_ia32_cmpbf16256_mask(A, B, 1, D) #define __builtin_ia32_cmpbf16128_mask(A, B, C, D) __builtin_ia32_cmpbf16128_mask(A, B, 1, D) -/* avx10_2-512satcvtintrin.h */ +/* avx10_2satcvtintrin.h */ #define __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, 8) #define __builtin_ia32_cvtph2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvtph2iubs512_mask_round(A, B, C, 8) #define __builtin_ia32_cvtps2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvtps2ibs512_mask_round(A, B, C, 8) @@ -871,8 +867,6 @@ #define __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, 8) #define __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, 8) #define __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, 8) - -/* avx10_2satcvtintrin.h */ #define __builtin_ia32_cvttsd2sis32_round(A, B) __builtin_ia32_cvttsd2sis32_round(A, 8) #define __builtin_ia32_cvttsd2usis32_round(A, B) __builtin_ia32_cvttsd2usis32_round(A, 8) #define __builtin_ia32_cvttss2sis32_round(A, B) __builtin_ia32_cvttss2sis32_round(A, 8) @@ -884,24 +878,22 @@ #define __builtin_ia32_cvttss2usis64_round(A, B) __builtin_ia32_cvttss2usis64_round(A, 8) #endif -/* avx10_2-512minmaxintrin.h */ +/* avx10_2-minmaxintrin.h */ +#define __builtin_ia32_minmaxbf16512_mask(A, B, C, W, U) __builtin_ia32_minmaxbf16512_mask (A, B, 100, W, U) +#define __builtin_ia32_minmaxbf16256_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16256_mask (A, B, 100, D, E) +#define __builtin_ia32_minmaxbf16128_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16128_mask (A, B, 100, D, E) #define __builtin_ia32_minmaxpd512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd512_mask_round (A, B, 100, D, E, 4) +#define __builtin_ia32_minmaxpd256_mask(A, B, C, D, E) __builtin_ia32_minmaxpd256_mask (A, B, 100, D, E) +#define __builtin_ia32_minmaxpd128_mask(A, B, C, D, E) __builtin_ia32_minmaxpd128_mask (A, B, 100, D, E) #define __builtin_ia32_minmaxph512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxph512_mask_round (A, B, 100, D, E, 4) +#define __builtin_ia32_minmaxph256_mask(A, B, C, D, E) __builtin_ia32_minmaxph256_mask (A, B, 100, D, E) +#define __builtin_ia32_minmaxph128_mask(A, B, C, D, E) __builtin_ia32_minmaxph128_mask (A, B, 100, D, E) #define __builtin_ia32_minmaxps512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps512_mask_round (A, B, 100, D, E, 4) -#define __builtin_ia32_minmaxbf16512_mask(A, B, C, W, U) __builtin_ia32_minmaxbf16512_mask (A, B, 100, W, U) - -/* avx10_2-minmaxintrin.h */ +#define __builtin_ia32_minmaxps256_mask(A, B, C, D, E) __builtin_ia32_minmaxps256_mask (A, B, 100, D, E) +#define __builtin_ia32_minmaxps128_mask(A, B, C, D, E) __builtin_ia32_minmaxps128_mask (A, B, 100, D, E) #define __builtin_ia32_minmaxsd_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsd_mask_round (A, B, 100, D, E, 4) #define __builtin_ia32_minmaxsh_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsh_mask_round (A, B, 100, D, E, 4) #define __builtin_ia32_minmaxss_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxss_mask_round (A, B, 100, D, E, 4) -#define __builtin_ia32_minmaxbf16128_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16128_mask (A, B, 100, D, E) -#define __builtin_ia32_minmaxbf16256_mask(A, B, C, D, E) __builtin_ia32_minmaxbf16256_mask (A, B, 100, D, E) -#define __builtin_ia32_minmaxpd128_mask(A, B, C, D, E) __builtin_ia32_minmaxpd128_mask (A, B, 100, D, E) -#define __builtin_ia32_minmaxpd256_mask(A, B, C, D, E) __builtin_ia32_minmaxpd256_mask (A, B, 100, D, E) -#define __builtin_ia32_minmaxph128_mask(A, B, C, D, E) __builtin_ia32_minmaxph128_mask (A, B, 100, D, E) -#define __builtin_ia32_minmaxph256_mask(A, B, C, D, E) __builtin_ia32_minmaxph256_mask (A, B, 100, D, E) -#define __builtin_ia32_minmaxps128_mask(A, B, C, D, E) __builtin_ia32_minmaxps128_mask (A, B, 100, D, E) -#define __builtin_ia32_minmaxps256_mask(A, B, C, D, E) __builtin_ia32_minmaxps256_mask (A, B, 100, D, E) #pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2,amx-avx512,amx-tf32,amx-transpose,amx-fp8,movrs,amx-movrs")