From c027accb4243ceed83b13982bd08c59f6a3561d2 Mon Sep 17 00:00:00 2001 From: liuhongt Date: Wed, 13 Mar 2019 15:16:03 +0800 Subject: [PATCH] AVX512FP16: Add vcvttph2w/vcvttph2uw/vcvttph2dq/vcvttph2qq/vcvttph2udq/vcvttph2uqq gcc/ChangeLog: * config/i386/avx512fp16intrin.h (_mm512_cvttph_epi32): New intrinsic. (_mm512_mask_cvttph_epi32): Likewise. (_mm512_maskz_cvttph_epi32): Likewise. (_mm512_cvtt_roundph_epi32): Likewise. (_mm512_mask_cvtt_roundph_epi32): Likewise. (_mm512_maskz_cvtt_roundph_epi32): Likewise. (_mm512_cvttph_epu32): Likewise. (_mm512_mask_cvttph_epu32): Likewise. (_mm512_maskz_cvttph_epu32): Likewise. (_mm512_cvtt_roundph_epu32): Likewise. (_mm512_mask_cvtt_roundph_epu32): Likewise. (_mm512_maskz_cvtt_roundph_epu32): Likewise. (_mm512_cvttph_epi64): Likewise. (_mm512_mask_cvttph_epi64): Likewise. (_mm512_maskz_cvttph_epi64): Likewise. (_mm512_cvtt_roundph_epi64): Likewise. (_mm512_mask_cvtt_roundph_epi64): Likewise. (_mm512_maskz_cvtt_roundph_epi64): Likewise. (_mm512_cvttph_epu64): Likewise. (_mm512_mask_cvttph_epu64): Likewise. (_mm512_maskz_cvttph_epu64): Likewise. (_mm512_cvtt_roundph_epu64): Likewise. (_mm512_mask_cvtt_roundph_epu64): Likewise. (_mm512_maskz_cvtt_roundph_epu64): Likewise. (_mm512_cvttph_epi16): Likewise. (_mm512_mask_cvttph_epi16): Likewise. (_mm512_maskz_cvttph_epi16): Likewise. (_mm512_cvtt_roundph_epi16): Likewise. (_mm512_mask_cvtt_roundph_epi16): Likewise. (_mm512_maskz_cvtt_roundph_epi16): Likewise. (_mm512_cvttph_epu16): Likewise. (_mm512_mask_cvttph_epu16): Likewise. (_mm512_maskz_cvttph_epu16): Likewise. (_mm512_cvtt_roundph_epu16): Likewise. (_mm512_mask_cvtt_roundph_epu16): Likewise. (_mm512_maskz_cvtt_roundph_epu16): Likewise. * config/i386/avx512fp16vlintrin.h (_mm_cvttph_epi32): New intirnsic. (_mm_mask_cvttph_epi32): Likewise. (_mm_maskz_cvttph_epi32): Likewise. (_mm256_cvttph_epi32): Likewise. (_mm256_mask_cvttph_epi32): Likewise. (_mm256_maskz_cvttph_epi32): Likewise. (_mm_cvttph_epu32): Likewise. (_mm_mask_cvttph_epu32): Likewise. (_mm_maskz_cvttph_epu32): Likewise. (_mm256_cvttph_epu32): Likewise. (_mm256_mask_cvttph_epu32): Likewise. (_mm256_maskz_cvttph_epu32): Likewise. (_mm_cvttph_epi64): Likewise. (_mm_mask_cvttph_epi64): Likewise. (_mm_maskz_cvttph_epi64): Likewise. (_mm256_cvttph_epi64): Likewise. (_mm256_mask_cvttph_epi64): Likewise. (_mm256_maskz_cvttph_epi64): Likewise. (_mm_cvttph_epu64): Likewise. (_mm_mask_cvttph_epu64): Likewise. (_mm_maskz_cvttph_epu64): Likewise. (_mm256_cvttph_epu64): Likewise. (_mm256_mask_cvttph_epu64): Likewise. (_mm256_maskz_cvttph_epu64): Likewise. (_mm_cvttph_epi16): Likewise. (_mm_mask_cvttph_epi16): Likewise. (_mm_maskz_cvttph_epi16): Likewise. (_mm256_cvttph_epi16): Likewise. (_mm256_mask_cvttph_epi16): Likewise. (_mm256_maskz_cvttph_epi16): Likewise. (_mm_cvttph_epu16): Likewise. (_mm_mask_cvttph_epu16): Likewise. (_mm_maskz_cvttph_epu16): Likewise. (_mm256_cvttph_epu16): Likewise. (_mm256_mask_cvttph_epu16): Likewise. (_mm256_maskz_cvttph_epu16): Likewise. * config/i386/i386-builtin.def: Add new builtins. * config/i386/sse.md (avx512fp16_fix_trunc2): New. (avx512fp16_fix_trunc2): Ditto. (*avx512fp16_fix_trunc2_load): Ditto. (avx512fp16_fix_truncv2di2): Ditto. (avx512fp16_fix_truncv2di2_load): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/avx-1.c: Add test for new builtins. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/sse-14.c: Add test for new intrinsics. * gcc.target/i386/sse-22.c: Ditto. --- gcc/config/i386/avx512fp16intrin.h | 545 +++++++++++++++++++++++++ gcc/config/i386/avx512fp16vlintrin.h | 365 +++++++++++++++++ gcc/config/i386/i386-builtin.def | 18 + gcc/config/i386/sse.md | 54 +++ gcc/testsuite/gcc.target/i386/avx-1.c | 6 + gcc/testsuite/gcc.target/i386/sse-13.c | 6 + gcc/testsuite/gcc.target/i386/sse-14.c | 18 + gcc/testsuite/gcc.target/i386/sse-22.c | 18 + gcc/testsuite/gcc.target/i386/sse-23.c | 6 + 9 files changed, 1036 insertions(+) diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h index 1b8a9f340ade..762fc8d72166 100644 --- a/gcc/config/i386/avx512fp16intrin.h +++ b/gcc/config/i386/avx512fp16intrin.h @@ -2702,6 +2702,203 @@ _mm512_maskz_cvt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C) #endif /* __OPTIMIZE__ */ +/* Intrinsics vcvttph2dq. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvttph_epi32 (__m256h __A) +{ + return (__m512i) + __builtin_ia32_vcvttph2dq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvttph_epi32 (__m512i __A, __mmask16 __B, __m256h __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2dq512_mask_round (__C, + (__v16si) __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvttph_epi32 (__mmask16 __A, __m256h __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2dq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtt_roundph_epi32 (__m256h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2dq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtt_roundph_epi32 (__m512i __A, __mmask16 __B, + __m256h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvttph2dq512_mask_round (__C, + (__v16si) __A, + __B, + __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2dq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvtt_roundph_epi32(A, B) \ + ((__m512i) \ + __builtin_ia32_vcvttph2dq512_mask_round ((A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16)(-1), (B))) + +#define _mm512_mask_cvtt_roundph_epi32(A, B, C, D) \ + ((__m512i) \ + __builtin_ia32_vcvttph2dq512_mask_round ((C), \ + (__v16si)(A), \ + (B), \ + (D))) + +#define _mm512_maskz_cvtt_roundph_epi32(A, B, C) \ + ((__m512i) \ + __builtin_ia32_vcvttph2dq512_mask_round ((B), \ + (__v16si) \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvttph2udq. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvttph_epu32 (__m256h __A) +{ + return (__m512i) + __builtin_ia32_vcvttph2udq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvttph_epu32 (__m512i __A, __mmask16 __B, __m256h __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2udq512_mask_round (__C, + (__v16si) __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvttph_epu32 (__mmask16 __A, __m256h __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2udq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtt_roundph_epu32 (__m256h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2udq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtt_roundph_epu32 (__m512i __A, __mmask16 __B, + __m256h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvttph2udq512_mask_round (__C, + (__v16si) __A, + __B, + __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2udq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvtt_roundph_epu32(A, B) \ + ((__m512i) \ + __builtin_ia32_vcvttph2udq512_mask_round ((A), \ + (__v16si) \ + _mm512_setzero_si512 (), \ + (__mmask16)-1, \ + (B))) + +#define _mm512_mask_cvtt_roundph_epu32(A, B, C, D) \ + ((__m512i) \ + __builtin_ia32_vcvttph2udq512_mask_round ((C), \ + (__v16si)(A), \ + (B), \ + (D))) + +#define _mm512_maskz_cvtt_roundph_epu32(A, B, C) \ + ((__m512i) \ + __builtin_ia32_vcvttph2udq512_mask_round ((B), \ + (__v16si) \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + /* Intrinsics vcvtdq2ph. */ extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) @@ -3019,6 +3216,156 @@ _mm512_maskz_cvt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C) #endif /* __OPTIMIZE__ */ +/* Intrinsics vcvttph2qq. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvttph_epi64 (__m128h __A) +{ + return __builtin_ia32_vcvttph2qq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvttph_epi64 (__m512i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvttph2qq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtt_roundph_epi64 (__m128h __A, int __B) +{ + return __builtin_ia32_vcvttph2qq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D) +{ + return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B, __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C) +{ + return __builtin_ia32_vcvttph2qq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvtt_roundph_epi64(A, B) \ + (__builtin_ia32_vcvttph2qq512_mask_round ((A), \ + _mm512_setzero_si512 (), \ + (__mmask8)-1, \ + (B))) + +#define _mm512_mask_cvtt_roundph_epi64(A, B, C, D) \ + __builtin_ia32_vcvttph2qq512_mask_round ((C), (A), (B), (D)) + +#define _mm512_maskz_cvtt_roundph_epi64(A, B, C) \ + (__builtin_ia32_vcvttph2qq512_mask_round ((B), \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvttph2uqq. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvttph_epu64 (__m128h __A) +{ + return __builtin_ia32_vcvttph2uqq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvttph_epu64 (__m512i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvttph2uqq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtt_roundph_epu64 (__m128h __A, int __B) +{ + return __builtin_ia32_vcvttph2uqq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D) +{ + return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B, __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C) +{ + return __builtin_ia32_vcvttph2uqq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvtt_roundph_epu64(A, B) \ + (__builtin_ia32_vcvttph2uqq512_mask_round ((A), \ + _mm512_setzero_si512 (), \ + (__mmask8)-1, \ + (B))) + +#define _mm512_mask_cvtt_roundph_epu64(A, B, C, D) \ + __builtin_ia32_vcvttph2uqq512_mask_round ((C), (A), (B), (D)) + +#define _mm512_maskz_cvtt_roundph_epu64(A, B, C) \ + (__builtin_ia32_vcvttph2uqq512_mask_round ((B), \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + /* Intrinsics vcvtqq2ph. */ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) @@ -3363,6 +3710,204 @@ _mm512_maskz_cvt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C) #endif /* __OPTIMIZE__ */ +/* Intrinsics vcvttph2w. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvttph_epi16 (__m512h __A) +{ + return (__m512i) + __builtin_ia32_vcvttph2w512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvttph_epi16 (__m512i __A, __mmask32 __B, __m512h __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2w512_mask_round (__C, + (__v32hi) __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvttph_epi16 (__mmask32 __A, __m512h __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2w512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtt_roundph_epi16 (__m512h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2w512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtt_roundph_epi16 (__m512i __A, __mmask32 __B, + __m512h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvttph2w512_mask_round (__C, + (__v32hi) __A, + __B, + __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2w512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvtt_roundph_epi16(A, B) \ + ((__m512i) \ + __builtin_ia32_vcvttph2w512_mask_round ((A), \ + (__v32hi) \ + _mm512_setzero_si512 (), \ + (__mmask32)-1, \ + (B))) + +#define _mm512_mask_cvtt_roundph_epi16(A, B, C, D) \ + ((__m512i) \ + __builtin_ia32_vcvttph2w512_mask_round ((C), \ + (__v32hi)(A), \ + (B), \ + (D))) + +#define _mm512_maskz_cvtt_roundph_epi16(A, B, C) \ + ((__m512i) \ + __builtin_ia32_vcvttph2w512_mask_round ((B), \ + (__v32hi) \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvttph2uw. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvttph_epu16 (__m512h __A) +{ + return (__m512i) + __builtin_ia32_vcvttph2uw512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvttph_epu16 (__m512i __A, __mmask32 __B, __m512h __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2uw512_mask_round (__C, + (__v32hi) __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvttph_epu16 (__mmask32 __A, __m512h __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2uw512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtt_roundph_epu16 (__m512h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2uw512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtt_roundph_epu16 (__m512i __A, __mmask32 __B, + __m512h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvttph2uw512_mask_round (__C, + (__v32hi) __A, + __B, + __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2uw512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvtt_roundph_epu16(A, B) \ + ((__m512i) \ + __builtin_ia32_vcvttph2uw512_mask_round ((A), \ + (__v32hi) \ + _mm512_setzero_si512 (), \ + (__mmask32)-1, \ + (B))) + +#define _mm512_mask_cvtt_roundph_epu16(A, B, C, D) \ + ((__m512i) \ + __builtin_ia32_vcvttph2uw512_mask_round ((C), \ + (__v32hi)(A), \ + (B), \ + (D))) + +#define _mm512_maskz_cvtt_roundph_epu16(A, B, C) \ + ((__m512i) \ + __builtin_ia32_vcvttph2uw512_mask_round ((B), \ + (__v32hi) \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + /* Intrinsics vcvtw2ph. */ extern __inline __m512h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h index 5898b626b5da..f54c32fd572b 100644 --- a/gcc/config/i386/avx512fp16vlintrin.h +++ b/gcc/config/i386/avx512fp16vlintrin.h @@ -1050,6 +1050,132 @@ _mm256_maskz_cvtph_epu32 (__mmask8 __A, __m128h __B) __A); } +/* Intrinsics vcvttph2dq. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttph_epi32 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvttph2dq128_mask (__A, + (__v4si) _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttph_epi32 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i)__builtin_ia32_vcvttph2dq128_mask (__C, + ( __v4si) __A, + __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttph_epi32 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvttph2dq128_mask (__B, + (__v4si) _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvttph_epi32 (__m128h __A) +{ + return (__m256i) + __builtin_ia32_vcvttph2dq256_mask (__A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvttph_epi32 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return (__m256i) + __builtin_ia32_vcvttph2dq256_mask (__C, + ( __v8si) __A, + __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvttph_epi32 (__mmask8 __A, __m128h __B) +{ + return (__m256i) + __builtin_ia32_vcvttph2dq256_mask (__B, + (__v8si) + _mm256_setzero_si256 (), + __A); +} + +/* Intrinsics vcvttph2udq. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttph_epu32 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvttph2udq128_mask (__A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttph_epu32 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i) + __builtin_ia32_vcvttph2udq128_mask (__C, + ( __v4si) __A, + __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttph_epu32 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvttph2udq128_mask (__B, + (__v4si) + _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvttph_epu32 (__m128h __A) +{ + return (__m256i) + __builtin_ia32_vcvttph2udq256_mask (__A, + (__v8si) + _mm256_setzero_si256 (), (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvttph_epu32 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return (__m256i) + __builtin_ia32_vcvttph2udq256_mask (__C, + ( __v8si) __A, + __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvttph_epu32 (__mmask8 __A, __m128h __B) +{ + return (__m256i) + __builtin_ia32_vcvttph2udq256_mask (__B, + (__v8si) + _mm256_setzero_si256 (), + __A); +} + /* Intrinsics vcvtdq2ph. */ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) @@ -1257,6 +1383,116 @@ _mm256_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B) __A); } +/* Intrinsics vcvttph2qq. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttph_epi64 (__m128h __A) +{ + return __builtin_ia32_vcvttph2qq128_mask (__A, + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttph_epi64 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvttph2qq128_mask (__C, + __A, + __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvttph2qq128_mask (__B, + _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvttph_epi64 (__m128h __A) +{ + return __builtin_ia32_vcvttph2qq256_mask (__A, + _mm256_setzero_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvttph_epi64 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvttph2qq256_mask (__C, + __A, + __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvttph2qq256_mask (__B, + _mm256_setzero_si256 (), + __A); +} + +/* Intrinsics vcvttph2uqq. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttph_epu64 (__m128h __A) +{ + return __builtin_ia32_vcvttph2uqq128_mask (__A, + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttph_epu64 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvttph2uqq128_mask (__C, + __A, + __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvttph2uqq128_mask (__B, + _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvttph_epu64 (__m128h __A) +{ + return __builtin_ia32_vcvttph2uqq256_mask (__A, + _mm256_setzero_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvttph_epu64 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvttph2uqq256_mask (__C, + __A, + __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvttph2uqq256_mask (__B, + _mm256_setzero_si256 (), + __A); +} + /* Intrinsics vcvtqq2ph. */ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) @@ -1481,6 +1717,135 @@ _mm256_maskz_cvtph_epu16 (__mmask16 __A, __m256h __B) __A); } +/* Intrinsics vcvttph2w. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttph_epi16 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvttph2w128_mask (__A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttph_epi16 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i) + __builtin_ia32_vcvttph2w128_mask (__C, + ( __v8hi) __A, + __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttph_epi16 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvttph2w128_mask (__B, + (__v8hi) + _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvttph_epi16 (__m256h __A) +{ + return (__m256i) + __builtin_ia32_vcvttph2w256_mask (__A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvttph_epi16 (__m256i __A, __mmask16 __B, __m256h __C) +{ + return (__m256i) + __builtin_ia32_vcvttph2w256_mask (__C, + ( __v16hi) __A, + __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvttph_epi16 (__mmask16 __A, __m256h __B) +{ + return (__m256i) + __builtin_ia32_vcvttph2w256_mask (__B, + (__v16hi) + _mm256_setzero_si256 (), + __A); +} + +/* Intrinsics vcvttph2uw. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttph_epu16 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvttph2uw128_mask (__A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttph_epu16 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i) + __builtin_ia32_vcvttph2uw128_mask (__C, + ( __v8hi) __A, + __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttph_epu16 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvttph2uw128_mask (__B, + (__v8hi) + _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvttph_epu16 (__m256h __A) +{ + return (__m256i) + __builtin_ia32_vcvttph2uw256_mask (__A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvttph_epu16 (__m256i __A, __mmask16 __B, __m256h __C) +{ + return (__m256i) + __builtin_ia32_vcvttph2uw256_mask (__C, + ( __v16hi) __A, + __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvttph_epu16 (__mmask16 __A, __m256h __B) +{ + return (__m256i) + __builtin_ia32_vcvttph2uw256_mask (__B, + (__v16hi) _mm256_setzero_si256 (), + __A); +} + /* Intrinsics vcvtw2ph. */ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index cc3efa35b741..176a3783ad90 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2835,14 +2835,26 @@ BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp1 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2dq_v8si_mask, "__builtin_ia32_vcvtph2dq256_mask", IX86_BUILTIN_VCVTPH2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HF_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2udq_v4si_mask, "__builtin_ia32_vcvtph2udq128_mask", IX86_BUILTIN_VCVTPH2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HF_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2udq_v8si_mask, "__builtin_ia32_vcvtph2udq256_mask", IX86_BUILTIN_VCVTPH2UDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HF_V8SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncv4si2_mask, "__builtin_ia32_vcvttph2dq128_mask", IX86_BUILTIN_VCVTTPH2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HF_V4SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncv8si2_mask, "__builtin_ia32_vcvttph2dq256_mask", IX86_BUILTIN_VCVTTPH2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HF_V8SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncv4si2_mask, "__builtin_ia32_vcvttph2udq128_mask", IX86_BUILTIN_VCVTTPH2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HF_V4SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncv8si2_mask, "__builtin_ia32_vcvttph2udq256_mask", IX86_BUILTIN_VCVTTPH2UDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HF_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2qq_v2di_mask, "__builtin_ia32_vcvtph2qq128_mask", IX86_BUILTIN_VCVTPH2QQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HF_V2DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2qq_v4di_mask, "__builtin_ia32_vcvtph2qq256_mask", IX86_BUILTIN_VCVTPH2QQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HF_V4DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uqq_v2di_mask, "__builtin_ia32_vcvtph2uqq128_mask", IX86_BUILTIN_VCVTPH2UQQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HF_V2DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uqq_v4di_mask, "__builtin_ia32_vcvtph2uqq256_mask", IX86_BUILTIN_VCVTPH2UQQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HF_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncv2di2_mask, "__builtin_ia32_vcvttph2qq128_mask", IX86_BUILTIN_VCVTTPH2QQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HF_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncv4di2_mask, "__builtin_ia32_vcvttph2qq256_mask", IX86_BUILTIN_VCVTTPH2QQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HF_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncv2di2_mask, "__builtin_ia32_vcvttph2uqq128_mask", IX86_BUILTIN_VCVTTPH2UQQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HF_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncv4di2_mask, "__builtin_ia32_vcvttph2uqq256_mask", IX86_BUILTIN_VCVTTPH2UQQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HF_V4DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2w_v8hi_mask, "__builtin_ia32_vcvtph2w128_mask", IX86_BUILTIN_VCVTPH2W128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2w_v16hi_mask, "__builtin_ia32_vcvtph2w256_mask", IX86_BUILTIN_VCVTPH2W256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uw_v8hi_mask, "__builtin_ia32_vcvtph2uw128_mask", IX86_BUILTIN_VCVTPH2UW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uw_v16hi_mask, "__builtin_ia32_vcvtph2uw256_mask", IX86_BUILTIN_VCVTPH2UW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncv8hi2_mask, "__builtin_ia32_vcvttph2w128_mask", IX86_BUILTIN_VCVTTPH2W128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncv16hi2_mask, "__builtin_ia32_vcvttph2w256_mask", IX86_BUILTIN_VCVTTPH2W256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncv8hi2_mask, "__builtin_ia32_vcvttph2uw128_mask", IX86_BUILTIN_VCVTTPH2UW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncv16hi2_mask, "__builtin_ia32_vcvttph2uw256_mask", IX86_BUILTIN_VCVTTPH2UW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtw2ph_v8hi_mask, "__builtin_ia32_vcvtw2ph128_mask", IX86_BUILTIN_VCVTW2PH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HI_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtw2ph_v16hi_mask, "__builtin_ia32_vcvtw2ph256_mask", IX86_BUILTIN_VCVTW2PH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HI_V16HF_UHI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuw2ph_v8hi_mask, "__builtin_ia32_vcvtuw2ph128_mask", IX86_BUILTIN_VCVTUW2PH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HI_V8HF_UQI) @@ -3084,10 +3096,16 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_getmantv32hf_mask_round BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vgetmantv8hf_mask_round, "__builtin_ia32_getmantsh_mask_round", IX86_BUILTIN_GETMANTSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2dq_v16si_mask_round, "__builtin_ia32_vcvtph2dq512_mask_round", IX86_BUILTIN_VCVTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2udq_v16si_mask_round, "__builtin_ia32_vcvtph2udq512_mask_round", IX86_BUILTIN_VCVTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncv16si2_mask_round, "__builtin_ia32_vcvttph2dq512_mask_round", IX86_BUILTIN_VCVTTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncv16si2_mask_round, "__builtin_ia32_vcvttph2udq512_mask_round", IX86_BUILTIN_VCVTTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2qq_v8di_mask_round, "__builtin_ia32_vcvtph2qq512_mask_round", IX86_BUILTIN_VCVTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uqq_v8di_mask_round, "__builtin_ia32_vcvtph2uqq512_mask_round", IX86_BUILTIN_VCVTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncv8di2_mask_round, "__builtin_ia32_vcvttph2qq512_mask_round", IX86_BUILTIN_VCVTTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncv8di2_mask_round, "__builtin_ia32_vcvttph2uqq512_mask_round", IX86_BUILTIN_VCVTTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2w_v32hi_mask_round, "__builtin_ia32_vcvtph2w512_mask_round", IX86_BUILTIN_VCVTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uw_v32hi_mask_round, "__builtin_ia32_vcvtph2uw512_mask_round", IX86_BUILTIN_VCVTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2w512_mask_round", IX86_BUILTIN_VCVTTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2uw512_mask_round", IX86_BUILTIN_VCVTTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtw2ph_v32hi_mask_round, "__builtin_ia32_vcvtw2ph512_mask_round", IX86_BUILTIN_VCVTW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuw2ph_v32hi_mask_round, "__builtin_ia32_vcvtuw2ph512_mask_round", IX86_BUILTIN_VCVTUW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtdq2ph_v16si_mask_round, "__builtin_ia32_vcvtdq2ph512_mask_round", IX86_BUILTIN_VCVTDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index e78cc8420571..60a52b236114 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -5934,6 +5934,60 @@ (set_attr "prefix" "evex") (set_attr "mode" "HF")]) +(define_insn "avx512fp16_fix_trunc2" + [(set (match_operand:VI2H_AVX512VL 0 "register_operand" "=v") + (any_fix:VI2H_AVX512VL + (match_operand: 1 "" "")))] + "TARGET_AVX512FP16" + "vcvttph2\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +(define_insn "avx512fp16_fix_trunc2" + [(set (match_operand:VI4_128_8_256 0 "register_operand" "=v") + (any_fix:VI4_128_8_256 + (vec_select:V4HF + (match_operand:V8HF 1 "register_operand" "v") + (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvttph2\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +(define_insn "*avx512fp16_fix_trunc2_load" + [(set (match_operand:VI4_128_8_256 0 "register_operand" "=v") + (any_fix:VI4_128_8_256 + (match_operand:V4HF 1 "memory_operand" "m")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvttph2\t{%1, %0|%0, %q1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +(define_insn "avx512fp16_fix_truncv2di2" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (any_fix:V2DI + (vec_select:V2HF + (match_operand:V8HF 1 "nonimmediate_operand" "v") + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvttph2qq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +(define_insn "*avx512fp16_fix_truncv2di2_load" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (any_fix:V2DI + (match_operand:V2HF 1 "memory_operand" "m")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvttph2qq\t{%1, %0|%0, %k1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel single-precision floating point conversion operations diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index fbefac793a02..bd6b9df5842c 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -723,8 +723,14 @@ #define __builtin_ia32_vcvtph2udq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2udq512_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtph2qq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2qq512_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtph2uqq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uqq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2dq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2dq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2udq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2udq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2qq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2qq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2uqq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2uqq512_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtph2w512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2w512_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtph2uw512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uw512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2w512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2w512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2uw512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2uw512_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtw2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtw2ph512_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtuw2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtuw2ph512_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtdq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtdq2ph512_mask_round(A, B, C, 8) diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 5db9e355ae21..c58e8acd57f7 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -740,8 +740,14 @@ #define __builtin_ia32_vcvtph2udq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2udq512_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtph2qq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2qq512_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtph2uqq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uqq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2dq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2dq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2udq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2udq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2qq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2qq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2uqq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2uqq512_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtph2w512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2w512_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtph2uw512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uw512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2w512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2w512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2uw512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2uw512_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtw2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtw2ph512_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtuw2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtuw2ph512_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtdq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtdq2ph512_mask_round(A, B, C, 8) diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index 89a589e0d80a..98e38fb025a9 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -680,8 +680,14 @@ test_1 (_mm512_roundscale_ph, __m512h, __m512h, 123) test_1 (_mm512_getexp_round_ph, __m512h, __m512h, 8) test_1 (_mm512_cvt_roundph_epi16, __m512i, __m512h, 8) test_1 (_mm512_cvt_roundph_epu16, __m512i, __m512h, 8) +test_1 (_mm512_cvtt_roundph_epi16, __m512i, __m512h, 8) +test_1 (_mm512_cvtt_roundph_epu16, __m512i, __m512h, 8) test_1 (_mm512_cvt_roundph_epi32, __m512i, __m256h, 8) test_1 (_mm512_cvt_roundph_epu32, __m512i, __m256h, 8) +test_1 (_mm512_cvtt_roundph_epi32, __m512i, __m256h, 8) +test_1 (_mm512_cvtt_roundph_epu32, __m512i, __m256h, 8) +test_1 (_mm512_cvtt_roundph_epi64, __m512i, __m128h, 8) +test_1 (_mm512_cvtt_roundph_epu64, __m512i, __m128h, 8) test_1 (_mm512_cvt_roundph_epi64, __m512i, __m128h, 8) test_1 (_mm512_cvt_roundph_epu64, __m512i, __m128h, 8) test_1 (_mm512_cvt_roundepi16_ph, __m512h, __m512i, 8) @@ -732,10 +738,16 @@ test_2 (_mm512_maskz_getexp_round_ph, __m512h, __mmask32, __m512h, 8) test_2 (_mm_getexp_round_sh, __m128h, __m128h, __m128h, 8) test_2 (_mm512_maskz_cvt_roundph_epi16, __m512i, __mmask32, __m512h, 8) test_2 (_mm512_maskz_cvt_roundph_epu16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epi16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epu16, __m512i, __mmask32, __m512h, 8) test_2 (_mm512_maskz_cvt_roundph_epi32, __m512i, __mmask16, __m256h, 8) test_2 (_mm512_maskz_cvt_roundph_epu32, __m512i, __mmask16, __m256h, 8) test_2 (_mm512_maskz_cvt_roundph_epi64, __m512i, __mmask8, __m128h, 8) test_2 (_mm512_maskz_cvt_roundph_epu64, __m512i, __mmask8, __m128h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epi32, __m512i, __mmask16, __m256h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epu32, __m512i, __mmask16, __m256h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epi64, __m512i, __mmask8, __m128h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epu64, __m512i, __mmask8, __m128h, 8) test_2 (_mm512_maskz_cvt_roundepi16_ph, __m512h, __mmask32, __m512i, 8) test_2 (_mm512_maskz_cvt_roundepu16_ph, __m512h, __mmask32, __m512i, 8) test_2 (_mm512_maskz_cvt_roundepi32_ph, __m256h, __mmask16, __m512i, 8) @@ -784,10 +796,16 @@ test_3 (_mm_maskz_getexp_round_sh, __m128h, __mmask8, __m128h, __m128h, 8) test_3 (_mm512_mask_getexp_round_ph, __m512h, __m512h, __mmask32, __m512h, 8) test_3 (_mm512_mask_cvt_roundph_epi16, __m512i, __m512i, __mmask32, __m512h, 8) test_3 (_mm512_mask_cvt_roundph_epu16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_cvtt_roundph_epi16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_cvtt_roundph_epu16, __m512i, __m512i, __mmask32, __m512h, 8) test_3 (_mm512_mask_cvt_roundph_epi32, __m512i, __m512i, __mmask16, __m256h, 8) test_3 (_mm512_mask_cvt_roundph_epu32, __m512i, __m512i, __mmask16, __m256h, 8) test_3 (_mm512_mask_cvt_roundph_epi64, __m512i, __m512i, __mmask8, __m128h, 8) test_3 (_mm512_mask_cvt_roundph_epu64, __m512i, __m512i, __mmask8, __m128h, 8) +test_3 (_mm512_mask_cvtt_roundph_epi32, __m512i, __m512i, __mmask16, __m256h, 8) +test_3 (_mm512_mask_cvtt_roundph_epu32, __m512i, __m512i, __mmask16, __m256h, 8) +test_3 (_mm512_mask_cvtt_roundph_epi64, __m512i, __m512i, __mmask8, __m128h, 8) +test_3 (_mm512_mask_cvtt_roundph_epu64, __m512i, __m512i, __mmask8, __m128h, 8) test_3 (_mm512_mask_cvt_roundepi16_ph, __m512h, __m512h, __mmask32, __m512i, 8) test_3 (_mm512_mask_cvt_roundepu16_ph, __m512h, __m512h, __mmask32, __m512i, 8) test_3 (_mm512_mask_cvt_roundepi32_ph, __m256h, __m256h, __mmask16, __m512i, 8) diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index fed12744c6c1..3ad10908d497 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -785,10 +785,16 @@ test_1 (_mm512_roundscale_ph, __m512h, __m512h, 123) test_1 (_mm512_getexp_round_ph, __m512h, __m512h, 8) test_1 (_mm512_cvt_roundph_epi16, __m512i, __m512h, 8) test_1 (_mm512_cvt_roundph_epu16, __m512i, __m512h, 8) +test_1 (_mm512_cvtt_roundph_epi16, __m512i, __m512h, 8) +test_1 (_mm512_cvtt_roundph_epu16, __m512i, __m512h, 8) test_1 (_mm512_cvt_roundph_epi32, __m512i, __m256h, 8) test_1 (_mm512_cvt_roundph_epu32, __m512i, __m256h, 8) test_1 (_mm512_cvt_roundph_epi64, __m512i, __m128h, 8) test_1 (_mm512_cvt_roundph_epu64, __m512i, __m128h, 8) +test_1 (_mm512_cvtt_roundph_epi32, __m512i, __m256h, 8) +test_1 (_mm512_cvtt_roundph_epu32, __m512i, __m256h, 8) +test_1 (_mm512_cvtt_roundph_epi64, __m512i, __m128h, 8) +test_1 (_mm512_cvtt_roundph_epu64, __m512i, __m128h, 8) test_1 (_mm512_cvt_roundepi16_ph, __m512h, __m512i, 8) test_1 (_mm512_cvt_roundepu16_ph, __m512h, __m512i, 8) test_1 (_mm512_cvt_roundepi32_ph, __m256h, __m512i, 8) @@ -836,10 +842,16 @@ test_2 (_mm512_maskz_getexp_round_ph, __m512h, __mmask32, __m512h, 8) test_2 (_mm_getexp_round_sh, __m128h, __m128h, __m128h, 8) test_2 (_mm512_maskz_cvt_roundph_epi16, __m512i, __mmask32, __m512h, 8) test_2 (_mm512_maskz_cvt_roundph_epu16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epi16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epu16, __m512i, __mmask32, __m512h, 8) test_2 (_mm512_maskz_cvt_roundph_epi32, __m512i, __mmask16, __m256h, 8) test_2 (_mm512_maskz_cvt_roundph_epu32, __m512i, __mmask16, __m256h, 8) test_2 (_mm512_maskz_cvt_roundph_epi64, __m512i, __mmask8, __m128h, 8) test_2 (_mm512_maskz_cvt_roundph_epu64, __m512i, __mmask8, __m128h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epi32, __m512i, __mmask16, __m256h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epu32, __m512i, __mmask16, __m256h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epi64, __m512i, __mmask8, __m128h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epu64, __m512i, __mmask8, __m128h, 8) test_2 (_mm512_maskz_cvt_roundepi16_ph, __m512h, __mmask32, __m512i, 8) test_2 (_mm512_maskz_cvt_roundepu16_ph, __m512h, __mmask32, __m512i, 8) test_2 (_mm512_maskz_cvt_roundepi32_ph, __m256h, __mmask16, __m512i, 8) @@ -887,10 +899,16 @@ test_3 (_mm_maskz_getexp_round_sh, __m128h, __mmask8, __m128h, __m128h, 8) test_3 (_mm512_mask_getexp_round_ph, __m512h, __m512h, __mmask32, __m512h, 8) test_3 (_mm512_mask_cvt_roundph_epi16, __m512i, __m512i, __mmask32, __m512h, 8) test_3 (_mm512_mask_cvt_roundph_epu16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_cvtt_roundph_epi16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_cvtt_roundph_epu16, __m512i, __m512i, __mmask32, __m512h, 8) test_3 (_mm512_mask_cvt_roundph_epi32, __m512i, __m512i, __mmask16, __m256h, 8) test_3 (_mm512_mask_cvt_roundph_epu32, __m512i, __m512i, __mmask16, __m256h, 8) test_3 (_mm512_mask_cvt_roundph_epi64, __m512i, __m512i, __mmask8, __m128h, 8) test_3 (_mm512_mask_cvt_roundph_epu64, __m512i, __m512i, __mmask8, __m128h, 8) +test_3 (_mm512_mask_cvtt_roundph_epi32, __m512i, __m512i, __mmask16, __m256h, 8) +test_3 (_mm512_mask_cvtt_roundph_epu32, __m512i, __m512i, __mmask16, __m256h, 8) +test_3 (_mm512_mask_cvtt_roundph_epi64, __m512i, __m512i, __mmask8, __m128h, 8) +test_3 (_mm512_mask_cvtt_roundph_epu64, __m512i, __m512i, __mmask8, __m128h, 8) test_3 (_mm512_mask_cvt_roundepi16_ph, __m512h, __m512h, __mmask32, __m512i, 8) test_3 (_mm512_mask_cvt_roundepu16_ph, __m512h, __m512h, __mmask32, __m512i, 8) test_3 (_mm512_mask_cvt_roundepi32_ph, __m256h, __m256h, __mmask16, __m512i, 8) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 354178a7c01a..7dcaf216043c 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -741,8 +741,14 @@ #define __builtin_ia32_vcvtph2udq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2udq512_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtph2qq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2qq512_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtph2uqq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uqq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2dq_v16si_mask_round(A, B, C, D) __builtin_ia32_vcvttph2dq_v16si_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2udq_v16si_mask_round(A, B, C, D) __builtin_ia32_vcvttph2udq_v16si_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2qq_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvttph2qq_v8di_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2uqq_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvttph2uqq_v8di_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtph2w512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2w512_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtph2uw512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uw512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2w_v32hi_mask_round(A, B, C, D) __builtin_ia32_vcvttph2w_v32hi_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2uw_v32hi_mask_round(A, B, C, D) __builtin_ia32_vcvttph2uw_v32hi_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtw2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtw2ph512_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtuw2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtuw2ph512_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtdq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtdq2ph512_mask_round(A, B, C, 8) -- 2.47.2