#endif /* __OPTIMIZE__ */
+/* Intrinsics vmovw. */
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi16_si128 (short __A)
+{
+ return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, __A);
+}
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi128_si16 (__m128i __A)
+{
+ return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, 0);
+}
+
+/* Intrinsics vmovsh. */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_load_sh (__m128h __A, __mmask8 __B, _Float16 const* __C)
+{
+ return __builtin_ia32_loadsh_mask (__C, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_load_sh (__mmask8 __A, _Float16 const* __B)
+{
+ return __builtin_ia32_loadsh_mask (__B, _mm_setzero_ph (), __A);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_store_sh (_Float16 const* __A, __mmask8 __B, __m128h __C)
+{
+ __builtin_ia32_storesh_mask (__A, __C, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_move_sh (__m128h __A, __m128h __B)
+{
+ __A[0] = __B[0];
+ return __A;
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_move_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+ return __builtin_ia32_vmovsh_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_move_sh (__mmask8 __A, __m128h __B, __m128h __C)
+{
+ return __builtin_ia32_vmovsh_mask (__B, __C, _mm_setzero_ph (), __A);
+}
+
#ifdef __DISABLE_AVX512FP16__
#undef __DISABLE_AVX512FP16__
#pragma GCC pop_options
DEF_POINTER_TYPE (PVOID, VOID)
DEF_POINTER_TYPE (PDOUBLE, DOUBLE)
DEF_POINTER_TYPE (PFLOAT, FLOAT)
+DEF_POINTER_TYPE (PCFLOAT16, FLOAT16, CONST)
DEF_POINTER_TYPE (PSHORT, SHORT)
DEF_POINTER_TYPE (PUSHORT, USHORT)
DEF_POINTER_TYPE (PINT, INT)
DEF_FUNCTION_TYPE (HI, V16HF, INT, UHI)
DEF_FUNCTION_TYPE (SI, V32HF, INT, USI)
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF)
+DEF_FUNCTION_TYPE (VOID, PCFLOAT16, V8HF, UQI)
+DEF_FUNCTION_TYPE (V8HF, PCFLOAT16, V8HF, UQI)
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, UQI)
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, INT)
DEF_FUNCTION_TYPE (V8HF, V8HF, INT, V8HF, UQI)
BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovswb512mem_mask", IX86_BUILTIN_PMOVSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovwb512mem_mask", IX86_BUILTIN_PMOVWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
+/* AVX512FP16 */
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_loadhf_mask, "__builtin_ia32_loadsh_mask", IX86_BUILTIN_LOADSH_MASK, UNKNOWN, (int) V8HF_FTYPE_PCFLOAT16_V8HF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_storehf_mask, "__builtin_ia32_storesh_mask", IX86_BUILTIN_STORESH_MASK, UNKNOWN, (int) VOID_FTYPE_PCFLOAT16_V8HF_UQI)
+
/* RDPKRU and WRPKRU. */
BDESC (OPTION_MASK_ISA_PKU, 0, CODE_FOR_rdpkru, "__builtin_ia32_rdpkru", IX86_BUILTIN_RDPKRU, UNKNOWN, (int) UNSIGNED_FTYPE_VOID)
BDESC (OPTION_MASK_ISA_PKU, 0, CODE_FOR_wrpkru, "__builtin_ia32_wrpkru", IX86_BUILTIN_WRPKRU, UNKNOWN, (int) VOID_FTYPE_UNSIGNED)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_getexpv8hf_mask, "__builtin_ia32_getexpph128_mask", IX86_BUILTIN_GETEXPPH128, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_getmantv16hf_mask, "__builtin_ia32_getmantph256_mask", IX86_BUILTIN_GETMANTPH256, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_getmantv8hf_mask, "__builtin_ia32_getmantph128_mask", IX86_BUILTIN_GETMANTPH128, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_V8HF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_movhf_mask, "__builtin_ia32_vmovsh_mask", IX86_BUILTIN_VMOVSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
/* Builtins with rounding support. */
BDESC_END (ARGS, ROUND_ARGS)
case VOID_FTYPE_PFLOAT_V16SF_UHI:
case VOID_FTYPE_PFLOAT_V8SF_UQI:
case VOID_FTYPE_PFLOAT_V4SF_UQI:
+ case VOID_FTYPE_PCFLOAT16_V8HF_UQI:
case VOID_FTYPE_PV32QI_V32HI_USI:
case VOID_FTYPE_PV16QI_V16HI_UHI:
case VOID_FTYPE_PUDI_V8HI_UQI:
case V16SF_FTYPE_PCFLOAT_V16SF_UHI:
case V8SF_FTYPE_PCFLOAT_V8SF_UQI:
case V4SF_FTYPE_PCFLOAT_V4SF_UQI:
+ case V8HF_FTYPE_PCFLOAT16_V8HF_UQI:
nargs = 3;
klass = load;
memory = 0;
break;
case E_V8HImode:
use_vector_set = TARGET_SSE2;
+ gen_vec_set_0 = TARGET_AVX512FP16 && one_var == 0
+ ? gen_vec_setv8hi_0 : NULL;
break;
case E_V8QImode:
use_vector_set = TARGET_MMX_WITH_SSE && TARGET_SSE4_1;
use_vector_set = TARGET_SSE4_1;
break;
case E_V32QImode:
+ use_vector_set = TARGET_AVX;
+ break;
case E_V16HImode:
use_vector_set = TARGET_AVX;
+ gen_vec_set_0 = TARGET_AVX512FP16 && one_var == 0
+ ? gen_vec_setv16hi_0 : NULL;
break;
case E_V8SImode:
use_vector_set = TARGET_AVX;
use_vector_set = TARGET_AVX512FP16 && one_var == 0;
gen_vec_set_0 = gen_vec_setv32hf_0;
break;
+ case E_V32HImode:
+ use_vector_set = TARGET_AVX512FP16 && one_var == 0;
+ gen_vec_set_0 = gen_vec_setv32hi_0;
default:
break;
}
(V32HF "TARGET_AVX512BW")])
;; Int-float size matches
+(define_mode_iterator VI2F [V8HI V16HI V32HI V8HF V16HF V32HF])
(define_mode_iterator VI4F_128 [V4SI V4SF])
(define_mode_iterator VI8F_128 [V2DI V2DF])
(define_mode_iterator VI4F_256 [V8SI V8SF])
[(set (match_dup 0) (match_dup 1))])
(define_insn "avx512f_mov<ssescalarmodelower>_mask"
- [(set (match_operand:VF_128 0 "register_operand" "=v")
- (vec_merge:VF_128
- (vec_merge:VF_128
- (match_operand:VF_128 2 "register_operand" "v")
- (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
+ [(set (match_operand:VFH_128 0 "register_operand" "=v")
+ (vec_merge:VFH_128
+ (vec_merge:VFH_128
+ (match_operand:VFH_128 2 "register_operand" "v")
+ (match_operand:VFH_128 3 "nonimm_or_0_operand" "0C")
(match_operand:QI 4 "register_operand" "Yk"))
- (match_operand:VF_128 1 "register_operand" "v")
+ (match_operand:VFH_128 1 "register_operand" "v")
(const_int 1)))]
"TARGET_AVX512F"
"vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
(vec_merge:<ssevecmode>
(vec_merge:<ssevecmode>
(vec_duplicate:<ssevecmode>
- (match_operand:MODEF 1 "memory_operand"))
+ (match_operand:MODEFH 1 "memory_operand"))
(match_operand:<ssevecmode> 2 "nonimm_or_0_operand")
(match_operand:QI 3 "register_operand"))
(match_dup 4)
(vec_merge:<ssevecmode>
(vec_merge:<ssevecmode>
(vec_duplicate:<ssevecmode>
- (match_operand:MODEF 1 "memory_operand" "m"))
+ (match_operand:MODEFH 1 "memory_operand" "m"))
(match_operand:<ssevecmode> 2 "nonimm_or_0_operand" "0C")
(match_operand:QI 3 "register_operand" "Yk"))
(match_operand:<ssevecmode> 4 "const0_operand" "C")
(set_attr "mode" "<MODE>")])
(define_insn "avx512f_store<mode>_mask"
- [(set (match_operand:MODEF 0 "memory_operand" "=m")
- (if_then_else:MODEF
+ [(set (match_operand:MODEFH 0 "memory_operand" "=m")
+ (if_then_else:MODEFH
(and:QI (match_operand:QI 2 "register_operand" "Yk")
(const_int 1))
- (vec_select:MODEF
+ (vec_select:MODEFH
(match_operand:<ssevecmode> 1 "register_operand" "v")
(parallel [(const_int 0)]))
(match_dup 0)))]
;; vmovw clears also the higer bits
(define_insn "vec_set<mode>_0"
- [(set (match_operand:VF_AVX512FP16 0 "register_operand" "=v,v")
- (vec_merge:VF_AVX512FP16
- (vec_duplicate:VF_AVX512FP16
- (match_operand:HF 2 "nonimmediate_operand" "r,m"))
- (match_operand:VF_AVX512FP16 1 "const0_operand" "C,C")
+ [(set (match_operand:VI2F 0 "register_operand" "=v,v")
+ (vec_merge:VI2F
+ (vec_duplicate:VI2F
+ (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m"))
+ (match_operand:VI2F 1 "const0_operand" "C,C")
(const_int 1)))]
"TARGET_AVX512FP16"
"@