1 /* Copyright (C) 2019-2024 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fp16intrin.h> directly; include <immintrin.h> instead."
28 #ifndef _AVX512FP16INTRIN_H_INCLUDED
29 #define _AVX512FP16INTRIN_H_INCLUDED
31 #if !defined (__AVX512FP16__) || defined (__EVEX512__)
32 #pragma GCC push_options
33 #pragma GCC target("avx512fp16,no-evex512")
34 #define __DISABLE_AVX512FP16__
35 #endif /* __AVX512FP16__ */
37 /* Internal data types for implementing the intrinsics. */
38 typedef _Float16 __v8hf
__attribute__ ((__vector_size__ (16)));
39 typedef _Float16 __v16hf
__attribute__ ((__vector_size__ (32)));
41 /* The Intel API is flexible enough that we must allow aliasing with other
42 vector types, and their scalar components. */
43 typedef _Float16 __m128h
__attribute__ ((__vector_size__ (16), __may_alias__
));
44 typedef _Float16 __m256h
__attribute__ ((__vector_size__ (32), __may_alias__
));
46 /* Unaligned version of the same type. */
47 typedef _Float16 __m128h_u
__attribute__ ((__vector_size__ (16), \
48 __may_alias__
, __aligned__ (1)));
49 typedef _Float16 __m256h_u
__attribute__ ((__vector_size__ (32), \
50 __may_alias__
, __aligned__ (1)));
52 extern __inline __m128h
53 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
54 _mm_set_ph (_Float16 __A7
, _Float16 __A6
, _Float16 __A5
,
55 _Float16 __A4
, _Float16 __A3
, _Float16 __A2
,
56 _Float16 __A1
, _Float16 __A0
)
58 return __extension__ (__m128h
)(__v8hf
){ __A0
, __A1
, __A2
, __A3
,
59 __A4
, __A5
, __A6
, __A7
};
62 extern __inline __m256h
63 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
64 _mm256_set_ph (_Float16 __A15
, _Float16 __A14
, _Float16 __A13
,
65 _Float16 __A12
, _Float16 __A11
, _Float16 __A10
,
66 _Float16 __A9
, _Float16 __A8
, _Float16 __A7
,
67 _Float16 __A6
, _Float16 __A5
, _Float16 __A4
,
68 _Float16 __A3
, _Float16 __A2
, _Float16 __A1
,
71 return __extension__ (__m256h
)(__v16hf
){ __A0
, __A1
, __A2
, __A3
,
72 __A4
, __A5
, __A6
, __A7
,
73 __A8
, __A9
, __A10
, __A11
,
74 __A12
, __A13
, __A14
, __A15
};
77 /* Create vectors of elements in the reversed order from _mm_set_ph
78 and _mm256_set_ph functions. */
79 extern __inline __m128h
80 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
81 _mm_setr_ph (_Float16 __A0
, _Float16 __A1
, _Float16 __A2
,
82 _Float16 __A3
, _Float16 __A4
, _Float16 __A5
,
83 _Float16 __A6
, _Float16 __A7
)
85 return _mm_set_ph (__A7
, __A6
, __A5
, __A4
, __A3
, __A2
, __A1
, __A0
);
88 extern __inline __m256h
89 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
90 _mm256_setr_ph (_Float16 __A0
, _Float16 __A1
, _Float16 __A2
,
91 _Float16 __A3
, _Float16 __A4
, _Float16 __A5
,
92 _Float16 __A6
, _Float16 __A7
, _Float16 __A8
,
93 _Float16 __A9
, _Float16 __A10
, _Float16 __A11
,
94 _Float16 __A12
, _Float16 __A13
, _Float16 __A14
,
97 return _mm256_set_ph (__A15
, __A14
, __A13
, __A12
, __A11
, __A10
, __A9
,
98 __A8
, __A7
, __A6
, __A5
, __A4
, __A3
, __A2
, __A1
,
102 /* Broadcast _Float16 to vector. */
103 extern __inline __m128h
104 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
105 _mm_set1_ph (_Float16 __A
)
107 return _mm_set_ph (__A
, __A
, __A
, __A
, __A
, __A
, __A
, __A
);
110 extern __inline __m256h
111 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
112 _mm256_set1_ph (_Float16 __A
)
114 return _mm256_set_ph (__A
, __A
, __A
, __A
, __A
, __A
, __A
, __A
,
115 __A
, __A
, __A
, __A
, __A
, __A
, __A
, __A
);
118 /* Create a vector with all zeros. */
119 extern __inline __m128h
120 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
121 _mm_setzero_ph (void)
123 return _mm_set1_ph (0.0f16
);
126 extern __inline __m256h
127 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
128 _mm256_setzero_ph (void)
130 return _mm256_set1_ph (0.0f16
);
133 extern __inline __m128h
134 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
135 _mm_undefined_ph (void)
137 #pragma GCC diagnostic push
138 #pragma GCC diagnostic ignored "-Winit-self"
140 #pragma GCC diagnostic pop
144 extern __inline __m256h
145 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
146 _mm256_undefined_ph (void)
148 #pragma GCC diagnostic push
149 #pragma GCC diagnostic ignored "-Winit-self"
151 #pragma GCC diagnostic pop
155 extern __inline _Float16
156 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
157 _mm256_cvtsh_h (__m256h __A
)
162 extern __inline __m256h
163 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
164 _mm256_load_ph (void const *__P
)
166 return *(const __m256h
*) __P
;
169 extern __inline __m128h
170 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
171 _mm_load_ph (void const *__P
)
173 return *(const __m128h
*) __P
;
176 extern __inline __m256h
177 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
178 _mm256_loadu_ph (void const *__P
)
180 return *(const __m256h_u
*) __P
;
183 extern __inline __m128h
184 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
185 _mm_loadu_ph (void const *__P
)
187 return *(const __m128h_u
*) __P
;
191 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
192 _mm256_store_ph (void *__P
, __m256h __A
)
194 *(__m256h
*) __P
= __A
;
198 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
199 _mm_store_ph (void *__P
, __m128h __A
)
201 *(__m128h
*) __P
= __A
;
205 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
206 _mm256_storeu_ph (void *__P
, __m256h __A
)
208 *(__m256h_u
*) __P
= __A
;
212 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
213 _mm_storeu_ph (void *__P
, __m128h __A
)
215 *(__m128h_u
*) __P
= __A
;
218 /* Create a vector with element 0 as F and the rest zero. */
219 extern __inline __m128h
220 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
221 _mm_set_sh (_Float16 __F
)
223 return _mm_set_ph (0.0f16
, 0.0f16
, 0.0f16
, 0.0f16
, 0.0f16
, 0.0f16
, 0.0f16
,
227 /* Create a vector with element 0 as *P and the rest zero. */
228 extern __inline __m128h
229 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
230 _mm_load_sh (void const *__P
)
232 return _mm_set_ph (0.0f16
, 0.0f16
, 0.0f16
, 0.0f16
, 0.0f16
, 0.0f16
, 0.0f16
,
233 *(_Float16
const *) __P
);
236 /* Stores the lower _Float16 value. */
238 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
239 _mm_store_sh (void *__P
, __m128h __A
)
241 *(_Float16
*) __P
= ((__v8hf
)__A
)[0];
244 /* Intrinsics of v[add,sub,mul,div]sh. */
245 extern __inline __m128h
246 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
247 _mm_add_sh (__m128h __A
, __m128h __B
)
253 extern __inline __m128h
254 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
255 _mm_mask_add_sh (__m128h __A
, __mmask8 __B
, __m128h __C
, __m128h __D
)
257 return __builtin_ia32_addsh_mask (__C
, __D
, __A
, __B
);
260 extern __inline __m128h
261 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
262 _mm_maskz_add_sh (__mmask8 __A
, __m128h __B
, __m128h __C
)
264 return __builtin_ia32_addsh_mask (__B
, __C
, _mm_setzero_ph (),
268 extern __inline __m128h
269 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
270 _mm_sub_sh (__m128h __A
, __m128h __B
)
276 extern __inline __m128h
277 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
278 _mm_mask_sub_sh (__m128h __A
, __mmask8 __B
, __m128h __C
, __m128h __D
)
280 return __builtin_ia32_subsh_mask (__C
, __D
, __A
, __B
);
283 extern __inline __m128h
284 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
285 _mm_maskz_sub_sh (__mmask8 __A
, __m128h __B
, __m128h __C
)
287 return __builtin_ia32_subsh_mask (__B
, __C
, _mm_setzero_ph (),
291 extern __inline __m128h
292 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
293 _mm_mul_sh (__m128h __A
, __m128h __B
)
299 extern __inline __m128h
300 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
301 _mm_mask_mul_sh (__m128h __A
, __mmask8 __B
, __m128h __C
, __m128h __D
)
303 return __builtin_ia32_mulsh_mask (__C
, __D
, __A
, __B
);
306 extern __inline __m128h
307 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
308 _mm_maskz_mul_sh (__mmask8 __A
, __m128h __B
, __m128h __C
)
310 return __builtin_ia32_mulsh_mask (__B
, __C
, _mm_setzero_ph (), __A
);
313 extern __inline __m128h
314 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
315 _mm_div_sh (__m128h __A
, __m128h __B
)
321 extern __inline __m128h
322 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
323 _mm_mask_div_sh (__m128h __A
, __mmask8 __B
, __m128h __C
, __m128h __D
)
325 return __builtin_ia32_divsh_mask (__C
, __D
, __A
, __B
);
328 extern __inline __m128h
329 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
330 _mm_maskz_div_sh (__mmask8 __A
, __m128h __B
, __m128h __C
)
332 return __builtin_ia32_divsh_mask (__B
, __C
, _mm_setzero_ph (),
337 extern __inline __m128h
338 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
339 _mm_add_round_sh (__m128h __A
, __m128h __B
, const int __C
)
341 return __builtin_ia32_addsh_mask_round (__A
, __B
,
346 extern __inline __m128h
347 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
348 _mm_mask_add_round_sh (__m128h __A
, __mmask8 __B
, __m128h __C
,
349 __m128h __D
, const int __E
)
351 return __builtin_ia32_addsh_mask_round (__C
, __D
, __A
, __B
, __E
);
354 extern __inline __m128h
355 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
356 _mm_maskz_add_round_sh (__mmask8 __A
, __m128h __B
, __m128h __C
,
359 return __builtin_ia32_addsh_mask_round (__B
, __C
,
364 extern __inline __m128h
365 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
366 _mm_sub_round_sh (__m128h __A
, __m128h __B
, const int __C
)
368 return __builtin_ia32_subsh_mask_round (__A
, __B
,
373 extern __inline __m128h
374 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
375 _mm_mask_sub_round_sh (__m128h __A
, __mmask8 __B
, __m128h __C
,
376 __m128h __D
, const int __E
)
378 return __builtin_ia32_subsh_mask_round (__C
, __D
, __A
, __B
, __E
);
381 extern __inline __m128h
382 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
383 _mm_maskz_sub_round_sh (__mmask8 __A
, __m128h __B
, __m128h __C
,
386 return __builtin_ia32_subsh_mask_round (__B
, __C
,
391 extern __inline __m128h
392 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
393 _mm_mul_round_sh (__m128h __A
, __m128h __B
, const int __C
)
395 return __builtin_ia32_mulsh_mask_round (__A
, __B
,
400 extern __inline __m128h
401 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
402 _mm_mask_mul_round_sh (__m128h __A
, __mmask8 __B
, __m128h __C
,
403 __m128h __D
, const int __E
)
405 return __builtin_ia32_mulsh_mask_round (__C
, __D
, __A
, __B
, __E
);
408 extern __inline __m128h
409 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
410 _mm_maskz_mul_round_sh (__mmask8 __A
, __m128h __B
, __m128h __C
,
413 return __builtin_ia32_mulsh_mask_round (__B
, __C
,
418 extern __inline __m128h
419 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
420 _mm_div_round_sh (__m128h __A
, __m128h __B
, const int __C
)
422 return __builtin_ia32_divsh_mask_round (__A
, __B
,
427 extern __inline __m128h
428 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
429 _mm_mask_div_round_sh (__m128h __A
, __mmask8 __B
, __m128h __C
,
430 __m128h __D
, const int __E
)
432 return __builtin_ia32_divsh_mask_round (__C
, __D
, __A
, __B
, __E
);
435 extern __inline __m128h
436 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
437 _mm_maskz_div_round_sh (__mmask8 __A
, __m128h __B
, __m128h __C
,
440 return __builtin_ia32_divsh_mask_round (__B
, __C
,
445 #define _mm_add_round_sh(A, B, C) \
446 ((__m128h)__builtin_ia32_addsh_mask_round ((A), (B), \
450 #define _mm_mask_add_round_sh(A, B, C, D, E) \
451 ((__m128h)__builtin_ia32_addsh_mask_round ((C), (D), (A), (B), (E)))
453 #define _mm_maskz_add_round_sh(A, B, C, D) \
454 ((__m128h)__builtin_ia32_addsh_mask_round ((B), (C), \
458 #define _mm_sub_round_sh(A, B, C) \
459 ((__m128h)__builtin_ia32_subsh_mask_round ((A), (B), \
463 #define _mm_mask_sub_round_sh(A, B, C, D, E) \
464 ((__m128h)__builtin_ia32_subsh_mask_round ((C), (D), (A), (B), (E)))
466 #define _mm_maskz_sub_round_sh(A, B, C, D) \
467 ((__m128h)__builtin_ia32_subsh_mask_round ((B), (C), \
471 #define _mm_mul_round_sh(A, B, C) \
472 ((__m128h)__builtin_ia32_mulsh_mask_round ((A), (B), \
476 #define _mm_mask_mul_round_sh(A, B, C, D, E) \
477 ((__m128h)__builtin_ia32_mulsh_mask_round ((C), (D), (A), (B), (E)))
479 #define _mm_maskz_mul_round_sh(A, B, C, D) \
480 ((__m128h)__builtin_ia32_mulsh_mask_round ((B), (C), \
484 #define _mm_div_round_sh(A, B, C) \
485 ((__m128h)__builtin_ia32_divsh_mask_round ((A), (B), \
489 #define _mm_mask_div_round_sh(A, B, C, D, E) \
490 ((__m128h)__builtin_ia32_divsh_mask_round ((C), (D), (A), (B), (E)))
492 #define _mm_maskz_div_round_sh(A, B, C, D) \
493 ((__m128h)__builtin_ia32_divsh_mask_round ((B), (C), \
496 #endif /* __OPTIMIZE__ */
498 /* Intrinsic vmaxsh vminsh. */
499 extern __inline __m128h
500 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
501 _mm_max_sh (__m128h __A
, __m128h __B
)
503 __A
[0] = __A
[0] > __B
[0] ? __A
[0] : __B
[0];
507 extern __inline __m128h
508 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
509 _mm_mask_max_sh (__m128h __A
, __mmask8 __B
, __m128h __C
, __m128h __D
)
511 return __builtin_ia32_maxsh_mask (__C
, __D
, __A
, __B
);
514 extern __inline __m128h
515 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
516 _mm_maskz_max_sh (__mmask8 __A
, __m128h __B
, __m128h __C
)
518 return __builtin_ia32_maxsh_mask (__B
, __C
, _mm_setzero_ph (),
522 extern __inline __m128h
523 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
524 _mm_min_sh (__m128h __A
, __m128h __B
)
526 __A
[0] = __A
[0] < __B
[0] ? __A
[0] : __B
[0];
530 extern __inline __m128h
531 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
532 _mm_mask_min_sh (__m128h __A
, __mmask8 __B
, __m128h __C
, __m128h __D
)
534 return __builtin_ia32_minsh_mask (__C
, __D
, __A
, __B
);
537 extern __inline __m128h
538 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
539 _mm_maskz_min_sh (__mmask8 __A
, __m128h __B
, __m128h __C
)
541 return __builtin_ia32_minsh_mask (__B
, __C
, _mm_setzero_ph (),
546 extern __inline __m128h
547 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
548 _mm_max_round_sh (__m128h __A
, __m128h __B
, const int __C
)
550 return __builtin_ia32_maxsh_mask_round (__A
, __B
,
555 extern __inline __m128h
556 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
557 _mm_mask_max_round_sh (__m128h __A
, __mmask8 __B
, __m128h __C
,
558 __m128h __D
, const int __E
)
560 return __builtin_ia32_maxsh_mask_round (__C
, __D
, __A
, __B
, __E
);
563 extern __inline __m128h
564 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
565 _mm_maskz_max_round_sh (__mmask8 __A
, __m128h __B
, __m128h __C
,
568 return __builtin_ia32_maxsh_mask_round (__B
, __C
,
573 extern __inline __m128h
574 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
575 _mm_min_round_sh (__m128h __A
, __m128h __B
, const int __C
)
577 return __builtin_ia32_minsh_mask_round (__A
, __B
,
582 extern __inline __m128h
583 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
584 _mm_mask_min_round_sh (__m128h __A
, __mmask8 __B
, __m128h __C
,
585 __m128h __D
, const int __E
)
587 return __builtin_ia32_minsh_mask_round (__C
, __D
, __A
, __B
, __E
);
590 extern __inline __m128h
591 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
592 _mm_maskz_min_round_sh (__mmask8 __A
, __m128h __B
, __m128h __C
,
595 return __builtin_ia32_minsh_mask_round (__B
, __C
,
601 #define _mm_max_round_sh(A, B, C) \
602 (__builtin_ia32_maxsh_mask_round ((A), (B), \
606 #define _mm_mask_max_round_sh(A, B, C, D, E) \
607 (__builtin_ia32_maxsh_mask_round ((C), (D), (A), (B), (E)))
609 #define _mm_maskz_max_round_sh(A, B, C, D) \
610 (__builtin_ia32_maxsh_mask_round ((B), (C), \
614 #define _mm_min_round_sh(A, B, C) \
615 (__builtin_ia32_minsh_mask_round ((A), (B), \
619 #define _mm_mask_min_round_sh(A, B, C, D, E) \
620 (__builtin_ia32_minsh_mask_round ((C), (D), (A), (B), (E)))
622 #define _mm_maskz_min_round_sh(A, B, C, D) \
623 (__builtin_ia32_minsh_mask_round ((B), (C), \
627 #endif /* __OPTIMIZE__ */
629 /* Intrinsics vcmpsh. */
631 extern __inline __mmask8
632 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
633 _mm_cmp_sh_mask (__m128h __A
, __m128h __B
, const int __C
)
636 __builtin_ia32_cmpsh_mask_round (__A
, __B
,
638 _MM_FROUND_CUR_DIRECTION
);
641 extern __inline __mmask8
642 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
643 _mm_mask_cmp_sh_mask (__mmask8 __A
, __m128h __B
, __m128h __C
,
647 __builtin_ia32_cmpsh_mask_round (__B
, __C
,
649 _MM_FROUND_CUR_DIRECTION
);
652 extern __inline __mmask8
653 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
654 _mm_cmp_round_sh_mask (__m128h __A
, __m128h __B
, const int __C
,
657 return (__mmask8
) __builtin_ia32_cmpsh_mask_round (__A
, __B
,
662 extern __inline __mmask8
663 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
664 _mm_mask_cmp_round_sh_mask (__mmask8 __A
, __m128h __B
, __m128h __C
,
665 const int __D
, const int __E
)
667 return (__mmask8
) __builtin_ia32_cmpsh_mask_round (__B
, __C
,
673 #define _mm_cmp_sh_mask(A, B, C) \
674 (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), \
675 (_MM_FROUND_CUR_DIRECTION)))
677 #define _mm_mask_cmp_sh_mask(A, B, C, D) \
678 (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), \
679 (_MM_FROUND_CUR_DIRECTION)))
681 #define _mm_cmp_round_sh_mask(A, B, C, D) \
682 (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), (D)))
684 #define _mm_mask_cmp_round_sh_mask(A, B, C, D, E) \
685 (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), (E)))
687 #endif /* __OPTIMIZE__ */
689 /* Intrinsics vcomish. */
691 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
692 _mm_comieq_sh (__m128h __A
, __m128h __B
)
694 return __builtin_ia32_cmpsh_mask_round (__A
, __B
, _CMP_EQ_OS
,
696 _MM_FROUND_CUR_DIRECTION
);
700 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
701 _mm_comilt_sh (__m128h __A
, __m128h __B
)
703 return __builtin_ia32_cmpsh_mask_round (__A
, __B
, _CMP_LT_OS
,
705 _MM_FROUND_CUR_DIRECTION
);
709 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
710 _mm_comile_sh (__m128h __A
, __m128h __B
)
712 return __builtin_ia32_cmpsh_mask_round (__A
, __B
, _CMP_LE_OS
,
714 _MM_FROUND_CUR_DIRECTION
);
718 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
719 _mm_comigt_sh (__m128h __A
, __m128h __B
)
721 return __builtin_ia32_cmpsh_mask_round (__A
, __B
, _CMP_GT_OS
,
723 _MM_FROUND_CUR_DIRECTION
);
727 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
728 _mm_comige_sh (__m128h __A
, __m128h __B
)
730 return __builtin_ia32_cmpsh_mask_round (__A
, __B
, _CMP_GE_OS
,
732 _MM_FROUND_CUR_DIRECTION
);
736 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
737 _mm_comineq_sh (__m128h __A
, __m128h __B
)
739 return __builtin_ia32_cmpsh_mask_round (__A
, __B
, _CMP_NEQ_US
,
741 _MM_FROUND_CUR_DIRECTION
);
745 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
746 _mm_ucomieq_sh (__m128h __A
, __m128h __B
)
748 return __builtin_ia32_cmpsh_mask_round (__A
, __B
, _CMP_EQ_OQ
,
750 _MM_FROUND_CUR_DIRECTION
);
754 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
755 _mm_ucomilt_sh (__m128h __A
, __m128h __B
)
757 return __builtin_ia32_cmpsh_mask_round (__A
, __B
, _CMP_LT_OQ
,
759 _MM_FROUND_CUR_DIRECTION
);
763 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
764 _mm_ucomile_sh (__m128h __A
, __m128h __B
)
766 return __builtin_ia32_cmpsh_mask_round (__A
, __B
, _CMP_LE_OQ
,
768 _MM_FROUND_CUR_DIRECTION
);
772 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
773 _mm_ucomigt_sh (__m128h __A
, __m128h __B
)
775 return __builtin_ia32_cmpsh_mask_round (__A
, __B
, _CMP_GT_OQ
,
777 _MM_FROUND_CUR_DIRECTION
);
781 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
782 _mm_ucomige_sh (__m128h __A
, __m128h __B
)
784 return __builtin_ia32_cmpsh_mask_round (__A
, __B
, _CMP_GE_OQ
,
786 _MM_FROUND_CUR_DIRECTION
);
790 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
791 _mm_ucomineq_sh (__m128h __A
, __m128h __B
)
793 return __builtin_ia32_cmpsh_mask_round (__A
, __B
, _CMP_NEQ_UQ
,
795 _MM_FROUND_CUR_DIRECTION
);
800 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
801 _mm_comi_sh (__m128h __A
, __m128h __B
, const int __P
)
803 return __builtin_ia32_cmpsh_mask_round (__A
, __B
, __P
,
805 _MM_FROUND_CUR_DIRECTION
);
809 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
810 _mm_comi_round_sh (__m128h __A
, __m128h __B
, const int __P
, const int __R
)
812 return __builtin_ia32_cmpsh_mask_round (__A
, __B
, __P
,
817 #define _mm_comi_round_sh(A, B, P, R) \
818 (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), (R)))
819 #define _mm_comi_sh(A, B, P) \
820 (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), \
821 _MM_FROUND_CUR_DIRECTION))
823 #endif /* __OPTIMIZE__ */
825 /* Intrinsics vsqrtsh. */
826 extern __inline __m128h
827 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
828 _mm_sqrt_sh (__m128h __A
, __m128h __B
)
830 return __builtin_ia32_sqrtsh_mask_round (__B
, __A
,
833 _MM_FROUND_CUR_DIRECTION
);
836 extern __inline __m128h
837 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
838 _mm_mask_sqrt_sh (__m128h __A
, __mmask8 __B
, __m128h __C
, __m128h __D
)
840 return __builtin_ia32_sqrtsh_mask_round (__D
, __C
, __A
, __B
,
841 _MM_FROUND_CUR_DIRECTION
);
844 extern __inline __m128h
845 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
846 _mm_maskz_sqrt_sh (__mmask8 __A
, __m128h __B
, __m128h __C
)
848 return __builtin_ia32_sqrtsh_mask_round (__C
, __B
,
850 __A
, _MM_FROUND_CUR_DIRECTION
);
854 extern __inline __m128h
855 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
856 _mm_sqrt_round_sh (__m128h __A
, __m128h __B
, const int __C
)
858 return __builtin_ia32_sqrtsh_mask_round (__B
, __A
,
863 extern __inline __m128h
864 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
865 _mm_mask_sqrt_round_sh (__m128h __A
, __mmask8 __B
, __m128h __C
,
866 __m128h __D
, const int __E
)
868 return __builtin_ia32_sqrtsh_mask_round (__D
, __C
, __A
, __B
,
872 extern __inline __m128h
873 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
874 _mm_maskz_sqrt_round_sh (__mmask8 __A
, __m128h __B
, __m128h __C
,
877 return __builtin_ia32_sqrtsh_mask_round (__C
, __B
,
883 #define _mm_sqrt_round_sh(A, B, C) \
884 (__builtin_ia32_sqrtsh_mask_round ((B), (A), \
888 #define _mm_mask_sqrt_round_sh(A, B, C, D, E) \
889 (__builtin_ia32_sqrtsh_mask_round ((D), (C), (A), (B), (E)))
891 #define _mm_maskz_sqrt_round_sh(A, B, C, D) \
892 (__builtin_ia32_sqrtsh_mask_round ((C), (B), \
896 #endif /* __OPTIMIZE__ */
898 /* Intrinsics vrsqrtsh. */
899 extern __inline __m128h
900 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
901 _mm_rsqrt_sh (__m128h __A
, __m128h __B
)
903 return __builtin_ia32_rsqrtsh_mask (__B
, __A
, _mm_setzero_ph (),
907 extern __inline __m128h
908 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
909 _mm_mask_rsqrt_sh (__m128h __A
, __mmask8 __B
, __m128h __C
, __m128h __D
)
911 return __builtin_ia32_rsqrtsh_mask (__D
, __C
, __A
, __B
);
914 extern __inline __m128h
915 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
916 _mm_maskz_rsqrt_sh (__mmask8 __A
, __m128h __B
, __m128h __C
)
918 return __builtin_ia32_rsqrtsh_mask (__C
, __B
, _mm_setzero_ph (),
922 /* Intrinsics vrcpsh. */
923 extern __inline __m128h
924 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
925 _mm_rcp_sh (__m128h __A
, __m128h __B
)
927 return __builtin_ia32_rcpsh_mask (__B
, __A
, _mm_setzero_ph (),
931 extern __inline __m128h
932 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
933 _mm_mask_rcp_sh (__m128h __A
, __mmask32 __B
, __m128h __C
, __m128h __D
)
935 return __builtin_ia32_rcpsh_mask (__D
, __C
, __A
, __B
);
938 extern __inline __m128h
939 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
940 _mm_maskz_rcp_sh (__mmask32 __A
, __m128h __B
, __m128h __C
)
942 return __builtin_ia32_rcpsh_mask (__C
, __B
, _mm_setzero_ph (),
946 /* Intrinsics vscalefsh. */
947 extern __inline __m128h
948 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
949 _mm_scalef_sh (__m128h __A
, __m128h __B
)
951 return __builtin_ia32_scalefsh_mask_round (__A
, __B
,
954 _MM_FROUND_CUR_DIRECTION
);
957 extern __inline __m128h
958 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
959 _mm_mask_scalef_sh (__m128h __A
, __mmask8 __B
, __m128h __C
, __m128h __D
)
961 return __builtin_ia32_scalefsh_mask_round (__C
, __D
, __A
, __B
,
962 _MM_FROUND_CUR_DIRECTION
);
965 extern __inline __m128h
966 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
967 _mm_maskz_scalef_sh (__mmask8 __A
, __m128h __B
, __m128h __C
)
969 return __builtin_ia32_scalefsh_mask_round (__B
, __C
,
972 _MM_FROUND_CUR_DIRECTION
);
976 extern __inline __m128h
977 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
978 _mm_scalef_round_sh (__m128h __A
, __m128h __B
, const int __C
)
980 return __builtin_ia32_scalefsh_mask_round (__A
, __B
,
985 extern __inline __m128h
986 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
987 _mm_mask_scalef_round_sh (__m128h __A
, __mmask8 __B
, __m128h __C
,
988 __m128h __D
, const int __E
)
990 return __builtin_ia32_scalefsh_mask_round (__C
, __D
, __A
, __B
,
994 extern __inline __m128h
995 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
996 _mm_maskz_scalef_round_sh (__mmask8 __A
, __m128h __B
, __m128h __C
,
999 return __builtin_ia32_scalefsh_mask_round (__B
, __C
,
1005 #define _mm_scalef_round_sh(A, B, C) \
1006 (__builtin_ia32_scalefsh_mask_round ((A), (B), \
1007 _mm_setzero_ph (), \
1010 #define _mm_mask_scalef_round_sh(A, B, C, D, E) \
1011 (__builtin_ia32_scalefsh_mask_round ((C), (D), (A), (B), (E)))
1013 #define _mm_maskz_scalef_round_sh(A, B, C, D) \
1014 (__builtin_ia32_scalefsh_mask_round ((B), (C), _mm_setzero_ph (), \
1017 #endif /* __OPTIMIZE__ */
1019 /* Intrinsics vreducesh. */
1021 extern __inline __m128h
1022 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1023 _mm_reduce_sh (__m128h __A
, __m128h __B
, int __C
)
1025 return __builtin_ia32_reducesh_mask_round (__A
, __B
, __C
,
1028 _MM_FROUND_CUR_DIRECTION
);
1031 extern __inline __m128h
1032 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1033 _mm_mask_reduce_sh (__m128h __A
, __mmask8 __B
, __m128h __C
,
1034 __m128h __D
, int __E
)
1036 return __builtin_ia32_reducesh_mask_round (__C
, __D
, __E
, __A
, __B
,
1037 _MM_FROUND_CUR_DIRECTION
);
1040 extern __inline __m128h
1041 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1042 _mm_maskz_reduce_sh (__mmask8 __A
, __m128h __B
, __m128h __C
, int __D
)
1044 return __builtin_ia32_reducesh_mask_round (__B
, __C
, __D
,
1045 _mm_setzero_ph (), __A
,
1046 _MM_FROUND_CUR_DIRECTION
);
1049 extern __inline __m128h
1050 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1051 _mm_reduce_round_sh (__m128h __A
, __m128h __B
, int __C
, const int __D
)
1053 return __builtin_ia32_reducesh_mask_round (__A
, __B
, __C
,
1055 (__mmask8
) -1, __D
);
1058 extern __inline __m128h
1059 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1060 _mm_mask_reduce_round_sh (__m128h __A
, __mmask8 __B
, __m128h __C
,
1061 __m128h __D
, int __E
, const int __F
)
1063 return __builtin_ia32_reducesh_mask_round (__C
, __D
, __E
, __A
,
1067 extern __inline __m128h
1068 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1069 _mm_maskz_reduce_round_sh (__mmask8 __A
, __m128h __B
, __m128h __C
,
1070 int __D
, const int __E
)
1072 return __builtin_ia32_reducesh_mask_round (__B
, __C
, __D
,
1078 #define _mm_reduce_sh(A, B, C) \
1079 (__builtin_ia32_reducesh_mask_round ((A), (B), (C), \
1080 _mm_setzero_ph (), \
1082 _MM_FROUND_CUR_DIRECTION))
1084 #define _mm_mask_reduce_sh(A, B, C, D, E) \
1085 (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), \
1086 _MM_FROUND_CUR_DIRECTION))
1088 #define _mm_maskz_reduce_sh(A, B, C, D) \
1089 (__builtin_ia32_reducesh_mask_round ((B), (C), (D), \
1090 _mm_setzero_ph (), \
1091 (A), _MM_FROUND_CUR_DIRECTION))
1093 #define _mm_reduce_round_sh(A, B, C, D) \
1094 (__builtin_ia32_reducesh_mask_round ((A), (B), (C), \
1095 _mm_setzero_ph (), \
1098 #define _mm_mask_reduce_round_sh(A, B, C, D, E, F) \
1099 (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), (F)))
1101 #define _mm_maskz_reduce_round_sh(A, B, C, D, E) \
1102 (__builtin_ia32_reducesh_mask_round ((B), (C), (D), \
1103 _mm_setzero_ph (), \
1106 #endif /* __OPTIMIZE__ */
1108 /* Intrinsics vrndscalesh. */
1110 extern __inline __m128h
1111 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1112 _mm_roundscale_sh (__m128h __A
, __m128h __B
, int __C
)
1114 return __builtin_ia32_rndscalesh_mask_round (__A
, __B
, __C
,
1117 _MM_FROUND_CUR_DIRECTION
);
1120 extern __inline __m128h
1121 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1122 _mm_mask_roundscale_sh (__m128h __A
, __mmask8 __B
, __m128h __C
,
1123 __m128h __D
, int __E
)
1125 return __builtin_ia32_rndscalesh_mask_round (__C
, __D
, __E
, __A
, __B
,
1126 _MM_FROUND_CUR_DIRECTION
);
1129 extern __inline __m128h
1130 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1131 _mm_maskz_roundscale_sh (__mmask8 __A
, __m128h __B
, __m128h __C
, int __D
)
1133 return __builtin_ia32_rndscalesh_mask_round (__B
, __C
, __D
,
1134 _mm_setzero_ph (), __A
,
1135 _MM_FROUND_CUR_DIRECTION
);
1138 extern __inline __m128h
1139 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1140 _mm_roundscale_round_sh (__m128h __A
, __m128h __B
, int __C
, const int __D
)
1142 return __builtin_ia32_rndscalesh_mask_round (__A
, __B
, __C
,
1148 extern __inline __m128h
1149 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1150 _mm_mask_roundscale_round_sh (__m128h __A
, __mmask8 __B
, __m128h __C
,
1151 __m128h __D
, int __E
, const int __F
)
1153 return __builtin_ia32_rndscalesh_mask_round (__C
, __D
, __E
,
1157 extern __inline __m128h
1158 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1159 _mm_maskz_roundscale_round_sh (__mmask8 __A
, __m128h __B
, __m128h __C
,
1160 int __D
, const int __E
)
1162 return __builtin_ia32_rndscalesh_mask_round (__B
, __C
, __D
,
1168 #define _mm_roundscale_sh(A, B, C) \
1169 (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C), \
1170 _mm_setzero_ph (), \
1172 _MM_FROUND_CUR_DIRECTION))
1174 #define _mm_mask_roundscale_sh(A, B, C, D, E) \
1175 (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), \
1176 _MM_FROUND_CUR_DIRECTION))
1178 #define _mm_maskz_roundscale_sh(A, B, C, D) \
1179 (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D), \
1180 _mm_setzero_ph (), \
1181 (A), _MM_FROUND_CUR_DIRECTION))
1183 #define _mm_roundscale_round_sh(A, B, C, D) \
1184 (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C), \
1185 _mm_setzero_ph (), \
1188 #define _mm_mask_roundscale_round_sh(A, B, C, D, E, F) \
1189 (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), (F)))
1191 #define _mm_maskz_roundscale_round_sh(A, B, C, D, E) \
1192 (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D), \
1193 _mm_setzero_ph (), \
1196 #endif /* __OPTIMIZE__ */
1198 /* Intrinsics vfpclasssh. */
1200 extern __inline __mmask8
1201 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1202 _mm_fpclass_sh_mask (__m128h __A
, const int __imm
)
1204 return (__mmask8
) __builtin_ia32_fpclasssh_mask ((__v8hf
) __A
, __imm
,
1208 extern __inline __mmask8
1209 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1210 _mm_mask_fpclass_sh_mask (__mmask8 __U
, __m128h __A
, const int __imm
)
1212 return (__mmask8
) __builtin_ia32_fpclasssh_mask ((__v8hf
) __A
, __imm
, __U
);
1216 #define _mm_fpclass_sh_mask(X, C) \
1217 ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X), \
1218 (int) (C), (__mmask8) (-1))) \
1220 #define _mm_mask_fpclass_sh_mask(U, X, C) \
1221 ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X), \
1222 (int) (C), (__mmask8) (U)))
1224 #endif /* __OPTIMIZE__ */
1226 /* Intrinsics vgetexpsh. */
1227 extern __inline __m128h
1228 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1229 _mm_getexp_sh (__m128h __A
, __m128h __B
)
1232 __builtin_ia32_getexpsh_mask_round ((__v8hf
) __A
, (__v8hf
) __B
,
1233 (__v8hf
) _mm_setzero_ph (),
1235 _MM_FROUND_CUR_DIRECTION
);
1238 extern __inline __m128h
1239 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1240 _mm_mask_getexp_sh (__m128h __W
, __mmask8 __U
, __m128h __A
, __m128h __B
)
1243 __builtin_ia32_getexpsh_mask_round ((__v8hf
) __A
, (__v8hf
) __B
,
1244 (__v8hf
) __W
, (__mmask8
) __U
,
1245 _MM_FROUND_CUR_DIRECTION
);
1248 extern __inline __m128h
1249 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1250 _mm_maskz_getexp_sh (__mmask8 __U
, __m128h __A
, __m128h __B
)
1253 __builtin_ia32_getexpsh_mask_round ((__v8hf
) __A
, (__v8hf
) __B
,
1254 (__v8hf
) _mm_setzero_ph (),
1256 _MM_FROUND_CUR_DIRECTION
);
1260 extern __inline __m128h
1261 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1262 _mm_getexp_round_sh (__m128h __A
, __m128h __B
, const int __R
)
1264 return (__m128h
) __builtin_ia32_getexpsh_mask_round ((__v8hf
) __A
,
1271 extern __inline __m128h
1272 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1273 _mm_mask_getexp_round_sh (__m128h __W
, __mmask8 __U
, __m128h __A
,
1274 __m128h __B
, const int __R
)
1276 return (__m128h
) __builtin_ia32_getexpsh_mask_round ((__v8hf
) __A
,
1279 (__mmask8
) __U
, __R
);
1282 extern __inline __m128h
1283 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1284 _mm_maskz_getexp_round_sh (__mmask8 __U
, __m128h __A
, __m128h __B
,
1287 return (__m128h
) __builtin_ia32_getexpsh_mask_round ((__v8hf
) __A
,
1291 (__mmask8
) __U
, __R
);
1295 #define _mm_getexp_round_sh(A, B, R) \
1296 ((__m128h)__builtin_ia32_getexpsh_mask_round((__v8hf)(__m128h)(A), \
1297 (__v8hf)(__m128h)(B), \
1298 (__v8hf)_mm_setzero_ph(), \
1301 #define _mm_mask_getexp_round_sh(W, U, A, B, C) \
1302 (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, W, U, C)
1304 #define _mm_maskz_getexp_round_sh(U, A, B, C) \
1305 (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, \
1306 (__v8hf)_mm_setzero_ph(), \
1309 #endif /* __OPTIMIZE__ */
1311 /* Intrinsics vgetmantsh. */
1313 extern __inline __m128h
1314 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1315 _mm_getmant_sh (__m128h __A
, __m128h __B
,
1316 _MM_MANTISSA_NORM_ENUM __C
,
1317 _MM_MANTISSA_SIGN_ENUM __D
)
1320 __builtin_ia32_getmantsh_mask_round ((__v8hf
) __A
, (__v8hf
) __B
,
1321 (__D
<< 2) | __C
, _mm_setzero_ph (),
1323 _MM_FROUND_CUR_DIRECTION
);
1326 extern __inline __m128h
1327 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1328 _mm_mask_getmant_sh (__m128h __W
, __mmask8 __U
, __m128h __A
,
1329 __m128h __B
, _MM_MANTISSA_NORM_ENUM __C
,
1330 _MM_MANTISSA_SIGN_ENUM __D
)
1333 __builtin_ia32_getmantsh_mask_round ((__v8hf
) __A
, (__v8hf
) __B
,
1334 (__D
<< 2) | __C
, (__v8hf
) __W
,
1335 __U
, _MM_FROUND_CUR_DIRECTION
);
1338 extern __inline __m128h
1339 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1340 _mm_maskz_getmant_sh (__mmask8 __U
, __m128h __A
, __m128h __B
,
1341 _MM_MANTISSA_NORM_ENUM __C
,
1342 _MM_MANTISSA_SIGN_ENUM __D
)
1345 __builtin_ia32_getmantsh_mask_round ((__v8hf
) __A
, (__v8hf
) __B
,
1347 (__v8hf
) _mm_setzero_ph(),
1348 __U
, _MM_FROUND_CUR_DIRECTION
);
1351 extern __inline __m128h
1352 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1353 _mm_getmant_round_sh (__m128h __A
, __m128h __B
,
1354 _MM_MANTISSA_NORM_ENUM __C
,
1355 _MM_MANTISSA_SIGN_ENUM __D
, const int __R
)
1357 return (__m128h
) __builtin_ia32_getmantsh_mask_round ((__v8hf
) __A
,
1365 extern __inline __m128h
1366 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1367 _mm_mask_getmant_round_sh (__m128h __W
, __mmask8 __U
, __m128h __A
,
1368 __m128h __B
, _MM_MANTISSA_NORM_ENUM __C
,
1369 _MM_MANTISSA_SIGN_ENUM __D
, const int __R
)
1371 return (__m128h
) __builtin_ia32_getmantsh_mask_round ((__v8hf
) __A
,
1378 extern __inline __m128h
1379 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1380 _mm_maskz_getmant_round_sh (__mmask8 __U
, __m128h __A
, __m128h __B
,
1381 _MM_MANTISSA_NORM_ENUM __C
,
1382 _MM_MANTISSA_SIGN_ENUM __D
, const int __R
)
1384 return (__m128h
) __builtin_ia32_getmantsh_mask_round ((__v8hf
) __A
,
1393 #define _mm_getmant_sh(X, Y, C, D) \
1394 ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
1395 (__v8hf)(__m128h)(Y), \
1396 (int)(((D)<<2) | (C)), \
1398 _mm_setzero_ph (), \
1400 _MM_FROUND_CUR_DIRECTION))
1402 #define _mm_mask_getmant_sh(W, U, X, Y, C, D) \
1403 ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
1404 (__v8hf)(__m128h)(Y), \
1405 (int)(((D)<<2) | (C)), \
1406 (__v8hf)(__m128h)(W), \
1408 _MM_FROUND_CUR_DIRECTION))
1410 #define _mm_maskz_getmant_sh(U, X, Y, C, D) \
1411 ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
1412 (__v8hf)(__m128h)(Y), \
1413 (int)(((D)<<2) | (C)), \
1417 _MM_FROUND_CUR_DIRECTION))
1419 #define _mm_getmant_round_sh(X, Y, C, D, R) \
1420 ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
1421 (__v8hf)(__m128h)(Y), \
1422 (int)(((D)<<2) | (C)), \
1424 _mm_setzero_ph (), \
1428 #define _mm_mask_getmant_round_sh(W, U, X, Y, C, D, R) \
1429 ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
1430 (__v8hf)(__m128h)(Y), \
1431 (int)(((D)<<2) | (C)), \
1432 (__v8hf)(__m128h)(W), \
1436 #define _mm_maskz_getmant_round_sh(U, X, Y, C, D, R) \
1437 ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
1438 (__v8hf)(__m128h)(Y), \
1439 (int)(((D)<<2) | (C)), \
1445 #endif /* __OPTIMIZE__ */
1447 /* Intrinsics vmovw. */
1448 extern __inline __m128i
1449 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1450 _mm_cvtsi16_si128 (short __A
)
1452 return _mm_avx512_set_epi16 (0, 0, 0, 0, 0, 0, 0, __A
);
1455 extern __inline
short
1456 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1457 _mm_cvtsi128_si16 (__m128i __A
)
1459 return __builtin_ia32_vec_ext_v8hi ((__v8hi
)__A
, 0);
1462 /* Intrinsics vmovsh. */
1463 extern __inline __m128h
1464 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1465 _mm_mask_load_sh (__m128h __A
, __mmask8 __B
, _Float16
const* __C
)
1467 return __builtin_ia32_loadsh_mask (__C
, __A
, __B
);
1470 extern __inline __m128h
1471 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1472 _mm_maskz_load_sh (__mmask8 __A
, _Float16
const* __B
)
1474 return __builtin_ia32_loadsh_mask (__B
, _mm_setzero_ph (), __A
);
1477 extern __inline
void
1478 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1479 _mm_mask_store_sh (_Float16
const* __A
, __mmask8 __B
, __m128h __C
)
1481 __builtin_ia32_storesh_mask (__A
, __C
, __B
);
1484 extern __inline __m128h
1485 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1486 _mm_move_sh (__m128h __A
, __m128h __B
)
1492 extern __inline __m128h
1493 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1494 _mm_mask_move_sh (__m128h __A
, __mmask8 __B
, __m128h __C
, __m128h __D
)
1496 return __builtin_ia32_vmovsh_mask (__C
, __D
, __A
, __B
);
1499 extern __inline __m128h
1500 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1501 _mm_maskz_move_sh (__mmask8 __A
, __m128h __B
, __m128h __C
)
1503 return __builtin_ia32_vmovsh_mask (__B
, __C
, _mm_setzero_ph (), __A
);
1506 /* Intrinsics vcvtsh2si, vcvtsh2us. */
1508 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1509 _mm_cvtsh_i32 (__m128h __A
)
1511 return (int) __builtin_ia32_vcvtsh2si32_round (__A
, _MM_FROUND_CUR_DIRECTION
);
1514 extern __inline
unsigned
1515 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1516 _mm_cvtsh_u32 (__m128h __A
)
1518 return (int) __builtin_ia32_vcvtsh2usi32_round (__A
,
1519 _MM_FROUND_CUR_DIRECTION
);
1524 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1525 _mm_cvt_roundsh_i32 (__m128h __A
, const int __R
)
1527 return (int) __builtin_ia32_vcvtsh2si32_round (__A
, __R
);
1530 extern __inline
unsigned
1531 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1532 _mm_cvt_roundsh_u32 (__m128h __A
, const int __R
)
1534 return (int) __builtin_ia32_vcvtsh2usi32_round (__A
, __R
);
1538 #define _mm_cvt_roundsh_i32(A, B) \
1539 ((int)__builtin_ia32_vcvtsh2si32_round ((A), (B)))
1540 #define _mm_cvt_roundsh_u32(A, B) \
1541 ((int)__builtin_ia32_vcvtsh2usi32_round ((A), (B)))
1543 #endif /* __OPTIMIZE__ */
1546 extern __inline
long long
1547 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1548 _mm_cvtsh_i64 (__m128h __A
)
1551 __builtin_ia32_vcvtsh2si64_round (__A
, _MM_FROUND_CUR_DIRECTION
);
1554 extern __inline
unsigned long long
1555 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1556 _mm_cvtsh_u64 (__m128h __A
)
1559 __builtin_ia32_vcvtsh2usi64_round (__A
, _MM_FROUND_CUR_DIRECTION
);
1563 extern __inline
long long
1564 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1565 _mm_cvt_roundsh_i64 (__m128h __A
, const int __R
)
1567 return (long long) __builtin_ia32_vcvtsh2si64_round (__A
, __R
);
1570 extern __inline
unsigned long long
1571 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1572 _mm_cvt_roundsh_u64 (__m128h __A
, const int __R
)
1574 return (long long) __builtin_ia32_vcvtsh2usi64_round (__A
, __R
);
1578 #define _mm_cvt_roundsh_i64(A, B) \
1579 ((long long)__builtin_ia32_vcvtsh2si64_round ((A), (B)))
1580 #define _mm_cvt_roundsh_u64(A, B) \
1581 ((long long)__builtin_ia32_vcvtsh2usi64_round ((A), (B)))
1583 #endif /* __OPTIMIZE__ */
1584 #endif /* __x86_64__ */
1586 /* Intrinsics vcvtsi2sh, vcvtusi2sh. */
1587 extern __inline __m128h
1588 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1589 _mm_cvti32_sh (__m128h __A
, int __B
)
1591 return __builtin_ia32_vcvtsi2sh32_round (__A
, __B
, _MM_FROUND_CUR_DIRECTION
);
1594 extern __inline __m128h
1595 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1596 _mm_cvtu32_sh (__m128h __A
, unsigned int __B
)
1598 return __builtin_ia32_vcvtusi2sh32_round (__A
, __B
, _MM_FROUND_CUR_DIRECTION
);
1602 extern __inline __m128h
1603 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1604 _mm_cvt_roundi32_sh (__m128h __A
, int __B
, const int __R
)
1606 return __builtin_ia32_vcvtsi2sh32_round (__A
, __B
, __R
);
1609 extern __inline __m128h
1610 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1611 _mm_cvt_roundu32_sh (__m128h __A
, unsigned int __B
, const int __R
)
1613 return __builtin_ia32_vcvtusi2sh32_round (__A
, __B
, __R
);
1617 #define _mm_cvt_roundi32_sh(A, B, C) \
1618 (__builtin_ia32_vcvtsi2sh32_round ((A), (B), (C)))
1619 #define _mm_cvt_roundu32_sh(A, B, C) \
1620 (__builtin_ia32_vcvtusi2sh32_round ((A), (B), (C)))
1622 #endif /* __OPTIMIZE__ */
1625 extern __inline __m128h
1626 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1627 _mm_cvti64_sh (__m128h __A
, long long __B
)
1629 return __builtin_ia32_vcvtsi2sh64_round (__A
, __B
, _MM_FROUND_CUR_DIRECTION
);
1632 extern __inline __m128h
1633 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1634 _mm_cvtu64_sh (__m128h __A
, unsigned long long __B
)
1636 return __builtin_ia32_vcvtusi2sh64_round (__A
, __B
, _MM_FROUND_CUR_DIRECTION
);
1640 extern __inline __m128h
1641 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1642 _mm_cvt_roundi64_sh (__m128h __A
, long long __B
, const int __R
)
1644 return __builtin_ia32_vcvtsi2sh64_round (__A
, __B
, __R
);
1647 extern __inline __m128h
1648 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1649 _mm_cvt_roundu64_sh (__m128h __A
, unsigned long long __B
, const int __R
)
1651 return __builtin_ia32_vcvtusi2sh64_round (__A
, __B
, __R
);
1655 #define _mm_cvt_roundi64_sh(A, B, C) \
1656 (__builtin_ia32_vcvtsi2sh64_round ((A), (B), (C)))
1657 #define _mm_cvt_roundu64_sh(A, B, C) \
1658 (__builtin_ia32_vcvtusi2sh64_round ((A), (B), (C)))
1660 #endif /* __OPTIMIZE__ */
1661 #endif /* __x86_64__ */
1663 /* Intrinsics vcvttsh2si, vcvttsh2us. */
1665 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1666 _mm_cvttsh_i32 (__m128h __A
)
1669 __builtin_ia32_vcvttsh2si32_round (__A
, _MM_FROUND_CUR_DIRECTION
);
1672 extern __inline
unsigned
1673 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1674 _mm_cvttsh_u32 (__m128h __A
)
1677 __builtin_ia32_vcvttsh2usi32_round (__A
, _MM_FROUND_CUR_DIRECTION
);
1682 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1683 _mm_cvtt_roundsh_i32 (__m128h __A
, const int __R
)
1685 return (int) __builtin_ia32_vcvttsh2si32_round (__A
, __R
);
1688 extern __inline
unsigned
1689 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1690 _mm_cvtt_roundsh_u32 (__m128h __A
, const int __R
)
1692 return (int) __builtin_ia32_vcvttsh2usi32_round (__A
, __R
);
1696 #define _mm_cvtt_roundsh_i32(A, B) \
1697 ((int)__builtin_ia32_vcvttsh2si32_round ((A), (B)))
1698 #define _mm_cvtt_roundsh_u32(A, B) \
1699 ((int)__builtin_ia32_vcvttsh2usi32_round ((A), (B)))
1701 #endif /* __OPTIMIZE__ */
1704 extern __inline
long long
1705 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1706 _mm_cvttsh_i64 (__m128h __A
)
1709 __builtin_ia32_vcvttsh2si64_round (__A
, _MM_FROUND_CUR_DIRECTION
);
1712 extern __inline
unsigned long long
1713 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1714 _mm_cvttsh_u64 (__m128h __A
)
1717 __builtin_ia32_vcvttsh2usi64_round (__A
, _MM_FROUND_CUR_DIRECTION
);
1721 extern __inline
long long
1722 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1723 _mm_cvtt_roundsh_i64 (__m128h __A
, const int __R
)
1725 return (long long) __builtin_ia32_vcvttsh2si64_round (__A
, __R
);
1728 extern __inline
unsigned long long
1729 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1730 _mm_cvtt_roundsh_u64 (__m128h __A
, const int __R
)
1732 return (long long) __builtin_ia32_vcvttsh2usi64_round (__A
, __R
);
1736 #define _mm_cvtt_roundsh_i64(A, B) \
1737 ((long long)__builtin_ia32_vcvttsh2si64_round ((A), (B)))
1738 #define _mm_cvtt_roundsh_u64(A, B) \
1739 ((long long)__builtin_ia32_vcvttsh2usi64_round ((A), (B)))
1741 #endif /* __OPTIMIZE__ */
1742 #endif /* __x86_64__ */
1744 /* Intrinsics vcvtsh2ss, vcvtsh2sd. */
1745 extern __inline __m128
1746 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1747 _mm_cvtsh_ss (__m128 __A
, __m128h __B
)
1749 return __builtin_ia32_vcvtsh2ss_mask_round (__B
, __A
,
1750 _mm_avx512_setzero_ps (),
1752 _MM_FROUND_CUR_DIRECTION
);
1755 extern __inline __m128
1756 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1757 _mm_mask_cvtsh_ss (__m128 __A
, __mmask8 __B
, __m128 __C
,
1760 return __builtin_ia32_vcvtsh2ss_mask_round (__D
, __C
, __A
, __B
,
1761 _MM_FROUND_CUR_DIRECTION
);
1764 extern __inline __m128
1765 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1766 _mm_maskz_cvtsh_ss (__mmask8 __A
, __m128 __B
,
1769 return __builtin_ia32_vcvtsh2ss_mask_round (__C
, __B
,
1770 _mm_avx512_setzero_ps (),
1771 __A
, _MM_FROUND_CUR_DIRECTION
);
1774 extern __inline __m128d
1775 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1776 _mm_cvtsh_sd (__m128d __A
, __m128h __B
)
1778 return __builtin_ia32_vcvtsh2sd_mask_round (__B
, __A
,
1779 _mm_avx512_setzero_pd (),
1781 _MM_FROUND_CUR_DIRECTION
);
1784 extern __inline __m128d
1785 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1786 _mm_mask_cvtsh_sd (__m128d __A
, __mmask8 __B
, __m128d __C
,
1789 return __builtin_ia32_vcvtsh2sd_mask_round (__D
, __C
, __A
, __B
,
1790 _MM_FROUND_CUR_DIRECTION
);
1793 extern __inline __m128d
1794 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1795 _mm_maskz_cvtsh_sd (__mmask8 __A
, __m128d __B
, __m128h __C
)
1797 return __builtin_ia32_vcvtsh2sd_mask_round (__C
, __B
,
1798 _mm_avx512_setzero_pd (),
1799 __A
, _MM_FROUND_CUR_DIRECTION
);
1803 extern __inline __m128
1804 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1805 _mm_cvt_roundsh_ss (__m128 __A
, __m128h __B
, const int __R
)
1807 return __builtin_ia32_vcvtsh2ss_mask_round (__B
, __A
,
1808 _mm_avx512_setzero_ps (),
1809 (__mmask8
) -1, __R
);
1812 extern __inline __m128
1813 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1814 _mm_mask_cvt_roundsh_ss (__m128 __A
, __mmask8 __B
, __m128 __C
,
1815 __m128h __D
, const int __R
)
1817 return __builtin_ia32_vcvtsh2ss_mask_round (__D
, __C
, __A
, __B
, __R
);
1820 extern __inline __m128
1821 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1822 _mm_maskz_cvt_roundsh_ss (__mmask8 __A
, __m128 __B
,
1823 __m128h __C
, const int __R
)
1825 return __builtin_ia32_vcvtsh2ss_mask_round (__C
, __B
,
1826 _mm_avx512_setzero_ps (),
1830 extern __inline __m128d
1831 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1832 _mm_cvt_roundsh_sd (__m128d __A
, __m128h __B
, const int __R
)
1834 return __builtin_ia32_vcvtsh2sd_mask_round (__B
, __A
,
1835 _mm_avx512_setzero_pd (),
1836 (__mmask8
) -1, __R
);
1839 extern __inline __m128d
1840 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1841 _mm_mask_cvt_roundsh_sd (__m128d __A
, __mmask8 __B
, __m128d __C
,
1842 __m128h __D
, const int __R
)
1844 return __builtin_ia32_vcvtsh2sd_mask_round (__D
, __C
, __A
, __B
, __R
);
1847 extern __inline __m128d
1848 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1849 _mm_maskz_cvt_roundsh_sd (__mmask8 __A
, __m128d __B
, __m128h __C
, const int __R
)
1851 return __builtin_ia32_vcvtsh2sd_mask_round (__C
, __B
,
1852 _mm_avx512_setzero_pd (),
1857 #define _mm_cvt_roundsh_ss(A, B, R) \
1858 (__builtin_ia32_vcvtsh2ss_mask_round ((B), (A), \
1859 _mm_avx512_setzero_ps (), \
1860 (__mmask8) -1, (R)))
1862 #define _mm_mask_cvt_roundsh_ss(A, B, C, D, R) \
1863 (__builtin_ia32_vcvtsh2ss_mask_round ((D), (C), (A), (B), (R)))
1865 #define _mm_maskz_cvt_roundsh_ss(A, B, C, R) \
1866 (__builtin_ia32_vcvtsh2ss_mask_round ((C), (B), \
1867 _mm_avx512_setzero_ps (), \
1870 #define _mm_cvt_roundsh_sd(A, B, R) \
1871 (__builtin_ia32_vcvtsh2sd_mask_round ((B), (A), \
1872 _mm_avx512_setzero_pd (), \
1873 (__mmask8) -1, (R)))
1875 #define _mm_mask_cvt_roundsh_sd(A, B, C, D, R) \
1876 (__builtin_ia32_vcvtsh2sd_mask_round ((D), (C), (A), (B), (R)))
1878 #define _mm_maskz_cvt_roundsh_sd(A, B, C, R) \
1879 (__builtin_ia32_vcvtsh2sd_mask_round ((C), (B), \
1880 _mm_avx512_setzero_pd (), \
1883 #endif /* __OPTIMIZE__ */
1885 /* Intrinsics vcvtss2sh, vcvtsd2sh. */
1886 extern __inline __m128h
1887 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1888 _mm_cvtss_sh (__m128h __A
, __m128 __B
)
1890 return __builtin_ia32_vcvtss2sh_mask_round (__B
, __A
,
1893 _MM_FROUND_CUR_DIRECTION
);
1896 extern __inline __m128h
1897 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1898 _mm_mask_cvtss_sh (__m128h __A
, __mmask8 __B
, __m128h __C
, __m128 __D
)
1900 return __builtin_ia32_vcvtss2sh_mask_round (__D
, __C
, __A
, __B
,
1901 _MM_FROUND_CUR_DIRECTION
);
1904 extern __inline __m128h
1905 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1906 _mm_maskz_cvtss_sh (__mmask8 __A
, __m128h __B
, __m128 __C
)
1908 return __builtin_ia32_vcvtss2sh_mask_round (__C
, __B
,
1910 __A
, _MM_FROUND_CUR_DIRECTION
);
1913 extern __inline __m128h
1914 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1915 _mm_cvtsd_sh (__m128h __A
, __m128d __B
)
1917 return __builtin_ia32_vcvtsd2sh_mask_round (__B
, __A
,
1920 _MM_FROUND_CUR_DIRECTION
);
1923 extern __inline __m128h
1924 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1925 _mm_mask_cvtsd_sh (__m128h __A
, __mmask8 __B
, __m128h __C
, __m128d __D
)
1927 return __builtin_ia32_vcvtsd2sh_mask_round (__D
, __C
, __A
, __B
,
1928 _MM_FROUND_CUR_DIRECTION
);
1931 extern __inline __m128h
1932 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1933 _mm_maskz_cvtsd_sh (__mmask8 __A
, __m128h __B
, __m128d __C
)
1935 return __builtin_ia32_vcvtsd2sh_mask_round (__C
, __B
,
1937 __A
, _MM_FROUND_CUR_DIRECTION
);
1941 extern __inline __m128h
1942 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1943 _mm_cvt_roundss_sh (__m128h __A
, __m128 __B
, const int __R
)
1945 return __builtin_ia32_vcvtss2sh_mask_round (__B
, __A
,
1947 (__mmask8
) -1, __R
);
1950 extern __inline __m128h
1951 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1952 _mm_mask_cvt_roundss_sh (__m128h __A
, __mmask8 __B
, __m128h __C
, __m128 __D
,
1955 return __builtin_ia32_vcvtss2sh_mask_round (__D
, __C
, __A
, __B
, __R
);
1958 extern __inline __m128h
1959 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1960 _mm_maskz_cvt_roundss_sh (__mmask8 __A
, __m128h __B
, __m128 __C
,
1963 return __builtin_ia32_vcvtss2sh_mask_round (__C
, __B
,
1968 extern __inline __m128h
1969 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1970 _mm_cvt_roundsd_sh (__m128h __A
, __m128d __B
, const int __R
)
1972 return __builtin_ia32_vcvtsd2sh_mask_round (__B
, __A
,
1974 (__mmask8
) -1, __R
);
1977 extern __inline __m128h
1978 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1979 _mm_mask_cvt_roundsd_sh (__m128h __A
, __mmask8 __B
, __m128h __C
, __m128d __D
,
1982 return __builtin_ia32_vcvtsd2sh_mask_round (__D
, __C
, __A
, __B
, __R
);
1985 extern __inline __m128h
1986 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1987 _mm_maskz_cvt_roundsd_sh (__mmask8 __A
, __m128h __B
, __m128d __C
,
1990 return __builtin_ia32_vcvtsd2sh_mask_round (__C
, __B
,
1996 #define _mm_cvt_roundss_sh(A, B, R) \
1997 (__builtin_ia32_vcvtss2sh_mask_round ((B), (A), \
1998 _mm_setzero_ph (), \
2001 #define _mm_mask_cvt_roundss_sh(A, B, C, D, R) \
2002 (__builtin_ia32_vcvtss2sh_mask_round ((D), (C), (A), (B), (R)))
2004 #define _mm_maskz_cvt_roundss_sh(A, B, C, R) \
2005 (__builtin_ia32_vcvtss2sh_mask_round ((C), (B), \
2006 _mm_setzero_ph (), \
2009 #define _mm_cvt_roundsd_sh(A, B, R) \
2010 (__builtin_ia32_vcvtsd2sh_mask_round ((B), (A), \
2011 _mm_setzero_ph (), \
2014 #define _mm_mask_cvt_roundsd_sh(A, B, C, D, R) \
2015 (__builtin_ia32_vcvtsd2sh_mask_round ((D), (C), (A), (B), (R)))
2017 #define _mm_maskz_cvt_roundsd_sh(A, B, C, R) \
2018 (__builtin_ia32_vcvtsd2sh_mask_round ((C), (B), \
2019 _mm_setzero_ph (), \
2022 #endif /* __OPTIMIZE__ */
2024 extern __inline _Float16
2025 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2026 _mm_cvtsh_h (__m128h __A
)
2031 /* Intrinsics vfmadd[132,213,231]sh. */
2032 extern __inline __m128h
2033 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2034 _mm_fmadd_sh (__m128h __W
, __m128h __A
, __m128h __B
)
2036 return (__m128h
) __builtin_ia32_vfmaddsh3_mask ((__v8hf
) __W
,
2040 _MM_FROUND_CUR_DIRECTION
);
2043 extern __inline __m128h
2044 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2045 _mm_mask_fmadd_sh (__m128h __W
, __mmask8 __U
, __m128h __A
, __m128h __B
)
2047 return (__m128h
) __builtin_ia32_vfmaddsh3_mask ((__v8hf
) __W
,
2051 _MM_FROUND_CUR_DIRECTION
);
2054 extern __inline __m128h
2055 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2056 _mm_mask3_fmadd_sh (__m128h __W
, __m128h __A
, __m128h __B
, __mmask8 __U
)
2058 return (__m128h
) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf
) __W
,
2062 _MM_FROUND_CUR_DIRECTION
);
2065 extern __inline __m128h
2066 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2067 _mm_maskz_fmadd_sh (__mmask8 __U
, __m128h __W
, __m128h __A
, __m128h __B
)
2069 return (__m128h
) __builtin_ia32_vfmaddsh3_maskz ((__v8hf
) __W
,
2073 _MM_FROUND_CUR_DIRECTION
);
2078 extern __inline __m128h
2079 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2080 _mm_fmadd_round_sh (__m128h __W
, __m128h __A
, __m128h __B
, const int __R
)
2082 return (__m128h
) __builtin_ia32_vfmaddsh3_mask ((__v8hf
) __W
,
2089 extern __inline __m128h
2090 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2091 _mm_mask_fmadd_round_sh (__m128h __W
, __mmask8 __U
, __m128h __A
, __m128h __B
,
2094 return (__m128h
) __builtin_ia32_vfmaddsh3_mask ((__v8hf
) __W
,
2097 (__mmask8
) __U
, __R
);
2100 extern __inline __m128h
2101 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2102 _mm_mask3_fmadd_round_sh (__m128h __W
, __m128h __A
, __m128h __B
, __mmask8 __U
,
2105 return (__m128h
) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf
) __W
,
2108 (__mmask8
) __U
, __R
);
2111 extern __inline __m128h
2112 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2113 _mm_maskz_fmadd_round_sh (__mmask8 __U
, __m128h __W
, __m128h __A
,
2114 __m128h __B
, const int __R
)
2116 return (__m128h
) __builtin_ia32_vfmaddsh3_maskz ((__v8hf
) __W
,
2119 (__mmask8
) __U
, __R
);
2123 #define _mm_fmadd_round_sh(A, B, C, R) \
2124 ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (-1), (R)))
2125 #define _mm_mask_fmadd_round_sh(A, U, B, C, R) \
2126 ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (U), (R)))
2127 #define _mm_mask3_fmadd_round_sh(A, B, C, U, R) \
2128 ((__m128h) __builtin_ia32_vfmaddsh3_mask3 ((A), (B), (C), (U), (R)))
2129 #define _mm_maskz_fmadd_round_sh(U, A, B, C, R) \
2130 ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), (C), (U), (R)))
2132 #endif /* __OPTIMIZE__ */
2134 /* Intrinsics vfnmadd[132,213,231]sh. */
2135 extern __inline __m128h
2136 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2137 _mm_fnmadd_sh (__m128h __W
, __m128h __A
, __m128h __B
)
2139 return (__m128h
) __builtin_ia32_vfnmaddsh3_mask ((__v8hf
) __W
,
2143 _MM_FROUND_CUR_DIRECTION
);
2146 extern __inline __m128h
2147 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2148 _mm_mask_fnmadd_sh (__m128h __W
, __mmask8 __U
, __m128h __A
, __m128h __B
)
2150 return (__m128h
) __builtin_ia32_vfnmaddsh3_mask ((__v8hf
) __W
,
2154 _MM_FROUND_CUR_DIRECTION
);
2157 extern __inline __m128h
2158 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2159 _mm_mask3_fnmadd_sh (__m128h __W
, __m128h __A
, __m128h __B
, __mmask8 __U
)
2161 return (__m128h
) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf
) __W
,
2165 _MM_FROUND_CUR_DIRECTION
);
2168 extern __inline __m128h
2169 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2170 _mm_maskz_fnmadd_sh (__mmask8 __U
, __m128h __W
, __m128h __A
, __m128h __B
)
2172 return (__m128h
) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf
) __W
,
2176 _MM_FROUND_CUR_DIRECTION
);
2181 extern __inline __m128h
2182 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2183 _mm_fnmadd_round_sh (__m128h __W
, __m128h __A
, __m128h __B
, const int __R
)
2185 return (__m128h
) __builtin_ia32_vfnmaddsh3_mask ((__v8hf
) __W
,
2192 extern __inline __m128h
2193 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2194 _mm_mask_fnmadd_round_sh (__m128h __W
, __mmask8 __U
, __m128h __A
, __m128h __B
,
2197 return (__m128h
) __builtin_ia32_vfnmaddsh3_mask ((__v8hf
) __W
,
2200 (__mmask8
) __U
, __R
);
2203 extern __inline __m128h
2204 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2205 _mm_mask3_fnmadd_round_sh (__m128h __W
, __m128h __A
, __m128h __B
, __mmask8 __U
,
2208 return (__m128h
) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf
) __W
,
2211 (__mmask8
) __U
, __R
);
2214 extern __inline __m128h
2215 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2216 _mm_maskz_fnmadd_round_sh (__mmask8 __U
, __m128h __W
, __m128h __A
,
2217 __m128h __B
, const int __R
)
2219 return (__m128h
) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf
) __W
,
2222 (__mmask8
) __U
, __R
);
2226 #define _mm_fnmadd_round_sh(A, B, C, R) \
2227 ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (-1), (R)))
2228 #define _mm_mask_fnmadd_round_sh(A, U, B, C, R) \
2229 ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (U), (R)))
2230 #define _mm_mask3_fnmadd_round_sh(A, B, C, U, R) \
2231 ((__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((A), (B), (C), (U), (R)))
2232 #define _mm_maskz_fnmadd_round_sh(U, A, B, C, R) \
2233 ((__m128h) __builtin_ia32_vfnmaddsh3_maskz ((A), (B), (C), (U), (R)))
2235 #endif /* __OPTIMIZE__ */
2237 /* Intrinsics vfmsub[132,213,231]sh. */
2238 extern __inline __m128h
2239 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2240 _mm_fmsub_sh (__m128h __W
, __m128h __A
, __m128h __B
)
2242 return (__m128h
) __builtin_ia32_vfmaddsh3_mask ((__v8hf
) __W
,
2246 _MM_FROUND_CUR_DIRECTION
);
2249 extern __inline __m128h
2250 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2251 _mm_mask_fmsub_sh (__m128h __W
, __mmask8 __U
, __m128h __A
, __m128h __B
)
2253 return (__m128h
) __builtin_ia32_vfmaddsh3_mask ((__v8hf
) __W
,
2257 _MM_FROUND_CUR_DIRECTION
);
2260 extern __inline __m128h
2261 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2262 _mm_mask3_fmsub_sh (__m128h __W
, __m128h __A
, __m128h __B
, __mmask8 __U
)
2264 return (__m128h
) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf
) __W
,
2268 _MM_FROUND_CUR_DIRECTION
);
2271 extern __inline __m128h
2272 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2273 _mm_maskz_fmsub_sh (__mmask8 __U
, __m128h __W
, __m128h __A
, __m128h __B
)
2275 return (__m128h
) __builtin_ia32_vfmaddsh3_maskz ((__v8hf
) __W
,
2279 _MM_FROUND_CUR_DIRECTION
);
2284 extern __inline __m128h
2285 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2286 _mm_fmsub_round_sh (__m128h __W
, __m128h __A
, __m128h __B
, const int __R
)
2288 return (__m128h
) __builtin_ia32_vfmaddsh3_mask ((__v8hf
) __W
,
2295 extern __inline __m128h
2296 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2297 _mm_mask_fmsub_round_sh (__m128h __W
, __mmask8 __U
, __m128h __A
, __m128h __B
,
2300 return (__m128h
) __builtin_ia32_vfmaddsh3_mask ((__v8hf
) __W
,
2303 (__mmask8
) __U
, __R
);
2306 extern __inline __m128h
2307 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2308 _mm_mask3_fmsub_round_sh (__m128h __W
, __m128h __A
, __m128h __B
, __mmask8 __U
,
2311 return (__m128h
) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf
) __W
,
2314 (__mmask8
) __U
, __R
);
2317 extern __inline __m128h
2318 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2319 _mm_maskz_fmsub_round_sh (__mmask8 __U
, __m128h __W
, __m128h __A
,
2320 __m128h __B
, const int __R
)
2322 return (__m128h
) __builtin_ia32_vfmaddsh3_maskz ((__v8hf
) __W
,
2325 (__mmask8
) __U
, __R
);
2329 #define _mm_fmsub_round_sh(A, B, C, R) \
2330 ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (-1), (R)))
2331 #define _mm_mask_fmsub_round_sh(A, U, B, C, R) \
2332 ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (U), (R)))
2333 #define _mm_mask3_fmsub_round_sh(A, B, C, U, R) \
2334 ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), (B), (C), (U), (R)))
2335 #define _mm_maskz_fmsub_round_sh(U, A, B, C, R) \
2336 ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), -(C), (U), (R)))
2338 #endif /* __OPTIMIZE__ */
2340 /* Intrinsics vfnmsub[132,213,231]sh. */
2341 extern __inline __m128h
2342 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2343 _mm_fnmsub_sh (__m128h __W
, __m128h __A
, __m128h __B
)
2345 return (__m128h
) __builtin_ia32_vfmaddsh3_mask ((__v8hf
) __W
,
2349 _MM_FROUND_CUR_DIRECTION
);
2352 extern __inline __m128h
2353 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2354 _mm_mask_fnmsub_sh (__m128h __W
, __mmask8 __U
, __m128h __A
, __m128h __B
)
2356 return (__m128h
) __builtin_ia32_vfmaddsh3_mask ((__v8hf
) __W
,
2360 _MM_FROUND_CUR_DIRECTION
);
2363 extern __inline __m128h
2364 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2365 _mm_mask3_fnmsub_sh (__m128h __W
, __m128h __A
, __m128h __B
, __mmask8 __U
)
2367 return (__m128h
) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf
) __W
,
2371 _MM_FROUND_CUR_DIRECTION
);
2374 extern __inline __m128h
2375 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2376 _mm_maskz_fnmsub_sh (__mmask8 __U
, __m128h __W
, __m128h __A
, __m128h __B
)
2378 return (__m128h
) __builtin_ia32_vfmaddsh3_maskz ((__v8hf
) __W
,
2382 _MM_FROUND_CUR_DIRECTION
);
2387 extern __inline __m128h
2388 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2389 _mm_fnmsub_round_sh (__m128h __W
, __m128h __A
, __m128h __B
, const int __R
)
2391 return (__m128h
) __builtin_ia32_vfmaddsh3_mask ((__v8hf
) __W
,
2398 extern __inline __m128h
2399 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2400 _mm_mask_fnmsub_round_sh (__m128h __W
, __mmask8 __U
, __m128h __A
, __m128h __B
,
2403 return (__m128h
) __builtin_ia32_vfmaddsh3_mask ((__v8hf
) __W
,
2406 (__mmask8
) __U
, __R
);
2409 extern __inline __m128h
2410 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2411 _mm_mask3_fnmsub_round_sh (__m128h __W
, __m128h __A
, __m128h __B
, __mmask8 __U
,
2414 return (__m128h
) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf
) __W
,
2417 (__mmask8
) __U
, __R
);
2420 extern __inline __m128h
2421 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2422 _mm_maskz_fnmsub_round_sh (__mmask8 __U
, __m128h __W
, __m128h __A
,
2423 __m128h __B
, const int __R
)
2425 return (__m128h
) __builtin_ia32_vfmaddsh3_maskz ((__v8hf
) __W
,
2428 (__mmask8
) __U
, __R
);
2432 #define _mm_fnmsub_round_sh(A, B, C, R) \
2433 ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (-1), (R)))
2434 #define _mm_mask_fnmsub_round_sh(A, U, B, C, R) \
2435 ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (U), (R)))
2436 #define _mm_mask3_fnmsub_round_sh(A, B, C, U, R) \
2437 ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), -(B), (C), (U), (R)))
2438 #define _mm_maskz_fnmsub_round_sh(U, A, B, C, R) \
2439 ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), -(B), -(C), (U), (R)))
2441 #endif /* __OPTIMIZE__ */
2443 /* Intrinsics vf[,c]maddcsh. */
2444 extern __inline __m128h
2445 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2446 _mm_mask_fcmadd_sch (__m128h __A
, __mmask8 __B
, __m128h __C
, __m128h __D
)
2449 __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf
) __A
,
2452 _MM_FROUND_CUR_DIRECTION
);
2455 extern __inline __m128h
2456 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2457 _mm_mask3_fcmadd_sch (__m128h __A
, __m128h __B
, __m128h __C
, __mmask8 __D
)
2460 __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf
) __A
,
2463 _MM_FROUND_CUR_DIRECTION
);
2466 extern __inline __m128h
2467 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2468 _mm_maskz_fcmadd_sch (__mmask8 __A
, __m128h __B
, __m128h __C
, __m128h __D
)
2471 __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf
) __B
,
2474 __A
, _MM_FROUND_CUR_DIRECTION
);
2477 extern __inline __m128h
2478 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2479 _mm_fcmadd_sch (__m128h __A
, __m128h __B
, __m128h __C
)
2482 __builtin_ia32_vfcmaddcsh_round ((__v8hf
) __A
,
2485 _MM_FROUND_CUR_DIRECTION
);
2488 extern __inline __m128h
2489 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2490 _mm_mask_fmadd_sch (__m128h __A
, __mmask8 __B
, __m128h __C
, __m128h __D
)
2493 __builtin_ia32_vfmaddcsh_mask_round ((__v8hf
) __A
,
2496 _MM_FROUND_CUR_DIRECTION
);
2499 extern __inline __m128h
2500 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2501 _mm_mask3_fmadd_sch (__m128h __A
, __m128h __B
, __m128h __C
, __mmask8 __D
)
2504 __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf
) __A
,
2507 _MM_FROUND_CUR_DIRECTION
);
2510 extern __inline __m128h
2511 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2512 _mm_maskz_fmadd_sch (__mmask8 __A
, __m128h __B
, __m128h __C
, __m128h __D
)
2515 __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf
) __B
,
2518 __A
, _MM_FROUND_CUR_DIRECTION
);
2521 extern __inline __m128h
2522 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2523 _mm_fmadd_sch (__m128h __A
, __m128h __B
, __m128h __C
)
2526 __builtin_ia32_vfmaddcsh_round ((__v8hf
) __A
,
2529 _MM_FROUND_CUR_DIRECTION
);
2533 extern __inline __m128h
2534 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2535 _mm_mask_fcmadd_round_sch (__m128h __A
, __mmask8 __B
, __m128h __C
,
2536 __m128h __D
, const int __E
)
2539 __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf
) __A
,
2545 extern __inline __m128h
2546 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2547 _mm_mask3_fcmadd_round_sch (__m128h __A
, __m128h __B
, __m128h __C
,
2548 __mmask8 __D
, const int __E
)
2551 __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf
) __A
,
2557 extern __inline __m128h
2558 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2559 _mm_maskz_fcmadd_round_sch (__mmask8 __A
, __m128h __B
, __m128h __C
,
2560 __m128h __D
, const int __E
)
2563 __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf
) __B
,
2569 extern __inline __m128h
2570 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2571 _mm_fcmadd_round_sch (__m128h __A
, __m128h __B
, __m128h __C
, const int __D
)
2574 __builtin_ia32_vfcmaddcsh_round ((__v8hf
) __A
,
2580 extern __inline __m128h
2581 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2582 _mm_mask_fmadd_round_sch (__m128h __A
, __mmask8 __B
, __m128h __C
,
2583 __m128h __D
, const int __E
)
2586 __builtin_ia32_vfmaddcsh_mask_round ((__v8hf
) __A
,
2592 extern __inline __m128h
2593 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2594 _mm_mask3_fmadd_round_sch (__m128h __A
, __m128h __B
, __m128h __C
,
2595 __mmask8 __D
, const int __E
)
2598 __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf
) __A
,
2604 extern __inline __m128h
2605 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2606 _mm_maskz_fmadd_round_sch (__mmask8 __A
, __m128h __B
, __m128h __C
,
2607 __m128h __D
, const int __E
)
2610 __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf
) __B
,
2616 extern __inline __m128h
2617 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2618 _mm_fmadd_round_sch (__m128h __A
, __m128h __B
, __m128h __C
, const int __D
)
2621 __builtin_ia32_vfmaddcsh_round ((__v8hf
) __A
,
2627 #define _mm_mask_fcmadd_round_sch(A, B, C, D, E) \
2629 __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) (A), \
2635 #define _mm_mask3_fcmadd_round_sch(A, B, C, D, E) \
2637 __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) (A), \
2642 #define _mm_maskz_fcmadd_round_sch(A, B, C, D, E) \
2643 __builtin_ia32_vfcmaddcsh_maskz_round ((B), (C), (D), (A), (E))
2645 #define _mm_fcmadd_round_sch(A, B, C, D) \
2646 __builtin_ia32_vfcmaddcsh_round ((A), (B), (C), (D))
2648 #define _mm_mask_fmadd_round_sch(A, B, C, D, E) \
2650 __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) (A), \
2655 #define _mm_mask3_fmadd_round_sch(A, B, C, D, E) \
2657 __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) (A), \
2662 #define _mm_maskz_fmadd_round_sch(A, B, C, D, E) \
2663 __builtin_ia32_vfmaddcsh_maskz_round ((B), (C), (D), (A), (E))
2665 #define _mm_fmadd_round_sch(A, B, C, D) \
2666 __builtin_ia32_vfmaddcsh_round ((A), (B), (C), (D))
2668 #endif /* __OPTIMIZE__ */
2670 /* Intrinsics vf[,c]mulcsh. */
2671 extern __inline __m128h
2672 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2673 _mm_fcmul_sch (__m128h __A
, __m128h __B
)
2676 __builtin_ia32_vfcmulcsh_round ((__v8hf
) __A
,
2678 _MM_FROUND_CUR_DIRECTION
);
2681 extern __inline __m128h
2682 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2683 _mm_mask_fcmul_sch (__m128h __A
, __mmask8 __B
, __m128h __C
, __m128h __D
)
2686 __builtin_ia32_vfcmulcsh_mask_round ((__v8hf
) __C
,
2689 __B
, _MM_FROUND_CUR_DIRECTION
);
2692 extern __inline __m128h
2693 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2694 _mm_maskz_fcmul_sch (__mmask8 __A
, __m128h __B
, __m128h __C
)
2697 __builtin_ia32_vfcmulcsh_mask_round ((__v8hf
) __B
,
2700 __A
, _MM_FROUND_CUR_DIRECTION
);
2703 extern __inline __m128h
2704 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2705 _mm_fmul_sch (__m128h __A
, __m128h __B
)
2708 __builtin_ia32_vfmulcsh_round ((__v8hf
) __A
,
2710 _MM_FROUND_CUR_DIRECTION
);
2713 extern __inline __m128h
2714 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2715 _mm_mask_fmul_sch (__m128h __A
, __mmask8 __B
, __m128h __C
, __m128h __D
)
2718 __builtin_ia32_vfmulcsh_mask_round ((__v8hf
) __C
,
2721 __B
, _MM_FROUND_CUR_DIRECTION
);
2724 extern __inline __m128h
2725 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2726 _mm_maskz_fmul_sch (__mmask8 __A
, __m128h __B
, __m128h __C
)
2729 __builtin_ia32_vfmulcsh_mask_round ((__v8hf
) __B
,
2732 __A
, _MM_FROUND_CUR_DIRECTION
);
2736 extern __inline __m128h
2737 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2738 _mm_fcmul_round_sch (__m128h __A
, __m128h __B
, const int __D
)
2741 __builtin_ia32_vfcmulcsh_round ((__v8hf
) __A
,
2746 extern __inline __m128h
2747 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2748 _mm_mask_fcmul_round_sch (__m128h __A
, __mmask8 __B
, __m128h __C
,
2749 __m128h __D
, const int __E
)
2752 __builtin_ia32_vfcmulcsh_mask_round ((__v8hf
) __C
,
2758 extern __inline __m128h
2759 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2760 _mm_maskz_fcmul_round_sch (__mmask8 __A
, __m128h __B
, __m128h __C
,
2764 __builtin_ia32_vfcmulcsh_mask_round ((__v8hf
) __B
,
2770 extern __inline __m128h
2771 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2772 _mm_fmul_round_sch (__m128h __A
, __m128h __B
, const int __D
)
2775 __builtin_ia32_vfmulcsh_round ((__v8hf
) __A
,
2779 extern __inline __m128h
2780 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2781 _mm_mask_fmul_round_sch (__m128h __A
, __mmask8 __B
, __m128h __C
,
2782 __m128h __D
, const int __E
)
2785 __builtin_ia32_vfmulcsh_mask_round ((__v8hf
) __C
,
2791 extern __inline __m128h
2792 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2793 _mm_maskz_fmul_round_sch (__mmask8 __A
, __m128h __B
, __m128h __C
, const int __E
)
2796 __builtin_ia32_vfmulcsh_mask_round ((__v8hf
) __B
,
2803 #define _mm_fcmul_round_sch(__A, __B, __D) \
2804 (__m128h) __builtin_ia32_vfcmulcsh_round ((__v8hf) __A, \
2807 #define _mm_mask_fcmul_round_sch(__A, __B, __C, __D, __E) \
2808 (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C, \
2813 #define _mm_maskz_fcmul_round_sch(__A, __B, __C, __E) \
2814 (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B, \
2816 _mm_setzero_ph (), \
2819 #define _mm_fmul_round_sch(__A, __B, __D) \
2820 (__m128h) __builtin_ia32_vfmulcsh_round ((__v8hf) __A, \
2823 #define _mm_mask_fmul_round_sch(__A, __B, __C, __D, __E) \
2824 (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C, \
2829 #define _mm_maskz_fmul_round_sch(__A, __B, __C, __E) \
2830 (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B, \
2832 _mm_setzero_ph (), \
2835 #endif /* __OPTIMIZE__ */
2837 #define _mm_mul_sch(A, B) _mm_fmul_sch ((A), (B))
2838 #define _mm_mask_mul_sch(W, U, A, B) _mm_mask_fmul_sch ((W), (U), (A), (B))
2839 #define _mm_maskz_mul_sch(U, A, B) _mm_maskz_fmul_sch ((U), (A), (B))
2840 #define _mm_mul_round_sch(A, B, R) _mm_fmul_round_sch ((A), (B), (R))
2841 #define _mm_mask_mul_round_sch(W, U, A, B, R) \
2842 _mm_mask_fmul_round_sch ((W), (U), (A), (B), (R))
2843 #define _mm_maskz_mul_round_sch(U, A, B, R) \
2844 _mm_maskz_fmul_round_sch ((U), (A), (B), (R))
2846 #define _mm_cmul_sch(A, B) _mm_fcmul_sch ((A), (B))
2847 #define _mm_mask_cmul_sch(W, U, A, B) _mm_mask_fcmul_sch ((W), (U), (A), (B))
2848 #define _mm_maskz_cmul_sch(U, A, B) _mm_maskz_fcmul_sch ((U), (A), (B))
2849 #define _mm_cmul_round_sch(A, B, R) _mm_fcmul_round_sch ((A), (B), (R))
2850 #define _mm_mask_cmul_round_sch(W, U, A, B, R) \
2851 _mm_mask_fcmul_round_sch ((W), (U), (A), (B), (R))
2852 #define _mm_maskz_cmul_round_sch(U, A, B, R) \
2853 _mm_maskz_fcmul_round_sch ((U), (A), (B), (R))
2855 #ifdef __DISABLE_AVX512FP16__
2856 #undef __DISABLE_AVX512FP16__
2857 #pragma GCC pop_options
2858 #endif /* __DISABLE_AVX512FP16__ */
2860 #if !defined (__AVX512FP16__) || !defined (__EVEX512__)
2861 #pragma GCC push_options
2862 #pragma GCC target("avx512fp16,evex512")
2863 #define __DISABLE_AVX512FP16_512__
2864 #endif /* __AVX512FP16_512__ */
2866 typedef _Float16 __v32hf
__attribute__ ((__vector_size__ (64)));
2867 typedef _Float16 __m512h
__attribute__ ((__vector_size__ (64), __may_alias__
));
2868 typedef _Float16 __m512h_u
__attribute__ ((__vector_size__ (64), \
2869 __may_alias__
, __aligned__ (1)));
2871 extern __inline __m512h
2872 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2873 _mm512_set_ph (_Float16 __A31
, _Float16 __A30
, _Float16 __A29
,
2874 _Float16 __A28
, _Float16 __A27
, _Float16 __A26
,
2875 _Float16 __A25
, _Float16 __A24
, _Float16 __A23
,
2876 _Float16 __A22
, _Float16 __A21
, _Float16 __A20
,
2877 _Float16 __A19
, _Float16 __A18
, _Float16 __A17
,
2878 _Float16 __A16
, _Float16 __A15
, _Float16 __A14
,
2879 _Float16 __A13
, _Float16 __A12
, _Float16 __A11
,
2880 _Float16 __A10
, _Float16 __A9
, _Float16 __A8
,
2881 _Float16 __A7
, _Float16 __A6
, _Float16 __A5
,
2882 _Float16 __A4
, _Float16 __A3
, _Float16 __A2
,
2883 _Float16 __A1
, _Float16 __A0
)
2885 return __extension__ (__m512h
)(__v32hf
){ __A0
, __A1
, __A2
, __A3
,
2886 __A4
, __A5
, __A6
, __A7
,
2887 __A8
, __A9
, __A10
, __A11
,
2888 __A12
, __A13
, __A14
, __A15
,
2889 __A16
, __A17
, __A18
, __A19
,
2890 __A20
, __A21
, __A22
, __A23
,
2891 __A24
, __A25
, __A26
, __A27
,
2892 __A28
, __A29
, __A30
, __A31
};
2895 /* Create vectors of elements in the reversed order from
2896 _mm512_set_ph functions. */
2898 extern __inline __m512h
2899 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2900 _mm512_setr_ph (_Float16 __A0
, _Float16 __A1
, _Float16 __A2
,
2901 _Float16 __A3
, _Float16 __A4
, _Float16 __A5
,
2902 _Float16 __A6
, _Float16 __A7
, _Float16 __A8
,
2903 _Float16 __A9
, _Float16 __A10
, _Float16 __A11
,
2904 _Float16 __A12
, _Float16 __A13
, _Float16 __A14
,
2905 _Float16 __A15
, _Float16 __A16
, _Float16 __A17
,
2906 _Float16 __A18
, _Float16 __A19
, _Float16 __A20
,
2907 _Float16 __A21
, _Float16 __A22
, _Float16 __A23
,
2908 _Float16 __A24
, _Float16 __A25
, _Float16 __A26
,
2909 _Float16 __A27
, _Float16 __A28
, _Float16 __A29
,
2910 _Float16 __A30
, _Float16 __A31
)
2913 return _mm512_set_ph (__A31
, __A30
, __A29
, __A28
, __A27
, __A26
, __A25
,
2914 __A24
, __A23
, __A22
, __A21
, __A20
, __A19
, __A18
,
2915 __A17
, __A16
, __A15
, __A14
, __A13
, __A12
, __A11
,
2916 __A10
, __A9
, __A8
, __A7
, __A6
, __A5
, __A4
, __A3
,
2920 /* Broadcast _Float16 to vector. */
2921 extern __inline __m512h
2922 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2923 _mm512_set1_ph (_Float16 __A
)
2925 return _mm512_set_ph (__A
, __A
, __A
, __A
, __A
, __A
, __A
, __A
,
2926 __A
, __A
, __A
, __A
, __A
, __A
, __A
, __A
,
2927 __A
, __A
, __A
, __A
, __A
, __A
, __A
, __A
,
2928 __A
, __A
, __A
, __A
, __A
, __A
, __A
, __A
);
2931 /* Create a vector with all zeros. */
2932 extern __inline __m512h
2933 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2934 _mm512_setzero_ph (void)
2936 return _mm512_set1_ph (0.0f16
);
2939 extern __inline __m512h
2940 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2941 _mm512_undefined_ph (void)
2943 #pragma GCC diagnostic push
2944 #pragma GCC diagnostic ignored "-Winit-self"
2946 #pragma GCC diagnostic pop
2950 extern __inline _Float16
2951 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2952 _mm512_cvtsh_h (__m512h __A
)
2957 extern __inline __m512
2958 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2959 _mm512_castph_ps (__m512h __a
)
2961 return (__m512
) __a
;
2964 extern __inline __m512d
2965 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2966 _mm512_castph_pd (__m512h __a
)
2968 return (__m512d
) __a
;
2971 extern __inline __m512i
2972 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2973 _mm512_castph_si512 (__m512h __a
)
2975 return (__m512i
) __a
;
2978 extern __inline __m128h
2979 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2980 _mm512_castph512_ph128 (__m512h __A
)
2986 } __u
= { .__v
= __A
};
2990 extern __inline __m256h
2991 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2992 _mm512_castph512_ph256 (__m512h __A
)
2998 } __u
= { .__v
= __A
};
3002 extern __inline __m512h
3003 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3004 _mm512_castph128_ph512 (__m128h __A
)
3015 extern __inline __m512h
3016 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3017 _mm512_castph256_ph512 (__m256h __A
)
3028 extern __inline __m512h
3029 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3030 _mm512_zextph128_ph512 (__m128h __A
)
3032 return (__m512h
) _mm512_insertf32x4 (_mm512_setzero_ps (),
3036 extern __inline __m512h
3037 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3038 _mm512_zextph256_ph512 (__m256h __A
)
3040 return (__m512h
) _mm512_insertf64x4 (_mm512_setzero_pd (),
3044 extern __inline __m512h
3045 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3046 _mm512_castps_ph (__m512 __a
)
3048 return (__m512h
) __a
;
3051 extern __inline __m512h
3052 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3053 _mm512_castpd_ph (__m512d __a
)
3055 return (__m512h
) __a
;
3058 extern __inline __m512h
3059 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3060 _mm512_castsi512_ph (__m512i __a
)
3062 return (__m512h
) __a
;
3065 /* Create a vector with element 0 as *P and the rest zero. */
3066 extern __inline __m512h
3067 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3068 _mm512_load_ph (void const *__P
)
3070 return *(const __m512h
*) __P
;
3073 extern __inline __m512h
3074 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3075 _mm512_loadu_ph (void const *__P
)
3077 return *(const __m512h_u
*) __P
;
3080 /* Stores the lower _Float16 value. */
3081 extern __inline
void
3082 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3083 _mm512_store_ph (void *__P
, __m512h __A
)
3085 *(__m512h
*) __P
= __A
;
3088 extern __inline
void
3089 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3090 _mm512_storeu_ph (void *__P
, __m512h __A
)
3092 *(__m512h_u
*) __P
= __A
;
3095 extern __inline __m512h
3096 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3097 _mm512_abs_ph (__m512h __A
)
3099 return (__m512h
) _mm512_and_epi32 ( _mm512_set1_epi32 (0x7FFF7FFF),
3103 /* Intrinsics v[add,sub,mul,div]ph. */
3104 extern __inline __m512h
3105 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3106 _mm512_add_ph (__m512h __A
, __m512h __B
)
3108 return (__m512h
) ((__v32hf
) __A
+ (__v32hf
) __B
);
3111 extern __inline __m512h
3112 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3113 _mm512_mask_add_ph (__m512h __A
, __mmask32 __B
, __m512h __C
, __m512h __D
)
3115 return __builtin_ia32_addph512_mask (__C
, __D
, __A
, __B
);
3118 extern __inline __m512h
3119 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3120 _mm512_maskz_add_ph (__mmask32 __A
, __m512h __B
, __m512h __C
)
3122 return __builtin_ia32_addph512_mask (__B
, __C
,
3123 _mm512_setzero_ph (), __A
);
3126 extern __inline __m512h
3127 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3128 _mm512_sub_ph (__m512h __A
, __m512h __B
)
3130 return (__m512h
) ((__v32hf
) __A
- (__v32hf
) __B
);
3133 extern __inline __m512h
3134 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3135 _mm512_mask_sub_ph (__m512h __A
, __mmask32 __B
, __m512h __C
, __m512h __D
)
3137 return __builtin_ia32_subph512_mask (__C
, __D
, __A
, __B
);
3140 extern __inline __m512h
3141 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3142 _mm512_maskz_sub_ph (__mmask32 __A
, __m512h __B
, __m512h __C
)
3144 return __builtin_ia32_subph512_mask (__B
, __C
,
3145 _mm512_setzero_ph (), __A
);
3148 extern __inline __m512h
3149 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3150 _mm512_mul_ph (__m512h __A
, __m512h __B
)
3152 return (__m512h
) ((__v32hf
) __A
* (__v32hf
) __B
);
3155 extern __inline __m512h
3156 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3157 _mm512_mask_mul_ph (__m512h __A
, __mmask32 __B
, __m512h __C
, __m512h __D
)
3159 return __builtin_ia32_mulph512_mask (__C
, __D
, __A
, __B
);
3162 extern __inline __m512h
3163 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3164 _mm512_maskz_mul_ph (__mmask32 __A
, __m512h __B
, __m512h __C
)
3166 return __builtin_ia32_mulph512_mask (__B
, __C
,
3167 _mm512_setzero_ph (), __A
);
3170 extern __inline __m512h
3171 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3172 _mm512_div_ph (__m512h __A
, __m512h __B
)
3174 return (__m512h
) ((__v32hf
) __A
/ (__v32hf
) __B
);
3177 extern __inline __m512h
3178 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3179 _mm512_mask_div_ph (__m512h __A
, __mmask32 __B
, __m512h __C
, __m512h __D
)
3181 return __builtin_ia32_divph512_mask (__C
, __D
, __A
, __B
);
3184 extern __inline __m512h
3185 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3186 _mm512_maskz_div_ph (__mmask32 __A
, __m512h __B
, __m512h __C
)
3188 return __builtin_ia32_divph512_mask (__B
, __C
,
3189 _mm512_setzero_ph (), __A
);
3193 extern __inline __m512h
3194 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3195 _mm512_add_round_ph (__m512h __A
, __m512h __B
, const int __C
)
3197 return __builtin_ia32_addph512_mask_round (__A
, __B
,
3198 _mm512_setzero_ph (),
3199 (__mmask32
) -1, __C
);
3202 extern __inline __m512h
3203 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3204 _mm512_mask_add_round_ph (__m512h __A
, __mmask32 __B
, __m512h __C
,
3205 __m512h __D
, const int __E
)
3207 return __builtin_ia32_addph512_mask_round (__C
, __D
, __A
, __B
, __E
);
3210 extern __inline __m512h
3211 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3212 _mm512_maskz_add_round_ph (__mmask32 __A
, __m512h __B
, __m512h __C
,
3215 return __builtin_ia32_addph512_mask_round (__B
, __C
,
3216 _mm512_setzero_ph (),
3220 extern __inline __m512h
3221 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3222 _mm512_sub_round_ph (__m512h __A
, __m512h __B
, const int __C
)
3224 return __builtin_ia32_subph512_mask_round (__A
, __B
,
3225 _mm512_setzero_ph (),
3226 (__mmask32
) -1, __C
);
3229 extern __inline __m512h
3230 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3231 _mm512_mask_sub_round_ph (__m512h __A
, __mmask32 __B
, __m512h __C
,
3232 __m512h __D
, const int __E
)
3234 return __builtin_ia32_subph512_mask_round (__C
, __D
, __A
, __B
, __E
);
3237 extern __inline __m512h
3238 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3239 _mm512_maskz_sub_round_ph (__mmask32 __A
, __m512h __B
, __m512h __C
,
3242 return __builtin_ia32_subph512_mask_round (__B
, __C
,
3243 _mm512_setzero_ph (),
3247 extern __inline __m512h
3248 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3249 _mm512_mul_round_ph (__m512h __A
, __m512h __B
, const int __C
)
3251 return __builtin_ia32_mulph512_mask_round (__A
, __B
,
3252 _mm512_setzero_ph (),
3253 (__mmask32
) -1, __C
);
3256 extern __inline __m512h
3257 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3258 _mm512_mask_mul_round_ph (__m512h __A
, __mmask32 __B
, __m512h __C
,
3259 __m512h __D
, const int __E
)
3261 return __builtin_ia32_mulph512_mask_round (__C
, __D
, __A
, __B
, __E
);
3264 extern __inline __m512h
3265 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3266 _mm512_maskz_mul_round_ph (__mmask32 __A
, __m512h __B
, __m512h __C
,
3269 return __builtin_ia32_mulph512_mask_round (__B
, __C
,
3270 _mm512_setzero_ph (),
3274 extern __inline __m512h
3275 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3276 _mm512_div_round_ph (__m512h __A
, __m512h __B
, const int __C
)
3278 return __builtin_ia32_divph512_mask_round (__A
, __B
,
3279 _mm512_setzero_ph (),
3280 (__mmask32
) -1, __C
);
3283 extern __inline __m512h
3284 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3285 _mm512_mask_div_round_ph (__m512h __A
, __mmask32 __B
, __m512h __C
,
3286 __m512h __D
, const int __E
)
3288 return __builtin_ia32_divph512_mask_round (__C
, __D
, __A
, __B
, __E
);
3291 extern __inline __m512h
3292 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3293 _mm512_maskz_div_round_ph (__mmask32 __A
, __m512h __B
, __m512h __C
,
3296 return __builtin_ia32_divph512_mask_round (__B
, __C
,
3297 _mm512_setzero_ph (),
3301 #define _mm512_add_round_ph(A, B, C) \
3302 ((__m512h)__builtin_ia32_addph512_mask_round((A), (B), \
3303 _mm512_setzero_ph (), \
3304 (__mmask32)-1, (C)))
3306 #define _mm512_mask_add_round_ph(A, B, C, D, E) \
3307 ((__m512h)__builtin_ia32_addph512_mask_round((C), (D), (A), (B), (E)))
3309 #define _mm512_maskz_add_round_ph(A, B, C, D) \
3310 ((__m512h)__builtin_ia32_addph512_mask_round((B), (C), \
3311 _mm512_setzero_ph (), \
3314 #define _mm512_sub_round_ph(A, B, C) \
3315 ((__m512h)__builtin_ia32_subph512_mask_round((A), (B), \
3316 _mm512_setzero_ph (), \
3317 (__mmask32)-1, (C)))
3319 #define _mm512_mask_sub_round_ph(A, B, C, D, E) \
3320 ((__m512h)__builtin_ia32_subph512_mask_round((C), (D), (A), (B), (E)))
3322 #define _mm512_maskz_sub_round_ph(A, B, C, D) \
3323 ((__m512h)__builtin_ia32_subph512_mask_round((B), (C), \
3324 _mm512_setzero_ph (), \
3327 #define _mm512_mul_round_ph(A, B, C) \
3328 ((__m512h)__builtin_ia32_mulph512_mask_round((A), (B), \
3329 _mm512_setzero_ph (), \
3330 (__mmask32)-1, (C)))
3332 #define _mm512_mask_mul_round_ph(A, B, C, D, E) \
3333 ((__m512h)__builtin_ia32_mulph512_mask_round((C), (D), (A), (B), (E)))
3335 #define _mm512_maskz_mul_round_ph(A, B, C, D) \
3336 ((__m512h)__builtin_ia32_mulph512_mask_round((B), (C), \
3337 _mm512_setzero_ph (), \
3340 #define _mm512_div_round_ph(A, B, C) \
3341 ((__m512h)__builtin_ia32_divph512_mask_round((A), (B), \
3342 _mm512_setzero_ph (), \
3343 (__mmask32)-1, (C)))
3345 #define _mm512_mask_div_round_ph(A, B, C, D, E) \
3346 ((__m512h)__builtin_ia32_divph512_mask_round((C), (D), (A), (B), (E)))
3348 #define _mm512_maskz_div_round_ph(A, B, C, D) \
3349 ((__m512h)__builtin_ia32_divph512_mask_round((B), (C), \
3350 _mm512_setzero_ph (), \
3352 #endif /* __OPTIMIZE__ */
3354 extern __inline __m512h
3355 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3356 _mm512_conj_pch (__m512h __A
)
3358 return (__m512h
) _mm512_xor_epi32 ((__m512i
) __A
, _mm512_set1_epi32 (1<<31));
3361 extern __inline __m512h
3362 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3363 _mm512_mask_conj_pch (__m512h __W
, __mmask16 __U
, __m512h __A
)
3366 __builtin_ia32_movaps512_mask ((__v16sf
) _mm512_conj_pch (__A
),
3371 extern __inline __m512h
3372 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3373 _mm512_maskz_conj_pch (__mmask16 __U
, __m512h __A
)
3376 __builtin_ia32_movaps512_mask ((__v16sf
) _mm512_conj_pch (__A
),
3377 (__v16sf
) _mm512_setzero_ps (),
3381 /* Intrinsic vmaxph vminph. */
3382 extern __inline __m512h
3383 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3384 _mm512_max_ph (__m512h __A
, __m512h __B
)
3386 return __builtin_ia32_maxph512_mask (__A
, __B
,
3387 _mm512_setzero_ph (),
3391 extern __inline __m512h
3392 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3393 _mm512_mask_max_ph (__m512h __A
, __mmask32 __B
, __m512h __C
, __m512h __D
)
3395 return __builtin_ia32_maxph512_mask (__C
, __D
, __A
, __B
);
3398 extern __inline __m512h
3399 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3400 _mm512_maskz_max_ph (__mmask32 __A
, __m512h __B
, __m512h __C
)
3402 return __builtin_ia32_maxph512_mask (__B
, __C
,
3403 _mm512_setzero_ph (), __A
);
3406 extern __inline __m512h
3407 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3408 _mm512_min_ph (__m512h __A
, __m512h __B
)
3410 return __builtin_ia32_minph512_mask (__A
, __B
,
3411 _mm512_setzero_ph (),
3415 extern __inline __m512h
3416 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3417 _mm512_mask_min_ph (__m512h __A
, __mmask32 __B
, __m512h __C
, __m512h __D
)
3419 return __builtin_ia32_minph512_mask (__C
, __D
, __A
, __B
);
3422 extern __inline __m512h
3423 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3424 _mm512_maskz_min_ph (__mmask32 __A
, __m512h __B
, __m512h __C
)
3426 return __builtin_ia32_minph512_mask (__B
, __C
,
3427 _mm512_setzero_ph (), __A
);
3431 extern __inline __m512h
3432 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3433 _mm512_max_round_ph (__m512h __A
, __m512h __B
, const int __C
)
3435 return __builtin_ia32_maxph512_mask_round (__A
, __B
,
3436 _mm512_setzero_ph (),
3437 (__mmask32
) -1, __C
);
3440 extern __inline __m512h
3441 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3442 _mm512_mask_max_round_ph (__m512h __A
, __mmask32 __B
, __m512h __C
,
3443 __m512h __D
, const int __E
)
3445 return __builtin_ia32_maxph512_mask_round (__C
, __D
, __A
, __B
, __E
);
3448 extern __inline __m512h
3449 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3450 _mm512_maskz_max_round_ph (__mmask32 __A
, __m512h __B
, __m512h __C
,
3453 return __builtin_ia32_maxph512_mask_round (__B
, __C
,
3454 _mm512_setzero_ph (),
3458 extern __inline __m512h
3459 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3460 _mm512_min_round_ph (__m512h __A
, __m512h __B
, const int __C
)
3462 return __builtin_ia32_minph512_mask_round (__A
, __B
,
3463 _mm512_setzero_ph (),
3464 (__mmask32
) -1, __C
);
3467 extern __inline __m512h
3468 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3469 _mm512_mask_min_round_ph (__m512h __A
, __mmask32 __B
, __m512h __C
,
3470 __m512h __D
, const int __E
)
3472 return __builtin_ia32_minph512_mask_round (__C
, __D
, __A
, __B
, __E
);
3475 extern __inline __m512h
3476 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3477 _mm512_maskz_min_round_ph (__mmask32 __A
, __m512h __B
, __m512h __C
,
3480 return __builtin_ia32_minph512_mask_round (__B
, __C
,
3481 _mm512_setzero_ph (),
3486 #define _mm512_max_round_ph(A, B, C) \
3487 (__builtin_ia32_maxph512_mask_round ((A), (B), \
3488 _mm512_setzero_ph (), \
3489 (__mmask32)-1, (C)))
3491 #define _mm512_mask_max_round_ph(A, B, C, D, E) \
3492 (__builtin_ia32_maxph512_mask_round ((C), (D), (A), (B), (E)))
3494 #define _mm512_maskz_max_round_ph(A, B, C, D) \
3495 (__builtin_ia32_maxph512_mask_round ((B), (C), \
3496 _mm512_setzero_ph (), \
3499 #define _mm512_min_round_ph(A, B, C) \
3500 (__builtin_ia32_minph512_mask_round ((A), (B), \
3501 _mm512_setzero_ph (), \
3502 (__mmask32)-1, (C)))
3504 #define _mm512_mask_min_round_ph(A, B, C, D, E) \
3505 (__builtin_ia32_minph512_mask_round ((C), (D), (A), (B), (E)))
3507 #define _mm512_maskz_min_round_ph(A, B, C, D) \
3508 (__builtin_ia32_minph512_mask_round ((B), (C), \
3509 _mm512_setzero_ph (), \
3511 #endif /* __OPTIMIZE__ */
3515 extern __inline __mmask32
3516 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3517 _mm512_cmp_ph_mask (__m512h __A
, __m512h __B
, const int __C
)
3519 return (__mmask32
) __builtin_ia32_cmpph512_mask (__A
, __B
, __C
,
3523 extern __inline __mmask32
3524 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3525 _mm512_mask_cmp_ph_mask (__mmask32 __A
, __m512h __B
, __m512h __C
,
3528 return (__mmask32
) __builtin_ia32_cmpph512_mask (__B
, __C
, __D
,
3532 extern __inline __mmask32
3533 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3534 _mm512_cmp_round_ph_mask (__m512h __A
, __m512h __B
, const int __C
,
3537 return (__mmask32
) __builtin_ia32_cmpph512_mask_round (__A
, __B
,
3538 __C
, (__mmask32
) -1,
3542 extern __inline __mmask32
3543 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3544 _mm512_mask_cmp_round_ph_mask (__mmask32 __A
, __m512h __B
, __m512h __C
,
3545 const int __D
, const int __E
)
3547 return (__mmask32
) __builtin_ia32_cmpph512_mask_round (__B
, __C
,
3553 #define _mm512_cmp_ph_mask(A, B, C) \
3554 (__builtin_ia32_cmpph512_mask ((A), (B), (C), (-1)))
3556 #define _mm512_mask_cmp_ph_mask(A, B, C, D) \
3557 (__builtin_ia32_cmpph512_mask ((B), (C), (D), (A)))
3559 #define _mm512_cmp_round_ph_mask(A, B, C, D) \
3560 (__builtin_ia32_cmpph512_mask_round ((A), (B), (C), (-1), (D)))
3562 #define _mm512_mask_cmp_round_ph_mask(A, B, C, D, E) \
3563 (__builtin_ia32_cmpph512_mask_round ((B), (C), (D), (A), (E)))
3565 #endif /* __OPTIMIZE__ */
3567 /* Intrinsics vsqrtph. */
3568 extern __inline __m512h
3569 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3570 _mm512_sqrt_ph (__m512h __A
)
3572 return __builtin_ia32_sqrtph512_mask_round (__A
,
3573 _mm512_setzero_ph(),
3575 _MM_FROUND_CUR_DIRECTION
);
3578 extern __inline __m512h
3579 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3580 _mm512_mask_sqrt_ph (__m512h __A
, __mmask32 __B
, __m512h __C
)
3582 return __builtin_ia32_sqrtph512_mask_round (__C
, __A
, __B
,
3583 _MM_FROUND_CUR_DIRECTION
);
3586 extern __inline __m512h
3587 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3588 _mm512_maskz_sqrt_ph (__mmask32 __A
, __m512h __B
)
3590 return __builtin_ia32_sqrtph512_mask_round (__B
,
3591 _mm512_setzero_ph (),
3593 _MM_FROUND_CUR_DIRECTION
);
3597 extern __inline __m512h
3598 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3599 _mm512_sqrt_round_ph (__m512h __A
, const int __B
)
3601 return __builtin_ia32_sqrtph512_mask_round (__A
,
3602 _mm512_setzero_ph(),
3603 (__mmask32
) -1, __B
);
3606 extern __inline __m512h
3607 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3608 _mm512_mask_sqrt_round_ph (__m512h __A
, __mmask32 __B
, __m512h __C
,
3611 return __builtin_ia32_sqrtph512_mask_round (__C
, __A
, __B
, __D
);
3614 extern __inline __m512h
3615 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3616 _mm512_maskz_sqrt_round_ph (__mmask32 __A
, __m512h __B
, const int __C
)
3618 return __builtin_ia32_sqrtph512_mask_round (__B
,
3619 _mm512_setzero_ph (),
3624 #define _mm512_sqrt_round_ph(A, B) \
3625 (__builtin_ia32_sqrtph512_mask_round ((A), \
3626 _mm512_setzero_ph (), \
3627 (__mmask32)-1, (B)))
3629 #define _mm512_mask_sqrt_round_ph(A, B, C, D) \
3630 (__builtin_ia32_sqrtph512_mask_round ((C), (A), (B), (D)))
3632 #define _mm512_maskz_sqrt_round_ph(A, B, C) \
3633 (__builtin_ia32_sqrtph512_mask_round ((B), \
3634 _mm512_setzero_ph (), \
3637 #endif /* __OPTIMIZE__ */
3639 /* Intrinsics vrsqrtph. */
3640 extern __inline __m512h
3641 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3642 _mm512_rsqrt_ph (__m512h __A
)
3644 return __builtin_ia32_rsqrtph512_mask (__A
, _mm512_setzero_ph (),
3648 extern __inline __m512h
3649 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3650 _mm512_mask_rsqrt_ph (__m512h __A
, __mmask32 __B
, __m512h __C
)
3652 return __builtin_ia32_rsqrtph512_mask (__C
, __A
, __B
);
3655 extern __inline __m512h
3656 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3657 _mm512_maskz_rsqrt_ph (__mmask32 __A
, __m512h __B
)
3659 return __builtin_ia32_rsqrtph512_mask (__B
, _mm512_setzero_ph (),
3663 /* Intrinsics vrcpph. */
3664 extern __inline __m512h
3665 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3666 _mm512_rcp_ph (__m512h __A
)
3668 return __builtin_ia32_rcpph512_mask (__A
, _mm512_setzero_ph (),
3672 extern __inline __m512h
3673 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3674 _mm512_mask_rcp_ph (__m512h __A
, __mmask32 __B
, __m512h __C
)
3676 return __builtin_ia32_rcpph512_mask (__C
, __A
, __B
);
3679 extern __inline __m512h
3680 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3681 _mm512_maskz_rcp_ph (__mmask32 __A
, __m512h __B
)
3683 return __builtin_ia32_rcpph512_mask (__B
, _mm512_setzero_ph (),
3687 /* Intrinsics vscalefph. */
3688 extern __inline __m512h
3689 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3690 _mm512_scalef_ph (__m512h __A
, __m512h __B
)
3692 return __builtin_ia32_scalefph512_mask_round (__A
, __B
,
3693 _mm512_setzero_ph (),
3695 _MM_FROUND_CUR_DIRECTION
);
3698 extern __inline __m512h
3699 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3700 _mm512_mask_scalef_ph (__m512h __A
, __mmask32 __B
, __m512h __C
, __m512h __D
)
3702 return __builtin_ia32_scalefph512_mask_round (__C
, __D
, __A
, __B
,
3703 _MM_FROUND_CUR_DIRECTION
);
3706 extern __inline __m512h
3707 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3708 _mm512_maskz_scalef_ph (__mmask32 __A
, __m512h __B
, __m512h __C
)
3710 return __builtin_ia32_scalefph512_mask_round (__B
, __C
,
3711 _mm512_setzero_ph (),
3713 _MM_FROUND_CUR_DIRECTION
);
3717 extern __inline __m512h
3718 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3719 _mm512_scalef_round_ph (__m512h __A
, __m512h __B
, const int __C
)
3721 return __builtin_ia32_scalefph512_mask_round (__A
, __B
,
3722 _mm512_setzero_ph (),
3723 (__mmask32
) -1, __C
);
3726 extern __inline __m512h
3727 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3728 _mm512_mask_scalef_round_ph (__m512h __A
, __mmask32 __B
, __m512h __C
,
3729 __m512h __D
, const int __E
)
3731 return __builtin_ia32_scalefph512_mask_round (__C
, __D
, __A
, __B
,
3735 extern __inline __m512h
3736 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3737 _mm512_maskz_scalef_round_ph (__mmask32 __A
, __m512h __B
, __m512h __C
,
3740 return __builtin_ia32_scalefph512_mask_round (__B
, __C
,
3741 _mm512_setzero_ph (),
3746 #define _mm512_scalef_round_ph(A, B, C) \
3747 (__builtin_ia32_scalefph512_mask_round ((A), (B), \
3748 _mm512_setzero_ph (), \
3749 (__mmask32)-1, (C)))
3751 #define _mm512_mask_scalef_round_ph(A, B, C, D, E) \
3752 (__builtin_ia32_scalefph512_mask_round ((C), (D), (A), (B), (E)))
3754 #define _mm512_maskz_scalef_round_ph(A, B, C, D) \
3755 (__builtin_ia32_scalefph512_mask_round ((B), (C), \
3756 _mm512_setzero_ph (), \
3759 #endif /* __OPTIMIZE__ */
3761 /* Intrinsics vreduceph. */
3763 extern __inline __m512h
3764 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3765 _mm512_reduce_ph (__m512h __A
, int __B
)
3767 return __builtin_ia32_reduceph512_mask_round (__A
, __B
,
3768 _mm512_setzero_ph (),
3770 _MM_FROUND_CUR_DIRECTION
);
3773 extern __inline __m512h
3774 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3775 _mm512_mask_reduce_ph (__m512h __A
, __mmask32 __B
, __m512h __C
, int __D
)
3777 return __builtin_ia32_reduceph512_mask_round (__C
, __D
, __A
, __B
,
3778 _MM_FROUND_CUR_DIRECTION
);
3781 extern __inline __m512h
3782 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3783 _mm512_maskz_reduce_ph (__mmask32 __A
, __m512h __B
, int __C
)
3785 return __builtin_ia32_reduceph512_mask_round (__B
, __C
,
3786 _mm512_setzero_ph (),
3788 _MM_FROUND_CUR_DIRECTION
);
3791 extern __inline __m512h
3792 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3793 _mm512_reduce_round_ph (__m512h __A
, int __B
, const int __C
)
3795 return __builtin_ia32_reduceph512_mask_round (__A
, __B
,
3796 _mm512_setzero_ph (),
3797 (__mmask32
) -1, __C
);
3800 extern __inline __m512h
3801 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3802 _mm512_mask_reduce_round_ph (__m512h __A
, __mmask32 __B
, __m512h __C
,
3803 int __D
, const int __E
)
3805 return __builtin_ia32_reduceph512_mask_round (__C
, __D
, __A
, __B
,
3809 extern __inline __m512h
3810 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3811 _mm512_maskz_reduce_round_ph (__mmask32 __A
, __m512h __B
, int __C
,
3814 return __builtin_ia32_reduceph512_mask_round (__B
, __C
,
3815 _mm512_setzero_ph (),
3820 #define _mm512_reduce_ph(A, B) \
3821 (__builtin_ia32_reduceph512_mask_round ((A), (B), \
3822 _mm512_setzero_ph (), \
3824 _MM_FROUND_CUR_DIRECTION))
3826 #define _mm512_mask_reduce_ph(A, B, C, D) \
3827 (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), \
3828 _MM_FROUND_CUR_DIRECTION))
3830 #define _mm512_maskz_reduce_ph(A, B, C) \
3831 (__builtin_ia32_reduceph512_mask_round ((B), (C), \
3832 _mm512_setzero_ph (), \
3833 (A), _MM_FROUND_CUR_DIRECTION))
3835 #define _mm512_reduce_round_ph(A, B, C) \
3836 (__builtin_ia32_reduceph512_mask_round ((A), (B), \
3837 _mm512_setzero_ph (), \
3838 (__mmask32)-1, (C)))
3840 #define _mm512_mask_reduce_round_ph(A, B, C, D, E) \
3841 (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), (E)))
3843 #define _mm512_maskz_reduce_round_ph(A, B, C, D) \
3844 (__builtin_ia32_reduceph512_mask_round ((B), (C), \
3845 _mm512_setzero_ph (), \
3848 #endif /* __OPTIMIZE__ */
3850 /* Intrinsics vrndscaleph. */
3852 extern __inline __m512h
3853 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3854 _mm512_roundscale_ph (__m512h __A
, int __B
)
3856 return __builtin_ia32_rndscaleph512_mask_round (__A
, __B
,
3857 _mm512_setzero_ph (),
3859 _MM_FROUND_CUR_DIRECTION
);
3862 extern __inline __m512h
3863 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3864 _mm512_mask_roundscale_ph (__m512h __A
, __mmask32 __B
,
3865 __m512h __C
, int __D
)
3867 return __builtin_ia32_rndscaleph512_mask_round (__C
, __D
, __A
, __B
,
3868 _MM_FROUND_CUR_DIRECTION
);
3871 extern __inline __m512h
3872 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3873 _mm512_maskz_roundscale_ph (__mmask32 __A
, __m512h __B
, int __C
)
3875 return __builtin_ia32_rndscaleph512_mask_round (__B
, __C
,
3876 _mm512_setzero_ph (),
3878 _MM_FROUND_CUR_DIRECTION
);
3881 extern __inline __m512h
3882 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3883 _mm512_roundscale_round_ph (__m512h __A
, int __B
, const int __C
)
3885 return __builtin_ia32_rndscaleph512_mask_round (__A
, __B
,
3886 _mm512_setzero_ph (),
3891 extern __inline __m512h
3892 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3893 _mm512_mask_roundscale_round_ph (__m512h __A
, __mmask32 __B
,
3894 __m512h __C
, int __D
, const int __E
)
3896 return __builtin_ia32_rndscaleph512_mask_round (__C
, __D
, __A
,
3900 extern __inline __m512h
3901 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3902 _mm512_maskz_roundscale_round_ph (__mmask32 __A
, __m512h __B
, int __C
,
3905 return __builtin_ia32_rndscaleph512_mask_round (__B
, __C
,
3906 _mm512_setzero_ph (),
3911 #define _mm512_roundscale_ph(A, B) \
3912 (__builtin_ia32_rndscaleph512_mask_round ((A), (B), \
3913 _mm512_setzero_ph (), \
3915 _MM_FROUND_CUR_DIRECTION))
3917 #define _mm512_mask_roundscale_ph(A, B, C, D) \
3918 (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), \
3919 _MM_FROUND_CUR_DIRECTION))
3921 #define _mm512_maskz_roundscale_ph(A, B, C) \
3922 (__builtin_ia32_rndscaleph512_mask_round ((B), (C), \
3923 _mm512_setzero_ph (), \
3925 _MM_FROUND_CUR_DIRECTION))
3926 #define _mm512_roundscale_round_ph(A, B, C) \
3927 (__builtin_ia32_rndscaleph512_mask_round ((A), (B), \
3928 _mm512_setzero_ph (), \
3929 (__mmask32)-1, (C)))
3931 #define _mm512_mask_roundscale_round_ph(A, B, C, D, E) \
3932 (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), (E)))
3934 #define _mm512_maskz_roundscale_round_ph(A, B, C, D) \
3935 (__builtin_ia32_rndscaleph512_mask_round ((B), (C), \
3936 _mm512_setzero_ph (), \
3939 #endif /* __OPTIMIZE__ */
3941 /* Intrinsics vfpclassph. */
3943 extern __inline __mmask32
3944 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3945 _mm512_mask_fpclass_ph_mask (__mmask32 __U
, __m512h __A
,
3948 return (__mmask32
) __builtin_ia32_fpclassph512_mask ((__v32hf
) __A
,
3952 extern __inline __mmask32
3953 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3954 _mm512_fpclass_ph_mask (__m512h __A
, const int __imm
)
3956 return (__mmask32
) __builtin_ia32_fpclassph512_mask ((__v32hf
) __A
,
3962 #define _mm512_mask_fpclass_ph_mask(u, x, c) \
3963 ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
3964 (int) (c),(__mmask8)(u)))
3966 #define _mm512_fpclass_ph_mask(x, c) \
3967 ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
3968 (int) (c),(__mmask8)-1))
3969 #endif /* __OPIMTIZE__ */
3971 /* Intrinsics vgetexpph. */
3972 extern __inline __m512h
3973 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3974 _mm512_getexp_ph (__m512h __A
)
3977 __builtin_ia32_getexpph512_mask ((__v32hf
) __A
,
3978 (__v32hf
) _mm512_setzero_ph (),
3979 (__mmask32
) -1, _MM_FROUND_CUR_DIRECTION
);
3982 extern __inline __m512h
3983 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3984 _mm512_mask_getexp_ph (__m512h __W
, __mmask32 __U
, __m512h __A
)
3987 __builtin_ia32_getexpph512_mask ((__v32hf
) __A
, (__v32hf
) __W
,
3988 (__mmask32
) __U
, _MM_FROUND_CUR_DIRECTION
);
3991 extern __inline __m512h
3992 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3993 _mm512_maskz_getexp_ph (__mmask32 __U
, __m512h __A
)
3996 __builtin_ia32_getexpph512_mask ((__v32hf
) __A
,
3997 (__v32hf
) _mm512_setzero_ph (),
3998 (__mmask32
) __U
, _MM_FROUND_CUR_DIRECTION
);
4002 extern __inline __m512h
4003 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4004 _mm512_getexp_round_ph (__m512h __A
, const int __R
)
4006 return (__m512h
) __builtin_ia32_getexpph512_mask ((__v32hf
) __A
,
4008 _mm512_setzero_ph (),
4009 (__mmask32
) -1, __R
);
4012 extern __inline __m512h
4013 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4014 _mm512_mask_getexp_round_ph (__m512h __W
, __mmask32 __U
, __m512h __A
,
4017 return (__m512h
) __builtin_ia32_getexpph512_mask ((__v32hf
) __A
,
4019 (__mmask32
) __U
, __R
);
4022 extern __inline __m512h
4023 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4024 _mm512_maskz_getexp_round_ph (__mmask32 __U
, __m512h __A
, const int __R
)
4026 return (__m512h
) __builtin_ia32_getexpph512_mask ((__v32hf
) __A
,
4028 _mm512_setzero_ph (),
4029 (__mmask32
) __U
, __R
);
4033 #define _mm512_getexp_round_ph(A, R) \
4034 ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
4035 (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, R))
4037 #define _mm512_mask_getexp_round_ph(W, U, A, R) \
4038 ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
4039 (__v32hf)(__m512h)(W), (__mmask32)(U), R))
4041 #define _mm512_maskz_getexp_round_ph(U, A, R) \
4042 ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
4043 (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), R))
4045 #endif /* __OPTIMIZE__ */
4047 /* Intrinsics vgetmantph. */
4049 extern __inline __m512h
4050 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4051 _mm512_getmant_ph (__m512h __A
, _MM_MANTISSA_NORM_ENUM __B
,
4052 _MM_MANTISSA_SIGN_ENUM __C
)
4054 return (__m512h
) __builtin_ia32_getmantph512_mask ((__v32hf
) __A
,
4056 _mm512_setzero_ph (),
4058 _MM_FROUND_CUR_DIRECTION
);
4061 extern __inline __m512h
4062 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4063 _mm512_mask_getmant_ph (__m512h __W
, __mmask32 __U
, __m512h __A
,
4064 _MM_MANTISSA_NORM_ENUM __B
,
4065 _MM_MANTISSA_SIGN_ENUM __C
)
4067 return (__m512h
) __builtin_ia32_getmantph512_mask ((__v32hf
) __A
,
4070 _MM_FROUND_CUR_DIRECTION
);
4073 extern __inline __m512h
4074 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4075 _mm512_maskz_getmant_ph (__mmask32 __U
, __m512h __A
,
4076 _MM_MANTISSA_NORM_ENUM __B
,
4077 _MM_MANTISSA_SIGN_ENUM __C
)
4079 return (__m512h
) __builtin_ia32_getmantph512_mask ((__v32hf
) __A
,
4082 _mm512_setzero_ph (),
4084 _MM_FROUND_CUR_DIRECTION
);
4087 extern __inline __m512h
4088 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4089 _mm512_getmant_round_ph (__m512h __A
, _MM_MANTISSA_NORM_ENUM __B
,
4090 _MM_MANTISSA_SIGN_ENUM __C
, const int __R
)
4092 return (__m512h
) __builtin_ia32_getmantph512_mask ((__v32hf
) __A
,
4094 _mm512_setzero_ph (),
4095 (__mmask32
) -1, __R
);
4098 extern __inline __m512h
4099 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4100 _mm512_mask_getmant_round_ph (__m512h __W
, __mmask32 __U
, __m512h __A
,
4101 _MM_MANTISSA_NORM_ENUM __B
,
4102 _MM_MANTISSA_SIGN_ENUM __C
, const int __R
)
4104 return (__m512h
) __builtin_ia32_getmantph512_mask ((__v32hf
) __A
,
4110 extern __inline __m512h
4111 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4112 _mm512_maskz_getmant_round_ph (__mmask32 __U
, __m512h __A
,
4113 _MM_MANTISSA_NORM_ENUM __B
,
4114 _MM_MANTISSA_SIGN_ENUM __C
, const int __R
)
4116 return (__m512h
) __builtin_ia32_getmantph512_mask ((__v32hf
) __A
,
4119 _mm512_setzero_ph (),
4124 #define _mm512_getmant_ph(X, B, C) \
4125 ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
4126 (int)(((C)<<2) | (B)), \
4127 (__v32hf)(__m512h) \
4128 _mm512_setzero_ph(), \
4130 _MM_FROUND_CUR_DIRECTION))
4132 #define _mm512_mask_getmant_ph(W, U, X, B, C) \
4133 ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
4134 (int)(((C)<<2) | (B)), \
4135 (__v32hf)(__m512h)(W), \
4137 _MM_FROUND_CUR_DIRECTION))
4140 #define _mm512_maskz_getmant_ph(U, X, B, C) \
4141 ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
4142 (int)(((C)<<2) | (B)), \
4143 (__v32hf)(__m512h) \
4144 _mm512_setzero_ph(), \
4146 _MM_FROUND_CUR_DIRECTION))
4148 #define _mm512_getmant_round_ph(X, B, C, R) \
4149 ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
4150 (int)(((C)<<2) | (B)), \
4151 (__v32hf)(__m512h) \
4152 _mm512_setzero_ph(), \
4156 #define _mm512_mask_getmant_round_ph(W, U, X, B, C, R) \
4157 ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
4158 (int)(((C)<<2) | (B)), \
4159 (__v32hf)(__m512h)(W), \
4164 #define _mm512_maskz_getmant_round_ph(U, X, B, C, R) \
4165 ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
4166 (int)(((C)<<2) | (B)), \
4167 (__v32hf)(__m512h) \
4168 _mm512_setzero_ph(), \
4172 #endif /* __OPTIMIZE__ */
4174 /* Intrinsics vcvtph2dq. */
4175 extern __inline __m512i
4176 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4177 _mm512_cvtph_epi32 (__m256h __A
)
4180 __builtin_ia32_vcvtph2dq512_mask_round (__A
,
4182 _mm512_setzero_si512 (),
4184 _MM_FROUND_CUR_DIRECTION
);
4187 extern __inline __m512i
4188 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4189 _mm512_mask_cvtph_epi32 (__m512i __A
, __mmask16 __B
, __m256h __C
)
4192 __builtin_ia32_vcvtph2dq512_mask_round (__C
,
4195 _MM_FROUND_CUR_DIRECTION
);
4198 extern __inline __m512i
4199 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4200 _mm512_maskz_cvtph_epi32 (__mmask16 __A
, __m256h __B
)
4203 __builtin_ia32_vcvtph2dq512_mask_round (__B
,
4205 _mm512_setzero_si512 (),
4207 _MM_FROUND_CUR_DIRECTION
);
4211 extern __inline __m512i
4212 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4213 _mm512_cvt_roundph_epi32 (__m256h __A
, int __B
)
4216 __builtin_ia32_vcvtph2dq512_mask_round (__A
,
4218 _mm512_setzero_si512 (),
4223 extern __inline __m512i
4224 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4225 _mm512_mask_cvt_roundph_epi32 (__m512i __A
, __mmask16 __B
, __m256h __C
, int __D
)
4228 __builtin_ia32_vcvtph2dq512_mask_round (__C
,
4234 extern __inline __m512i
4235 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4236 _mm512_maskz_cvt_roundph_epi32 (__mmask16 __A
, __m256h __B
, int __C
)
4239 __builtin_ia32_vcvtph2dq512_mask_round (__B
,
4241 _mm512_setzero_si512 (),
4247 #define _mm512_cvt_roundph_epi32(A, B) \
4249 __builtin_ia32_vcvtph2dq512_mask_round ((A), \
4251 _mm512_setzero_si512 (), \
4255 #define _mm512_mask_cvt_roundph_epi32(A, B, C, D) \
4257 __builtin_ia32_vcvtph2dq512_mask_round ((C), (__v16si)(A), (B), (D)))
4259 #define _mm512_maskz_cvt_roundph_epi32(A, B, C) \
4261 __builtin_ia32_vcvtph2dq512_mask_round ((B), \
4263 _mm512_setzero_si512 (), \
4267 #endif /* __OPTIMIZE__ */
4269 /* Intrinsics vcvtph2udq. */
4270 extern __inline __m512i
4271 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4272 _mm512_cvtph_epu32 (__m256h __A
)
4275 __builtin_ia32_vcvtph2udq512_mask_round (__A
,
4277 _mm512_setzero_si512 (),
4279 _MM_FROUND_CUR_DIRECTION
);
4282 extern __inline __m512i
4283 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4284 _mm512_mask_cvtph_epu32 (__m512i __A
, __mmask16 __B
, __m256h __C
)
4287 __builtin_ia32_vcvtph2udq512_mask_round (__C
,
4290 _MM_FROUND_CUR_DIRECTION
);
4293 extern __inline __m512i
4294 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4295 _mm512_maskz_cvtph_epu32 (__mmask16 __A
, __m256h __B
)
4298 __builtin_ia32_vcvtph2udq512_mask_round (__B
,
4300 _mm512_setzero_si512 (),
4302 _MM_FROUND_CUR_DIRECTION
);
4306 extern __inline __m512i
4307 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4308 _mm512_cvt_roundph_epu32 (__m256h __A
, int __B
)
4311 __builtin_ia32_vcvtph2udq512_mask_round (__A
,
4313 _mm512_setzero_si512 (),
4318 extern __inline __m512i
4319 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4320 _mm512_mask_cvt_roundph_epu32 (__m512i __A
, __mmask16 __B
, __m256h __C
, int __D
)
4323 __builtin_ia32_vcvtph2udq512_mask_round (__C
,
4329 extern __inline __m512i
4330 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4331 _mm512_maskz_cvt_roundph_epu32 (__mmask16 __A
, __m256h __B
, int __C
)
4334 __builtin_ia32_vcvtph2udq512_mask_round (__B
,
4336 _mm512_setzero_si512 (),
4342 #define _mm512_cvt_roundph_epu32(A, B) \
4344 __builtin_ia32_vcvtph2udq512_mask_round ((A), \
4346 _mm512_setzero_si512 (), \
4350 #define _mm512_mask_cvt_roundph_epu32(A, B, C, D) \
4352 __builtin_ia32_vcvtph2udq512_mask_round ((C), (__v16si)(A), (B), (D)))
4354 #define _mm512_maskz_cvt_roundph_epu32(A, B, C) \
4356 __builtin_ia32_vcvtph2udq512_mask_round ((B), \
4358 _mm512_setzero_si512 (), \
4362 #endif /* __OPTIMIZE__ */
4364 /* Intrinsics vcvttph2dq. */
4365 extern __inline __m512i
4366 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4367 _mm512_cvttph_epi32 (__m256h __A
)
4370 __builtin_ia32_vcvttph2dq512_mask_round (__A
,
4372 _mm512_setzero_si512 (),
4374 _MM_FROUND_CUR_DIRECTION
);
4377 extern __inline __m512i
4378 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4379 _mm512_mask_cvttph_epi32 (__m512i __A
, __mmask16 __B
, __m256h __C
)
4382 __builtin_ia32_vcvttph2dq512_mask_round (__C
,
4385 _MM_FROUND_CUR_DIRECTION
);
4388 extern __inline __m512i
4389 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4390 _mm512_maskz_cvttph_epi32 (__mmask16 __A
, __m256h __B
)
4393 __builtin_ia32_vcvttph2dq512_mask_round (__B
,
4395 _mm512_setzero_si512 (),
4397 _MM_FROUND_CUR_DIRECTION
);
4401 extern __inline __m512i
4402 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4403 _mm512_cvtt_roundph_epi32 (__m256h __A
, int __B
)
4406 __builtin_ia32_vcvttph2dq512_mask_round (__A
,
4408 _mm512_setzero_si512 (),
4413 extern __inline __m512i
4414 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4415 _mm512_mask_cvtt_roundph_epi32 (__m512i __A
, __mmask16 __B
,
4416 __m256h __C
, int __D
)
4419 __builtin_ia32_vcvttph2dq512_mask_round (__C
,
4425 extern __inline __m512i
4426 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4427 _mm512_maskz_cvtt_roundph_epi32 (__mmask16 __A
, __m256h __B
, int __C
)
4430 __builtin_ia32_vcvttph2dq512_mask_round (__B
,
4432 _mm512_setzero_si512 (),
4438 #define _mm512_cvtt_roundph_epi32(A, B) \
4440 __builtin_ia32_vcvttph2dq512_mask_round ((A), \
4442 (_mm512_setzero_si512 ()), \
4443 (__mmask16)(-1), (B)))
4445 #define _mm512_mask_cvtt_roundph_epi32(A, B, C, D) \
4447 __builtin_ia32_vcvttph2dq512_mask_round ((C), \
4452 #define _mm512_maskz_cvtt_roundph_epi32(A, B, C) \
4454 __builtin_ia32_vcvttph2dq512_mask_round ((B), \
4456 _mm512_setzero_si512 (), \
4460 #endif /* __OPTIMIZE__ */
4462 /* Intrinsics vcvttph2udq. */
4463 extern __inline __m512i
4464 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4465 _mm512_cvttph_epu32 (__m256h __A
)
4468 __builtin_ia32_vcvttph2udq512_mask_round (__A
,
4470 _mm512_setzero_si512 (),
4472 _MM_FROUND_CUR_DIRECTION
);
4475 extern __inline __m512i
4476 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4477 _mm512_mask_cvttph_epu32 (__m512i __A
, __mmask16 __B
, __m256h __C
)
4480 __builtin_ia32_vcvttph2udq512_mask_round (__C
,
4483 _MM_FROUND_CUR_DIRECTION
);
4486 extern __inline __m512i
4487 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4488 _mm512_maskz_cvttph_epu32 (__mmask16 __A
, __m256h __B
)
4491 __builtin_ia32_vcvttph2udq512_mask_round (__B
,
4493 _mm512_setzero_si512 (),
4495 _MM_FROUND_CUR_DIRECTION
);
4499 extern __inline __m512i
4500 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4501 _mm512_cvtt_roundph_epu32 (__m256h __A
, int __B
)
4504 __builtin_ia32_vcvttph2udq512_mask_round (__A
,
4506 _mm512_setzero_si512 (),
4511 extern __inline __m512i
4512 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4513 _mm512_mask_cvtt_roundph_epu32 (__m512i __A
, __mmask16 __B
,
4514 __m256h __C
, int __D
)
4517 __builtin_ia32_vcvttph2udq512_mask_round (__C
,
4523 extern __inline __m512i
4524 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4525 _mm512_maskz_cvtt_roundph_epu32 (__mmask16 __A
, __m256h __B
, int __C
)
4528 __builtin_ia32_vcvttph2udq512_mask_round (__B
,
4530 _mm512_setzero_si512 (),
4536 #define _mm512_cvtt_roundph_epu32(A, B) \
4538 __builtin_ia32_vcvttph2udq512_mask_round ((A), \
4540 _mm512_setzero_si512 (), \
4544 #define _mm512_mask_cvtt_roundph_epu32(A, B, C, D) \
4546 __builtin_ia32_vcvttph2udq512_mask_round ((C), \
4551 #define _mm512_maskz_cvtt_roundph_epu32(A, B, C) \
4553 __builtin_ia32_vcvttph2udq512_mask_round ((B), \
4555 _mm512_setzero_si512 (), \
4559 #endif /* __OPTIMIZE__ */
4561 /* Intrinsics vcvtdq2ph. */
4562 extern __inline __m256h
4563 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4564 _mm512_cvtepi32_ph (__m512i __A
)
4566 return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si
) __A
,
4567 _mm256_setzero_ph (),
4569 _MM_FROUND_CUR_DIRECTION
);
4572 extern __inline __m256h
4573 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4574 _mm512_mask_cvtepi32_ph (__m256h __A
, __mmask16 __B
, __m512i __C
)
4576 return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si
) __C
,
4579 _MM_FROUND_CUR_DIRECTION
);
4582 extern __inline __m256h
4583 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4584 _mm512_maskz_cvtepi32_ph (__mmask16 __A
, __m512i __B
)
4586 return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si
) __B
,
4587 _mm256_setzero_ph (),
4589 _MM_FROUND_CUR_DIRECTION
);
4593 extern __inline __m256h
4594 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4595 _mm512_cvt_roundepi32_ph (__m512i __A
, int __B
)
4597 return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si
) __A
,
4598 _mm256_setzero_ph (),
4603 extern __inline __m256h
4604 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4605 _mm512_mask_cvt_roundepi32_ph (__m256h __A
, __mmask16 __B
, __m512i __C
, int __D
)
4607 return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si
) __C
,
4613 extern __inline __m256h
4614 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4615 _mm512_maskz_cvt_roundepi32_ph (__mmask16 __A
, __m512i __B
, int __C
)
4617 return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si
) __B
,
4618 _mm256_setzero_ph (),
4624 #define _mm512_cvt_roundepi32_ph(A, B) \
4625 (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(A), \
4626 _mm256_setzero_ph (), \
4630 #define _mm512_mask_cvt_roundepi32_ph(A, B, C, D) \
4631 (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(C), \
4636 #define _mm512_maskz_cvt_roundepi32_ph(A, B, C) \
4637 (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(B), \
4638 _mm256_setzero_ph (), \
4642 #endif /* __OPTIMIZE__ */
4644 /* Intrinsics vcvtudq2ph. */
4645 extern __inline __m256h
4646 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4647 _mm512_cvtepu32_ph (__m512i __A
)
4649 return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si
) __A
,
4650 _mm256_setzero_ph (),
4652 _MM_FROUND_CUR_DIRECTION
);
4655 extern __inline __m256h
4656 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4657 _mm512_mask_cvtepu32_ph (__m256h __A
, __mmask16 __B
, __m512i __C
)
4659 return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si
) __C
,
4662 _MM_FROUND_CUR_DIRECTION
);
4665 extern __inline __m256h
4666 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4667 _mm512_maskz_cvtepu32_ph (__mmask16 __A
, __m512i __B
)
4669 return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si
) __B
,
4670 _mm256_setzero_ph (),
4672 _MM_FROUND_CUR_DIRECTION
);
4676 extern __inline __m256h
4677 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4678 _mm512_cvt_roundepu32_ph (__m512i __A
, int __B
)
4680 return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si
) __A
,
4681 _mm256_setzero_ph (),
4686 extern __inline __m256h
4687 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4688 _mm512_mask_cvt_roundepu32_ph (__m256h __A
, __mmask16 __B
, __m512i __C
, int __D
)
4690 return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si
) __C
,
4696 extern __inline __m256h
4697 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4698 _mm512_maskz_cvt_roundepu32_ph (__mmask16 __A
, __m512i __B
, int __C
)
4700 return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si
) __B
,
4701 _mm256_setzero_ph (),
4707 #define _mm512_cvt_roundepu32_ph(A, B) \
4708 (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)(A), \
4709 _mm256_setzero_ph (), \
4713 #define _mm512_mask_cvt_roundepu32_ph(A, B, C, D) \
4714 (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)C, \
4719 #define _mm512_maskz_cvt_roundepu32_ph(A, B, C) \
4720 (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)B, \
4721 _mm256_setzero_ph (), \
4725 #endif /* __OPTIMIZE__ */
4727 /* Intrinsics vcvtph2qq. */
4728 extern __inline __m512i
4729 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4730 _mm512_cvtph_epi64 (__m128h __A
)
4732 return __builtin_ia32_vcvtph2qq512_mask_round (__A
,
4733 _mm512_setzero_si512 (),
4735 _MM_FROUND_CUR_DIRECTION
);
4738 extern __inline __m512i
4739 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4740 _mm512_mask_cvtph_epi64 (__m512i __A
, __mmask8 __B
, __m128h __C
)
4742 return __builtin_ia32_vcvtph2qq512_mask_round (__C
, __A
, __B
,
4743 _MM_FROUND_CUR_DIRECTION
);
4746 extern __inline __m512i
4747 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4748 _mm512_maskz_cvtph_epi64 (__mmask8 __A
, __m128h __B
)
4750 return __builtin_ia32_vcvtph2qq512_mask_round (__B
,
4751 _mm512_setzero_si512 (),
4753 _MM_FROUND_CUR_DIRECTION
);
4757 extern __inline __m512i
4758 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4759 _mm512_cvt_roundph_epi64 (__m128h __A
, int __B
)
4761 return __builtin_ia32_vcvtph2qq512_mask_round (__A
,
4762 _mm512_setzero_si512 (),
4767 extern __inline __m512i
4768 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4769 _mm512_mask_cvt_roundph_epi64 (__m512i __A
, __mmask8 __B
, __m128h __C
, int __D
)
4771 return __builtin_ia32_vcvtph2qq512_mask_round (__C
, __A
, __B
, __D
);
4774 extern __inline __m512i
4775 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4776 _mm512_maskz_cvt_roundph_epi64 (__mmask8 __A
, __m128h __B
, int __C
)
4778 return __builtin_ia32_vcvtph2qq512_mask_round (__B
,
4779 _mm512_setzero_si512 (),
4785 #define _mm512_cvt_roundph_epi64(A, B) \
4786 (__builtin_ia32_vcvtph2qq512_mask_round ((A), \
4787 _mm512_setzero_si512 (), \
4791 #define _mm512_mask_cvt_roundph_epi64(A, B, C, D) \
4792 (__builtin_ia32_vcvtph2qq512_mask_round ((C), (A), (B), (D)))
4794 #define _mm512_maskz_cvt_roundph_epi64(A, B, C) \
4795 (__builtin_ia32_vcvtph2qq512_mask_round ((B), \
4796 _mm512_setzero_si512 (), \
4800 #endif /* __OPTIMIZE__ */
4802 /* Intrinsics vcvtph2uqq. */
4803 extern __inline __m512i
4804 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4805 _mm512_cvtph_epu64 (__m128h __A
)
4807 return __builtin_ia32_vcvtph2uqq512_mask_round (__A
,
4808 _mm512_setzero_si512 (),
4810 _MM_FROUND_CUR_DIRECTION
);
4813 extern __inline __m512i
4814 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4815 _mm512_mask_cvtph_epu64 (__m512i __A
, __mmask8 __B
, __m128h __C
)
4817 return __builtin_ia32_vcvtph2uqq512_mask_round (__C
, __A
, __B
,
4818 _MM_FROUND_CUR_DIRECTION
);
4821 extern __inline __m512i
4822 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4823 _mm512_maskz_cvtph_epu64 (__mmask8 __A
, __m128h __B
)
4825 return __builtin_ia32_vcvtph2uqq512_mask_round (__B
,
4826 _mm512_setzero_si512 (),
4828 _MM_FROUND_CUR_DIRECTION
);
4833 extern __inline __m512i
4834 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4835 _mm512_cvt_roundph_epu64 (__m128h __A
, int __B
)
4837 return __builtin_ia32_vcvtph2uqq512_mask_round (__A
,
4838 _mm512_setzero_si512 (),
4843 extern __inline __m512i
4844 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4845 _mm512_mask_cvt_roundph_epu64 (__m512i __A
, __mmask8 __B
, __m128h __C
, int __D
)
4847 return __builtin_ia32_vcvtph2uqq512_mask_round (__C
, __A
, __B
, __D
);
4850 extern __inline __m512i
4851 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4852 _mm512_maskz_cvt_roundph_epu64 (__mmask8 __A
, __m128h __B
, int __C
)
4854 return __builtin_ia32_vcvtph2uqq512_mask_round (__B
,
4855 _mm512_setzero_si512 (),
4861 #define _mm512_cvt_roundph_epu64(A, B) \
4862 (__builtin_ia32_vcvtph2uqq512_mask_round ((A), \
4863 _mm512_setzero_si512 (), \
4867 #define _mm512_mask_cvt_roundph_epu64(A, B, C, D) \
4868 (__builtin_ia32_vcvtph2uqq512_mask_round ((C), (A), (B), (D)))
4870 #define _mm512_maskz_cvt_roundph_epu64(A, B, C) \
4871 (__builtin_ia32_vcvtph2uqq512_mask_round ((B), \
4872 _mm512_setzero_si512 (), \
4876 #endif /* __OPTIMIZE__ */
4878 /* Intrinsics vcvttph2qq. */
4879 extern __inline __m512i
4880 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4881 _mm512_cvttph_epi64 (__m128h __A
)
4883 return __builtin_ia32_vcvttph2qq512_mask_round (__A
,
4884 _mm512_setzero_si512 (),
4886 _MM_FROUND_CUR_DIRECTION
);
4889 extern __inline __m512i
4890 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4891 _mm512_mask_cvttph_epi64 (__m512i __A
, __mmask8 __B
, __m128h __C
)
4893 return __builtin_ia32_vcvttph2qq512_mask_round (__C
, __A
, __B
,
4894 _MM_FROUND_CUR_DIRECTION
);
4897 extern __inline __m512i
4898 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4899 _mm512_maskz_cvttph_epi64 (__mmask8 __A
, __m128h __B
)
4901 return __builtin_ia32_vcvttph2qq512_mask_round (__B
,
4902 _mm512_setzero_si512 (),
4904 _MM_FROUND_CUR_DIRECTION
);
4908 extern __inline __m512i
4909 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4910 _mm512_cvtt_roundph_epi64 (__m128h __A
, int __B
)
4912 return __builtin_ia32_vcvttph2qq512_mask_round (__A
,
4913 _mm512_setzero_si512 (),
4918 extern __inline __m512i
4919 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4920 _mm512_mask_cvtt_roundph_epi64 (__m512i __A
, __mmask8 __B
, __m128h __C
, int __D
)
4922 return __builtin_ia32_vcvttph2qq512_mask_round (__C
, __A
, __B
, __D
);
4925 extern __inline __m512i
4926 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4927 _mm512_maskz_cvtt_roundph_epi64 (__mmask8 __A
, __m128h __B
, int __C
)
4929 return __builtin_ia32_vcvttph2qq512_mask_round (__B
,
4930 _mm512_setzero_si512 (),
4936 #define _mm512_cvtt_roundph_epi64(A, B) \
4937 (__builtin_ia32_vcvttph2qq512_mask_round ((A), \
4938 _mm512_setzero_si512 (), \
4942 #define _mm512_mask_cvtt_roundph_epi64(A, B, C, D) \
4943 __builtin_ia32_vcvttph2qq512_mask_round ((C), (A), (B), (D))
4945 #define _mm512_maskz_cvtt_roundph_epi64(A, B, C) \
4946 (__builtin_ia32_vcvttph2qq512_mask_round ((B), \
4947 _mm512_setzero_si512 (), \
4951 #endif /* __OPTIMIZE__ */
4953 /* Intrinsics vcvttph2uqq. */
4954 extern __inline __m512i
4955 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4956 _mm512_cvttph_epu64 (__m128h __A
)
4958 return __builtin_ia32_vcvttph2uqq512_mask_round (__A
,
4959 _mm512_setzero_si512 (),
4961 _MM_FROUND_CUR_DIRECTION
);
4964 extern __inline __m512i
4965 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4966 _mm512_mask_cvttph_epu64 (__m512i __A
, __mmask8 __B
, __m128h __C
)
4968 return __builtin_ia32_vcvttph2uqq512_mask_round (__C
, __A
, __B
,
4969 _MM_FROUND_CUR_DIRECTION
);
4972 extern __inline __m512i
4973 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4974 _mm512_maskz_cvttph_epu64 (__mmask8 __A
, __m128h __B
)
4976 return __builtin_ia32_vcvttph2uqq512_mask_round (__B
,
4977 _mm512_setzero_si512 (),
4979 _MM_FROUND_CUR_DIRECTION
);
4983 extern __inline __m512i
4984 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4985 _mm512_cvtt_roundph_epu64 (__m128h __A
, int __B
)
4987 return __builtin_ia32_vcvttph2uqq512_mask_round (__A
,
4988 _mm512_setzero_si512 (),
4993 extern __inline __m512i
4994 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4995 _mm512_mask_cvtt_roundph_epu64 (__m512i __A
, __mmask8 __B
, __m128h __C
, int __D
)
4997 return __builtin_ia32_vcvttph2uqq512_mask_round (__C
, __A
, __B
, __D
);
5000 extern __inline __m512i
5001 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5002 _mm512_maskz_cvtt_roundph_epu64 (__mmask8 __A
, __m128h __B
, int __C
)
5004 return __builtin_ia32_vcvttph2uqq512_mask_round (__B
,
5005 _mm512_setzero_si512 (),
5011 #define _mm512_cvtt_roundph_epu64(A, B) \
5012 (__builtin_ia32_vcvttph2uqq512_mask_round ((A), \
5013 _mm512_setzero_si512 (), \
5017 #define _mm512_mask_cvtt_roundph_epu64(A, B, C, D) \
5018 __builtin_ia32_vcvttph2uqq512_mask_round ((C), (A), (B), (D))
5020 #define _mm512_maskz_cvtt_roundph_epu64(A, B, C) \
5021 (__builtin_ia32_vcvttph2uqq512_mask_round ((B), \
5022 _mm512_setzero_si512 (), \
5026 #endif /* __OPTIMIZE__ */
5028 /* Intrinsics vcvtqq2ph. */
5029 extern __inline __m128h
5030 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5031 _mm512_cvtepi64_ph (__m512i __A
)
5033 return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di
) __A
,
5036 _MM_FROUND_CUR_DIRECTION
);
5039 extern __inline __m128h
5040 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5041 _mm512_mask_cvtepi64_ph (__m128h __A
, __mmask8 __B
, __m512i __C
)
5043 return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di
) __C
,
5046 _MM_FROUND_CUR_DIRECTION
);
5049 extern __inline __m128h
5050 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5051 _mm512_maskz_cvtepi64_ph (__mmask8 __A
, __m512i __B
)
5053 return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di
) __B
,
5056 _MM_FROUND_CUR_DIRECTION
);
5060 extern __inline __m128h
5061 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5062 _mm512_cvt_roundepi64_ph (__m512i __A
, int __B
)
5064 return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di
) __A
,
5070 extern __inline __m128h
5071 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5072 _mm512_mask_cvt_roundepi64_ph (__m128h __A
, __mmask8 __B
, __m512i __C
, int __D
)
5074 return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di
) __C
,
5080 extern __inline __m128h
5081 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5082 _mm512_maskz_cvt_roundepi64_ph (__mmask8 __A
, __m512i __B
, int __C
)
5084 return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di
) __B
,
5091 #define _mm512_cvt_roundepi64_ph(A, B) \
5092 (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(A), \
5093 _mm_setzero_ph (), \
5097 #define _mm512_mask_cvt_roundepi64_ph(A, B, C, D) \
5098 (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(C), (A), (B), (D)))
5100 #define _mm512_maskz_cvt_roundepi64_ph(A, B, C) \
5101 (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(B), \
5102 _mm_setzero_ph (), \
5106 #endif /* __OPTIMIZE__ */
5108 /* Intrinsics vcvtuqq2ph. */
5109 extern __inline __m128h
5110 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5111 _mm512_cvtepu64_ph (__m512i __A
)
5113 return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di
) __A
,
5116 _MM_FROUND_CUR_DIRECTION
);
5119 extern __inline __m128h
5120 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5121 _mm512_mask_cvtepu64_ph (__m128h __A
, __mmask8 __B
, __m512i __C
)
5123 return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di
) __C
,
5126 _MM_FROUND_CUR_DIRECTION
);
5129 extern __inline __m128h
5130 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5131 _mm512_maskz_cvtepu64_ph (__mmask8 __A
, __m512i __B
)
5133 return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di
) __B
,
5136 _MM_FROUND_CUR_DIRECTION
);
5140 extern __inline __m128h
5141 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5142 _mm512_cvt_roundepu64_ph (__m512i __A
, int __B
)
5144 return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di
) __A
,
5150 extern __inline __m128h
5151 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5152 _mm512_mask_cvt_roundepu64_ph (__m128h __A
, __mmask8 __B
, __m512i __C
, int __D
)
5154 return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di
) __C
,
5160 extern __inline __m128h
5161 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5162 _mm512_maskz_cvt_roundepu64_ph (__mmask8 __A
, __m512i __B
, int __C
)
5164 return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di
) __B
,
5171 #define _mm512_cvt_roundepu64_ph(A, B) \
5172 (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(A), \
5173 _mm_setzero_ph (), \
5177 #define _mm512_mask_cvt_roundepu64_ph(A, B, C, D) \
5178 (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(C), (A), (B), (D)))
5180 #define _mm512_maskz_cvt_roundepu64_ph(A, B, C) \
5181 (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(B), \
5182 _mm_setzero_ph (), \
5186 #endif /* __OPTIMIZE__ */
5188 /* Intrinsics vcvtph2w. */
5189 extern __inline __m512i
5190 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5191 _mm512_cvtph_epi16 (__m512h __A
)
5194 __builtin_ia32_vcvtph2w512_mask_round (__A
,
5196 _mm512_setzero_si512 (),
5198 _MM_FROUND_CUR_DIRECTION
);
5201 extern __inline __m512i
5202 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5203 _mm512_mask_cvtph_epi16 (__m512i __A
, __mmask32 __B
, __m512h __C
)
5206 __builtin_ia32_vcvtph2w512_mask_round (__C
,
5209 _MM_FROUND_CUR_DIRECTION
);
5212 extern __inline __m512i
5213 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5214 _mm512_maskz_cvtph_epi16 (__mmask32 __A
, __m512h __B
)
5217 __builtin_ia32_vcvtph2w512_mask_round (__B
,
5219 _mm512_setzero_si512 (),
5221 _MM_FROUND_CUR_DIRECTION
);
5225 extern __inline __m512i
5226 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5227 _mm512_cvt_roundph_epi16 (__m512h __A
, int __B
)
5230 __builtin_ia32_vcvtph2w512_mask_round (__A
,
5232 _mm512_setzero_si512 (),
5237 extern __inline __m512i
5238 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5239 _mm512_mask_cvt_roundph_epi16 (__m512i __A
, __mmask32 __B
, __m512h __C
, int __D
)
5242 __builtin_ia32_vcvtph2w512_mask_round (__C
,
5248 extern __inline __m512i
5249 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5250 _mm512_maskz_cvt_roundph_epi16 (__mmask32 __A
, __m512h __B
, int __C
)
5253 __builtin_ia32_vcvtph2w512_mask_round (__B
,
5255 _mm512_setzero_si512 (),
5261 #define _mm512_cvt_roundph_epi16(A, B) \
5262 ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((A), \
5264 _mm512_setzero_si512 (), \
5268 #define _mm512_mask_cvt_roundph_epi16(A, B, C, D) \
5269 ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((C), \
5274 #define _mm512_maskz_cvt_roundph_epi16(A, B, C) \
5275 ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((B), \
5277 _mm512_setzero_si512 (), \
5281 #endif /* __OPTIMIZE__ */
5283 /* Intrinsics vcvtph2uw. */
5284 extern __inline __m512i
5285 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5286 _mm512_cvtph_epu16 (__m512h __A
)
5289 __builtin_ia32_vcvtph2uw512_mask_round (__A
,
5291 _mm512_setzero_si512 (),
5293 _MM_FROUND_CUR_DIRECTION
);
5296 extern __inline __m512i
5297 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5298 _mm512_mask_cvtph_epu16 (__m512i __A
, __mmask32 __B
, __m512h __C
)
5301 __builtin_ia32_vcvtph2uw512_mask_round (__C
, (__v32hi
) __A
, __B
,
5302 _MM_FROUND_CUR_DIRECTION
);
5305 extern __inline __m512i
5306 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5307 _mm512_maskz_cvtph_epu16 (__mmask32 __A
, __m512h __B
)
5310 __builtin_ia32_vcvtph2uw512_mask_round (__B
,
5312 _mm512_setzero_si512 (),
5314 _MM_FROUND_CUR_DIRECTION
);
5318 extern __inline __m512i
5319 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5320 _mm512_cvt_roundph_epu16 (__m512h __A
, int __B
)
5323 __builtin_ia32_vcvtph2uw512_mask_round (__A
,
5325 _mm512_setzero_si512 (),
5330 extern __inline __m512i
5331 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5332 _mm512_mask_cvt_roundph_epu16 (__m512i __A
, __mmask32 __B
, __m512h __C
, int __D
)
5335 __builtin_ia32_vcvtph2uw512_mask_round (__C
, (__v32hi
) __A
, __B
, __D
);
5338 extern __inline __m512i
5339 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5340 _mm512_maskz_cvt_roundph_epu16 (__mmask32 __A
, __m512h __B
, int __C
)
5343 __builtin_ia32_vcvtph2uw512_mask_round (__B
,
5345 _mm512_setzero_si512 (),
5351 #define _mm512_cvt_roundph_epu16(A, B) \
5353 __builtin_ia32_vcvtph2uw512_mask_round ((A), \
5355 _mm512_setzero_si512 (), \
5356 (__mmask32)-1, (B)))
5358 #define _mm512_mask_cvt_roundph_epu16(A, B, C, D) \
5360 __builtin_ia32_vcvtph2uw512_mask_round ((C), (__v32hi)(A), (B), (D)))
5362 #define _mm512_maskz_cvt_roundph_epu16(A, B, C) \
5364 __builtin_ia32_vcvtph2uw512_mask_round ((B), \
5366 _mm512_setzero_si512 (), \
5370 #endif /* __OPTIMIZE__ */
5372 /* Intrinsics vcvttph2w. */
5373 extern __inline __m512i
5374 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5375 _mm512_cvttph_epi16 (__m512h __A
)
5378 __builtin_ia32_vcvttph2w512_mask_round (__A
,
5380 _mm512_setzero_si512 (),
5382 _MM_FROUND_CUR_DIRECTION
);
5385 extern __inline __m512i
5386 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5387 _mm512_mask_cvttph_epi16 (__m512i __A
, __mmask32 __B
, __m512h __C
)
5390 __builtin_ia32_vcvttph2w512_mask_round (__C
,
5393 _MM_FROUND_CUR_DIRECTION
);
5396 extern __inline __m512i
5397 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5398 _mm512_maskz_cvttph_epi16 (__mmask32 __A
, __m512h __B
)
5401 __builtin_ia32_vcvttph2w512_mask_round (__B
,
5403 _mm512_setzero_si512 (),
5405 _MM_FROUND_CUR_DIRECTION
);
5409 extern __inline __m512i
5410 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5411 _mm512_cvtt_roundph_epi16 (__m512h __A
, int __B
)
5414 __builtin_ia32_vcvttph2w512_mask_round (__A
,
5416 _mm512_setzero_si512 (),
5421 extern __inline __m512i
5422 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5423 _mm512_mask_cvtt_roundph_epi16 (__m512i __A
, __mmask32 __B
,
5424 __m512h __C
, int __D
)
5427 __builtin_ia32_vcvttph2w512_mask_round (__C
,
5433 extern __inline __m512i
5434 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5435 _mm512_maskz_cvtt_roundph_epi16 (__mmask32 __A
, __m512h __B
, int __C
)
5438 __builtin_ia32_vcvttph2w512_mask_round (__B
,
5440 _mm512_setzero_si512 (),
5446 #define _mm512_cvtt_roundph_epi16(A, B) \
5448 __builtin_ia32_vcvttph2w512_mask_round ((A), \
5450 _mm512_setzero_si512 (), \
5454 #define _mm512_mask_cvtt_roundph_epi16(A, B, C, D) \
5456 __builtin_ia32_vcvttph2w512_mask_round ((C), \
5461 #define _mm512_maskz_cvtt_roundph_epi16(A, B, C) \
5463 __builtin_ia32_vcvttph2w512_mask_round ((B), \
5465 _mm512_setzero_si512 (), \
5469 #endif /* __OPTIMIZE__ */
5471 /* Intrinsics vcvttph2uw. */
5472 extern __inline __m512i
5473 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5474 _mm512_cvttph_epu16 (__m512h __A
)
5477 __builtin_ia32_vcvttph2uw512_mask_round (__A
,
5479 _mm512_setzero_si512 (),
5481 _MM_FROUND_CUR_DIRECTION
);
5484 extern __inline __m512i
5485 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5486 _mm512_mask_cvttph_epu16 (__m512i __A
, __mmask32 __B
, __m512h __C
)
5489 __builtin_ia32_vcvttph2uw512_mask_round (__C
,
5492 _MM_FROUND_CUR_DIRECTION
);
5495 extern __inline __m512i
5496 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5497 _mm512_maskz_cvttph_epu16 (__mmask32 __A
, __m512h __B
)
5500 __builtin_ia32_vcvttph2uw512_mask_round (__B
,
5502 _mm512_setzero_si512 (),
5504 _MM_FROUND_CUR_DIRECTION
);
5508 extern __inline __m512i
5509 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5510 _mm512_cvtt_roundph_epu16 (__m512h __A
, int __B
)
5513 __builtin_ia32_vcvttph2uw512_mask_round (__A
,
5515 _mm512_setzero_si512 (),
5520 extern __inline __m512i
5521 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5522 _mm512_mask_cvtt_roundph_epu16 (__m512i __A
, __mmask32 __B
,
5523 __m512h __C
, int __D
)
5526 __builtin_ia32_vcvttph2uw512_mask_round (__C
,
5532 extern __inline __m512i
5533 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5534 _mm512_maskz_cvtt_roundph_epu16 (__mmask32 __A
, __m512h __B
, int __C
)
5537 __builtin_ia32_vcvttph2uw512_mask_round (__B
,
5539 _mm512_setzero_si512 (),
5545 #define _mm512_cvtt_roundph_epu16(A, B) \
5547 __builtin_ia32_vcvttph2uw512_mask_round ((A), \
5549 _mm512_setzero_si512 (), \
5553 #define _mm512_mask_cvtt_roundph_epu16(A, B, C, D) \
5555 __builtin_ia32_vcvttph2uw512_mask_round ((C), \
5560 #define _mm512_maskz_cvtt_roundph_epu16(A, B, C) \
5562 __builtin_ia32_vcvttph2uw512_mask_round ((B), \
5564 _mm512_setzero_si512 (), \
5568 #endif /* __OPTIMIZE__ */
5570 /* Intrinsics vcvtw2ph. */
5571 extern __inline __m512h
5572 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5573 _mm512_cvtepi16_ph (__m512i __A
)
5575 return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi
) __A
,
5576 _mm512_setzero_ph (),
5578 _MM_FROUND_CUR_DIRECTION
);
5581 extern __inline __m512h
5582 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5583 _mm512_mask_cvtepi16_ph (__m512h __A
, __mmask32 __B
, __m512i __C
)
5585 return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi
) __C
,
5588 _MM_FROUND_CUR_DIRECTION
);
5591 extern __inline __m512h
5592 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5593 _mm512_maskz_cvtepi16_ph (__mmask32 __A
, __m512i __B
)
5595 return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi
) __B
,
5596 _mm512_setzero_ph (),
5598 _MM_FROUND_CUR_DIRECTION
);
5602 extern __inline __m512h
5603 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5604 _mm512_cvt_roundepi16_ph (__m512i __A
, int __B
)
5606 return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi
) __A
,
5607 _mm512_setzero_ph (),
5612 extern __inline __m512h
5613 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5614 _mm512_mask_cvt_roundepi16_ph (__m512h __A
, __mmask32 __B
, __m512i __C
, int __D
)
5616 return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi
) __C
,
5622 extern __inline __m512h
5623 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5624 _mm512_maskz_cvt_roundepi16_ph (__mmask32 __A
, __m512i __B
, int __C
)
5626 return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi
) __B
,
5627 _mm512_setzero_ph (),
5633 #define _mm512_cvt_roundepi16_ph(A, B) \
5634 (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(A), \
5635 _mm512_setzero_ph (), \
5639 #define _mm512_mask_cvt_roundepi16_ph(A, B, C, D) \
5640 (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(C), \
5645 #define _mm512_maskz_cvt_roundepi16_ph(A, B, C) \
5646 (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(B), \
5647 _mm512_setzero_ph (), \
5651 #endif /* __OPTIMIZE__ */
5653 /* Intrinsics vcvtuw2ph. */
5654 extern __inline __m512h
5655 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5656 _mm512_cvtepu16_ph (__m512i __A
)
5658 return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi
) __A
,
5659 _mm512_setzero_ph (),
5661 _MM_FROUND_CUR_DIRECTION
);
5664 extern __inline __m512h
5665 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5666 _mm512_mask_cvtepu16_ph (__m512h __A
, __mmask32 __B
, __m512i __C
)
5668 return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi
) __C
,
5671 _MM_FROUND_CUR_DIRECTION
);
5674 extern __inline __m512h
5675 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5676 _mm512_maskz_cvtepu16_ph (__mmask32 __A
, __m512i __B
)
5678 return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi
) __B
,
5679 _mm512_setzero_ph (),
5681 _MM_FROUND_CUR_DIRECTION
);
5685 extern __inline __m512h
5686 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5687 _mm512_cvt_roundepu16_ph (__m512i __A
, int __B
)
5689 return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi
) __A
,
5690 _mm512_setzero_ph (),
5695 extern __inline __m512h
5696 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5697 _mm512_mask_cvt_roundepu16_ph (__m512h __A
, __mmask32 __B
, __m512i __C
, int __D
)
5699 return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi
) __C
,
5705 extern __inline __m512h
5706 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5707 _mm512_maskz_cvt_roundepu16_ph (__mmask32 __A
, __m512i __B
, int __C
)
5709 return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi
) __B
,
5710 _mm512_setzero_ph (),
5716 #define _mm512_cvt_roundepu16_ph(A, B) \
5717 (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(A), \
5718 _mm512_setzero_ph (), \
5722 #define _mm512_mask_cvt_roundepu16_ph(A, B, C, D) \
5723 (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(C), \
5728 #define _mm512_maskz_cvt_roundepu16_ph(A, B, C) \
5729 (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(B), \
5730 _mm512_setzero_ph (), \
5734 #endif /* __OPTIMIZE__ */
5736 /* Intrinsics vcvtph2pd. */
5737 extern __inline __m512d
5738 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5739 _mm512_cvtph_pd (__m128h __A
)
5741 return __builtin_ia32_vcvtph2pd512_mask_round (__A
,
5742 _mm512_setzero_pd (),
5744 _MM_FROUND_CUR_DIRECTION
);
5747 extern __inline __m512d
5748 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5749 _mm512_mask_cvtph_pd (__m512d __A
, __mmask8 __B
, __m128h __C
)
5751 return __builtin_ia32_vcvtph2pd512_mask_round (__C
, __A
, __B
,
5752 _MM_FROUND_CUR_DIRECTION
);
5755 extern __inline __m512d
5756 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5757 _mm512_maskz_cvtph_pd (__mmask8 __A
, __m128h __B
)
5759 return __builtin_ia32_vcvtph2pd512_mask_round (__B
,
5760 _mm512_setzero_pd (),
5762 _MM_FROUND_CUR_DIRECTION
);
5766 extern __inline __m512d
5767 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5768 _mm512_cvt_roundph_pd (__m128h __A
, int __B
)
5770 return __builtin_ia32_vcvtph2pd512_mask_round (__A
,
5771 _mm512_setzero_pd (),
5776 extern __inline __m512d
5777 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5778 _mm512_mask_cvt_roundph_pd (__m512d __A
, __mmask8 __B
, __m128h __C
, int __D
)
5780 return __builtin_ia32_vcvtph2pd512_mask_round (__C
, __A
, __B
, __D
);
5783 extern __inline __m512d
5784 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5785 _mm512_maskz_cvt_roundph_pd (__mmask8 __A
, __m128h __B
, int __C
)
5787 return __builtin_ia32_vcvtph2pd512_mask_round (__B
,
5788 _mm512_setzero_pd (),
5794 #define _mm512_cvt_roundph_pd(A, B) \
5795 (__builtin_ia32_vcvtph2pd512_mask_round ((A), \
5796 _mm512_setzero_pd (), \
5800 #define _mm512_mask_cvt_roundph_pd(A, B, C, D) \
5801 (__builtin_ia32_vcvtph2pd512_mask_round ((C), (A), (B), (D)))
5803 #define _mm512_maskz_cvt_roundph_pd(A, B, C) \
5804 (__builtin_ia32_vcvtph2pd512_mask_round ((B), \
5805 _mm512_setzero_pd (), \
5809 #endif /* __OPTIMIZE__ */
5811 /* Intrinsics vcvtph2psx. */
5812 extern __inline __m512
5813 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5814 _mm512_cvtxph_ps (__m256h __A
)
5816 return __builtin_ia32_vcvtph2psx512_mask_round (__A
,
5817 _mm512_setzero_ps (),
5819 _MM_FROUND_CUR_DIRECTION
);
5822 extern __inline __m512
5823 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5824 _mm512_mask_cvtxph_ps (__m512 __A
, __mmask16 __B
, __m256h __C
)
5826 return __builtin_ia32_vcvtph2psx512_mask_round (__C
, __A
, __B
,
5827 _MM_FROUND_CUR_DIRECTION
);
5830 extern __inline __m512
5831 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5832 _mm512_maskz_cvtxph_ps (__mmask16 __A
, __m256h __B
)
5834 return __builtin_ia32_vcvtph2psx512_mask_round (__B
,
5835 _mm512_setzero_ps (),
5837 _MM_FROUND_CUR_DIRECTION
);
5841 extern __inline __m512
5842 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5843 _mm512_cvtx_roundph_ps (__m256h __A
, int __B
)
5845 return __builtin_ia32_vcvtph2psx512_mask_round (__A
,
5846 _mm512_setzero_ps (),
5851 extern __inline __m512
5852 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5853 _mm512_mask_cvtx_roundph_ps (__m512 __A
, __mmask16 __B
, __m256h __C
, int __D
)
5855 return __builtin_ia32_vcvtph2psx512_mask_round (__C
, __A
, __B
, __D
);
5858 extern __inline __m512
5859 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5860 _mm512_maskz_cvtx_roundph_ps (__mmask16 __A
, __m256h __B
, int __C
)
5862 return __builtin_ia32_vcvtph2psx512_mask_round (__B
,
5863 _mm512_setzero_ps (),
5869 #define _mm512_cvtx_roundph_ps(A, B) \
5870 (__builtin_ia32_vcvtph2psx512_mask_round ((A), \
5871 _mm512_setzero_ps (), \
5875 #define _mm512_mask_cvtx_roundph_ps(A, B, C, D) \
5876 (__builtin_ia32_vcvtph2psx512_mask_round ((C), (A), (B), (D)))
5878 #define _mm512_maskz_cvtx_roundph_ps(A, B, C) \
5879 (__builtin_ia32_vcvtph2psx512_mask_round ((B), \
5880 _mm512_setzero_ps (), \
5883 #endif /* __OPTIMIZE__ */
5885 /* Intrinsics vcvtps2ph. */
5886 extern __inline __m256h
5887 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5888 _mm512_cvtxps_ph (__m512 __A
)
5890 return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf
) __A
,
5891 _mm256_setzero_ph (),
5893 _MM_FROUND_CUR_DIRECTION
);
5896 extern __inline __m256h
5897 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5898 _mm512_mask_cvtxps_ph (__m256h __A
, __mmask16 __B
, __m512 __C
)
5900 return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf
) __C
,
5902 _MM_FROUND_CUR_DIRECTION
);
5905 extern __inline __m256h
5906 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5907 _mm512_maskz_cvtxps_ph (__mmask16 __A
, __m512 __B
)
5909 return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf
) __B
,
5910 _mm256_setzero_ph (),
5912 _MM_FROUND_CUR_DIRECTION
);
5916 extern __inline __m256h
5917 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5918 _mm512_cvtx_roundps_ph (__m512 __A
, int __B
)
5920 return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf
) __A
,
5921 _mm256_setzero_ph (),
5926 extern __inline __m256h
5927 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5928 _mm512_mask_cvtx_roundps_ph (__m256h __A
, __mmask16 __B
, __m512 __C
, int __D
)
5930 return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf
) __C
,
5934 extern __inline __m256h
5935 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5936 _mm512_maskz_cvtx_roundps_ph (__mmask16 __A
, __m512 __B
, int __C
)
5938 return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf
) __B
,
5939 _mm256_setzero_ph (),
5944 #define _mm512_cvtx_roundps_ph(A, B) \
5945 (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(A), \
5946 _mm256_setzero_ph (),\
5947 (__mmask16)-1, (B)))
5949 #define _mm512_mask_cvtx_roundps_ph(A, B, C, D) \
5950 (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(C), \
5953 #define _mm512_maskz_cvtx_roundps_ph(A, B, C) \
5954 (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(B), \
5955 _mm256_setzero_ph (),\
5957 #endif /* __OPTIMIZE__ */
5959 /* Intrinsics vcvtpd2ph. */
5960 extern __inline __m128h
5961 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5962 _mm512_cvtpd_ph (__m512d __A
)
5964 return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df
) __A
,
5967 _MM_FROUND_CUR_DIRECTION
);
5970 extern __inline __m128h
5971 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5972 _mm512_mask_cvtpd_ph (__m128h __A
, __mmask8 __B
, __m512d __C
)
5974 return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df
) __C
,
5976 _MM_FROUND_CUR_DIRECTION
);
5979 extern __inline __m128h
5980 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5981 _mm512_maskz_cvtpd_ph (__mmask8 __A
, __m512d __B
)
5983 return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df
) __B
,
5986 _MM_FROUND_CUR_DIRECTION
);
5990 extern __inline __m128h
5991 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
5992 _mm512_cvt_roundpd_ph (__m512d __A
, int __B
)
5994 return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df
) __A
,
6000 extern __inline __m128h
6001 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6002 _mm512_mask_cvt_roundpd_ph (__m128h __A
, __mmask8 __B
, __m512d __C
, int __D
)
6004 return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df
) __C
,
6008 extern __inline __m128h
6009 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6010 _mm512_maskz_cvt_roundpd_ph (__mmask8 __A
, __m512d __B
, int __C
)
6012 return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df
) __B
,
6018 #define _mm512_cvt_roundpd_ph(A, B) \
6019 (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(A), \
6020 _mm_setzero_ph (), \
6023 #define _mm512_mask_cvt_roundpd_ph(A, B, C, D) \
6024 (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(C), \
6027 #define _mm512_maskz_cvt_roundpd_ph(A, B, C) \
6028 (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(B), \
6029 _mm_setzero_ph (), \
6032 #endif /* __OPTIMIZE__ */
6034 /* Intrinsics vfmaddsub[132,213,231]ph. */
6035 extern __inline __m512h
6036 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6037 _mm512_fmaddsub_ph (__m512h __A
, __m512h __B
, __m512h __C
)
6040 __builtin_ia32_vfmaddsubph512_mask ((__v32hf
) __A
,
6044 _MM_FROUND_CUR_DIRECTION
);
6047 extern __inline __m512h
6048 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6049 _mm512_mask_fmaddsub_ph (__m512h __A
, __mmask32 __U
, __m512h __B
, __m512h __C
)
6052 __builtin_ia32_vfmaddsubph512_mask ((__v32hf
) __A
,
6056 _MM_FROUND_CUR_DIRECTION
);
6059 extern __inline __m512h
6060 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6061 _mm512_mask3_fmaddsub_ph (__m512h __A
, __m512h __B
, __m512h __C
, __mmask32 __U
)
6064 __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf
) __A
,
6068 _MM_FROUND_CUR_DIRECTION
);
6071 extern __inline __m512h
6072 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6073 _mm512_maskz_fmaddsub_ph (__mmask32 __U
, __m512h __A
, __m512h __B
, __m512h __C
)
6076 __builtin_ia32_vfmaddsubph512_maskz ((__v32hf
) __A
,
6080 _MM_FROUND_CUR_DIRECTION
);
6084 extern __inline __m512h
6085 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6086 _mm512_fmaddsub_round_ph (__m512h __A
, __m512h __B
, __m512h __C
, const int __R
)
6089 __builtin_ia32_vfmaddsubph512_mask ((__v32hf
) __A
,
6092 (__mmask32
) -1, __R
);
6095 extern __inline __m512h
6096 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6097 _mm512_mask_fmaddsub_round_ph (__m512h __A
, __mmask32 __U
, __m512h __B
,
6098 __m512h __C
, const int __R
)
6101 __builtin_ia32_vfmaddsubph512_mask ((__v32hf
) __A
,
6104 (__mmask32
) __U
, __R
);
6107 extern __inline __m512h
6108 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6109 _mm512_mask3_fmaddsub_round_ph (__m512h __A
, __m512h __B
, __m512h __C
,
6110 __mmask32 __U
, const int __R
)
6113 __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf
) __A
,
6116 (__mmask32
) __U
, __R
);
6119 extern __inline __m512h
6120 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6121 _mm512_maskz_fmaddsub_round_ph (__mmask32 __U
, __m512h __A
, __m512h __B
,
6122 __m512h __C
, const int __R
)
6125 __builtin_ia32_vfmaddsubph512_maskz ((__v32hf
) __A
,
6128 (__mmask32
) __U
, __R
);
6132 #define _mm512_fmaddsub_round_ph(A, B, C, R) \
6133 ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), -1, (R)))
6135 #define _mm512_mask_fmaddsub_round_ph(A, U, B, C, R) \
6136 ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), (U), (R)))
6138 #define _mm512_mask3_fmaddsub_round_ph(A, B, C, U, R) \
6139 ((__m512h)__builtin_ia32_vfmaddsubph512_mask3 ((A), (B), (C), (U), (R)))
6141 #define _mm512_maskz_fmaddsub_round_ph(U, A, B, C, R) \
6142 ((__m512h)__builtin_ia32_vfmaddsubph512_maskz ((A), (B), (C), (U), (R)))
6144 #endif /* __OPTIMIZE__ */
6146 /* Intrinsics vfmsubadd[132,213,231]ph. */
6147 extern __inline __m512h
6148 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6149 _mm512_fmsubadd_ph (__m512h __A
, __m512h __B
, __m512h __C
)
6152 __builtin_ia32_vfmsubaddph512_mask ((__v32hf
) __A
,
6156 _MM_FROUND_CUR_DIRECTION
);
6159 extern __inline __m512h
6160 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6161 _mm512_mask_fmsubadd_ph (__m512h __A
, __mmask32 __U
,
6162 __m512h __B
, __m512h __C
)
6165 __builtin_ia32_vfmsubaddph512_mask ((__v32hf
) __A
,
6169 _MM_FROUND_CUR_DIRECTION
);
6172 extern __inline __m512h
6173 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6174 _mm512_mask3_fmsubadd_ph (__m512h __A
, __m512h __B
,
6175 __m512h __C
, __mmask32 __U
)
6178 __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf
) __A
,
6182 _MM_FROUND_CUR_DIRECTION
);
6185 extern __inline __m512h
6186 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6187 _mm512_maskz_fmsubadd_ph (__mmask32 __U
, __m512h __A
,
6188 __m512h __B
, __m512h __C
)
6191 __builtin_ia32_vfmsubaddph512_maskz ((__v32hf
) __A
,
6195 _MM_FROUND_CUR_DIRECTION
);
6199 extern __inline __m512h
6200 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6201 _mm512_fmsubadd_round_ph (__m512h __A
, __m512h __B
,
6202 __m512h __C
, const int __R
)
6205 __builtin_ia32_vfmsubaddph512_mask ((__v32hf
) __A
,
6208 (__mmask32
) -1, __R
);
6211 extern __inline __m512h
6212 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6213 _mm512_mask_fmsubadd_round_ph (__m512h __A
, __mmask32 __U
, __m512h __B
,
6214 __m512h __C
, const int __R
)
6217 __builtin_ia32_vfmsubaddph512_mask ((__v32hf
) __A
,
6220 (__mmask32
) __U
, __R
);
6223 extern __inline __m512h
6224 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6225 _mm512_mask3_fmsubadd_round_ph (__m512h __A
, __m512h __B
, __m512h __C
,
6226 __mmask32 __U
, const int __R
)
6229 __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf
) __A
,
6232 (__mmask32
) __U
, __R
);
6235 extern __inline __m512h
6236 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6237 _mm512_maskz_fmsubadd_round_ph (__mmask32 __U
, __m512h __A
, __m512h __B
,
6238 __m512h __C
, const int __R
)
6241 __builtin_ia32_vfmsubaddph512_maskz ((__v32hf
) __A
,
6244 (__mmask32
) __U
, __R
);
6248 #define _mm512_fmsubadd_round_ph(A, B, C, R) \
6249 ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), -1, (R)))
6251 #define _mm512_mask_fmsubadd_round_ph(A, U, B, C, R) \
6252 ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), (U), (R)))
6254 #define _mm512_mask3_fmsubadd_round_ph(A, B, C, U, R) \
6255 ((__m512h)__builtin_ia32_vfmsubaddph512_mask3 ((A), (B), (C), (U), (R)))
6257 #define _mm512_maskz_fmsubadd_round_ph(U, A, B, C, R) \
6258 ((__m512h)__builtin_ia32_vfmsubaddph512_maskz ((A), (B), (C), (U), (R)))
6260 #endif /* __OPTIMIZE__ */
6262 /* Intrinsics vfmadd[132,213,231]ph. */
6263 extern __inline __m512h
6264 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6265 _mm512_fmadd_ph (__m512h __A
, __m512h __B
, __m512h __C
)
6268 __builtin_ia32_vfmaddph512_mask ((__v32hf
) __A
,
6272 _MM_FROUND_CUR_DIRECTION
);
6275 extern __inline __m512h
6276 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6277 _mm512_mask_fmadd_ph (__m512h __A
, __mmask32 __U
, __m512h __B
, __m512h __C
)
6280 __builtin_ia32_vfmaddph512_mask ((__v32hf
) __A
,
6284 _MM_FROUND_CUR_DIRECTION
);
6287 extern __inline __m512h
6288 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6289 _mm512_mask3_fmadd_ph (__m512h __A
, __m512h __B
, __m512h __C
, __mmask32 __U
)
6292 __builtin_ia32_vfmaddph512_mask3 ((__v32hf
) __A
,
6296 _MM_FROUND_CUR_DIRECTION
);
6299 extern __inline __m512h
6300 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6301 _mm512_maskz_fmadd_ph (__mmask32 __U
, __m512h __A
, __m512h __B
, __m512h __C
)
6304 __builtin_ia32_vfmaddph512_maskz ((__v32hf
) __A
,
6308 _MM_FROUND_CUR_DIRECTION
);
6312 extern __inline __m512h
6313 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6314 _mm512_fmadd_round_ph (__m512h __A
, __m512h __B
, __m512h __C
, const int __R
)
6316 return (__m512h
) __builtin_ia32_vfmaddph512_mask ((__v32hf
) __A
,
6319 (__mmask32
) -1, __R
);
6322 extern __inline __m512h
6323 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6324 _mm512_mask_fmadd_round_ph (__m512h __A
, __mmask32 __U
, __m512h __B
,
6325 __m512h __C
, const int __R
)
6327 return (__m512h
) __builtin_ia32_vfmaddph512_mask ((__v32hf
) __A
,
6330 (__mmask32
) __U
, __R
);
6333 extern __inline __m512h
6334 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6335 _mm512_mask3_fmadd_round_ph (__m512h __A
, __m512h __B
, __m512h __C
,
6336 __mmask32 __U
, const int __R
)
6338 return (__m512h
) __builtin_ia32_vfmaddph512_mask3 ((__v32hf
) __A
,
6341 (__mmask32
) __U
, __R
);
6344 extern __inline __m512h
6345 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6346 _mm512_maskz_fmadd_round_ph (__mmask32 __U
, __m512h __A
, __m512h __B
,
6347 __m512h __C
, const int __R
)
6349 return (__m512h
) __builtin_ia32_vfmaddph512_maskz ((__v32hf
) __A
,
6352 (__mmask32
) __U
, __R
);
6356 #define _mm512_fmadd_round_ph(A, B, C, R) \
6357 ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), -1, (R)))
6359 #define _mm512_mask_fmadd_round_ph(A, U, B, C, R) \
6360 ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), (U), (R)))
6362 #define _mm512_mask3_fmadd_round_ph(A, B, C, U, R) \
6363 ((__m512h)__builtin_ia32_vfmaddph512_mask3 ((A), (B), (C), (U), (R)))
6365 #define _mm512_maskz_fmadd_round_ph(U, A, B, C, R) \
6366 ((__m512h)__builtin_ia32_vfmaddph512_maskz ((A), (B), (C), (U), (R)))
6368 #endif /* __OPTIMIZE__ */
6370 /* Intrinsics vfnmadd[132,213,231]ph. */
6371 extern __inline __m512h
6372 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6373 _mm512_fnmadd_ph (__m512h __A
, __m512h __B
, __m512h __C
)
6376 __builtin_ia32_vfnmaddph512_mask ((__v32hf
) __A
,
6380 _MM_FROUND_CUR_DIRECTION
);
6383 extern __inline __m512h
6384 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6385 _mm512_mask_fnmadd_ph (__m512h __A
, __mmask32 __U
, __m512h __B
, __m512h __C
)
6388 __builtin_ia32_vfnmaddph512_mask ((__v32hf
) __A
,
6392 _MM_FROUND_CUR_DIRECTION
);
6395 extern __inline __m512h
6396 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6397 _mm512_mask3_fnmadd_ph (__m512h __A
, __m512h __B
, __m512h __C
, __mmask32 __U
)
6400 __builtin_ia32_vfnmaddph512_mask3 ((__v32hf
) __A
,
6404 _MM_FROUND_CUR_DIRECTION
);
6407 extern __inline __m512h
6408 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6409 _mm512_maskz_fnmadd_ph (__mmask32 __U
, __m512h __A
, __m512h __B
, __m512h __C
)
6412 __builtin_ia32_vfnmaddph512_maskz ((__v32hf
) __A
,
6416 _MM_FROUND_CUR_DIRECTION
);
6420 extern __inline __m512h
6421 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6422 _mm512_fnmadd_round_ph (__m512h __A
, __m512h __B
, __m512h __C
, const int __R
)
6424 return (__m512h
) __builtin_ia32_vfnmaddph512_mask ((__v32hf
) __A
,
6427 (__mmask32
) -1, __R
);
6430 extern __inline __m512h
6431 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6432 _mm512_mask_fnmadd_round_ph (__m512h __A
, __mmask32 __U
, __m512h __B
,
6433 __m512h __C
, const int __R
)
6435 return (__m512h
) __builtin_ia32_vfnmaddph512_mask ((__v32hf
) __A
,
6438 (__mmask32
) __U
, __R
);
6441 extern __inline __m512h
6442 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6443 _mm512_mask3_fnmadd_round_ph (__m512h __A
, __m512h __B
, __m512h __C
,
6444 __mmask32 __U
, const int __R
)
6446 return (__m512h
) __builtin_ia32_vfnmaddph512_mask3 ((__v32hf
) __A
,
6449 (__mmask32
) __U
, __R
);
6452 extern __inline __m512h
6453 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6454 _mm512_maskz_fnmadd_round_ph (__mmask32 __U
, __m512h __A
, __m512h __B
,
6455 __m512h __C
, const int __R
)
6457 return (__m512h
) __builtin_ia32_vfnmaddph512_maskz ((__v32hf
) __A
,
6460 (__mmask32
) __U
, __R
);
6464 #define _mm512_fnmadd_round_ph(A, B, C, R) \
6465 ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), -1, (R)))
6467 #define _mm512_mask_fnmadd_round_ph(A, U, B, C, R) \
6468 ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), (U), (R)))
6470 #define _mm512_mask3_fnmadd_round_ph(A, B, C, U, R) \
6471 ((__m512h)__builtin_ia32_vfnmaddph512_mask3 ((A), (B), (C), (U), (R)))
6473 #define _mm512_maskz_fnmadd_round_ph(U, A, B, C, R) \
6474 ((__m512h)__builtin_ia32_vfnmaddph512_maskz ((A), (B), (C), (U), (R)))
6476 #endif /* __OPTIMIZE__ */
6478 /* Intrinsics vfmsub[132,213,231]ph. */
6479 extern __inline __m512h
6480 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6481 _mm512_fmsub_ph (__m512h __A
, __m512h __B
, __m512h __C
)
6484 __builtin_ia32_vfmsubph512_mask ((__v32hf
) __A
,
6488 _MM_FROUND_CUR_DIRECTION
);
6491 extern __inline __m512h
6492 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6493 _mm512_mask_fmsub_ph (__m512h __A
, __mmask32 __U
, __m512h __B
, __m512h __C
)
6496 __builtin_ia32_vfmsubph512_mask ((__v32hf
) __A
,
6500 _MM_FROUND_CUR_DIRECTION
);
6503 extern __inline __m512h
6504 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6505 _mm512_mask3_fmsub_ph (__m512h __A
, __m512h __B
, __m512h __C
, __mmask32 __U
)
6508 __builtin_ia32_vfmsubph512_mask3 ((__v32hf
) __A
,
6512 _MM_FROUND_CUR_DIRECTION
);
6515 extern __inline __m512h
6516 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6517 _mm512_maskz_fmsub_ph (__mmask32 __U
, __m512h __A
, __m512h __B
, __m512h __C
)
6520 __builtin_ia32_vfmsubph512_maskz ((__v32hf
) __A
,
6524 _MM_FROUND_CUR_DIRECTION
);
6528 extern __inline __m512h
6529 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6530 _mm512_fmsub_round_ph (__m512h __A
, __m512h __B
, __m512h __C
, const int __R
)
6532 return (__m512h
) __builtin_ia32_vfmsubph512_mask ((__v32hf
) __A
,
6535 (__mmask32
) -1, __R
);
6538 extern __inline __m512h
6539 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6540 _mm512_mask_fmsub_round_ph (__m512h __A
, __mmask32 __U
, __m512h __B
,
6541 __m512h __C
, const int __R
)
6543 return (__m512h
) __builtin_ia32_vfmsubph512_mask ((__v32hf
) __A
,
6546 (__mmask32
) __U
, __R
);
6549 extern __inline __m512h
6550 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6551 _mm512_mask3_fmsub_round_ph (__m512h __A
, __m512h __B
, __m512h __C
,
6552 __mmask32 __U
, const int __R
)
6554 return (__m512h
) __builtin_ia32_vfmsubph512_mask3 ((__v32hf
) __A
,
6557 (__mmask32
) __U
, __R
);
6560 extern __inline __m512h
6561 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6562 _mm512_maskz_fmsub_round_ph (__mmask32 __U
, __m512h __A
, __m512h __B
,
6563 __m512h __C
, const int __R
)
6565 return (__m512h
) __builtin_ia32_vfmsubph512_maskz ((__v32hf
) __A
,
6568 (__mmask32
) __U
, __R
);
6572 #define _mm512_fmsub_round_ph(A, B, C, R) \
6573 ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), -1, (R)))
6575 #define _mm512_mask_fmsub_round_ph(A, U, B, C, R) \
6576 ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), (U), (R)))
6578 #define _mm512_mask3_fmsub_round_ph(A, B, C, U, R) \
6579 ((__m512h)__builtin_ia32_vfmsubph512_mask3 ((A), (B), (C), (U), (R)))
6581 #define _mm512_maskz_fmsub_round_ph(U, A, B, C, R) \
6582 ((__m512h)__builtin_ia32_vfmsubph512_maskz ((A), (B), (C), (U), (R)))
6584 #endif /* __OPTIMIZE__ */
6586 /* Intrinsics vfnmsub[132,213,231]ph. */
6587 extern __inline __m512h
6588 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6589 _mm512_fnmsub_ph (__m512h __A
, __m512h __B
, __m512h __C
)
6592 __builtin_ia32_vfnmsubph512_mask ((__v32hf
) __A
,
6596 _MM_FROUND_CUR_DIRECTION
);
6599 extern __inline __m512h
6600 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6601 _mm512_mask_fnmsub_ph (__m512h __A
, __mmask32 __U
, __m512h __B
, __m512h __C
)
6604 __builtin_ia32_vfnmsubph512_mask ((__v32hf
) __A
,
6608 _MM_FROUND_CUR_DIRECTION
);
6611 extern __inline __m512h
6612 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6613 _mm512_mask3_fnmsub_ph (__m512h __A
, __m512h __B
, __m512h __C
, __mmask32 __U
)
6616 __builtin_ia32_vfnmsubph512_mask3 ((__v32hf
) __A
,
6620 _MM_FROUND_CUR_DIRECTION
);
6623 extern __inline __m512h
6624 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6625 _mm512_maskz_fnmsub_ph (__mmask32 __U
, __m512h __A
, __m512h __B
, __m512h __C
)
6628 __builtin_ia32_vfnmsubph512_maskz ((__v32hf
) __A
,
6632 _MM_FROUND_CUR_DIRECTION
);
6636 extern __inline __m512h
6637 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6638 _mm512_fnmsub_round_ph (__m512h __A
, __m512h __B
, __m512h __C
, const int __R
)
6640 return (__m512h
) __builtin_ia32_vfnmsubph512_mask ((__v32hf
) __A
,
6643 (__mmask32
) -1, __R
);
6646 extern __inline __m512h
6647 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6648 _mm512_mask_fnmsub_round_ph (__m512h __A
, __mmask32 __U
, __m512h __B
,
6649 __m512h __C
, const int __R
)
6651 return (__m512h
) __builtin_ia32_vfnmsubph512_mask ((__v32hf
) __A
,
6654 (__mmask32
) __U
, __R
);
6657 extern __inline __m512h
6658 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6659 _mm512_mask3_fnmsub_round_ph (__m512h __A
, __m512h __B
, __m512h __C
,
6660 __mmask32 __U
, const int __R
)
6662 return (__m512h
) __builtin_ia32_vfnmsubph512_mask3 ((__v32hf
) __A
,
6665 (__mmask32
) __U
, __R
);
6668 extern __inline __m512h
6669 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6670 _mm512_maskz_fnmsub_round_ph (__mmask32 __U
, __m512h __A
, __m512h __B
,
6671 __m512h __C
, const int __R
)
6673 return (__m512h
) __builtin_ia32_vfnmsubph512_maskz ((__v32hf
) __A
,
6676 (__mmask32
) __U
, __R
);
6680 #define _mm512_fnmsub_round_ph(A, B, C, R) \
6681 ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), -1, (R)))
6683 #define _mm512_mask_fnmsub_round_ph(A, U, B, C, R) \
6684 ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), (U), (R)))
6686 #define _mm512_mask3_fnmsub_round_ph(A, B, C, U, R) \
6687 ((__m512h)__builtin_ia32_vfnmsubph512_mask3 ((A), (B), (C), (U), (R)))
6689 #define _mm512_maskz_fnmsub_round_ph(U, A, B, C, R) \
6690 ((__m512h)__builtin_ia32_vfnmsubph512_maskz ((A), (B), (C), (U), (R)))
6692 #endif /* __OPTIMIZE__ */
6694 /* Intrinsics vf[,c]maddcph. */
6695 extern __inline __m512h
6696 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6697 _mm512_fcmadd_pch (__m512h __A
, __m512h __B
, __m512h __C
)
6700 __builtin_ia32_vfcmaddcph512_round ((__v32hf
) __A
,
6703 _MM_FROUND_CUR_DIRECTION
);
6706 extern __inline __m512h
6707 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6708 _mm512_mask_fcmadd_pch (__m512h __A
, __mmask16 __B
, __m512h __C
, __m512h __D
)
6711 __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf
) __A
,
6714 _MM_FROUND_CUR_DIRECTION
);
6717 extern __inline __m512h
6718 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6719 _mm512_mask3_fcmadd_pch (__m512h __A
, __m512h __B
, __m512h __C
, __mmask16 __D
)
6722 __builtin_ia32_vfcmaddcph512_mask3_round ((__v32hf
) __A
,
6725 __D
, _MM_FROUND_CUR_DIRECTION
);
6728 extern __inline __m512h
6729 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6730 _mm512_maskz_fcmadd_pch (__mmask16 __A
, __m512h __B
, __m512h __C
, __m512h __D
)
6733 __builtin_ia32_vfcmaddcph512_maskz_round ((__v32hf
) __B
,
6736 __A
, _MM_FROUND_CUR_DIRECTION
);
6739 extern __inline __m512h
6740 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6741 _mm512_fmadd_pch (__m512h __A
, __m512h __B
, __m512h __C
)
6744 __builtin_ia32_vfmaddcph512_round ((__v32hf
) __A
,
6747 _MM_FROUND_CUR_DIRECTION
);
6750 extern __inline __m512h
6751 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6752 _mm512_mask_fmadd_pch (__m512h __A
, __mmask16 __B
, __m512h __C
, __m512h __D
)
6755 __builtin_ia32_vfmaddcph512_mask_round ((__v32hf
) __A
,
6758 _MM_FROUND_CUR_DIRECTION
);
6761 extern __inline __m512h
6762 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6763 _mm512_mask3_fmadd_pch (__m512h __A
, __m512h __B
, __m512h __C
, __mmask16 __D
)
6766 __builtin_ia32_vfmaddcph512_mask3_round ((__v32hf
) __A
,
6769 __D
, _MM_FROUND_CUR_DIRECTION
);
6772 extern __inline __m512h
6773 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6774 _mm512_maskz_fmadd_pch (__mmask16 __A
, __m512h __B
, __m512h __C
, __m512h __D
)
6777 __builtin_ia32_vfmaddcph512_maskz_round ((__v32hf
) __B
,
6780 __A
, _MM_FROUND_CUR_DIRECTION
);
6784 extern __inline __m512h
6785 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6786 _mm512_fcmadd_round_pch (__m512h __A
, __m512h __B
, __m512h __C
, const int __D
)
6789 __builtin_ia32_vfcmaddcph512_round ((__v32hf
) __A
,
6795 extern __inline __m512h
6796 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6797 _mm512_mask_fcmadd_round_pch (__m512h __A
, __mmask16 __B
, __m512h __C
,
6798 __m512h __D
, const int __E
)
6801 __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf
) __A
,
6807 extern __inline __m512h
6808 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6809 _mm512_mask3_fcmadd_round_pch (__m512h __A
, __m512h __B
, __m512h __C
,
6810 __mmask16 __D
, const int __E
)
6813 __builtin_ia32_vfcmaddcph512_mask3_round ((__v32hf
) __A
,
6819 extern __inline __m512h
6820 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6821 _mm512_maskz_fcmadd_round_pch (__mmask16 __A
, __m512h __B
, __m512h __C
,
6822 __m512h __D
, const int __E
)
6825 __builtin_ia32_vfcmaddcph512_maskz_round ((__v32hf
) __B
,
6831 extern __inline __m512h
6832 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6833 _mm512_fmadd_round_pch (__m512h __A
, __m512h __B
, __m512h __C
, const int __D
)
6836 __builtin_ia32_vfmaddcph512_round ((__v32hf
) __A
,
6842 extern __inline __m512h
6843 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6844 _mm512_mask_fmadd_round_pch (__m512h __A
, __mmask16 __B
, __m512h __C
,
6845 __m512h __D
, const int __E
)
6848 __builtin_ia32_vfmaddcph512_mask_round ((__v32hf
) __A
,
6854 extern __inline __m512h
6855 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6856 _mm512_mask3_fmadd_round_pch (__m512h __A
, __m512h __B
, __m512h __C
,
6857 __mmask16 __D
, const int __E
)
6860 __builtin_ia32_vfmaddcph512_mask3_round ((__v32hf
) __A
,
6866 extern __inline __m512h
6867 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6868 _mm512_maskz_fmadd_round_pch (__mmask16 __A
, __m512h __B
, __m512h __C
,
6869 __m512h __D
, const int __E
)
6872 __builtin_ia32_vfmaddcph512_maskz_round ((__v32hf
) __B
,
6879 #define _mm512_fcmadd_round_pch(A, B, C, D) \
6880 (__m512h) __builtin_ia32_vfcmaddcph512_round ((A), (B), (C), (D))
6882 #define _mm512_mask_fcmadd_round_pch(A, B, C, D, E) \
6884 __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) (A), \
6890 #define _mm512_mask3_fcmadd_round_pch(A, B, C, D, E) \
6892 __builtin_ia32_vfcmaddcph512_mask3_round ((A), (B), (C), (D), (E)))
6894 #define _mm512_maskz_fcmadd_round_pch(A, B, C, D, E) \
6896 __builtin_ia32_vfcmaddcph512_maskz_round ((B), (C), (D), (A), (E))
6898 #define _mm512_fmadd_round_pch(A, B, C, D) \
6899 (__m512h) __builtin_ia32_vfmaddcph512_round ((A), (B), (C), (D))
6901 #define _mm512_mask_fmadd_round_pch(A, B, C, D, E) \
6903 __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) (A), \
6908 #define _mm512_mask3_fmadd_round_pch(A, B, C, D, E) \
6910 __builtin_ia32_vfmaddcph512_mask3_round ((A), (B), (C), (D), (E))
6912 #define _mm512_maskz_fmadd_round_pch(A, B, C, D, E) \
6914 __builtin_ia32_vfmaddcph512_maskz_round ((B), (C), (D), (A), (E))
6916 #endif /* __OPTIMIZE__ */
6918 /* Intrinsics vf[,c]mulcph. */
6919 extern __inline __m512h
6920 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6921 _mm512_fcmul_pch (__m512h __A
, __m512h __B
)
6924 __builtin_ia32_vfcmulcph512_round ((__v32hf
) __A
,
6926 _MM_FROUND_CUR_DIRECTION
);
6929 extern __inline __m512h
6930 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6931 _mm512_mask_fcmul_pch (__m512h __A
, __mmask16 __B
, __m512h __C
, __m512h __D
)
6934 __builtin_ia32_vfcmulcph512_mask_round ((__v32hf
) __C
,
6937 __B
, _MM_FROUND_CUR_DIRECTION
);
6940 extern __inline __m512h
6941 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6942 _mm512_maskz_fcmul_pch (__mmask16 __A
, __m512h __B
, __m512h __C
)
6945 __builtin_ia32_vfcmulcph512_mask_round ((__v32hf
) __B
,
6947 _mm512_setzero_ph (),
6948 __A
, _MM_FROUND_CUR_DIRECTION
);
6951 extern __inline __m512h
6952 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6953 _mm512_fmul_pch (__m512h __A
, __m512h __B
)
6956 __builtin_ia32_vfmulcph512_round ((__v32hf
) __A
,
6958 _MM_FROUND_CUR_DIRECTION
);
6961 extern __inline __m512h
6962 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6963 _mm512_mask_fmul_pch (__m512h __A
, __mmask16 __B
, __m512h __C
, __m512h __D
)
6966 __builtin_ia32_vfmulcph512_mask_round ((__v32hf
) __C
,
6969 __B
, _MM_FROUND_CUR_DIRECTION
);
6972 extern __inline __m512h
6973 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6974 _mm512_maskz_fmul_pch (__mmask16 __A
, __m512h __B
, __m512h __C
)
6977 __builtin_ia32_vfmulcph512_mask_round ((__v32hf
) __B
,
6979 _mm512_setzero_ph (),
6980 __A
, _MM_FROUND_CUR_DIRECTION
);
6984 extern __inline __m512h
6985 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6986 _mm512_fcmul_round_pch (__m512h __A
, __m512h __B
, const int __D
)
6989 __builtin_ia32_vfcmulcph512_round ((__v32hf
) __A
,
6990 (__v32hf
) __B
, __D
);
6993 extern __inline __m512h
6994 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
6995 _mm512_mask_fcmul_round_pch (__m512h __A
, __mmask16 __B
, __m512h __C
,
6996 __m512h __D
, const int __E
)
6999 __builtin_ia32_vfcmulcph512_mask_round ((__v32hf
) __C
,
7005 extern __inline __m512h
7006 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
7007 _mm512_maskz_fcmul_round_pch (__mmask16 __A
, __m512h __B
,
7008 __m512h __C
, const int __E
)
7011 __builtin_ia32_vfcmulcph512_mask_round ((__v32hf
) __B
,
7013 _mm512_setzero_ph (),
7017 extern __inline __m512h
7018 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
7019 _mm512_fmul_round_pch (__m512h __A
, __m512h __B
, const int __D
)
7022 __builtin_ia32_vfmulcph512_round ((__v32hf
) __A
,
7027 extern __inline __m512h
7028 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
7029 _mm512_mask_fmul_round_pch (__m512h __A
, __mmask16 __B
, __m512h __C
,
7030 __m512h __D
, const int __E
)
7033 __builtin_ia32_vfmulcph512_mask_round ((__v32hf
) __C
,
7039 extern __inline __m512h
7040 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
7041 _mm512_maskz_fmul_round_pch (__mmask16 __A
, __m512h __B
,
7042 __m512h __C
, const int __E
)
7045 __builtin_ia32_vfmulcph512_mask_round ((__v32hf
) __B
,
7047 _mm512_setzero_ph (),
7052 #define _mm512_fcmul_round_pch(A, B, D) \
7053 (__m512h) __builtin_ia32_vfcmulcph512_round ((A), (B), (D))
7055 #define _mm512_mask_fcmul_round_pch(A, B, C, D, E) \
7056 (__m512h) __builtin_ia32_vfcmulcph512_mask_round ((C), (D), (A), (B), (E))
7058 #define _mm512_maskz_fcmul_round_pch(A, B, C, E) \
7059 (__m512h) __builtin_ia32_vfcmulcph512_mask_round ((B), (C), \
7061 _mm512_setzero_ph (), \
7064 #define _mm512_fmul_round_pch(A, B, D) \
7065 (__m512h) __builtin_ia32_vfmulcph512_round ((A), (B), (D))
7067 #define _mm512_mask_fmul_round_pch(A, B, C, D, E) \
7068 (__m512h) __builtin_ia32_vfmulcph512_mask_round ((C), (D), (A), (B), (E))
7070 #define _mm512_maskz_fmul_round_pch(A, B, C, E) \
7071 (__m512h) __builtin_ia32_vfmulcph512_mask_round ((B), (C), \
7073 _mm512_setzero_ph (), \
7076 #endif /* __OPTIMIZE__ */
7078 #define _MM512_REDUCE_OP(op) \
7079 __m256h __T1 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
7080 __m256h __T2 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
7081 __m256h __T3 = (__T1 op __T2); \
7082 __m128h __T4 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 0); \
7083 __m128h __T5 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 1); \
7084 __m128h __T6 = (__T4 op __T5); \
7085 __m128h __T7 = (__m128h) __builtin_shuffle ((__m128h)__T6, \
7086 (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 }); \
7087 __m128h __T8 = (__T6 op __T7); \
7088 __m128h __T9 = (__m128h) __builtin_shuffle ((__m128h)__T8, \
7089 (__v8hi) { 2, 3, 0, 1, 4, 5, 6, 7 }); \
7090 __m128h __T10 = __T8 op __T9; \
7091 return __T10[0] op __T10[1]
7094 extern __inline _Float16
7095 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
7096 _mm512_reduce_add_ph (__m512h __A
)
7098 _MM512_REDUCE_OP (+);
7101 extern __inline _Float16
7102 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
7103 _mm512_reduce_mul_ph (__m512h __A
)
7105 _MM512_REDUCE_OP (*);
7108 #undef _MM512_REDUCE_OP
7112 #define _MM512_REDUCE_OP(op) \
7113 __m256h __T1 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
7114 __m256h __T2 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
7115 __m256h __T3 = __builtin_ia32_##op##ph256_mask (__T1, __T2, \
7116 _mm256_setzero_ph (), (__mmask16) -1); \
7117 __m128h __T4 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 0); \
7118 __m128h __T5 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 1); \
7119 __m128h __T6 = __builtin_ia32_##op##ph128_mask \
7120 (__T4, __T5, _mm_setzero_ph (),(__mmask8) -1); \
7121 __m128h __T7 = (__m128h) __builtin_shuffle ((__m128h)__T6, \
7122 (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 }); \
7123 __m128h __T8 = (__m128h) __builtin_ia32_##op##ph128_mask \
7124 (__T6, __T7, _mm_setzero_ph (),(__mmask8) -1); \
7125 __m128h __T9 = (__m128h) __builtin_shuffle ((__m128h)__T8, \
7126 (__v8hi) { 4, 5 }); \
7127 __m128h __T10 = __builtin_ia32_##op##ph128_mask \
7128 (__T8, __T9, _mm_setzero_ph (),(__mmask8) -1); \
7129 __m128h __T11 = (__m128h) __builtin_shuffle (__T10, \
7130 (__v8hi) { 1, 0 }); \
7131 __m128h __T12 = __builtin_ia32_##op##ph128_mask \
7132 (__T10, __T11, _mm_setzero_ph (),(__mmask8) -1); \
7137 #define _MM512_REDUCE_OP(op) \
7138 __m512h __T1 = (__m512h) __builtin_shuffle ((__m512d) __A, \
7139 (__v8di) { 4, 5, 6, 7, 0, 0, 0, 0 }); \
7140 __m512h __T2 = _mm512_##op##_ph (__A, __T1); \
7141 __m512h __T3 = (__m512h) __builtin_shuffle ((__m512d) __T2, \
7142 (__v8di) { 2, 3, 0, 0, 0, 0, 0, 0 }); \
7143 __m512h __T4 = _mm512_##op##_ph (__T2, __T3); \
7144 __m512h __T5 = (__m512h) __builtin_shuffle ((__m512d) __T4, \
7145 (__v8di) { 1, 0, 0, 0, 0, 0, 0, 0 }); \
7146 __m512h __T6 = _mm512_##op##_ph (__T4, __T5); \
7147 __m512h __T7 = (__m512h) __builtin_shuffle ((__m512) __T6, \
7148 (__v16si) { 1, 0, 0, 0, 0, 0, 0, 0, \
7149 0, 0, 0, 0, 0, 0, 0, 0 }); \
7150 __m512h __T8 = _mm512_##op##_ph (__T6, __T7); \
7151 __m512h __T9 = (__m512h) __builtin_shuffle (__T8, \
7152 (__v32hi) { 1, 0, 0, 0, 0, 0, 0, 0, \
7153 0, 0, 0, 0, 0, 0, 0, 0, \
7154 0, 0, 0, 0, 0, 0, 0, 0, \
7155 0, 0, 0, 0, 0, 0, 0, 0 }); \
7156 __m512h __T10 = _mm512_##op##_ph (__T8, __T9); \
7160 extern __inline _Float16
7161 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
7162 _mm512_reduce_min_ph (__m512h __A
)
7164 _MM512_REDUCE_OP (min
);
7167 extern __inline _Float16
7168 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
7169 _mm512_reduce_max_ph (__m512h __A
)
7171 _MM512_REDUCE_OP (max
);
7174 #undef _MM512_REDUCE_OP
7176 extern __inline __m512h
7177 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
7178 _mm512_mask_blend_ph (__mmask32 __U
, __m512h __A
, __m512h __W
)
7180 return (__m512h
) __builtin_ia32_movdquhi512_mask ((__v32hi
) __W
,
7186 extern __inline __m512h
7187 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
7188 _mm512_permutex2var_ph (__m512h __A
, __m512i __I
, __m512h __B
)
7190 return (__m512h
) __builtin_ia32_vpermi2varhi512_mask ((__v32hi
) __A
,
7196 extern __inline __m512h
7197 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
7198 _mm512_permutexvar_ph (__m512i __A
, __m512h __B
)
7200 return (__m512h
) __builtin_ia32_permvarhi512_mask ((__v32hi
) __B
,
7203 (_mm512_setzero_ph ()),
7207 extern __inline __m512h
7208 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
7209 _mm512_set1_pch (_Float16 _Complex __A
)
7213 _Float16 _Complex __a
;
7215 } __u
= { .__a
= __A
};
7217 return (__m512h
) _mm512_set1_ps (__u
.__b
);
7220 // intrinsics below are alias for f*mul_*ch
7221 #define _mm512_mul_pch(A, B) _mm512_fmul_pch ((A), (B))
7222 #define _mm512_mask_mul_pch(W, U, A, B) \
7223 _mm512_mask_fmul_pch ((W), (U), (A), (B))
7224 #define _mm512_maskz_mul_pch(U, A, B) _mm512_maskz_fmul_pch ((U), (A), (B))
7225 #define _mm512_mul_round_pch(A, B, R) _mm512_fmul_round_pch ((A), (B), (R))
7226 #define _mm512_mask_mul_round_pch(W, U, A, B, R) \
7227 _mm512_mask_fmul_round_pch ((W), (U), (A), (B), (R))
7228 #define _mm512_maskz_mul_round_pch(U, A, B, R) \
7229 _mm512_maskz_fmul_round_pch ((U), (A), (B), (R))
7231 #define _mm512_cmul_pch(A, B) _mm512_fcmul_pch ((A), (B))
7232 #define _mm512_mask_cmul_pch(W, U, A, B) \
7233 _mm512_mask_fcmul_pch ((W), (U), (A), (B))
7234 #define _mm512_maskz_cmul_pch(U, A, B) _mm512_maskz_fcmul_pch ((U), (A), (B))
7235 #define _mm512_cmul_round_pch(A, B, R) _mm512_fcmul_round_pch ((A), (B), (R))
7236 #define _mm512_mask_cmul_round_pch(W, U, A, B, R) \
7237 _mm512_mask_fcmul_round_pch ((W), (U), (A), (B), (R))
7238 #define _mm512_maskz_cmul_round_pch(U, A, B, R) \
7239 _mm512_maskz_fcmul_round_pch ((U), (A), (B), (R))
7241 #ifdef __DISABLE_AVX512FP16_512__
7242 #undef __DISABLE_AVX512FP16_512__
7243 #pragma GCC pop_options
7244 #endif /* __DISABLE_AVX512FP16_512__ */
7246 #endif /* _AVX512FP16INTRIN_H_INCLUDED */