]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/avx512fp16intrin.h
Update copyright years.
[thirdparty/gcc.git] / gcc / config / i386 / avx512fp16intrin.h
1 /* Copyright (C) 2019-2024 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fp16intrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef _AVX512FP16INTRIN_H_INCLUDED
29 #define _AVX512FP16INTRIN_H_INCLUDED
30
31 #if !defined (__AVX512FP16__) || defined (__EVEX512__)
32 #pragma GCC push_options
33 #pragma GCC target("avx512fp16,no-evex512")
34 #define __DISABLE_AVX512FP16__
35 #endif /* __AVX512FP16__ */
36
37 /* Internal data types for implementing the intrinsics. */
38 typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16)));
39 typedef _Float16 __v16hf __attribute__ ((__vector_size__ (32)));
40
41 /* The Intel API is flexible enough that we must allow aliasing with other
42 vector types, and their scalar components. */
43 typedef _Float16 __m128h __attribute__ ((__vector_size__ (16), __may_alias__));
44 typedef _Float16 __m256h __attribute__ ((__vector_size__ (32), __may_alias__));
45
46 /* Unaligned version of the same type. */
47 typedef _Float16 __m128h_u __attribute__ ((__vector_size__ (16), \
48 __may_alias__, __aligned__ (1)));
49 typedef _Float16 __m256h_u __attribute__ ((__vector_size__ (32), \
50 __may_alias__, __aligned__ (1)));
51
52 extern __inline __m128h
53 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
54 _mm_set_ph (_Float16 __A7, _Float16 __A6, _Float16 __A5,
55 _Float16 __A4, _Float16 __A3, _Float16 __A2,
56 _Float16 __A1, _Float16 __A0)
57 {
58 return __extension__ (__m128h)(__v8hf){ __A0, __A1, __A2, __A3,
59 __A4, __A5, __A6, __A7 };
60 }
61
62 extern __inline __m256h
63 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
64 _mm256_set_ph (_Float16 __A15, _Float16 __A14, _Float16 __A13,
65 _Float16 __A12, _Float16 __A11, _Float16 __A10,
66 _Float16 __A9, _Float16 __A8, _Float16 __A7,
67 _Float16 __A6, _Float16 __A5, _Float16 __A4,
68 _Float16 __A3, _Float16 __A2, _Float16 __A1,
69 _Float16 __A0)
70 {
71 return __extension__ (__m256h)(__v16hf){ __A0, __A1, __A2, __A3,
72 __A4, __A5, __A6, __A7,
73 __A8, __A9, __A10, __A11,
74 __A12, __A13, __A14, __A15 };
75 }
76
77 /* Create vectors of elements in the reversed order from _mm_set_ph
78 and _mm256_set_ph functions. */
79 extern __inline __m128h
80 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
81 _mm_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
82 _Float16 __A3, _Float16 __A4, _Float16 __A5,
83 _Float16 __A6, _Float16 __A7)
84 {
85 return _mm_set_ph (__A7, __A6, __A5, __A4, __A3, __A2, __A1, __A0);
86 }
87
88 extern __inline __m256h
89 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
90 _mm256_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
91 _Float16 __A3, _Float16 __A4, _Float16 __A5,
92 _Float16 __A6, _Float16 __A7, _Float16 __A8,
93 _Float16 __A9, _Float16 __A10, _Float16 __A11,
94 _Float16 __A12, _Float16 __A13, _Float16 __A14,
95 _Float16 __A15)
96 {
97 return _mm256_set_ph (__A15, __A14, __A13, __A12, __A11, __A10, __A9,
98 __A8, __A7, __A6, __A5, __A4, __A3, __A2, __A1,
99 __A0);
100 }
101
102 /* Broadcast _Float16 to vector. */
103 extern __inline __m128h
104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
105 _mm_set1_ph (_Float16 __A)
106 {
107 return _mm_set_ph (__A, __A, __A, __A, __A, __A, __A, __A);
108 }
109
110 extern __inline __m256h
111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
112 _mm256_set1_ph (_Float16 __A)
113 {
114 return _mm256_set_ph (__A, __A, __A, __A, __A, __A, __A, __A,
115 __A, __A, __A, __A, __A, __A, __A, __A);
116 }
117
118 /* Create a vector with all zeros. */
119 extern __inline __m128h
120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
121 _mm_setzero_ph (void)
122 {
123 return _mm_set1_ph (0.0f16);
124 }
125
126 extern __inline __m256h
127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
128 _mm256_setzero_ph (void)
129 {
130 return _mm256_set1_ph (0.0f16);
131 }
132
133 extern __inline __m128h
134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
135 _mm_undefined_ph (void)
136 {
137 #pragma GCC diagnostic push
138 #pragma GCC diagnostic ignored "-Winit-self"
139 __m128h __Y = __Y;
140 #pragma GCC diagnostic pop
141 return __Y;
142 }
143
144 extern __inline __m256h
145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
146 _mm256_undefined_ph (void)
147 {
148 #pragma GCC diagnostic push
149 #pragma GCC diagnostic ignored "-Winit-self"
150 __m256h __Y = __Y;
151 #pragma GCC diagnostic pop
152 return __Y;
153 }
154
155 extern __inline _Float16
156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
157 _mm256_cvtsh_h (__m256h __A)
158 {
159 return __A[0];
160 }
161
162 extern __inline __m256h
163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
164 _mm256_load_ph (void const *__P)
165 {
166 return *(const __m256h *) __P;
167 }
168
169 extern __inline __m128h
170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
171 _mm_load_ph (void const *__P)
172 {
173 return *(const __m128h *) __P;
174 }
175
176 extern __inline __m256h
177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
178 _mm256_loadu_ph (void const *__P)
179 {
180 return *(const __m256h_u *) __P;
181 }
182
183 extern __inline __m128h
184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
185 _mm_loadu_ph (void const *__P)
186 {
187 return *(const __m128h_u *) __P;
188 }
189
190 extern __inline void
191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
192 _mm256_store_ph (void *__P, __m256h __A)
193 {
194 *(__m256h *) __P = __A;
195 }
196
197 extern __inline void
198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
199 _mm_store_ph (void *__P, __m128h __A)
200 {
201 *(__m128h *) __P = __A;
202 }
203
204 extern __inline void
205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
206 _mm256_storeu_ph (void *__P, __m256h __A)
207 {
208 *(__m256h_u *) __P = __A;
209 }
210
211 extern __inline void
212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
213 _mm_storeu_ph (void *__P, __m128h __A)
214 {
215 *(__m128h_u *) __P = __A;
216 }
217
218 /* Create a vector with element 0 as F and the rest zero. */
219 extern __inline __m128h
220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
221 _mm_set_sh (_Float16 __F)
222 {
223 return _mm_set_ph (0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16,
224 __F);
225 }
226
227 /* Create a vector with element 0 as *P and the rest zero. */
228 extern __inline __m128h
229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
230 _mm_load_sh (void const *__P)
231 {
232 return _mm_set_ph (0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16,
233 *(_Float16 const *) __P);
234 }
235
236 /* Stores the lower _Float16 value. */
237 extern __inline void
238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
239 _mm_store_sh (void *__P, __m128h __A)
240 {
241 *(_Float16 *) __P = ((__v8hf)__A)[0];
242 }
243
244 /* Intrinsics of v[add,sub,mul,div]sh. */
245 extern __inline __m128h
246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
247 _mm_add_sh (__m128h __A, __m128h __B)
248 {
249 __A[0] += __B[0];
250 return __A;
251 }
252
253 extern __inline __m128h
254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
255 _mm_mask_add_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
256 {
257 return __builtin_ia32_addsh_mask (__C, __D, __A, __B);
258 }
259
260 extern __inline __m128h
261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
262 _mm_maskz_add_sh (__mmask8 __A, __m128h __B, __m128h __C)
263 {
264 return __builtin_ia32_addsh_mask (__B, __C, _mm_setzero_ph (),
265 __A);
266 }
267
268 extern __inline __m128h
269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
270 _mm_sub_sh (__m128h __A, __m128h __B)
271 {
272 __A[0] -= __B[0];
273 return __A;
274 }
275
276 extern __inline __m128h
277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
278 _mm_mask_sub_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
279 {
280 return __builtin_ia32_subsh_mask (__C, __D, __A, __B);
281 }
282
283 extern __inline __m128h
284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
285 _mm_maskz_sub_sh (__mmask8 __A, __m128h __B, __m128h __C)
286 {
287 return __builtin_ia32_subsh_mask (__B, __C, _mm_setzero_ph (),
288 __A);
289 }
290
291 extern __inline __m128h
292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
293 _mm_mul_sh (__m128h __A, __m128h __B)
294 {
295 __A[0] *= __B[0];
296 return __A;
297 }
298
299 extern __inline __m128h
300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
301 _mm_mask_mul_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
302 {
303 return __builtin_ia32_mulsh_mask (__C, __D, __A, __B);
304 }
305
306 extern __inline __m128h
307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
308 _mm_maskz_mul_sh (__mmask8 __A, __m128h __B, __m128h __C)
309 {
310 return __builtin_ia32_mulsh_mask (__B, __C, _mm_setzero_ph (), __A);
311 }
312
313 extern __inline __m128h
314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
315 _mm_div_sh (__m128h __A, __m128h __B)
316 {
317 __A[0] /= __B[0];
318 return __A;
319 }
320
321 extern __inline __m128h
322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
323 _mm_mask_div_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
324 {
325 return __builtin_ia32_divsh_mask (__C, __D, __A, __B);
326 }
327
328 extern __inline __m128h
329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
330 _mm_maskz_div_sh (__mmask8 __A, __m128h __B, __m128h __C)
331 {
332 return __builtin_ia32_divsh_mask (__B, __C, _mm_setzero_ph (),
333 __A);
334 }
335
336 #ifdef __OPTIMIZE__
337 extern __inline __m128h
338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
339 _mm_add_round_sh (__m128h __A, __m128h __B, const int __C)
340 {
341 return __builtin_ia32_addsh_mask_round (__A, __B,
342 _mm_setzero_ph (),
343 (__mmask8) -1, __C);
344 }
345
346 extern __inline __m128h
347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
348 _mm_mask_add_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
349 __m128h __D, const int __E)
350 {
351 return __builtin_ia32_addsh_mask_round (__C, __D, __A, __B, __E);
352 }
353
354 extern __inline __m128h
355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
356 _mm_maskz_add_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
357 const int __D)
358 {
359 return __builtin_ia32_addsh_mask_round (__B, __C,
360 _mm_setzero_ph (),
361 __A, __D);
362 }
363
364 extern __inline __m128h
365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
366 _mm_sub_round_sh (__m128h __A, __m128h __B, const int __C)
367 {
368 return __builtin_ia32_subsh_mask_round (__A, __B,
369 _mm_setzero_ph (),
370 (__mmask8) -1, __C);
371 }
372
373 extern __inline __m128h
374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
375 _mm_mask_sub_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
376 __m128h __D, const int __E)
377 {
378 return __builtin_ia32_subsh_mask_round (__C, __D, __A, __B, __E);
379 }
380
381 extern __inline __m128h
382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
383 _mm_maskz_sub_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
384 const int __D)
385 {
386 return __builtin_ia32_subsh_mask_round (__B, __C,
387 _mm_setzero_ph (),
388 __A, __D);
389 }
390
391 extern __inline __m128h
392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
393 _mm_mul_round_sh (__m128h __A, __m128h __B, const int __C)
394 {
395 return __builtin_ia32_mulsh_mask_round (__A, __B,
396 _mm_setzero_ph (),
397 (__mmask8) -1, __C);
398 }
399
400 extern __inline __m128h
401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
402 _mm_mask_mul_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
403 __m128h __D, const int __E)
404 {
405 return __builtin_ia32_mulsh_mask_round (__C, __D, __A, __B, __E);
406 }
407
408 extern __inline __m128h
409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
410 _mm_maskz_mul_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
411 const int __D)
412 {
413 return __builtin_ia32_mulsh_mask_round (__B, __C,
414 _mm_setzero_ph (),
415 __A, __D);
416 }
417
418 extern __inline __m128h
419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
420 _mm_div_round_sh (__m128h __A, __m128h __B, const int __C)
421 {
422 return __builtin_ia32_divsh_mask_round (__A, __B,
423 _mm_setzero_ph (),
424 (__mmask8) -1, __C);
425 }
426
427 extern __inline __m128h
428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
429 _mm_mask_div_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
430 __m128h __D, const int __E)
431 {
432 return __builtin_ia32_divsh_mask_round (__C, __D, __A, __B, __E);
433 }
434
435 extern __inline __m128h
436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
437 _mm_maskz_div_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
438 const int __D)
439 {
440 return __builtin_ia32_divsh_mask_round (__B, __C,
441 _mm_setzero_ph (),
442 __A, __D);
443 }
444 #else
445 #define _mm_add_round_sh(A, B, C) \
446 ((__m128h)__builtin_ia32_addsh_mask_round ((A), (B), \
447 _mm_setzero_ph (), \
448 (__mmask8)-1, (C)))
449
450 #define _mm_mask_add_round_sh(A, B, C, D, E) \
451 ((__m128h)__builtin_ia32_addsh_mask_round ((C), (D), (A), (B), (E)))
452
453 #define _mm_maskz_add_round_sh(A, B, C, D) \
454 ((__m128h)__builtin_ia32_addsh_mask_round ((B), (C), \
455 _mm_setzero_ph (), \
456 (A), (D)))
457
458 #define _mm_sub_round_sh(A, B, C) \
459 ((__m128h)__builtin_ia32_subsh_mask_round ((A), (B), \
460 _mm_setzero_ph (), \
461 (__mmask8)-1, (C)))
462
463 #define _mm_mask_sub_round_sh(A, B, C, D, E) \
464 ((__m128h)__builtin_ia32_subsh_mask_round ((C), (D), (A), (B), (E)))
465
466 #define _mm_maskz_sub_round_sh(A, B, C, D) \
467 ((__m128h)__builtin_ia32_subsh_mask_round ((B), (C), \
468 _mm_setzero_ph (), \
469 (A), (D)))
470
471 #define _mm_mul_round_sh(A, B, C) \
472 ((__m128h)__builtin_ia32_mulsh_mask_round ((A), (B), \
473 _mm_setzero_ph (), \
474 (__mmask8)-1, (C)))
475
476 #define _mm_mask_mul_round_sh(A, B, C, D, E) \
477 ((__m128h)__builtin_ia32_mulsh_mask_round ((C), (D), (A), (B), (E)))
478
479 #define _mm_maskz_mul_round_sh(A, B, C, D) \
480 ((__m128h)__builtin_ia32_mulsh_mask_round ((B), (C), \
481 _mm_setzero_ph (), \
482 (A), (D)))
483
484 #define _mm_div_round_sh(A, B, C) \
485 ((__m128h)__builtin_ia32_divsh_mask_round ((A), (B), \
486 _mm_setzero_ph (), \
487 (__mmask8)-1, (C)))
488
489 #define _mm_mask_div_round_sh(A, B, C, D, E) \
490 ((__m128h)__builtin_ia32_divsh_mask_round ((C), (D), (A), (B), (E)))
491
492 #define _mm_maskz_div_round_sh(A, B, C, D) \
493 ((__m128h)__builtin_ia32_divsh_mask_round ((B), (C), \
494 _mm_setzero_ph (), \
495 (A), (D)))
496 #endif /* __OPTIMIZE__ */
497
498 /* Intrinsic vmaxsh vminsh. */
499 extern __inline __m128h
500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
501 _mm_max_sh (__m128h __A, __m128h __B)
502 {
503 __A[0] = __A[0] > __B[0] ? __A[0] : __B[0];
504 return __A;
505 }
506
507 extern __inline __m128h
508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
509 _mm_mask_max_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
510 {
511 return __builtin_ia32_maxsh_mask (__C, __D, __A, __B);
512 }
513
514 extern __inline __m128h
515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
516 _mm_maskz_max_sh (__mmask8 __A, __m128h __B, __m128h __C)
517 {
518 return __builtin_ia32_maxsh_mask (__B, __C, _mm_setzero_ph (),
519 __A);
520 }
521
522 extern __inline __m128h
523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
524 _mm_min_sh (__m128h __A, __m128h __B)
525 {
526 __A[0] = __A[0] < __B[0] ? __A[0] : __B[0];
527 return __A;
528 }
529
530 extern __inline __m128h
531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
532 _mm_mask_min_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
533 {
534 return __builtin_ia32_minsh_mask (__C, __D, __A, __B);
535 }
536
537 extern __inline __m128h
538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
539 _mm_maskz_min_sh (__mmask8 __A, __m128h __B, __m128h __C)
540 {
541 return __builtin_ia32_minsh_mask (__B, __C, _mm_setzero_ph (),
542 __A);
543 }
544
545 #ifdef __OPTIMIZE__
546 extern __inline __m128h
547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
548 _mm_max_round_sh (__m128h __A, __m128h __B, const int __C)
549 {
550 return __builtin_ia32_maxsh_mask_round (__A, __B,
551 _mm_setzero_ph (),
552 (__mmask8) -1, __C);
553 }
554
555 extern __inline __m128h
556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
557 _mm_mask_max_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
558 __m128h __D, const int __E)
559 {
560 return __builtin_ia32_maxsh_mask_round (__C, __D, __A, __B, __E);
561 }
562
563 extern __inline __m128h
564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
565 _mm_maskz_max_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
566 const int __D)
567 {
568 return __builtin_ia32_maxsh_mask_round (__B, __C,
569 _mm_setzero_ph (),
570 __A, __D);
571 }
572
573 extern __inline __m128h
574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
575 _mm_min_round_sh (__m128h __A, __m128h __B, const int __C)
576 {
577 return __builtin_ia32_minsh_mask_round (__A, __B,
578 _mm_setzero_ph (),
579 (__mmask8) -1, __C);
580 }
581
582 extern __inline __m128h
583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
584 _mm_mask_min_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
585 __m128h __D, const int __E)
586 {
587 return __builtin_ia32_minsh_mask_round (__C, __D, __A, __B, __E);
588 }
589
590 extern __inline __m128h
591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
592 _mm_maskz_min_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
593 const int __D)
594 {
595 return __builtin_ia32_minsh_mask_round (__B, __C,
596 _mm_setzero_ph (),
597 __A, __D);
598 }
599
600 #else
601 #define _mm_max_round_sh(A, B, C) \
602 (__builtin_ia32_maxsh_mask_round ((A), (B), \
603 _mm_setzero_ph (), \
604 (__mmask8)-1, (C)))
605
606 #define _mm_mask_max_round_sh(A, B, C, D, E) \
607 (__builtin_ia32_maxsh_mask_round ((C), (D), (A), (B), (E)))
608
609 #define _mm_maskz_max_round_sh(A, B, C, D) \
610 (__builtin_ia32_maxsh_mask_round ((B), (C), \
611 _mm_setzero_ph (), \
612 (A), (D)))
613
614 #define _mm_min_round_sh(A, B, C) \
615 (__builtin_ia32_minsh_mask_round ((A), (B), \
616 _mm_setzero_ph (), \
617 (__mmask8)-1, (C)))
618
619 #define _mm_mask_min_round_sh(A, B, C, D, E) \
620 (__builtin_ia32_minsh_mask_round ((C), (D), (A), (B), (E)))
621
622 #define _mm_maskz_min_round_sh(A, B, C, D) \
623 (__builtin_ia32_minsh_mask_round ((B), (C), \
624 _mm_setzero_ph (), \
625 (A), (D)))
626
627 #endif /* __OPTIMIZE__ */
628
629 /* Intrinsics vcmpsh. */
630 #ifdef __OPTIMIZE__
631 extern __inline __mmask8
632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
633 _mm_cmp_sh_mask (__m128h __A, __m128h __B, const int __C)
634 {
635 return (__mmask8)
636 __builtin_ia32_cmpsh_mask_round (__A, __B,
637 __C, (__mmask8) -1,
638 _MM_FROUND_CUR_DIRECTION);
639 }
640
641 extern __inline __mmask8
642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
643 _mm_mask_cmp_sh_mask (__mmask8 __A, __m128h __B, __m128h __C,
644 const int __D)
645 {
646 return (__mmask8)
647 __builtin_ia32_cmpsh_mask_round (__B, __C,
648 __D, __A,
649 _MM_FROUND_CUR_DIRECTION);
650 }
651
652 extern __inline __mmask8
653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
654 _mm_cmp_round_sh_mask (__m128h __A, __m128h __B, const int __C,
655 const int __D)
656 {
657 return (__mmask8) __builtin_ia32_cmpsh_mask_round (__A, __B,
658 __C, (__mmask8) -1,
659 __D);
660 }
661
662 extern __inline __mmask8
663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
664 _mm_mask_cmp_round_sh_mask (__mmask8 __A, __m128h __B, __m128h __C,
665 const int __D, const int __E)
666 {
667 return (__mmask8) __builtin_ia32_cmpsh_mask_round (__B, __C,
668 __D, __A,
669 __E);
670 }
671
672 #else
673 #define _mm_cmp_sh_mask(A, B, C) \
674 (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), \
675 (_MM_FROUND_CUR_DIRECTION)))
676
677 #define _mm_mask_cmp_sh_mask(A, B, C, D) \
678 (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), \
679 (_MM_FROUND_CUR_DIRECTION)))
680
681 #define _mm_cmp_round_sh_mask(A, B, C, D) \
682 (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), (D)))
683
684 #define _mm_mask_cmp_round_sh_mask(A, B, C, D, E) \
685 (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), (E)))
686
687 #endif /* __OPTIMIZE__ */
688
689 /* Intrinsics vcomish. */
690 extern __inline int
691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
692 _mm_comieq_sh (__m128h __A, __m128h __B)
693 {
694 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OS,
695 (__mmask8) -1,
696 _MM_FROUND_CUR_DIRECTION);
697 }
698
699 extern __inline int
700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
701 _mm_comilt_sh (__m128h __A, __m128h __B)
702 {
703 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OS,
704 (__mmask8) -1,
705 _MM_FROUND_CUR_DIRECTION);
706 }
707
708 extern __inline int
709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
710 _mm_comile_sh (__m128h __A, __m128h __B)
711 {
712 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OS,
713 (__mmask8) -1,
714 _MM_FROUND_CUR_DIRECTION);
715 }
716
717 extern __inline int
718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
719 _mm_comigt_sh (__m128h __A, __m128h __B)
720 {
721 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OS,
722 (__mmask8) -1,
723 _MM_FROUND_CUR_DIRECTION);
724 }
725
726 extern __inline int
727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
728 _mm_comige_sh (__m128h __A, __m128h __B)
729 {
730 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OS,
731 (__mmask8) -1,
732 _MM_FROUND_CUR_DIRECTION);
733 }
734
735 extern __inline int
736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
737 _mm_comineq_sh (__m128h __A, __m128h __B)
738 {
739 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_US,
740 (__mmask8) -1,
741 _MM_FROUND_CUR_DIRECTION);
742 }
743
744 extern __inline int
745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
746 _mm_ucomieq_sh (__m128h __A, __m128h __B)
747 {
748 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OQ,
749 (__mmask8) -1,
750 _MM_FROUND_CUR_DIRECTION);
751 }
752
753 extern __inline int
754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
755 _mm_ucomilt_sh (__m128h __A, __m128h __B)
756 {
757 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OQ,
758 (__mmask8) -1,
759 _MM_FROUND_CUR_DIRECTION);
760 }
761
762 extern __inline int
763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
764 _mm_ucomile_sh (__m128h __A, __m128h __B)
765 {
766 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OQ,
767 (__mmask8) -1,
768 _MM_FROUND_CUR_DIRECTION);
769 }
770
771 extern __inline int
772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
773 _mm_ucomigt_sh (__m128h __A, __m128h __B)
774 {
775 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OQ,
776 (__mmask8) -1,
777 _MM_FROUND_CUR_DIRECTION);
778 }
779
780 extern __inline int
781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
782 _mm_ucomige_sh (__m128h __A, __m128h __B)
783 {
784 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OQ,
785 (__mmask8) -1,
786 _MM_FROUND_CUR_DIRECTION);
787 }
788
789 extern __inline int
790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
791 _mm_ucomineq_sh (__m128h __A, __m128h __B)
792 {
793 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_UQ,
794 (__mmask8) -1,
795 _MM_FROUND_CUR_DIRECTION);
796 }
797
798 #ifdef __OPTIMIZE__
799 extern __inline int
800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
801 _mm_comi_sh (__m128h __A, __m128h __B, const int __P)
802 {
803 return __builtin_ia32_cmpsh_mask_round (__A, __B, __P,
804 (__mmask8) -1,
805 _MM_FROUND_CUR_DIRECTION);
806 }
807
808 extern __inline int
809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
810 _mm_comi_round_sh (__m128h __A, __m128h __B, const int __P, const int __R)
811 {
812 return __builtin_ia32_cmpsh_mask_round (__A, __B, __P,
813 (__mmask8) -1,__R);
814 }
815
816 #else
817 #define _mm_comi_round_sh(A, B, P, R) \
818 (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), (R)))
819 #define _mm_comi_sh(A, B, P) \
820 (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), \
821 _MM_FROUND_CUR_DIRECTION))
822
823 #endif /* __OPTIMIZE__ */
824
825 /* Intrinsics vsqrtsh. */
826 extern __inline __m128h
827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
828 _mm_sqrt_sh (__m128h __A, __m128h __B)
829 {
830 return __builtin_ia32_sqrtsh_mask_round (__B, __A,
831 _mm_setzero_ph (),
832 (__mmask8) -1,
833 _MM_FROUND_CUR_DIRECTION);
834 }
835
836 extern __inline __m128h
837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
838 _mm_mask_sqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
839 {
840 return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B,
841 _MM_FROUND_CUR_DIRECTION);
842 }
843
844 extern __inline __m128h
845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
846 _mm_maskz_sqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
847 {
848 return __builtin_ia32_sqrtsh_mask_round (__C, __B,
849 _mm_setzero_ph (),
850 __A, _MM_FROUND_CUR_DIRECTION);
851 }
852
853 #ifdef __OPTIMIZE__
854 extern __inline __m128h
855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
856 _mm_sqrt_round_sh (__m128h __A, __m128h __B, const int __C)
857 {
858 return __builtin_ia32_sqrtsh_mask_round (__B, __A,
859 _mm_setzero_ph (),
860 (__mmask8) -1, __C);
861 }
862
863 extern __inline __m128h
864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
865 _mm_mask_sqrt_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
866 __m128h __D, const int __E)
867 {
868 return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B,
869 __E);
870 }
871
872 extern __inline __m128h
873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
874 _mm_maskz_sqrt_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
875 const int __D)
876 {
877 return __builtin_ia32_sqrtsh_mask_round (__C, __B,
878 _mm_setzero_ph (),
879 __A, __D);
880 }
881
882 #else
883 #define _mm_sqrt_round_sh(A, B, C) \
884 (__builtin_ia32_sqrtsh_mask_round ((B), (A), \
885 _mm_setzero_ph (), \
886 (__mmask8)-1, (C)))
887
888 #define _mm_mask_sqrt_round_sh(A, B, C, D, E) \
889 (__builtin_ia32_sqrtsh_mask_round ((D), (C), (A), (B), (E)))
890
891 #define _mm_maskz_sqrt_round_sh(A, B, C, D) \
892 (__builtin_ia32_sqrtsh_mask_round ((C), (B), \
893 _mm_setzero_ph (), \
894 (A), (D)))
895
896 #endif /* __OPTIMIZE__ */
897
898 /* Intrinsics vrsqrtsh. */
899 extern __inline __m128h
900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
901 _mm_rsqrt_sh (__m128h __A, __m128h __B)
902 {
903 return __builtin_ia32_rsqrtsh_mask (__B, __A, _mm_setzero_ph (),
904 (__mmask8) -1);
905 }
906
907 extern __inline __m128h
908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
909 _mm_mask_rsqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
910 {
911 return __builtin_ia32_rsqrtsh_mask (__D, __C, __A, __B);
912 }
913
914 extern __inline __m128h
915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
916 _mm_maskz_rsqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
917 {
918 return __builtin_ia32_rsqrtsh_mask (__C, __B, _mm_setzero_ph (),
919 __A);
920 }
921
922 /* Intrinsics vrcpsh. */
923 extern __inline __m128h
924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
925 _mm_rcp_sh (__m128h __A, __m128h __B)
926 {
927 return __builtin_ia32_rcpsh_mask (__B, __A, _mm_setzero_ph (),
928 (__mmask8) -1);
929 }
930
931 extern __inline __m128h
932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
933 _mm_mask_rcp_sh (__m128h __A, __mmask32 __B, __m128h __C, __m128h __D)
934 {
935 return __builtin_ia32_rcpsh_mask (__D, __C, __A, __B);
936 }
937
938 extern __inline __m128h
939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
940 _mm_maskz_rcp_sh (__mmask32 __A, __m128h __B, __m128h __C)
941 {
942 return __builtin_ia32_rcpsh_mask (__C, __B, _mm_setzero_ph (),
943 __A);
944 }
945
946 /* Intrinsics vscalefsh. */
947 extern __inline __m128h
948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
949 _mm_scalef_sh (__m128h __A, __m128h __B)
950 {
951 return __builtin_ia32_scalefsh_mask_round (__A, __B,
952 _mm_setzero_ph (),
953 (__mmask8) -1,
954 _MM_FROUND_CUR_DIRECTION);
955 }
956
957 extern __inline __m128h
958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
959 _mm_mask_scalef_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
960 {
961 return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B,
962 _MM_FROUND_CUR_DIRECTION);
963 }
964
965 extern __inline __m128h
966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
967 _mm_maskz_scalef_sh (__mmask8 __A, __m128h __B, __m128h __C)
968 {
969 return __builtin_ia32_scalefsh_mask_round (__B, __C,
970 _mm_setzero_ph (),
971 __A,
972 _MM_FROUND_CUR_DIRECTION);
973 }
974
975 #ifdef __OPTIMIZE__
976 extern __inline __m128h
977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
978 _mm_scalef_round_sh (__m128h __A, __m128h __B, const int __C)
979 {
980 return __builtin_ia32_scalefsh_mask_round (__A, __B,
981 _mm_setzero_ph (),
982 (__mmask8) -1, __C);
983 }
984
985 extern __inline __m128h
986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
987 _mm_mask_scalef_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
988 __m128h __D, const int __E)
989 {
990 return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B,
991 __E);
992 }
993
994 extern __inline __m128h
995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
996 _mm_maskz_scalef_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
997 const int __D)
998 {
999 return __builtin_ia32_scalefsh_mask_round (__B, __C,
1000 _mm_setzero_ph (),
1001 __A, __D);
1002 }
1003
1004 #else
1005 #define _mm_scalef_round_sh(A, B, C) \
1006 (__builtin_ia32_scalefsh_mask_round ((A), (B), \
1007 _mm_setzero_ph (), \
1008 (__mmask8)-1, (C)))
1009
1010 #define _mm_mask_scalef_round_sh(A, B, C, D, E) \
1011 (__builtin_ia32_scalefsh_mask_round ((C), (D), (A), (B), (E)))
1012
1013 #define _mm_maskz_scalef_round_sh(A, B, C, D) \
1014 (__builtin_ia32_scalefsh_mask_round ((B), (C), _mm_setzero_ph (), \
1015 (A), (D)))
1016
1017 #endif /* __OPTIMIZE__ */
1018
1019 /* Intrinsics vreducesh. */
1020 #ifdef __OPTIMIZE__
1021 extern __inline __m128h
1022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1023 _mm_reduce_sh (__m128h __A, __m128h __B, int __C)
1024 {
1025 return __builtin_ia32_reducesh_mask_round (__A, __B, __C,
1026 _mm_setzero_ph (),
1027 (__mmask8) -1,
1028 _MM_FROUND_CUR_DIRECTION);
1029 }
1030
1031 extern __inline __m128h
1032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1033 _mm_mask_reduce_sh (__m128h __A, __mmask8 __B, __m128h __C,
1034 __m128h __D, int __E)
1035 {
1036 return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A, __B,
1037 _MM_FROUND_CUR_DIRECTION);
1038 }
1039
1040 extern __inline __m128h
1041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1042 _mm_maskz_reduce_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
1043 {
1044 return __builtin_ia32_reducesh_mask_round (__B, __C, __D,
1045 _mm_setzero_ph (), __A,
1046 _MM_FROUND_CUR_DIRECTION);
1047 }
1048
1049 extern __inline __m128h
1050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1051 _mm_reduce_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
1052 {
1053 return __builtin_ia32_reducesh_mask_round (__A, __B, __C,
1054 _mm_setzero_ph (),
1055 (__mmask8) -1, __D);
1056 }
1057
1058 extern __inline __m128h
1059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1060 _mm_mask_reduce_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
1061 __m128h __D, int __E, const int __F)
1062 {
1063 return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A,
1064 __B, __F);
1065 }
1066
1067 extern __inline __m128h
1068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1069 _mm_maskz_reduce_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
1070 int __D, const int __E)
1071 {
1072 return __builtin_ia32_reducesh_mask_round (__B, __C, __D,
1073 _mm_setzero_ph (),
1074 __A, __E);
1075 }
1076
1077 #else
1078 #define _mm_reduce_sh(A, B, C) \
1079 (__builtin_ia32_reducesh_mask_round ((A), (B), (C), \
1080 _mm_setzero_ph (), \
1081 (__mmask8)-1, \
1082 _MM_FROUND_CUR_DIRECTION))
1083
1084 #define _mm_mask_reduce_sh(A, B, C, D, E) \
1085 (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), \
1086 _MM_FROUND_CUR_DIRECTION))
1087
1088 #define _mm_maskz_reduce_sh(A, B, C, D) \
1089 (__builtin_ia32_reducesh_mask_round ((B), (C), (D), \
1090 _mm_setzero_ph (), \
1091 (A), _MM_FROUND_CUR_DIRECTION))
1092
1093 #define _mm_reduce_round_sh(A, B, C, D) \
1094 (__builtin_ia32_reducesh_mask_round ((A), (B), (C), \
1095 _mm_setzero_ph (), \
1096 (__mmask8)-1, (D)))
1097
1098 #define _mm_mask_reduce_round_sh(A, B, C, D, E, F) \
1099 (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), (F)))
1100
1101 #define _mm_maskz_reduce_round_sh(A, B, C, D, E) \
1102 (__builtin_ia32_reducesh_mask_round ((B), (C), (D), \
1103 _mm_setzero_ph (), \
1104 (A), (E)))
1105
1106 #endif /* __OPTIMIZE__ */
1107
1108 /* Intrinsics vrndscalesh. */
1109 #ifdef __OPTIMIZE__
1110 extern __inline __m128h
1111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1112 _mm_roundscale_sh (__m128h __A, __m128h __B, int __C)
1113 {
1114 return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C,
1115 _mm_setzero_ph (),
1116 (__mmask8) -1,
1117 _MM_FROUND_CUR_DIRECTION);
1118 }
1119
1120 extern __inline __m128h
1121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1122 _mm_mask_roundscale_sh (__m128h __A, __mmask8 __B, __m128h __C,
1123 __m128h __D, int __E)
1124 {
1125 return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E, __A, __B,
1126 _MM_FROUND_CUR_DIRECTION);
1127 }
1128
1129 extern __inline __m128h
1130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1131 _mm_maskz_roundscale_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
1132 {
1133 return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D,
1134 _mm_setzero_ph (), __A,
1135 _MM_FROUND_CUR_DIRECTION);
1136 }
1137
1138 extern __inline __m128h
1139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1140 _mm_roundscale_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
1141 {
1142 return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C,
1143 _mm_setzero_ph (),
1144 (__mmask8) -1,
1145 __D);
1146 }
1147
1148 extern __inline __m128h
1149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1150 _mm_mask_roundscale_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
1151 __m128h __D, int __E, const int __F)
1152 {
1153 return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E,
1154 __A, __B, __F);
1155 }
1156
1157 extern __inline __m128h
1158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1159 _mm_maskz_roundscale_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
1160 int __D, const int __E)
1161 {
1162 return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D,
1163 _mm_setzero_ph (),
1164 __A, __E);
1165 }
1166
1167 #else
1168 #define _mm_roundscale_sh(A, B, C) \
1169 (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C), \
1170 _mm_setzero_ph (), \
1171 (__mmask8)-1, \
1172 _MM_FROUND_CUR_DIRECTION))
1173
1174 #define _mm_mask_roundscale_sh(A, B, C, D, E) \
1175 (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), \
1176 _MM_FROUND_CUR_DIRECTION))
1177
1178 #define _mm_maskz_roundscale_sh(A, B, C, D) \
1179 (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D), \
1180 _mm_setzero_ph (), \
1181 (A), _MM_FROUND_CUR_DIRECTION))
1182
1183 #define _mm_roundscale_round_sh(A, B, C, D) \
1184 (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C), \
1185 _mm_setzero_ph (), \
1186 (__mmask8)-1, (D)))
1187
1188 #define _mm_mask_roundscale_round_sh(A, B, C, D, E, F) \
1189 (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), (F)))
1190
1191 #define _mm_maskz_roundscale_round_sh(A, B, C, D, E) \
1192 (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D), \
1193 _mm_setzero_ph (), \
1194 (A), (E)))
1195
1196 #endif /* __OPTIMIZE__ */
1197
1198 /* Intrinsics vfpclasssh. */
1199 #ifdef __OPTIMIZE__
1200 extern __inline __mmask8
1201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1202 _mm_fpclass_sh_mask (__m128h __A, const int __imm)
1203 {
1204 return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm,
1205 (__mmask8) -1);
1206 }
1207
1208 extern __inline __mmask8
1209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1210 _mm_mask_fpclass_sh_mask (__mmask8 __U, __m128h __A, const int __imm)
1211 {
1212 return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm, __U);
1213 }
1214
1215 #else
1216 #define _mm_fpclass_sh_mask(X, C) \
1217 ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X), \
1218 (int) (C), (__mmask8) (-1))) \
1219
1220 #define _mm_mask_fpclass_sh_mask(U, X, C) \
1221 ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X), \
1222 (int) (C), (__mmask8) (U)))
1223
1224 #endif /* __OPTIMIZE__ */
1225
1226 /* Intrinsics vgetexpsh. */
1227 extern __inline __m128h
1228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1229 _mm_getexp_sh (__m128h __A, __m128h __B)
1230 {
1231 return (__m128h)
1232 __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
1233 (__v8hf) _mm_setzero_ph (),
1234 (__mmask8) -1,
1235 _MM_FROUND_CUR_DIRECTION);
1236 }
1237
1238 extern __inline __m128h
1239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1240 _mm_mask_getexp_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
1241 {
1242 return (__m128h)
1243 __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
1244 (__v8hf) __W, (__mmask8) __U,
1245 _MM_FROUND_CUR_DIRECTION);
1246 }
1247
1248 extern __inline __m128h
1249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1250 _mm_maskz_getexp_sh (__mmask8 __U, __m128h __A, __m128h __B)
1251 {
1252 return (__m128h)
1253 __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
1254 (__v8hf) _mm_setzero_ph (),
1255 (__mmask8) __U,
1256 _MM_FROUND_CUR_DIRECTION);
1257 }
1258
1259 #ifdef __OPTIMIZE__
1260 extern __inline __m128h
1261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1262 _mm_getexp_round_sh (__m128h __A, __m128h __B, const int __R)
1263 {
1264 return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
1265 (__v8hf) __B,
1266 _mm_setzero_ph (),
1267 (__mmask8) -1,
1268 __R);
1269 }
1270
1271 extern __inline __m128h
1272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1273 _mm_mask_getexp_round_sh (__m128h __W, __mmask8 __U, __m128h __A,
1274 __m128h __B, const int __R)
1275 {
1276 return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
1277 (__v8hf) __B,
1278 (__v8hf) __W,
1279 (__mmask8) __U, __R);
1280 }
1281
1282 extern __inline __m128h
1283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1284 _mm_maskz_getexp_round_sh (__mmask8 __U, __m128h __A, __m128h __B,
1285 const int __R)
1286 {
1287 return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
1288 (__v8hf) __B,
1289 (__v8hf)
1290 _mm_setzero_ph (),
1291 (__mmask8) __U, __R);
1292 }
1293
1294 #else
1295 #define _mm_getexp_round_sh(A, B, R) \
1296 ((__m128h)__builtin_ia32_getexpsh_mask_round((__v8hf)(__m128h)(A), \
1297 (__v8hf)(__m128h)(B), \
1298 (__v8hf)_mm_setzero_ph(), \
1299 (__mmask8)-1, R))
1300
1301 #define _mm_mask_getexp_round_sh(W, U, A, B, C) \
1302 (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, W, U, C)
1303
1304 #define _mm_maskz_getexp_round_sh(U, A, B, C) \
1305 (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, \
1306 (__v8hf)_mm_setzero_ph(), \
1307 U, C)
1308
1309 #endif /* __OPTIMIZE__ */
1310
1311 /* Intrinsics vgetmantsh. */
1312 #ifdef __OPTIMIZE__
1313 extern __inline __m128h
1314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1315 _mm_getmant_sh (__m128h __A, __m128h __B,
1316 _MM_MANTISSA_NORM_ENUM __C,
1317 _MM_MANTISSA_SIGN_ENUM __D)
1318 {
1319 return (__m128h)
1320 __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
1321 (__D << 2) | __C, _mm_setzero_ph (),
1322 (__mmask8) -1,
1323 _MM_FROUND_CUR_DIRECTION);
1324 }
1325
1326 extern __inline __m128h
1327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1328 _mm_mask_getmant_sh (__m128h __W, __mmask8 __U, __m128h __A,
1329 __m128h __B, _MM_MANTISSA_NORM_ENUM __C,
1330 _MM_MANTISSA_SIGN_ENUM __D)
1331 {
1332 return (__m128h)
1333 __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
1334 (__D << 2) | __C, (__v8hf) __W,
1335 __U, _MM_FROUND_CUR_DIRECTION);
1336 }
1337
1338 extern __inline __m128h
1339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1340 _mm_maskz_getmant_sh (__mmask8 __U, __m128h __A, __m128h __B,
1341 _MM_MANTISSA_NORM_ENUM __C,
1342 _MM_MANTISSA_SIGN_ENUM __D)
1343 {
1344 return (__m128h)
1345 __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
1346 (__D << 2) | __C,
1347 (__v8hf) _mm_setzero_ph(),
1348 __U, _MM_FROUND_CUR_DIRECTION);
1349 }
1350
1351 extern __inline __m128h
1352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1353 _mm_getmant_round_sh (__m128h __A, __m128h __B,
1354 _MM_MANTISSA_NORM_ENUM __C,
1355 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
1356 {
1357 return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
1358 (__v8hf) __B,
1359 (__D << 2) | __C,
1360 _mm_setzero_ph (),
1361 (__mmask8) -1,
1362 __R);
1363 }
1364
1365 extern __inline __m128h
1366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1367 _mm_mask_getmant_round_sh (__m128h __W, __mmask8 __U, __m128h __A,
1368 __m128h __B, _MM_MANTISSA_NORM_ENUM __C,
1369 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
1370 {
1371 return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
1372 (__v8hf) __B,
1373 (__D << 2) | __C,
1374 (__v8hf) __W,
1375 __U, __R);
1376 }
1377
1378 extern __inline __m128h
1379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1380 _mm_maskz_getmant_round_sh (__mmask8 __U, __m128h __A, __m128h __B,
1381 _MM_MANTISSA_NORM_ENUM __C,
1382 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
1383 {
1384 return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
1385 (__v8hf) __B,
1386 (__D << 2) | __C,
1387 (__v8hf)
1388 _mm_setzero_ph(),
1389 __U, __R);
1390 }
1391
1392 #else
1393 #define _mm_getmant_sh(X, Y, C, D) \
1394 ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
1395 (__v8hf)(__m128h)(Y), \
1396 (int)(((D)<<2) | (C)), \
1397 (__v8hf)(__m128h) \
1398 _mm_setzero_ph (), \
1399 (__mmask8)-1, \
1400 _MM_FROUND_CUR_DIRECTION))
1401
1402 #define _mm_mask_getmant_sh(W, U, X, Y, C, D) \
1403 ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
1404 (__v8hf)(__m128h)(Y), \
1405 (int)(((D)<<2) | (C)), \
1406 (__v8hf)(__m128h)(W), \
1407 (__mmask8)(U), \
1408 _MM_FROUND_CUR_DIRECTION))
1409
1410 #define _mm_maskz_getmant_sh(U, X, Y, C, D) \
1411 ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
1412 (__v8hf)(__m128h)(Y), \
1413 (int)(((D)<<2) | (C)), \
1414 (__v8hf)(__m128h) \
1415 _mm_setzero_ph(), \
1416 (__mmask8)(U), \
1417 _MM_FROUND_CUR_DIRECTION))
1418
1419 #define _mm_getmant_round_sh(X, Y, C, D, R) \
1420 ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
1421 (__v8hf)(__m128h)(Y), \
1422 (int)(((D)<<2) | (C)), \
1423 (__v8hf)(__m128h) \
1424 _mm_setzero_ph (), \
1425 (__mmask8)-1, \
1426 (R)))
1427
1428 #define _mm_mask_getmant_round_sh(W, U, X, Y, C, D, R) \
1429 ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
1430 (__v8hf)(__m128h)(Y), \
1431 (int)(((D)<<2) | (C)), \
1432 (__v8hf)(__m128h)(W), \
1433 (__mmask8)(U), \
1434 (R)))
1435
1436 #define _mm_maskz_getmant_round_sh(U, X, Y, C, D, R) \
1437 ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
1438 (__v8hf)(__m128h)(Y), \
1439 (int)(((D)<<2) | (C)), \
1440 (__v8hf)(__m128h) \
1441 _mm_setzero_ph(), \
1442 (__mmask8)(U), \
1443 (R)))
1444
1445 #endif /* __OPTIMIZE__ */
1446
1447 /* Intrinsics vmovw. */
1448 extern __inline __m128i
1449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1450 _mm_cvtsi16_si128 (short __A)
1451 {
1452 return _mm_avx512_set_epi16 (0, 0, 0, 0, 0, 0, 0, __A);
1453 }
1454
1455 extern __inline short
1456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1457 _mm_cvtsi128_si16 (__m128i __A)
1458 {
1459 return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, 0);
1460 }
1461
1462 /* Intrinsics vmovsh. */
1463 extern __inline __m128h
1464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1465 _mm_mask_load_sh (__m128h __A, __mmask8 __B, _Float16 const* __C)
1466 {
1467 return __builtin_ia32_loadsh_mask (__C, __A, __B);
1468 }
1469
1470 extern __inline __m128h
1471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1472 _mm_maskz_load_sh (__mmask8 __A, _Float16 const* __B)
1473 {
1474 return __builtin_ia32_loadsh_mask (__B, _mm_setzero_ph (), __A);
1475 }
1476
1477 extern __inline void
1478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1479 _mm_mask_store_sh (_Float16 const* __A, __mmask8 __B, __m128h __C)
1480 {
1481 __builtin_ia32_storesh_mask (__A, __C, __B);
1482 }
1483
1484 extern __inline __m128h
1485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1486 _mm_move_sh (__m128h __A, __m128h __B)
1487 {
1488 __A[0] = __B[0];
1489 return __A;
1490 }
1491
1492 extern __inline __m128h
1493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1494 _mm_mask_move_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
1495 {
1496 return __builtin_ia32_vmovsh_mask (__C, __D, __A, __B);
1497 }
1498
1499 extern __inline __m128h
1500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1501 _mm_maskz_move_sh (__mmask8 __A, __m128h __B, __m128h __C)
1502 {
1503 return __builtin_ia32_vmovsh_mask (__B, __C, _mm_setzero_ph (), __A);
1504 }
1505
1506 /* Intrinsics vcvtsh2si, vcvtsh2us. */
1507 extern __inline int
1508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1509 _mm_cvtsh_i32 (__m128h __A)
1510 {
1511 return (int) __builtin_ia32_vcvtsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
1512 }
1513
1514 extern __inline unsigned
1515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1516 _mm_cvtsh_u32 (__m128h __A)
1517 {
1518 return (int) __builtin_ia32_vcvtsh2usi32_round (__A,
1519 _MM_FROUND_CUR_DIRECTION);
1520 }
1521
1522 #ifdef __OPTIMIZE__
1523 extern __inline int
1524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1525 _mm_cvt_roundsh_i32 (__m128h __A, const int __R)
1526 {
1527 return (int) __builtin_ia32_vcvtsh2si32_round (__A, __R);
1528 }
1529
1530 extern __inline unsigned
1531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1532 _mm_cvt_roundsh_u32 (__m128h __A, const int __R)
1533 {
1534 return (int) __builtin_ia32_vcvtsh2usi32_round (__A, __R);
1535 }
1536
1537 #else
1538 #define _mm_cvt_roundsh_i32(A, B) \
1539 ((int)__builtin_ia32_vcvtsh2si32_round ((A), (B)))
1540 #define _mm_cvt_roundsh_u32(A, B) \
1541 ((int)__builtin_ia32_vcvtsh2usi32_round ((A), (B)))
1542
1543 #endif /* __OPTIMIZE__ */
1544
1545 #ifdef __x86_64__
1546 extern __inline long long
1547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1548 _mm_cvtsh_i64 (__m128h __A)
1549 {
1550 return (long long)
1551 __builtin_ia32_vcvtsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
1552 }
1553
1554 extern __inline unsigned long long
1555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1556 _mm_cvtsh_u64 (__m128h __A)
1557 {
1558 return (long long)
1559 __builtin_ia32_vcvtsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
1560 }
1561
1562 #ifdef __OPTIMIZE__
1563 extern __inline long long
1564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1565 _mm_cvt_roundsh_i64 (__m128h __A, const int __R)
1566 {
1567 return (long long) __builtin_ia32_vcvtsh2si64_round (__A, __R);
1568 }
1569
1570 extern __inline unsigned long long
1571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1572 _mm_cvt_roundsh_u64 (__m128h __A, const int __R)
1573 {
1574 return (long long) __builtin_ia32_vcvtsh2usi64_round (__A, __R);
1575 }
1576
1577 #else
1578 #define _mm_cvt_roundsh_i64(A, B) \
1579 ((long long)__builtin_ia32_vcvtsh2si64_round ((A), (B)))
1580 #define _mm_cvt_roundsh_u64(A, B) \
1581 ((long long)__builtin_ia32_vcvtsh2usi64_round ((A), (B)))
1582
1583 #endif /* __OPTIMIZE__ */
1584 #endif /* __x86_64__ */
1585
1586 /* Intrinsics vcvtsi2sh, vcvtusi2sh. */
1587 extern __inline __m128h
1588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1589 _mm_cvti32_sh (__m128h __A, int __B)
1590 {
1591 return __builtin_ia32_vcvtsi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
1592 }
1593
1594 extern __inline __m128h
1595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1596 _mm_cvtu32_sh (__m128h __A, unsigned int __B)
1597 {
1598 return __builtin_ia32_vcvtusi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
1599 }
1600
1601 #ifdef __OPTIMIZE__
1602 extern __inline __m128h
1603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1604 _mm_cvt_roundi32_sh (__m128h __A, int __B, const int __R)
1605 {
1606 return __builtin_ia32_vcvtsi2sh32_round (__A, __B, __R);
1607 }
1608
1609 extern __inline __m128h
1610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1611 _mm_cvt_roundu32_sh (__m128h __A, unsigned int __B, const int __R)
1612 {
1613 return __builtin_ia32_vcvtusi2sh32_round (__A, __B, __R);
1614 }
1615
1616 #else
1617 #define _mm_cvt_roundi32_sh(A, B, C) \
1618 (__builtin_ia32_vcvtsi2sh32_round ((A), (B), (C)))
1619 #define _mm_cvt_roundu32_sh(A, B, C) \
1620 (__builtin_ia32_vcvtusi2sh32_round ((A), (B), (C)))
1621
1622 #endif /* __OPTIMIZE__ */
1623
1624 #ifdef __x86_64__
1625 extern __inline __m128h
1626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1627 _mm_cvti64_sh (__m128h __A, long long __B)
1628 {
1629 return __builtin_ia32_vcvtsi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
1630 }
1631
1632 extern __inline __m128h
1633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1634 _mm_cvtu64_sh (__m128h __A, unsigned long long __B)
1635 {
1636 return __builtin_ia32_vcvtusi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
1637 }
1638
1639 #ifdef __OPTIMIZE__
1640 extern __inline __m128h
1641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1642 _mm_cvt_roundi64_sh (__m128h __A, long long __B, const int __R)
1643 {
1644 return __builtin_ia32_vcvtsi2sh64_round (__A, __B, __R);
1645 }
1646
1647 extern __inline __m128h
1648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1649 _mm_cvt_roundu64_sh (__m128h __A, unsigned long long __B, const int __R)
1650 {
1651 return __builtin_ia32_vcvtusi2sh64_round (__A, __B, __R);
1652 }
1653
1654 #else
1655 #define _mm_cvt_roundi64_sh(A, B, C) \
1656 (__builtin_ia32_vcvtsi2sh64_round ((A), (B), (C)))
1657 #define _mm_cvt_roundu64_sh(A, B, C) \
1658 (__builtin_ia32_vcvtusi2sh64_round ((A), (B), (C)))
1659
1660 #endif /* __OPTIMIZE__ */
1661 #endif /* __x86_64__ */
1662
1663 /* Intrinsics vcvttsh2si, vcvttsh2us. */
1664 extern __inline int
1665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1666 _mm_cvttsh_i32 (__m128h __A)
1667 {
1668 return (int)
1669 __builtin_ia32_vcvttsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
1670 }
1671
1672 extern __inline unsigned
1673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1674 _mm_cvttsh_u32 (__m128h __A)
1675 {
1676 return (int)
1677 __builtin_ia32_vcvttsh2usi32_round (__A, _MM_FROUND_CUR_DIRECTION);
1678 }
1679
1680 #ifdef __OPTIMIZE__
1681 extern __inline int
1682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1683 _mm_cvtt_roundsh_i32 (__m128h __A, const int __R)
1684 {
1685 return (int) __builtin_ia32_vcvttsh2si32_round (__A, __R);
1686 }
1687
1688 extern __inline unsigned
1689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1690 _mm_cvtt_roundsh_u32 (__m128h __A, const int __R)
1691 {
1692 return (int) __builtin_ia32_vcvttsh2usi32_round (__A, __R);
1693 }
1694
1695 #else
1696 #define _mm_cvtt_roundsh_i32(A, B) \
1697 ((int)__builtin_ia32_vcvttsh2si32_round ((A), (B)))
1698 #define _mm_cvtt_roundsh_u32(A, B) \
1699 ((int)__builtin_ia32_vcvttsh2usi32_round ((A), (B)))
1700
1701 #endif /* __OPTIMIZE__ */
1702
1703 #ifdef __x86_64__
1704 extern __inline long long
1705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1706 _mm_cvttsh_i64 (__m128h __A)
1707 {
1708 return (long long)
1709 __builtin_ia32_vcvttsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
1710 }
1711
1712 extern __inline unsigned long long
1713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1714 _mm_cvttsh_u64 (__m128h __A)
1715 {
1716 return (long long)
1717 __builtin_ia32_vcvttsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
1718 }
1719
1720 #ifdef __OPTIMIZE__
1721 extern __inline long long
1722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1723 _mm_cvtt_roundsh_i64 (__m128h __A, const int __R)
1724 {
1725 return (long long) __builtin_ia32_vcvttsh2si64_round (__A, __R);
1726 }
1727
1728 extern __inline unsigned long long
1729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1730 _mm_cvtt_roundsh_u64 (__m128h __A, const int __R)
1731 {
1732 return (long long) __builtin_ia32_vcvttsh2usi64_round (__A, __R);
1733 }
1734
1735 #else
1736 #define _mm_cvtt_roundsh_i64(A, B) \
1737 ((long long)__builtin_ia32_vcvttsh2si64_round ((A), (B)))
1738 #define _mm_cvtt_roundsh_u64(A, B) \
1739 ((long long)__builtin_ia32_vcvttsh2usi64_round ((A), (B)))
1740
1741 #endif /* __OPTIMIZE__ */
1742 #endif /* __x86_64__ */
1743
1744 /* Intrinsics vcvtsh2ss, vcvtsh2sd. */
1745 extern __inline __m128
1746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1747 _mm_cvtsh_ss (__m128 __A, __m128h __B)
1748 {
1749 return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
1750 _mm_avx512_setzero_ps (),
1751 (__mmask8) -1,
1752 _MM_FROUND_CUR_DIRECTION);
1753 }
1754
1755 extern __inline __m128
1756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1757 _mm_mask_cvtsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
1758 __m128h __D)
1759 {
1760 return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B,
1761 _MM_FROUND_CUR_DIRECTION);
1762 }
1763
1764 extern __inline __m128
1765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1766 _mm_maskz_cvtsh_ss (__mmask8 __A, __m128 __B,
1767 __m128h __C)
1768 {
1769 return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
1770 _mm_avx512_setzero_ps (),
1771 __A, _MM_FROUND_CUR_DIRECTION);
1772 }
1773
1774 extern __inline __m128d
1775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1776 _mm_cvtsh_sd (__m128d __A, __m128h __B)
1777 {
1778 return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
1779 _mm_avx512_setzero_pd (),
1780 (__mmask8) -1,
1781 _MM_FROUND_CUR_DIRECTION);
1782 }
1783
1784 extern __inline __m128d
1785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1786 _mm_mask_cvtsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
1787 __m128h __D)
1788 {
1789 return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B,
1790 _MM_FROUND_CUR_DIRECTION);
1791 }
1792
1793 extern __inline __m128d
1794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1795 _mm_maskz_cvtsh_sd (__mmask8 __A, __m128d __B, __m128h __C)
1796 {
1797 return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
1798 _mm_avx512_setzero_pd (),
1799 __A, _MM_FROUND_CUR_DIRECTION);
1800 }
1801
1802 #ifdef __OPTIMIZE__
1803 extern __inline __m128
1804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1805 _mm_cvt_roundsh_ss (__m128 __A, __m128h __B, const int __R)
1806 {
1807 return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
1808 _mm_avx512_setzero_ps (),
1809 (__mmask8) -1, __R);
1810 }
1811
1812 extern __inline __m128
1813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1814 _mm_mask_cvt_roundsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
1815 __m128h __D, const int __R)
1816 {
1817 return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B, __R);
1818 }
1819
1820 extern __inline __m128
1821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1822 _mm_maskz_cvt_roundsh_ss (__mmask8 __A, __m128 __B,
1823 __m128h __C, const int __R)
1824 {
1825 return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
1826 _mm_avx512_setzero_ps (),
1827 __A, __R);
1828 }
1829
1830 extern __inline __m128d
1831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1832 _mm_cvt_roundsh_sd (__m128d __A, __m128h __B, const int __R)
1833 {
1834 return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
1835 _mm_avx512_setzero_pd (),
1836 (__mmask8) -1, __R);
1837 }
1838
1839 extern __inline __m128d
1840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1841 _mm_mask_cvt_roundsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
1842 __m128h __D, const int __R)
1843 {
1844 return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B, __R);
1845 }
1846
1847 extern __inline __m128d
1848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1849 _mm_maskz_cvt_roundsh_sd (__mmask8 __A, __m128d __B, __m128h __C, const int __R)
1850 {
1851 return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
1852 _mm_avx512_setzero_pd (),
1853 __A, __R);
1854 }
1855
1856 #else
1857 #define _mm_cvt_roundsh_ss(A, B, R) \
1858 (__builtin_ia32_vcvtsh2ss_mask_round ((B), (A), \
1859 _mm_avx512_setzero_ps (), \
1860 (__mmask8) -1, (R)))
1861
1862 #define _mm_mask_cvt_roundsh_ss(A, B, C, D, R) \
1863 (__builtin_ia32_vcvtsh2ss_mask_round ((D), (C), (A), (B), (R)))
1864
1865 #define _mm_maskz_cvt_roundsh_ss(A, B, C, R) \
1866 (__builtin_ia32_vcvtsh2ss_mask_round ((C), (B), \
1867 _mm_avx512_setzero_ps (), \
1868 (A), (R)))
1869
1870 #define _mm_cvt_roundsh_sd(A, B, R) \
1871 (__builtin_ia32_vcvtsh2sd_mask_round ((B), (A), \
1872 _mm_avx512_setzero_pd (), \
1873 (__mmask8) -1, (R)))
1874
1875 #define _mm_mask_cvt_roundsh_sd(A, B, C, D, R) \
1876 (__builtin_ia32_vcvtsh2sd_mask_round ((D), (C), (A), (B), (R)))
1877
1878 #define _mm_maskz_cvt_roundsh_sd(A, B, C, R) \
1879 (__builtin_ia32_vcvtsh2sd_mask_round ((C), (B), \
1880 _mm_avx512_setzero_pd (), \
1881 (A), (R)))
1882
1883 #endif /* __OPTIMIZE__ */
1884
1885 /* Intrinsics vcvtss2sh, vcvtsd2sh. */
1886 extern __inline __m128h
1887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1888 _mm_cvtss_sh (__m128h __A, __m128 __B)
1889 {
1890 return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
1891 _mm_setzero_ph (),
1892 (__mmask8) -1,
1893 _MM_FROUND_CUR_DIRECTION);
1894 }
1895
1896 extern __inline __m128h
1897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1898 _mm_mask_cvtss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D)
1899 {
1900 return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B,
1901 _MM_FROUND_CUR_DIRECTION);
1902 }
1903
1904 extern __inline __m128h
1905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1906 _mm_maskz_cvtss_sh (__mmask8 __A, __m128h __B, __m128 __C)
1907 {
1908 return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
1909 _mm_setzero_ph (),
1910 __A, _MM_FROUND_CUR_DIRECTION);
1911 }
1912
1913 extern __inline __m128h
1914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1915 _mm_cvtsd_sh (__m128h __A, __m128d __B)
1916 {
1917 return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
1918 _mm_setzero_ph (),
1919 (__mmask8) -1,
1920 _MM_FROUND_CUR_DIRECTION);
1921 }
1922
1923 extern __inline __m128h
1924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1925 _mm_mask_cvtsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D)
1926 {
1927 return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B,
1928 _MM_FROUND_CUR_DIRECTION);
1929 }
1930
1931 extern __inline __m128h
1932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1933 _mm_maskz_cvtsd_sh (__mmask8 __A, __m128h __B, __m128d __C)
1934 {
1935 return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
1936 _mm_setzero_ph (),
1937 __A, _MM_FROUND_CUR_DIRECTION);
1938 }
1939
1940 #ifdef __OPTIMIZE__
1941 extern __inline __m128h
1942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1943 _mm_cvt_roundss_sh (__m128h __A, __m128 __B, const int __R)
1944 {
1945 return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
1946 _mm_setzero_ph (),
1947 (__mmask8) -1, __R);
1948 }
1949
1950 extern __inline __m128h
1951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1952 _mm_mask_cvt_roundss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D,
1953 const int __R)
1954 {
1955 return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B, __R);
1956 }
1957
1958 extern __inline __m128h
1959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1960 _mm_maskz_cvt_roundss_sh (__mmask8 __A, __m128h __B, __m128 __C,
1961 const int __R)
1962 {
1963 return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
1964 _mm_setzero_ph (),
1965 __A, __R);
1966 }
1967
1968 extern __inline __m128h
1969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1970 _mm_cvt_roundsd_sh (__m128h __A, __m128d __B, const int __R)
1971 {
1972 return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
1973 _mm_setzero_ph (),
1974 (__mmask8) -1, __R);
1975 }
1976
1977 extern __inline __m128h
1978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1979 _mm_mask_cvt_roundsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D,
1980 const int __R)
1981 {
1982 return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B, __R);
1983 }
1984
1985 extern __inline __m128h
1986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1987 _mm_maskz_cvt_roundsd_sh (__mmask8 __A, __m128h __B, __m128d __C,
1988 const int __R)
1989 {
1990 return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
1991 _mm_setzero_ph (),
1992 __A, __R);
1993 }
1994
1995 #else
1996 #define _mm_cvt_roundss_sh(A, B, R) \
1997 (__builtin_ia32_vcvtss2sh_mask_round ((B), (A), \
1998 _mm_setzero_ph (), \
1999 (__mmask8) -1, R))
2000
2001 #define _mm_mask_cvt_roundss_sh(A, B, C, D, R) \
2002 (__builtin_ia32_vcvtss2sh_mask_round ((D), (C), (A), (B), (R)))
2003
2004 #define _mm_maskz_cvt_roundss_sh(A, B, C, R) \
2005 (__builtin_ia32_vcvtss2sh_mask_round ((C), (B), \
2006 _mm_setzero_ph (), \
2007 A, R))
2008
2009 #define _mm_cvt_roundsd_sh(A, B, R) \
2010 (__builtin_ia32_vcvtsd2sh_mask_round ((B), (A), \
2011 _mm_setzero_ph (), \
2012 (__mmask8) -1, R))
2013
2014 #define _mm_mask_cvt_roundsd_sh(A, B, C, D, R) \
2015 (__builtin_ia32_vcvtsd2sh_mask_round ((D), (C), (A), (B), (R)))
2016
2017 #define _mm_maskz_cvt_roundsd_sh(A, B, C, R) \
2018 (__builtin_ia32_vcvtsd2sh_mask_round ((C), (B), \
2019 _mm_setzero_ph (), \
2020 (A), (R)))
2021
2022 #endif /* __OPTIMIZE__ */
2023
2024 extern __inline _Float16
2025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2026 _mm_cvtsh_h (__m128h __A)
2027 {
2028 return __A[0];
2029 }
2030
2031 /* Intrinsics vfmadd[132,213,231]sh. */
2032 extern __inline __m128h
2033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2034 _mm_fmadd_sh (__m128h __W, __m128h __A, __m128h __B)
2035 {
2036 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
2037 (__v8hf) __A,
2038 (__v8hf) __B,
2039 (__mmask8) -1,
2040 _MM_FROUND_CUR_DIRECTION);
2041 }
2042
2043 extern __inline __m128h
2044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2045 _mm_mask_fmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
2046 {
2047 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
2048 (__v8hf) __A,
2049 (__v8hf) __B,
2050 (__mmask8) __U,
2051 _MM_FROUND_CUR_DIRECTION);
2052 }
2053
2054 extern __inline __m128h
2055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2056 _mm_mask3_fmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
2057 {
2058 return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W,
2059 (__v8hf) __A,
2060 (__v8hf) __B,
2061 (__mmask8) __U,
2062 _MM_FROUND_CUR_DIRECTION);
2063 }
2064
2065 extern __inline __m128h
2066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2067 _mm_maskz_fmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
2068 {
2069 return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
2070 (__v8hf) __A,
2071 (__v8hf) __B,
2072 (__mmask8) __U,
2073 _MM_FROUND_CUR_DIRECTION);
2074 }
2075
2076
2077 #ifdef __OPTIMIZE__
2078 extern __inline __m128h
2079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2080 _mm_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
2081 {
2082 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
2083 (__v8hf) __A,
2084 (__v8hf) __B,
2085 (__mmask8) -1,
2086 __R);
2087 }
2088
2089 extern __inline __m128h
2090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2091 _mm_mask_fmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
2092 const int __R)
2093 {
2094 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
2095 (__v8hf) __A,
2096 (__v8hf) __B,
2097 (__mmask8) __U, __R);
2098 }
2099
2100 extern __inline __m128h
2101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2102 _mm_mask3_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
2103 const int __R)
2104 {
2105 return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W,
2106 (__v8hf) __A,
2107 (__v8hf) __B,
2108 (__mmask8) __U, __R);
2109 }
2110
2111 extern __inline __m128h
2112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2113 _mm_maskz_fmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
2114 __m128h __B, const int __R)
2115 {
2116 return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
2117 (__v8hf) __A,
2118 (__v8hf) __B,
2119 (__mmask8) __U, __R);
2120 }
2121
2122 #else
2123 #define _mm_fmadd_round_sh(A, B, C, R) \
2124 ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (-1), (R)))
2125 #define _mm_mask_fmadd_round_sh(A, U, B, C, R) \
2126 ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (U), (R)))
2127 #define _mm_mask3_fmadd_round_sh(A, B, C, U, R) \
2128 ((__m128h) __builtin_ia32_vfmaddsh3_mask3 ((A), (B), (C), (U), (R)))
2129 #define _mm_maskz_fmadd_round_sh(U, A, B, C, R) \
2130 ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), (C), (U), (R)))
2131
2132 #endif /* __OPTIMIZE__ */
2133
2134 /* Intrinsics vfnmadd[132,213,231]sh. */
2135 extern __inline __m128h
2136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2137 _mm_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B)
2138 {
2139 return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
2140 (__v8hf) __A,
2141 (__v8hf) __B,
2142 (__mmask8) -1,
2143 _MM_FROUND_CUR_DIRECTION);
2144 }
2145
2146 extern __inline __m128h
2147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2148 _mm_mask_fnmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
2149 {
2150 return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
2151 (__v8hf) __A,
2152 (__v8hf) __B,
2153 (__mmask8) __U,
2154 _MM_FROUND_CUR_DIRECTION);
2155 }
2156
2157 extern __inline __m128h
2158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2159 _mm_mask3_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
2160 {
2161 return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W,
2162 (__v8hf) __A,
2163 (__v8hf) __B,
2164 (__mmask8) __U,
2165 _MM_FROUND_CUR_DIRECTION);
2166 }
2167
2168 extern __inline __m128h
2169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2170 _mm_maskz_fnmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
2171 {
2172 return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W,
2173 (__v8hf) __A,
2174 (__v8hf) __B,
2175 (__mmask8) __U,
2176 _MM_FROUND_CUR_DIRECTION);
2177 }
2178
2179
2180 #ifdef __OPTIMIZE__
2181 extern __inline __m128h
2182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2183 _mm_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
2184 {
2185 return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
2186 (__v8hf) __A,
2187 (__v8hf) __B,
2188 (__mmask8) -1,
2189 __R);
2190 }
2191
2192 extern __inline __m128h
2193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2194 _mm_mask_fnmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
2195 const int __R)
2196 {
2197 return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
2198 (__v8hf) __A,
2199 (__v8hf) __B,
2200 (__mmask8) __U, __R);
2201 }
2202
2203 extern __inline __m128h
2204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2205 _mm_mask3_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
2206 const int __R)
2207 {
2208 return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W,
2209 (__v8hf) __A,
2210 (__v8hf) __B,
2211 (__mmask8) __U, __R);
2212 }
2213
2214 extern __inline __m128h
2215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2216 _mm_maskz_fnmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
2217 __m128h __B, const int __R)
2218 {
2219 return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W,
2220 (__v8hf) __A,
2221 (__v8hf) __B,
2222 (__mmask8) __U, __R);
2223 }
2224
2225 #else
2226 #define _mm_fnmadd_round_sh(A, B, C, R) \
2227 ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (-1), (R)))
2228 #define _mm_mask_fnmadd_round_sh(A, U, B, C, R) \
2229 ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (U), (R)))
2230 #define _mm_mask3_fnmadd_round_sh(A, B, C, U, R) \
2231 ((__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((A), (B), (C), (U), (R)))
2232 #define _mm_maskz_fnmadd_round_sh(U, A, B, C, R) \
2233 ((__m128h) __builtin_ia32_vfnmaddsh3_maskz ((A), (B), (C), (U), (R)))
2234
2235 #endif /* __OPTIMIZE__ */
2236
2237 /* Intrinsics vfmsub[132,213,231]sh. */
2238 extern __inline __m128h
2239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2240 _mm_fmsub_sh (__m128h __W, __m128h __A, __m128h __B)
2241 {
2242 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
2243 (__v8hf) __A,
2244 -(__v8hf) __B,
2245 (__mmask8) -1,
2246 _MM_FROUND_CUR_DIRECTION);
2247 }
2248
2249 extern __inline __m128h
2250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2251 _mm_mask_fmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
2252 {
2253 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
2254 (__v8hf) __A,
2255 -(__v8hf) __B,
2256 (__mmask8) __U,
2257 _MM_FROUND_CUR_DIRECTION);
2258 }
2259
2260 extern __inline __m128h
2261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2262 _mm_mask3_fmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
2263 {
2264 return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
2265 (__v8hf) __A,
2266 (__v8hf) __B,
2267 (__mmask8) __U,
2268 _MM_FROUND_CUR_DIRECTION);
2269 }
2270
2271 extern __inline __m128h
2272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2273 _mm_maskz_fmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
2274 {
2275 return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
2276 (__v8hf) __A,
2277 -(__v8hf) __B,
2278 (__mmask8) __U,
2279 _MM_FROUND_CUR_DIRECTION);
2280 }
2281
2282
2283 #ifdef __OPTIMIZE__
2284 extern __inline __m128h
2285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2286 _mm_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
2287 {
2288 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
2289 (__v8hf) __A,
2290 -(__v8hf) __B,
2291 (__mmask8) -1,
2292 __R);
2293 }
2294
2295 extern __inline __m128h
2296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2297 _mm_mask_fmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
2298 const int __R)
2299 {
2300 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
2301 (__v8hf) __A,
2302 -(__v8hf) __B,
2303 (__mmask8) __U, __R);
2304 }
2305
2306 extern __inline __m128h
2307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2308 _mm_mask3_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
2309 const int __R)
2310 {
2311 return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
2312 (__v8hf) __A,
2313 (__v8hf) __B,
2314 (__mmask8) __U, __R);
2315 }
2316
2317 extern __inline __m128h
2318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2319 _mm_maskz_fmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
2320 __m128h __B, const int __R)
2321 {
2322 return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
2323 (__v8hf) __A,
2324 -(__v8hf) __B,
2325 (__mmask8) __U, __R);
2326 }
2327
2328 #else
2329 #define _mm_fmsub_round_sh(A, B, C, R) \
2330 ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (-1), (R)))
2331 #define _mm_mask_fmsub_round_sh(A, U, B, C, R) \
2332 ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (U), (R)))
2333 #define _mm_mask3_fmsub_round_sh(A, B, C, U, R) \
2334 ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), (B), (C), (U), (R)))
2335 #define _mm_maskz_fmsub_round_sh(U, A, B, C, R) \
2336 ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), -(C), (U), (R)))
2337
2338 #endif /* __OPTIMIZE__ */
2339
2340 /* Intrinsics vfnmsub[132,213,231]sh. */
2341 extern __inline __m128h
2342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2343 _mm_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B)
2344 {
2345 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
2346 -(__v8hf) __A,
2347 -(__v8hf) __B,
2348 (__mmask8) -1,
2349 _MM_FROUND_CUR_DIRECTION);
2350 }
2351
2352 extern __inline __m128h
2353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2354 _mm_mask_fnmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
2355 {
2356 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
2357 -(__v8hf) __A,
2358 -(__v8hf) __B,
2359 (__mmask8) __U,
2360 _MM_FROUND_CUR_DIRECTION);
2361 }
2362
2363 extern __inline __m128h
2364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2365 _mm_mask3_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
2366 {
2367 return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
2368 -(__v8hf) __A,
2369 (__v8hf) __B,
2370 (__mmask8) __U,
2371 _MM_FROUND_CUR_DIRECTION);
2372 }
2373
2374 extern __inline __m128h
2375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2376 _mm_maskz_fnmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
2377 {
2378 return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
2379 -(__v8hf) __A,
2380 -(__v8hf) __B,
2381 (__mmask8) __U,
2382 _MM_FROUND_CUR_DIRECTION);
2383 }
2384
2385
2386 #ifdef __OPTIMIZE__
2387 extern __inline __m128h
2388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2389 _mm_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
2390 {
2391 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
2392 -(__v8hf) __A,
2393 -(__v8hf) __B,
2394 (__mmask8) -1,
2395 __R);
2396 }
2397
2398 extern __inline __m128h
2399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2400 _mm_mask_fnmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
2401 const int __R)
2402 {
2403 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
2404 -(__v8hf) __A,
2405 -(__v8hf) __B,
2406 (__mmask8) __U, __R);
2407 }
2408
2409 extern __inline __m128h
2410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2411 _mm_mask3_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
2412 const int __R)
2413 {
2414 return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
2415 -(__v8hf) __A,
2416 (__v8hf) __B,
2417 (__mmask8) __U, __R);
2418 }
2419
2420 extern __inline __m128h
2421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2422 _mm_maskz_fnmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
2423 __m128h __B, const int __R)
2424 {
2425 return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
2426 -(__v8hf) __A,
2427 -(__v8hf) __B,
2428 (__mmask8) __U, __R);
2429 }
2430
2431 #else
2432 #define _mm_fnmsub_round_sh(A, B, C, R) \
2433 ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (-1), (R)))
2434 #define _mm_mask_fnmsub_round_sh(A, U, B, C, R) \
2435 ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (U), (R)))
2436 #define _mm_mask3_fnmsub_round_sh(A, B, C, U, R) \
2437 ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), -(B), (C), (U), (R)))
2438 #define _mm_maskz_fnmsub_round_sh(U, A, B, C, R) \
2439 ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), -(B), -(C), (U), (R)))
2440
2441 #endif /* __OPTIMIZE__ */
2442
2443 /* Intrinsics vf[,c]maddcsh. */
2444 extern __inline __m128h
2445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2446 _mm_mask_fcmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
2447 {
2448 return (__m128h)
2449 __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A,
2450 (__v8hf) __C,
2451 (__v8hf) __D, __B,
2452 _MM_FROUND_CUR_DIRECTION);
2453 }
2454
2455 extern __inline __m128h
2456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2457 _mm_mask3_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
2458 {
2459 return (__m128h)
2460 __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A,
2461 (__v8hf) __B,
2462 (__v8hf) __C, __D,
2463 _MM_FROUND_CUR_DIRECTION);
2464 }
2465
2466 extern __inline __m128h
2467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2468 _mm_maskz_fcmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
2469 {
2470 return (__m128h)
2471 __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B,
2472 (__v8hf) __C,
2473 (__v8hf) __D,
2474 __A, _MM_FROUND_CUR_DIRECTION);
2475 }
2476
2477 extern __inline __m128h
2478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2479 _mm_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C)
2480 {
2481 return (__m128h)
2482 __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A,
2483 (__v8hf) __B,
2484 (__v8hf) __C,
2485 _MM_FROUND_CUR_DIRECTION);
2486 }
2487
2488 extern __inline __m128h
2489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2490 _mm_mask_fmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
2491 {
2492 return (__m128h)
2493 __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A,
2494 (__v8hf) __C,
2495 (__v8hf) __D, __B,
2496 _MM_FROUND_CUR_DIRECTION);
2497 }
2498
2499 extern __inline __m128h
2500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2501 _mm_mask3_fmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
2502 {
2503 return (__m128h)
2504 __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A,
2505 (__v8hf) __B,
2506 (__v8hf) __C, __D,
2507 _MM_FROUND_CUR_DIRECTION);
2508 }
2509
2510 extern __inline __m128h
2511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2512 _mm_maskz_fmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
2513 {
2514 return (__m128h)
2515 __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B,
2516 (__v8hf) __C,
2517 (__v8hf) __D,
2518 __A, _MM_FROUND_CUR_DIRECTION);
2519 }
2520
2521 extern __inline __m128h
2522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2523 _mm_fmadd_sch (__m128h __A, __m128h __B, __m128h __C)
2524 {
2525 return (__m128h)
2526 __builtin_ia32_vfmaddcsh_round ((__v8hf) __A,
2527 (__v8hf) __B,
2528 (__v8hf) __C,
2529 _MM_FROUND_CUR_DIRECTION);
2530 }
2531
2532 #ifdef __OPTIMIZE__
2533 extern __inline __m128h
2534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2535 _mm_mask_fcmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
2536 __m128h __D, const int __E)
2537 {
2538 return (__m128h)
2539 __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A,
2540 (__v8hf) __C,
2541 (__v8hf) __D,
2542 __B, __E);
2543 }
2544
2545 extern __inline __m128h
2546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2547 _mm_mask3_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C,
2548 __mmask8 __D, const int __E)
2549 {
2550 return (__m128h)
2551 __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A,
2552 (__v8hf) __B,
2553 (__v8hf) __C,
2554 __D, __E);
2555 }
2556
2557 extern __inline __m128h
2558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2559 _mm_maskz_fcmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
2560 __m128h __D, const int __E)
2561 {
2562 return (__m128h)
2563 __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B,
2564 (__v8hf) __C,
2565 (__v8hf) __D,
2566 __A, __E);
2567 }
2568
2569 extern __inline __m128h
2570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2571 _mm_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D)
2572 {
2573 return (__m128h)
2574 __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A,
2575 (__v8hf) __B,
2576 (__v8hf) __C,
2577 __D);
2578 }
2579
2580 extern __inline __m128h
2581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2582 _mm_mask_fmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
2583 __m128h __D, const int __E)
2584 {
2585 return (__m128h)
2586 __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A,
2587 (__v8hf) __C,
2588 (__v8hf) __D,
2589 __B, __E);
2590 }
2591
2592 extern __inline __m128h
2593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2594 _mm_mask3_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C,
2595 __mmask8 __D, const int __E)
2596 {
2597 return (__m128h)
2598 __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A,
2599 (__v8hf) __B,
2600 (__v8hf) __C,
2601 __D, __E);
2602 }
2603
2604 extern __inline __m128h
2605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2606 _mm_maskz_fmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
2607 __m128h __D, const int __E)
2608 {
2609 return (__m128h)
2610 __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B,
2611 (__v8hf) __C,
2612 (__v8hf) __D,
2613 __A, __E);
2614 }
2615
2616 extern __inline __m128h
2617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2618 _mm_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D)
2619 {
2620 return (__m128h)
2621 __builtin_ia32_vfmaddcsh_round ((__v8hf) __A,
2622 (__v8hf) __B,
2623 (__v8hf) __C,
2624 __D);
2625 }
2626 #else
2627 #define _mm_mask_fcmadd_round_sch(A, B, C, D, E) \
2628 ((__m128h) \
2629 __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) (A), \
2630 (__v8hf) (C), \
2631 (__v8hf) (D), \
2632 (B), (E)))
2633
2634
2635 #define _mm_mask3_fcmadd_round_sch(A, B, C, D, E) \
2636 ((__m128h) \
2637 __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) (A), \
2638 (__v8hf) (B), \
2639 (__v8hf) (C), \
2640 (D), (E)))
2641
2642 #define _mm_maskz_fcmadd_round_sch(A, B, C, D, E) \
2643 __builtin_ia32_vfcmaddcsh_maskz_round ((B), (C), (D), (A), (E))
2644
2645 #define _mm_fcmadd_round_sch(A, B, C, D) \
2646 __builtin_ia32_vfcmaddcsh_round ((A), (B), (C), (D))
2647
2648 #define _mm_mask_fmadd_round_sch(A, B, C, D, E) \
2649 ((__m128h) \
2650 __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) (A), \
2651 (__v8hf) (C), \
2652 (__v8hf) (D), \
2653 (B), (E)))
2654
2655 #define _mm_mask3_fmadd_round_sch(A, B, C, D, E) \
2656 ((__m128h) \
2657 __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) (A), \
2658 (__v8hf) (B), \
2659 (__v8hf) (C), \
2660 (D), (E)))
2661
2662 #define _mm_maskz_fmadd_round_sch(A, B, C, D, E) \
2663 __builtin_ia32_vfmaddcsh_maskz_round ((B), (C), (D), (A), (E))
2664
2665 #define _mm_fmadd_round_sch(A, B, C, D) \
2666 __builtin_ia32_vfmaddcsh_round ((A), (B), (C), (D))
2667
2668 #endif /* __OPTIMIZE__ */
2669
2670 /* Intrinsics vf[,c]mulcsh. */
2671 extern __inline __m128h
2672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2673 _mm_fcmul_sch (__m128h __A, __m128h __B)
2674 {
2675 return (__m128h)
2676 __builtin_ia32_vfcmulcsh_round ((__v8hf) __A,
2677 (__v8hf) __B,
2678 _MM_FROUND_CUR_DIRECTION);
2679 }
2680
2681 extern __inline __m128h
2682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2683 _mm_mask_fcmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
2684 {
2685 return (__m128h)
2686 __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C,
2687 (__v8hf) __D,
2688 (__v8hf) __A,
2689 __B, _MM_FROUND_CUR_DIRECTION);
2690 }
2691
2692 extern __inline __m128h
2693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2694 _mm_maskz_fcmul_sch (__mmask8 __A, __m128h __B, __m128h __C)
2695 {
2696 return (__m128h)
2697 __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B,
2698 (__v8hf) __C,
2699 _mm_setzero_ph (),
2700 __A, _MM_FROUND_CUR_DIRECTION);
2701 }
2702
2703 extern __inline __m128h
2704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2705 _mm_fmul_sch (__m128h __A, __m128h __B)
2706 {
2707 return (__m128h)
2708 __builtin_ia32_vfmulcsh_round ((__v8hf) __A,
2709 (__v8hf) __B,
2710 _MM_FROUND_CUR_DIRECTION);
2711 }
2712
2713 extern __inline __m128h
2714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2715 _mm_mask_fmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
2716 {
2717 return (__m128h)
2718 __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C,
2719 (__v8hf) __D,
2720 (__v8hf) __A,
2721 __B, _MM_FROUND_CUR_DIRECTION);
2722 }
2723
2724 extern __inline __m128h
2725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2726 _mm_maskz_fmul_sch (__mmask8 __A, __m128h __B, __m128h __C)
2727 {
2728 return (__m128h)
2729 __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B,
2730 (__v8hf) __C,
2731 _mm_setzero_ph (),
2732 __A, _MM_FROUND_CUR_DIRECTION);
2733 }
2734
2735 #ifdef __OPTIMIZE__
2736 extern __inline __m128h
2737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2738 _mm_fcmul_round_sch (__m128h __A, __m128h __B, const int __D)
2739 {
2740 return (__m128h)
2741 __builtin_ia32_vfcmulcsh_round ((__v8hf) __A,
2742 (__v8hf) __B,
2743 __D);
2744 }
2745
2746 extern __inline __m128h
2747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2748 _mm_mask_fcmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
2749 __m128h __D, const int __E)
2750 {
2751 return (__m128h)
2752 __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C,
2753 (__v8hf) __D,
2754 (__v8hf) __A,
2755 __B, __E);
2756 }
2757
2758 extern __inline __m128h
2759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2760 _mm_maskz_fcmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
2761 const int __E)
2762 {
2763 return (__m128h)
2764 __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B,
2765 (__v8hf) __C,
2766 _mm_setzero_ph (),
2767 __A, __E);
2768 }
2769
2770 extern __inline __m128h
2771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2772 _mm_fmul_round_sch (__m128h __A, __m128h __B, const int __D)
2773 {
2774 return (__m128h)
2775 __builtin_ia32_vfmulcsh_round ((__v8hf) __A,
2776 (__v8hf) __B, __D);
2777 }
2778
2779 extern __inline __m128h
2780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2781 _mm_mask_fmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
2782 __m128h __D, const int __E)
2783 {
2784 return (__m128h)
2785 __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C,
2786 (__v8hf) __D,
2787 (__v8hf) __A,
2788 __B, __E);
2789 }
2790
2791 extern __inline __m128h
2792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2793 _mm_maskz_fmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C, const int __E)
2794 {
2795 return (__m128h)
2796 __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B,
2797 (__v8hf) __C,
2798 _mm_setzero_ph (),
2799 __A, __E);
2800 }
2801
2802 #else
2803 #define _mm_fcmul_round_sch(__A, __B, __D) \
2804 (__m128h) __builtin_ia32_vfcmulcsh_round ((__v8hf) __A, \
2805 (__v8hf) __B, __D)
2806
2807 #define _mm_mask_fcmul_round_sch(__A, __B, __C, __D, __E) \
2808 (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C, \
2809 (__v8hf) __D, \
2810 (__v8hf) __A, \
2811 __B, __E)
2812
2813 #define _mm_maskz_fcmul_round_sch(__A, __B, __C, __E) \
2814 (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B, \
2815 (__v8hf) __C, \
2816 _mm_setzero_ph (), \
2817 __A, __E)
2818
2819 #define _mm_fmul_round_sch(__A, __B, __D) \
2820 (__m128h) __builtin_ia32_vfmulcsh_round ((__v8hf) __A, \
2821 (__v8hf) __B, __D)
2822
2823 #define _mm_mask_fmul_round_sch(__A, __B, __C, __D, __E) \
2824 (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C, \
2825 (__v8hf) __D, \
2826 (__v8hf) __A, \
2827 __B, __E)
2828
2829 #define _mm_maskz_fmul_round_sch(__A, __B, __C, __E) \
2830 (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B, \
2831 (__v8hf) __C, \
2832 _mm_setzero_ph (), \
2833 __A, __E)
2834
2835 #endif /* __OPTIMIZE__ */
2836
2837 #define _mm_mul_sch(A, B) _mm_fmul_sch ((A), (B))
2838 #define _mm_mask_mul_sch(W, U, A, B) _mm_mask_fmul_sch ((W), (U), (A), (B))
2839 #define _mm_maskz_mul_sch(U, A, B) _mm_maskz_fmul_sch ((U), (A), (B))
2840 #define _mm_mul_round_sch(A, B, R) _mm_fmul_round_sch ((A), (B), (R))
2841 #define _mm_mask_mul_round_sch(W, U, A, B, R) \
2842 _mm_mask_fmul_round_sch ((W), (U), (A), (B), (R))
2843 #define _mm_maskz_mul_round_sch(U, A, B, R) \
2844 _mm_maskz_fmul_round_sch ((U), (A), (B), (R))
2845
2846 #define _mm_cmul_sch(A, B) _mm_fcmul_sch ((A), (B))
2847 #define _mm_mask_cmul_sch(W, U, A, B) _mm_mask_fcmul_sch ((W), (U), (A), (B))
2848 #define _mm_maskz_cmul_sch(U, A, B) _mm_maskz_fcmul_sch ((U), (A), (B))
2849 #define _mm_cmul_round_sch(A, B, R) _mm_fcmul_round_sch ((A), (B), (R))
2850 #define _mm_mask_cmul_round_sch(W, U, A, B, R) \
2851 _mm_mask_fcmul_round_sch ((W), (U), (A), (B), (R))
2852 #define _mm_maskz_cmul_round_sch(U, A, B, R) \
2853 _mm_maskz_fcmul_round_sch ((U), (A), (B), (R))
2854
2855 #ifdef __DISABLE_AVX512FP16__
2856 #undef __DISABLE_AVX512FP16__
2857 #pragma GCC pop_options
2858 #endif /* __DISABLE_AVX512FP16__ */
2859
2860 #if !defined (__AVX512FP16__) || !defined (__EVEX512__)
2861 #pragma GCC push_options
2862 #pragma GCC target("avx512fp16,evex512")
2863 #define __DISABLE_AVX512FP16_512__
2864 #endif /* __AVX512FP16_512__ */
2865
2866 typedef _Float16 __v32hf __attribute__ ((__vector_size__ (64)));
2867 typedef _Float16 __m512h __attribute__ ((__vector_size__ (64), __may_alias__));
2868 typedef _Float16 __m512h_u __attribute__ ((__vector_size__ (64), \
2869 __may_alias__, __aligned__ (1)));
2870
2871 extern __inline __m512h
2872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2873 _mm512_set_ph (_Float16 __A31, _Float16 __A30, _Float16 __A29,
2874 _Float16 __A28, _Float16 __A27, _Float16 __A26,
2875 _Float16 __A25, _Float16 __A24, _Float16 __A23,
2876 _Float16 __A22, _Float16 __A21, _Float16 __A20,
2877 _Float16 __A19, _Float16 __A18, _Float16 __A17,
2878 _Float16 __A16, _Float16 __A15, _Float16 __A14,
2879 _Float16 __A13, _Float16 __A12, _Float16 __A11,
2880 _Float16 __A10, _Float16 __A9, _Float16 __A8,
2881 _Float16 __A7, _Float16 __A6, _Float16 __A5,
2882 _Float16 __A4, _Float16 __A3, _Float16 __A2,
2883 _Float16 __A1, _Float16 __A0)
2884 {
2885 return __extension__ (__m512h)(__v32hf){ __A0, __A1, __A2, __A3,
2886 __A4, __A5, __A6, __A7,
2887 __A8, __A9, __A10, __A11,
2888 __A12, __A13, __A14, __A15,
2889 __A16, __A17, __A18, __A19,
2890 __A20, __A21, __A22, __A23,
2891 __A24, __A25, __A26, __A27,
2892 __A28, __A29, __A30, __A31 };
2893 }
2894
2895 /* Create vectors of elements in the reversed order from
2896 _mm512_set_ph functions. */
2897
2898 extern __inline __m512h
2899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2900 _mm512_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
2901 _Float16 __A3, _Float16 __A4, _Float16 __A5,
2902 _Float16 __A6, _Float16 __A7, _Float16 __A8,
2903 _Float16 __A9, _Float16 __A10, _Float16 __A11,
2904 _Float16 __A12, _Float16 __A13, _Float16 __A14,
2905 _Float16 __A15, _Float16 __A16, _Float16 __A17,
2906 _Float16 __A18, _Float16 __A19, _Float16 __A20,
2907 _Float16 __A21, _Float16 __A22, _Float16 __A23,
2908 _Float16 __A24, _Float16 __A25, _Float16 __A26,
2909 _Float16 __A27, _Float16 __A28, _Float16 __A29,
2910 _Float16 __A30, _Float16 __A31)
2911
2912 {
2913 return _mm512_set_ph (__A31, __A30, __A29, __A28, __A27, __A26, __A25,
2914 __A24, __A23, __A22, __A21, __A20, __A19, __A18,
2915 __A17, __A16, __A15, __A14, __A13, __A12, __A11,
2916 __A10, __A9, __A8, __A7, __A6, __A5, __A4, __A3,
2917 __A2, __A1, __A0);
2918 }
2919
2920 /* Broadcast _Float16 to vector. */
2921 extern __inline __m512h
2922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2923 _mm512_set1_ph (_Float16 __A)
2924 {
2925 return _mm512_set_ph (__A, __A, __A, __A, __A, __A, __A, __A,
2926 __A, __A, __A, __A, __A, __A, __A, __A,
2927 __A, __A, __A, __A, __A, __A, __A, __A,
2928 __A, __A, __A, __A, __A, __A, __A, __A);
2929 }
2930
2931 /* Create a vector with all zeros. */
2932 extern __inline __m512h
2933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2934 _mm512_setzero_ph (void)
2935 {
2936 return _mm512_set1_ph (0.0f16);
2937 }
2938
2939 extern __inline __m512h
2940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2941 _mm512_undefined_ph (void)
2942 {
2943 #pragma GCC diagnostic push
2944 #pragma GCC diagnostic ignored "-Winit-self"
2945 __m512h __Y = __Y;
2946 #pragma GCC diagnostic pop
2947 return __Y;
2948 }
2949
2950 extern __inline _Float16
2951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2952 _mm512_cvtsh_h (__m512h __A)
2953 {
2954 return __A[0];
2955 }
2956
2957 extern __inline __m512
2958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2959 _mm512_castph_ps (__m512h __a)
2960 {
2961 return (__m512) __a;
2962 }
2963
2964 extern __inline __m512d
2965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2966 _mm512_castph_pd (__m512h __a)
2967 {
2968 return (__m512d) __a;
2969 }
2970
2971 extern __inline __m512i
2972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2973 _mm512_castph_si512 (__m512h __a)
2974 {
2975 return (__m512i) __a;
2976 }
2977
2978 extern __inline __m128h
2979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2980 _mm512_castph512_ph128 (__m512h __A)
2981 {
2982 union
2983 {
2984 __m128h __a[4];
2985 __m512h __v;
2986 } __u = { .__v = __A };
2987 return __u.__a[0];
2988 }
2989
2990 extern __inline __m256h
2991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2992 _mm512_castph512_ph256 (__m512h __A)
2993 {
2994 union
2995 {
2996 __m256h __a[2];
2997 __m512h __v;
2998 } __u = { .__v = __A };
2999 return __u.__a[0];
3000 }
3001
3002 extern __inline __m512h
3003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3004 _mm512_castph128_ph512 (__m128h __A)
3005 {
3006 union
3007 {
3008 __m128h __a[4];
3009 __m512h __v;
3010 } __u;
3011 __u.__a[0] = __A;
3012 return __u.__v;
3013 }
3014
3015 extern __inline __m512h
3016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3017 _mm512_castph256_ph512 (__m256h __A)
3018 {
3019 union
3020 {
3021 __m256h __a[2];
3022 __m512h __v;
3023 } __u;
3024 __u.__a[0] = __A;
3025 return __u.__v;
3026 }
3027
3028 extern __inline __m512h
3029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3030 _mm512_zextph128_ph512 (__m128h __A)
3031 {
3032 return (__m512h) _mm512_insertf32x4 (_mm512_setzero_ps (),
3033 (__m128) __A, 0);
3034 }
3035
3036 extern __inline __m512h
3037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3038 _mm512_zextph256_ph512 (__m256h __A)
3039 {
3040 return (__m512h) _mm512_insertf64x4 (_mm512_setzero_pd (),
3041 (__m256d) __A, 0);
3042 }
3043
3044 extern __inline __m512h
3045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3046 _mm512_castps_ph (__m512 __a)
3047 {
3048 return (__m512h) __a;
3049 }
3050
3051 extern __inline __m512h
3052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3053 _mm512_castpd_ph (__m512d __a)
3054 {
3055 return (__m512h) __a;
3056 }
3057
3058 extern __inline __m512h
3059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3060 _mm512_castsi512_ph (__m512i __a)
3061 {
3062 return (__m512h) __a;
3063 }
3064
3065 /* Create a vector with element 0 as *P and the rest zero. */
3066 extern __inline __m512h
3067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3068 _mm512_load_ph (void const *__P)
3069 {
3070 return *(const __m512h *) __P;
3071 }
3072
3073 extern __inline __m512h
3074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3075 _mm512_loadu_ph (void const *__P)
3076 {
3077 return *(const __m512h_u *) __P;
3078 }
3079
3080 /* Stores the lower _Float16 value. */
3081 extern __inline void
3082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3083 _mm512_store_ph (void *__P, __m512h __A)
3084 {
3085 *(__m512h *) __P = __A;
3086 }
3087
3088 extern __inline void
3089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3090 _mm512_storeu_ph (void *__P, __m512h __A)
3091 {
3092 *(__m512h_u *) __P = __A;
3093 }
3094
3095 extern __inline __m512h
3096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3097 _mm512_abs_ph (__m512h __A)
3098 {
3099 return (__m512h) _mm512_and_epi32 ( _mm512_set1_epi32 (0x7FFF7FFF),
3100 (__m512i) __A);
3101 }
3102
3103 /* Intrinsics v[add,sub,mul,div]ph. */
3104 extern __inline __m512h
3105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3106 _mm512_add_ph (__m512h __A, __m512h __B)
3107 {
3108 return (__m512h) ((__v32hf) __A + (__v32hf) __B);
3109 }
3110
3111 extern __inline __m512h
3112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3113 _mm512_mask_add_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
3114 {
3115 return __builtin_ia32_addph512_mask (__C, __D, __A, __B);
3116 }
3117
3118 extern __inline __m512h
3119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3120 _mm512_maskz_add_ph (__mmask32 __A, __m512h __B, __m512h __C)
3121 {
3122 return __builtin_ia32_addph512_mask (__B, __C,
3123 _mm512_setzero_ph (), __A);
3124 }
3125
3126 extern __inline __m512h
3127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3128 _mm512_sub_ph (__m512h __A, __m512h __B)
3129 {
3130 return (__m512h) ((__v32hf) __A - (__v32hf) __B);
3131 }
3132
3133 extern __inline __m512h
3134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3135 _mm512_mask_sub_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
3136 {
3137 return __builtin_ia32_subph512_mask (__C, __D, __A, __B);
3138 }
3139
3140 extern __inline __m512h
3141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3142 _mm512_maskz_sub_ph (__mmask32 __A, __m512h __B, __m512h __C)
3143 {
3144 return __builtin_ia32_subph512_mask (__B, __C,
3145 _mm512_setzero_ph (), __A);
3146 }
3147
3148 extern __inline __m512h
3149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3150 _mm512_mul_ph (__m512h __A, __m512h __B)
3151 {
3152 return (__m512h) ((__v32hf) __A * (__v32hf) __B);
3153 }
3154
3155 extern __inline __m512h
3156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3157 _mm512_mask_mul_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
3158 {
3159 return __builtin_ia32_mulph512_mask (__C, __D, __A, __B);
3160 }
3161
3162 extern __inline __m512h
3163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3164 _mm512_maskz_mul_ph (__mmask32 __A, __m512h __B, __m512h __C)
3165 {
3166 return __builtin_ia32_mulph512_mask (__B, __C,
3167 _mm512_setzero_ph (), __A);
3168 }
3169
3170 extern __inline __m512h
3171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3172 _mm512_div_ph (__m512h __A, __m512h __B)
3173 {
3174 return (__m512h) ((__v32hf) __A / (__v32hf) __B);
3175 }
3176
3177 extern __inline __m512h
3178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3179 _mm512_mask_div_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
3180 {
3181 return __builtin_ia32_divph512_mask (__C, __D, __A, __B);
3182 }
3183
3184 extern __inline __m512h
3185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3186 _mm512_maskz_div_ph (__mmask32 __A, __m512h __B, __m512h __C)
3187 {
3188 return __builtin_ia32_divph512_mask (__B, __C,
3189 _mm512_setzero_ph (), __A);
3190 }
3191
3192 #ifdef __OPTIMIZE__
3193 extern __inline __m512h
3194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3195 _mm512_add_round_ph (__m512h __A, __m512h __B, const int __C)
3196 {
3197 return __builtin_ia32_addph512_mask_round (__A, __B,
3198 _mm512_setzero_ph (),
3199 (__mmask32) -1, __C);
3200 }
3201
3202 extern __inline __m512h
3203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3204 _mm512_mask_add_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
3205 __m512h __D, const int __E)
3206 {
3207 return __builtin_ia32_addph512_mask_round (__C, __D, __A, __B, __E);
3208 }
3209
3210 extern __inline __m512h
3211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3212 _mm512_maskz_add_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
3213 const int __D)
3214 {
3215 return __builtin_ia32_addph512_mask_round (__B, __C,
3216 _mm512_setzero_ph (),
3217 __A, __D);
3218 }
3219
3220 extern __inline __m512h
3221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3222 _mm512_sub_round_ph (__m512h __A, __m512h __B, const int __C)
3223 {
3224 return __builtin_ia32_subph512_mask_round (__A, __B,
3225 _mm512_setzero_ph (),
3226 (__mmask32) -1, __C);
3227 }
3228
3229 extern __inline __m512h
3230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3231 _mm512_mask_sub_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
3232 __m512h __D, const int __E)
3233 {
3234 return __builtin_ia32_subph512_mask_round (__C, __D, __A, __B, __E);
3235 }
3236
3237 extern __inline __m512h
3238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3239 _mm512_maskz_sub_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
3240 const int __D)
3241 {
3242 return __builtin_ia32_subph512_mask_round (__B, __C,
3243 _mm512_setzero_ph (),
3244 __A, __D);
3245 }
3246
3247 extern __inline __m512h
3248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3249 _mm512_mul_round_ph (__m512h __A, __m512h __B, const int __C)
3250 {
3251 return __builtin_ia32_mulph512_mask_round (__A, __B,
3252 _mm512_setzero_ph (),
3253 (__mmask32) -1, __C);
3254 }
3255
3256 extern __inline __m512h
3257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3258 _mm512_mask_mul_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
3259 __m512h __D, const int __E)
3260 {
3261 return __builtin_ia32_mulph512_mask_round (__C, __D, __A, __B, __E);
3262 }
3263
3264 extern __inline __m512h
3265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3266 _mm512_maskz_mul_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
3267 const int __D)
3268 {
3269 return __builtin_ia32_mulph512_mask_round (__B, __C,
3270 _mm512_setzero_ph (),
3271 __A, __D);
3272 }
3273
3274 extern __inline __m512h
3275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3276 _mm512_div_round_ph (__m512h __A, __m512h __B, const int __C)
3277 {
3278 return __builtin_ia32_divph512_mask_round (__A, __B,
3279 _mm512_setzero_ph (),
3280 (__mmask32) -1, __C);
3281 }
3282
3283 extern __inline __m512h
3284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3285 _mm512_mask_div_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
3286 __m512h __D, const int __E)
3287 {
3288 return __builtin_ia32_divph512_mask_round (__C, __D, __A, __B, __E);
3289 }
3290
3291 extern __inline __m512h
3292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3293 _mm512_maskz_div_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
3294 const int __D)
3295 {
3296 return __builtin_ia32_divph512_mask_round (__B, __C,
3297 _mm512_setzero_ph (),
3298 __A, __D);
3299 }
3300 #else
3301 #define _mm512_add_round_ph(A, B, C) \
3302 ((__m512h)__builtin_ia32_addph512_mask_round((A), (B), \
3303 _mm512_setzero_ph (), \
3304 (__mmask32)-1, (C)))
3305
3306 #define _mm512_mask_add_round_ph(A, B, C, D, E) \
3307 ((__m512h)__builtin_ia32_addph512_mask_round((C), (D), (A), (B), (E)))
3308
3309 #define _mm512_maskz_add_round_ph(A, B, C, D) \
3310 ((__m512h)__builtin_ia32_addph512_mask_round((B), (C), \
3311 _mm512_setzero_ph (), \
3312 (A), (D)))
3313
3314 #define _mm512_sub_round_ph(A, B, C) \
3315 ((__m512h)__builtin_ia32_subph512_mask_round((A), (B), \
3316 _mm512_setzero_ph (), \
3317 (__mmask32)-1, (C)))
3318
3319 #define _mm512_mask_sub_round_ph(A, B, C, D, E) \
3320 ((__m512h)__builtin_ia32_subph512_mask_round((C), (D), (A), (B), (E)))
3321
3322 #define _mm512_maskz_sub_round_ph(A, B, C, D) \
3323 ((__m512h)__builtin_ia32_subph512_mask_round((B), (C), \
3324 _mm512_setzero_ph (), \
3325 (A), (D)))
3326
3327 #define _mm512_mul_round_ph(A, B, C) \
3328 ((__m512h)__builtin_ia32_mulph512_mask_round((A), (B), \
3329 _mm512_setzero_ph (), \
3330 (__mmask32)-1, (C)))
3331
3332 #define _mm512_mask_mul_round_ph(A, B, C, D, E) \
3333 ((__m512h)__builtin_ia32_mulph512_mask_round((C), (D), (A), (B), (E)))
3334
3335 #define _mm512_maskz_mul_round_ph(A, B, C, D) \
3336 ((__m512h)__builtin_ia32_mulph512_mask_round((B), (C), \
3337 _mm512_setzero_ph (), \
3338 (A), (D)))
3339
3340 #define _mm512_div_round_ph(A, B, C) \
3341 ((__m512h)__builtin_ia32_divph512_mask_round((A), (B), \
3342 _mm512_setzero_ph (), \
3343 (__mmask32)-1, (C)))
3344
3345 #define _mm512_mask_div_round_ph(A, B, C, D, E) \
3346 ((__m512h)__builtin_ia32_divph512_mask_round((C), (D), (A), (B), (E)))
3347
3348 #define _mm512_maskz_div_round_ph(A, B, C, D) \
3349 ((__m512h)__builtin_ia32_divph512_mask_round((B), (C), \
3350 _mm512_setzero_ph (), \
3351 (A), (D)))
3352 #endif /* __OPTIMIZE__ */
3353
3354 extern __inline __m512h
3355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3356 _mm512_conj_pch (__m512h __A)
3357 {
3358 return (__m512h) _mm512_xor_epi32 ((__m512i) __A, _mm512_set1_epi32 (1<<31));
3359 }
3360
3361 extern __inline __m512h
3362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3363 _mm512_mask_conj_pch (__m512h __W, __mmask16 __U, __m512h __A)
3364 {
3365 return (__m512h)
3366 __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A),
3367 (__v16sf) __W,
3368 (__mmask16) __U);
3369 }
3370
3371 extern __inline __m512h
3372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3373 _mm512_maskz_conj_pch (__mmask16 __U, __m512h __A)
3374 {
3375 return (__m512h)
3376 __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A),
3377 (__v16sf) _mm512_setzero_ps (),
3378 (__mmask16) __U);
3379 }
3380
3381 /* Intrinsic vmaxph vminph. */
3382 extern __inline __m512h
3383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3384 _mm512_max_ph (__m512h __A, __m512h __B)
3385 {
3386 return __builtin_ia32_maxph512_mask (__A, __B,
3387 _mm512_setzero_ph (),
3388 (__mmask32) -1);
3389 }
3390
3391 extern __inline __m512h
3392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3393 _mm512_mask_max_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
3394 {
3395 return __builtin_ia32_maxph512_mask (__C, __D, __A, __B);
3396 }
3397
3398 extern __inline __m512h
3399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3400 _mm512_maskz_max_ph (__mmask32 __A, __m512h __B, __m512h __C)
3401 {
3402 return __builtin_ia32_maxph512_mask (__B, __C,
3403 _mm512_setzero_ph (), __A);
3404 }
3405
3406 extern __inline __m512h
3407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3408 _mm512_min_ph (__m512h __A, __m512h __B)
3409 {
3410 return __builtin_ia32_minph512_mask (__A, __B,
3411 _mm512_setzero_ph (),
3412 (__mmask32) -1);
3413 }
3414
3415 extern __inline __m512h
3416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3417 _mm512_mask_min_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
3418 {
3419 return __builtin_ia32_minph512_mask (__C, __D, __A, __B);
3420 }
3421
3422 extern __inline __m512h
3423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3424 _mm512_maskz_min_ph (__mmask32 __A, __m512h __B, __m512h __C)
3425 {
3426 return __builtin_ia32_minph512_mask (__B, __C,
3427 _mm512_setzero_ph (), __A);
3428 }
3429
3430 #ifdef __OPTIMIZE__
3431 extern __inline __m512h
3432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3433 _mm512_max_round_ph (__m512h __A, __m512h __B, const int __C)
3434 {
3435 return __builtin_ia32_maxph512_mask_round (__A, __B,
3436 _mm512_setzero_ph (),
3437 (__mmask32) -1, __C);
3438 }
3439
3440 extern __inline __m512h
3441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3442 _mm512_mask_max_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
3443 __m512h __D, const int __E)
3444 {
3445 return __builtin_ia32_maxph512_mask_round (__C, __D, __A, __B, __E);
3446 }
3447
3448 extern __inline __m512h
3449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3450 _mm512_maskz_max_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
3451 const int __D)
3452 {
3453 return __builtin_ia32_maxph512_mask_round (__B, __C,
3454 _mm512_setzero_ph (),
3455 __A, __D);
3456 }
3457
3458 extern __inline __m512h
3459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3460 _mm512_min_round_ph (__m512h __A, __m512h __B, const int __C)
3461 {
3462 return __builtin_ia32_minph512_mask_round (__A, __B,
3463 _mm512_setzero_ph (),
3464 (__mmask32) -1, __C);
3465 }
3466
3467 extern __inline __m512h
3468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3469 _mm512_mask_min_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
3470 __m512h __D, const int __E)
3471 {
3472 return __builtin_ia32_minph512_mask_round (__C, __D, __A, __B, __E);
3473 }
3474
3475 extern __inline __m512h
3476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3477 _mm512_maskz_min_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
3478 const int __D)
3479 {
3480 return __builtin_ia32_minph512_mask_round (__B, __C,
3481 _mm512_setzero_ph (),
3482 __A, __D);
3483 }
3484
3485 #else
3486 #define _mm512_max_round_ph(A, B, C) \
3487 (__builtin_ia32_maxph512_mask_round ((A), (B), \
3488 _mm512_setzero_ph (), \
3489 (__mmask32)-1, (C)))
3490
3491 #define _mm512_mask_max_round_ph(A, B, C, D, E) \
3492 (__builtin_ia32_maxph512_mask_round ((C), (D), (A), (B), (E)))
3493
3494 #define _mm512_maskz_max_round_ph(A, B, C, D) \
3495 (__builtin_ia32_maxph512_mask_round ((B), (C), \
3496 _mm512_setzero_ph (), \
3497 (A), (D)))
3498
3499 #define _mm512_min_round_ph(A, B, C) \
3500 (__builtin_ia32_minph512_mask_round ((A), (B), \
3501 _mm512_setzero_ph (), \
3502 (__mmask32)-1, (C)))
3503
3504 #define _mm512_mask_min_round_ph(A, B, C, D, E) \
3505 (__builtin_ia32_minph512_mask_round ((C), (D), (A), (B), (E)))
3506
3507 #define _mm512_maskz_min_round_ph(A, B, C, D) \
3508 (__builtin_ia32_minph512_mask_round ((B), (C), \
3509 _mm512_setzero_ph (), \
3510 (A), (D)))
3511 #endif /* __OPTIMIZE__ */
3512
3513 /* vcmpph */
3514 #ifdef __OPTIMIZE
3515 extern __inline __mmask32
3516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3517 _mm512_cmp_ph_mask (__m512h __A, __m512h __B, const int __C)
3518 {
3519 return (__mmask32) __builtin_ia32_cmpph512_mask (__A, __B, __C,
3520 (__mmask32) -1);
3521 }
3522
3523 extern __inline __mmask32
3524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3525 _mm512_mask_cmp_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
3526 const int __D)
3527 {
3528 return (__mmask32) __builtin_ia32_cmpph512_mask (__B, __C, __D,
3529 __A);
3530 }
3531
3532 extern __inline __mmask32
3533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3534 _mm512_cmp_round_ph_mask (__m512h __A, __m512h __B, const int __C,
3535 const int __D)
3536 {
3537 return (__mmask32) __builtin_ia32_cmpph512_mask_round (__A, __B,
3538 __C, (__mmask32) -1,
3539 __D);
3540 }
3541
3542 extern __inline __mmask32
3543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3544 _mm512_mask_cmp_round_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
3545 const int __D, const int __E)
3546 {
3547 return (__mmask32) __builtin_ia32_cmpph512_mask_round (__B, __C,
3548 __D, __A,
3549 __E);
3550 }
3551
3552 #else
3553 #define _mm512_cmp_ph_mask(A, B, C) \
3554 (__builtin_ia32_cmpph512_mask ((A), (B), (C), (-1)))
3555
3556 #define _mm512_mask_cmp_ph_mask(A, B, C, D) \
3557 (__builtin_ia32_cmpph512_mask ((B), (C), (D), (A)))
3558
3559 #define _mm512_cmp_round_ph_mask(A, B, C, D) \
3560 (__builtin_ia32_cmpph512_mask_round ((A), (B), (C), (-1), (D)))
3561
3562 #define _mm512_mask_cmp_round_ph_mask(A, B, C, D, E) \
3563 (__builtin_ia32_cmpph512_mask_round ((B), (C), (D), (A), (E)))
3564
3565 #endif /* __OPTIMIZE__ */
3566
3567 /* Intrinsics vsqrtph. */
3568 extern __inline __m512h
3569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3570 _mm512_sqrt_ph (__m512h __A)
3571 {
3572 return __builtin_ia32_sqrtph512_mask_round (__A,
3573 _mm512_setzero_ph(),
3574 (__mmask32) -1,
3575 _MM_FROUND_CUR_DIRECTION);
3576 }
3577
3578 extern __inline __m512h
3579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3580 _mm512_mask_sqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
3581 {
3582 return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B,
3583 _MM_FROUND_CUR_DIRECTION);
3584 }
3585
3586 extern __inline __m512h
3587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3588 _mm512_maskz_sqrt_ph (__mmask32 __A, __m512h __B)
3589 {
3590 return __builtin_ia32_sqrtph512_mask_round (__B,
3591 _mm512_setzero_ph (),
3592 __A,
3593 _MM_FROUND_CUR_DIRECTION);
3594 }
3595
3596 #ifdef __OPTIMIZE__
3597 extern __inline __m512h
3598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3599 _mm512_sqrt_round_ph (__m512h __A, const int __B)
3600 {
3601 return __builtin_ia32_sqrtph512_mask_round (__A,
3602 _mm512_setzero_ph(),
3603 (__mmask32) -1, __B);
3604 }
3605
3606 extern __inline __m512h
3607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3608 _mm512_mask_sqrt_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
3609 const int __D)
3610 {
3611 return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B, __D);
3612 }
3613
3614 extern __inline __m512h
3615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3616 _mm512_maskz_sqrt_round_ph (__mmask32 __A, __m512h __B, const int __C)
3617 {
3618 return __builtin_ia32_sqrtph512_mask_round (__B,
3619 _mm512_setzero_ph (),
3620 __A, __C);
3621 }
3622
3623 #else
3624 #define _mm512_sqrt_round_ph(A, B) \
3625 (__builtin_ia32_sqrtph512_mask_round ((A), \
3626 _mm512_setzero_ph (), \
3627 (__mmask32)-1, (B)))
3628
3629 #define _mm512_mask_sqrt_round_ph(A, B, C, D) \
3630 (__builtin_ia32_sqrtph512_mask_round ((C), (A), (B), (D)))
3631
3632 #define _mm512_maskz_sqrt_round_ph(A, B, C) \
3633 (__builtin_ia32_sqrtph512_mask_round ((B), \
3634 _mm512_setzero_ph (), \
3635 (A), (C)))
3636
3637 #endif /* __OPTIMIZE__ */
3638
3639 /* Intrinsics vrsqrtph. */
3640 extern __inline __m512h
3641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3642 _mm512_rsqrt_ph (__m512h __A)
3643 {
3644 return __builtin_ia32_rsqrtph512_mask (__A, _mm512_setzero_ph (),
3645 (__mmask32) -1);
3646 }
3647
3648 extern __inline __m512h
3649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3650 _mm512_mask_rsqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
3651 {
3652 return __builtin_ia32_rsqrtph512_mask (__C, __A, __B);
3653 }
3654
3655 extern __inline __m512h
3656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3657 _mm512_maskz_rsqrt_ph (__mmask32 __A, __m512h __B)
3658 {
3659 return __builtin_ia32_rsqrtph512_mask (__B, _mm512_setzero_ph (),
3660 __A);
3661 }
3662
3663 /* Intrinsics vrcpph. */
3664 extern __inline __m512h
3665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3666 _mm512_rcp_ph (__m512h __A)
3667 {
3668 return __builtin_ia32_rcpph512_mask (__A, _mm512_setzero_ph (),
3669 (__mmask32) -1);
3670 }
3671
3672 extern __inline __m512h
3673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3674 _mm512_mask_rcp_ph (__m512h __A, __mmask32 __B, __m512h __C)
3675 {
3676 return __builtin_ia32_rcpph512_mask (__C, __A, __B);
3677 }
3678
3679 extern __inline __m512h
3680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3681 _mm512_maskz_rcp_ph (__mmask32 __A, __m512h __B)
3682 {
3683 return __builtin_ia32_rcpph512_mask (__B, _mm512_setzero_ph (),
3684 __A);
3685 }
3686
3687 /* Intrinsics vscalefph. */
3688 extern __inline __m512h
3689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3690 _mm512_scalef_ph (__m512h __A, __m512h __B)
3691 {
3692 return __builtin_ia32_scalefph512_mask_round (__A, __B,
3693 _mm512_setzero_ph (),
3694 (__mmask32) -1,
3695 _MM_FROUND_CUR_DIRECTION);
3696 }
3697
3698 extern __inline __m512h
3699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3700 _mm512_mask_scalef_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
3701 {
3702 return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
3703 _MM_FROUND_CUR_DIRECTION);
3704 }
3705
3706 extern __inline __m512h
3707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3708 _mm512_maskz_scalef_ph (__mmask32 __A, __m512h __B, __m512h __C)
3709 {
3710 return __builtin_ia32_scalefph512_mask_round (__B, __C,
3711 _mm512_setzero_ph (),
3712 __A,
3713 _MM_FROUND_CUR_DIRECTION);
3714 }
3715
3716 #ifdef __OPTIMIZE__
3717 extern __inline __m512h
3718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3719 _mm512_scalef_round_ph (__m512h __A, __m512h __B, const int __C)
3720 {
3721 return __builtin_ia32_scalefph512_mask_round (__A, __B,
3722 _mm512_setzero_ph (),
3723 (__mmask32) -1, __C);
3724 }
3725
3726 extern __inline __m512h
3727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3728 _mm512_mask_scalef_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
3729 __m512h __D, const int __E)
3730 {
3731 return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
3732 __E);
3733 }
3734
3735 extern __inline __m512h
3736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3737 _mm512_maskz_scalef_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
3738 const int __D)
3739 {
3740 return __builtin_ia32_scalefph512_mask_round (__B, __C,
3741 _mm512_setzero_ph (),
3742 __A, __D);
3743 }
3744
3745 #else
3746 #define _mm512_scalef_round_ph(A, B, C) \
3747 (__builtin_ia32_scalefph512_mask_round ((A), (B), \
3748 _mm512_setzero_ph (), \
3749 (__mmask32)-1, (C)))
3750
3751 #define _mm512_mask_scalef_round_ph(A, B, C, D, E) \
3752 (__builtin_ia32_scalefph512_mask_round ((C), (D), (A), (B), (E)))
3753
3754 #define _mm512_maskz_scalef_round_ph(A, B, C, D) \
3755 (__builtin_ia32_scalefph512_mask_round ((B), (C), \
3756 _mm512_setzero_ph (), \
3757 (A), (D)))
3758
3759 #endif /* __OPTIMIZE__ */
3760
3761 /* Intrinsics vreduceph. */
3762 #ifdef __OPTIMIZE__
3763 extern __inline __m512h
3764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3765 _mm512_reduce_ph (__m512h __A, int __B)
3766 {
3767 return __builtin_ia32_reduceph512_mask_round (__A, __B,
3768 _mm512_setzero_ph (),
3769 (__mmask32) -1,
3770 _MM_FROUND_CUR_DIRECTION);
3771 }
3772
3773 extern __inline __m512h
3774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3775 _mm512_mask_reduce_ph (__m512h __A, __mmask32 __B, __m512h __C, int __D)
3776 {
3777 return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
3778 _MM_FROUND_CUR_DIRECTION);
3779 }
3780
3781 extern __inline __m512h
3782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3783 _mm512_maskz_reduce_ph (__mmask32 __A, __m512h __B, int __C)
3784 {
3785 return __builtin_ia32_reduceph512_mask_round (__B, __C,
3786 _mm512_setzero_ph (),
3787 __A,
3788 _MM_FROUND_CUR_DIRECTION);
3789 }
3790
3791 extern __inline __m512h
3792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3793 _mm512_reduce_round_ph (__m512h __A, int __B, const int __C)
3794 {
3795 return __builtin_ia32_reduceph512_mask_round (__A, __B,
3796 _mm512_setzero_ph (),
3797 (__mmask32) -1, __C);
3798 }
3799
3800 extern __inline __m512h
3801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3802 _mm512_mask_reduce_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
3803 int __D, const int __E)
3804 {
3805 return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
3806 __E);
3807 }
3808
3809 extern __inline __m512h
3810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3811 _mm512_maskz_reduce_round_ph (__mmask32 __A, __m512h __B, int __C,
3812 const int __D)
3813 {
3814 return __builtin_ia32_reduceph512_mask_round (__B, __C,
3815 _mm512_setzero_ph (),
3816 __A, __D);
3817 }
3818
3819 #else
3820 #define _mm512_reduce_ph(A, B) \
3821 (__builtin_ia32_reduceph512_mask_round ((A), (B), \
3822 _mm512_setzero_ph (), \
3823 (__mmask32)-1, \
3824 _MM_FROUND_CUR_DIRECTION))
3825
3826 #define _mm512_mask_reduce_ph(A, B, C, D) \
3827 (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), \
3828 _MM_FROUND_CUR_DIRECTION))
3829
3830 #define _mm512_maskz_reduce_ph(A, B, C) \
3831 (__builtin_ia32_reduceph512_mask_round ((B), (C), \
3832 _mm512_setzero_ph (), \
3833 (A), _MM_FROUND_CUR_DIRECTION))
3834
3835 #define _mm512_reduce_round_ph(A, B, C) \
3836 (__builtin_ia32_reduceph512_mask_round ((A), (B), \
3837 _mm512_setzero_ph (), \
3838 (__mmask32)-1, (C)))
3839
3840 #define _mm512_mask_reduce_round_ph(A, B, C, D, E) \
3841 (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), (E)))
3842
3843 #define _mm512_maskz_reduce_round_ph(A, B, C, D) \
3844 (__builtin_ia32_reduceph512_mask_round ((B), (C), \
3845 _mm512_setzero_ph (), \
3846 (A), (D)))
3847
3848 #endif /* __OPTIMIZE__ */
3849
3850 /* Intrinsics vrndscaleph. */
3851 #ifdef __OPTIMIZE__
3852 extern __inline __m512h
3853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3854 _mm512_roundscale_ph (__m512h __A, int __B)
3855 {
3856 return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
3857 _mm512_setzero_ph (),
3858 (__mmask32) -1,
3859 _MM_FROUND_CUR_DIRECTION);
3860 }
3861
3862 extern __inline __m512h
3863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3864 _mm512_mask_roundscale_ph (__m512h __A, __mmask32 __B,
3865 __m512h __C, int __D)
3866 {
3867 return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A, __B,
3868 _MM_FROUND_CUR_DIRECTION);
3869 }
3870
3871 extern __inline __m512h
3872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3873 _mm512_maskz_roundscale_ph (__mmask32 __A, __m512h __B, int __C)
3874 {
3875 return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
3876 _mm512_setzero_ph (),
3877 __A,
3878 _MM_FROUND_CUR_DIRECTION);
3879 }
3880
3881 extern __inline __m512h
3882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3883 _mm512_roundscale_round_ph (__m512h __A, int __B, const int __C)
3884 {
3885 return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
3886 _mm512_setzero_ph (),
3887 (__mmask32) -1,
3888 __C);
3889 }
3890
3891 extern __inline __m512h
3892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3893 _mm512_mask_roundscale_round_ph (__m512h __A, __mmask32 __B,
3894 __m512h __C, int __D, const int __E)
3895 {
3896 return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A,
3897 __B, __E);
3898 }
3899
3900 extern __inline __m512h
3901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3902 _mm512_maskz_roundscale_round_ph (__mmask32 __A, __m512h __B, int __C,
3903 const int __D)
3904 {
3905 return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
3906 _mm512_setzero_ph (),
3907 __A, __D);
3908 }
3909
3910 #else
3911 #define _mm512_roundscale_ph(A, B) \
3912 (__builtin_ia32_rndscaleph512_mask_round ((A), (B), \
3913 _mm512_setzero_ph (), \
3914 (__mmask32)-1, \
3915 _MM_FROUND_CUR_DIRECTION))
3916
3917 #define _mm512_mask_roundscale_ph(A, B, C, D) \
3918 (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), \
3919 _MM_FROUND_CUR_DIRECTION))
3920
3921 #define _mm512_maskz_roundscale_ph(A, B, C) \
3922 (__builtin_ia32_rndscaleph512_mask_round ((B), (C), \
3923 _mm512_setzero_ph (), \
3924 (A), \
3925 _MM_FROUND_CUR_DIRECTION))
3926 #define _mm512_roundscale_round_ph(A, B, C) \
3927 (__builtin_ia32_rndscaleph512_mask_round ((A), (B), \
3928 _mm512_setzero_ph (), \
3929 (__mmask32)-1, (C)))
3930
3931 #define _mm512_mask_roundscale_round_ph(A, B, C, D, E) \
3932 (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), (E)))
3933
3934 #define _mm512_maskz_roundscale_round_ph(A, B, C, D) \
3935 (__builtin_ia32_rndscaleph512_mask_round ((B), (C), \
3936 _mm512_setzero_ph (), \
3937 (A), (D)))
3938
3939 #endif /* __OPTIMIZE__ */
3940
3941 /* Intrinsics vfpclassph. */
3942 #ifdef __OPTIMIZE__
3943 extern __inline __mmask32
3944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3945 _mm512_mask_fpclass_ph_mask (__mmask32 __U, __m512h __A,
3946 const int __imm)
3947 {
3948 return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A,
3949 __imm, __U);
3950 }
3951
3952 extern __inline __mmask32
3953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3954 _mm512_fpclass_ph_mask (__m512h __A, const int __imm)
3955 {
3956 return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A,
3957 __imm,
3958 (__mmask32) -1);
3959 }
3960
3961 #else
3962 #define _mm512_mask_fpclass_ph_mask(u, x, c) \
3963 ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
3964 (int) (c),(__mmask8)(u)))
3965
3966 #define _mm512_fpclass_ph_mask(x, c) \
3967 ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
3968 (int) (c),(__mmask8)-1))
3969 #endif /* __OPIMTIZE__ */
3970
3971 /* Intrinsics vgetexpph. */
3972 extern __inline __m512h
3973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3974 _mm512_getexp_ph (__m512h __A)
3975 {
3976 return (__m512h)
3977 __builtin_ia32_getexpph512_mask ((__v32hf) __A,
3978 (__v32hf) _mm512_setzero_ph (),
3979 (__mmask32) -1, _MM_FROUND_CUR_DIRECTION);
3980 }
3981
3982 extern __inline __m512h
3983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3984 _mm512_mask_getexp_ph (__m512h __W, __mmask32 __U, __m512h __A)
3985 {
3986 return (__m512h)
3987 __builtin_ia32_getexpph512_mask ((__v32hf) __A, (__v32hf) __W,
3988 (__mmask32) __U, _MM_FROUND_CUR_DIRECTION);
3989 }
3990
3991 extern __inline __m512h
3992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3993 _mm512_maskz_getexp_ph (__mmask32 __U, __m512h __A)
3994 {
3995 return (__m512h)
3996 __builtin_ia32_getexpph512_mask ((__v32hf) __A,
3997 (__v32hf) _mm512_setzero_ph (),
3998 (__mmask32) __U, _MM_FROUND_CUR_DIRECTION);
3999 }
4000
4001 #ifdef __OPTIMIZE__
4002 extern __inline __m512h
4003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4004 _mm512_getexp_round_ph (__m512h __A, const int __R)
4005 {
4006 return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
4007 (__v32hf)
4008 _mm512_setzero_ph (),
4009 (__mmask32) -1, __R);
4010 }
4011
4012 extern __inline __m512h
4013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4014 _mm512_mask_getexp_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
4015 const int __R)
4016 {
4017 return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
4018 (__v32hf) __W,
4019 (__mmask32) __U, __R);
4020 }
4021
4022 extern __inline __m512h
4023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4024 _mm512_maskz_getexp_round_ph (__mmask32 __U, __m512h __A, const int __R)
4025 {
4026 return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
4027 (__v32hf)
4028 _mm512_setzero_ph (),
4029 (__mmask32) __U, __R);
4030 }
4031
4032 #else
4033 #define _mm512_getexp_round_ph(A, R) \
4034 ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
4035 (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, R))
4036
4037 #define _mm512_mask_getexp_round_ph(W, U, A, R) \
4038 ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
4039 (__v32hf)(__m512h)(W), (__mmask32)(U), R))
4040
4041 #define _mm512_maskz_getexp_round_ph(U, A, R) \
4042 ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
4043 (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), R))
4044
4045 #endif /* __OPTIMIZE__ */
4046
4047 /* Intrinsics vgetmantph. */
4048 #ifdef __OPTIMIZE__
4049 extern __inline __m512h
4050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4051 _mm512_getmant_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B,
4052 _MM_MANTISSA_SIGN_ENUM __C)
4053 {
4054 return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
4055 (__C << 2) | __B,
4056 _mm512_setzero_ph (),
4057 (__mmask32) -1,
4058 _MM_FROUND_CUR_DIRECTION);
4059 }
4060
4061 extern __inline __m512h
4062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4063 _mm512_mask_getmant_ph (__m512h __W, __mmask32 __U, __m512h __A,
4064 _MM_MANTISSA_NORM_ENUM __B,
4065 _MM_MANTISSA_SIGN_ENUM __C)
4066 {
4067 return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
4068 (__C << 2) | __B,
4069 (__v32hf) __W, __U,
4070 _MM_FROUND_CUR_DIRECTION);
4071 }
4072
4073 extern __inline __m512h
4074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4075 _mm512_maskz_getmant_ph (__mmask32 __U, __m512h __A,
4076 _MM_MANTISSA_NORM_ENUM __B,
4077 _MM_MANTISSA_SIGN_ENUM __C)
4078 {
4079 return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
4080 (__C << 2) | __B,
4081 (__v32hf)
4082 _mm512_setzero_ph (),
4083 __U,
4084 _MM_FROUND_CUR_DIRECTION);
4085 }
4086
4087 extern __inline __m512h
4088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4089 _mm512_getmant_round_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B,
4090 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
4091 {
4092 return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
4093 (__C << 2) | __B,
4094 _mm512_setzero_ph (),
4095 (__mmask32) -1, __R);
4096 }
4097
4098 extern __inline __m512h
4099 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4100 _mm512_mask_getmant_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
4101 _MM_MANTISSA_NORM_ENUM __B,
4102 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
4103 {
4104 return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
4105 (__C << 2) | __B,
4106 (__v32hf) __W, __U,
4107 __R);
4108 }
4109
4110 extern __inline __m512h
4111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4112 _mm512_maskz_getmant_round_ph (__mmask32 __U, __m512h __A,
4113 _MM_MANTISSA_NORM_ENUM __B,
4114 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
4115 {
4116 return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
4117 (__C << 2) | __B,
4118 (__v32hf)
4119 _mm512_setzero_ph (),
4120 __U, __R);
4121 }
4122
4123 #else
4124 #define _mm512_getmant_ph(X, B, C) \
4125 ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
4126 (int)(((C)<<2) | (B)), \
4127 (__v32hf)(__m512h) \
4128 _mm512_setzero_ph(), \
4129 (__mmask32)-1, \
4130 _MM_FROUND_CUR_DIRECTION))
4131
4132 #define _mm512_mask_getmant_ph(W, U, X, B, C) \
4133 ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
4134 (int)(((C)<<2) | (B)), \
4135 (__v32hf)(__m512h)(W), \
4136 (__mmask32)(U), \
4137 _MM_FROUND_CUR_DIRECTION))
4138
4139
4140 #define _mm512_maskz_getmant_ph(U, X, B, C) \
4141 ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
4142 (int)(((C)<<2) | (B)), \
4143 (__v32hf)(__m512h) \
4144 _mm512_setzero_ph(), \
4145 (__mmask32)(U), \
4146 _MM_FROUND_CUR_DIRECTION))
4147
4148 #define _mm512_getmant_round_ph(X, B, C, R) \
4149 ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
4150 (int)(((C)<<2) | (B)), \
4151 (__v32hf)(__m512h) \
4152 _mm512_setzero_ph(), \
4153 (__mmask32)-1, \
4154 (R)))
4155
4156 #define _mm512_mask_getmant_round_ph(W, U, X, B, C, R) \
4157 ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
4158 (int)(((C)<<2) | (B)), \
4159 (__v32hf)(__m512h)(W), \
4160 (__mmask32)(U), \
4161 (R)))
4162
4163
4164 #define _mm512_maskz_getmant_round_ph(U, X, B, C, R) \
4165 ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
4166 (int)(((C)<<2) | (B)), \
4167 (__v32hf)(__m512h) \
4168 _mm512_setzero_ph(), \
4169 (__mmask32)(U), \
4170 (R)))
4171
4172 #endif /* __OPTIMIZE__ */
4173
4174 /* Intrinsics vcvtph2dq. */
4175 extern __inline __m512i
4176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4177 _mm512_cvtph_epi32 (__m256h __A)
4178 {
4179 return (__m512i)
4180 __builtin_ia32_vcvtph2dq512_mask_round (__A,
4181 (__v16si)
4182 _mm512_setzero_si512 (),
4183 (__mmask16) -1,
4184 _MM_FROUND_CUR_DIRECTION);
4185 }
4186
4187 extern __inline __m512i
4188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4189 _mm512_mask_cvtph_epi32 (__m512i __A, __mmask16 __B, __m256h __C)
4190 {
4191 return (__m512i)
4192 __builtin_ia32_vcvtph2dq512_mask_round (__C,
4193 (__v16si) __A,
4194 __B,
4195 _MM_FROUND_CUR_DIRECTION);
4196 }
4197
4198 extern __inline __m512i
4199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4200 _mm512_maskz_cvtph_epi32 (__mmask16 __A, __m256h __B)
4201 {
4202 return (__m512i)
4203 __builtin_ia32_vcvtph2dq512_mask_round (__B,
4204 (__v16si)
4205 _mm512_setzero_si512 (),
4206 __A,
4207 _MM_FROUND_CUR_DIRECTION);
4208 }
4209
4210 #ifdef __OPTIMIZE__
4211 extern __inline __m512i
4212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4213 _mm512_cvt_roundph_epi32 (__m256h __A, int __B)
4214 {
4215 return (__m512i)
4216 __builtin_ia32_vcvtph2dq512_mask_round (__A,
4217 (__v16si)
4218 _mm512_setzero_si512 (),
4219 (__mmask16) -1,
4220 __B);
4221 }
4222
4223 extern __inline __m512i
4224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4225 _mm512_mask_cvt_roundph_epi32 (__m512i __A, __mmask16 __B, __m256h __C, int __D)
4226 {
4227 return (__m512i)
4228 __builtin_ia32_vcvtph2dq512_mask_round (__C,
4229 (__v16si) __A,
4230 __B,
4231 __D);
4232 }
4233
4234 extern __inline __m512i
4235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4236 _mm512_maskz_cvt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C)
4237 {
4238 return (__m512i)
4239 __builtin_ia32_vcvtph2dq512_mask_round (__B,
4240 (__v16si)
4241 _mm512_setzero_si512 (),
4242 __A,
4243 __C);
4244 }
4245
4246 #else
4247 #define _mm512_cvt_roundph_epi32(A, B) \
4248 ((__m512i) \
4249 __builtin_ia32_vcvtph2dq512_mask_round ((A), \
4250 (__v16si) \
4251 _mm512_setzero_si512 (), \
4252 (__mmask16)-1, \
4253 (B)))
4254
4255 #define _mm512_mask_cvt_roundph_epi32(A, B, C, D) \
4256 ((__m512i) \
4257 __builtin_ia32_vcvtph2dq512_mask_round ((C), (__v16si)(A), (B), (D)))
4258
4259 #define _mm512_maskz_cvt_roundph_epi32(A, B, C) \
4260 ((__m512i) \
4261 __builtin_ia32_vcvtph2dq512_mask_round ((B), \
4262 (__v16si) \
4263 _mm512_setzero_si512 (), \
4264 (A), \
4265 (C)))
4266
4267 #endif /* __OPTIMIZE__ */
4268
4269 /* Intrinsics vcvtph2udq. */
4270 extern __inline __m512i
4271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4272 _mm512_cvtph_epu32 (__m256h __A)
4273 {
4274 return (__m512i)
4275 __builtin_ia32_vcvtph2udq512_mask_round (__A,
4276 (__v16si)
4277 _mm512_setzero_si512 (),
4278 (__mmask16) -1,
4279 _MM_FROUND_CUR_DIRECTION);
4280 }
4281
4282 extern __inline __m512i
4283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4284 _mm512_mask_cvtph_epu32 (__m512i __A, __mmask16 __B, __m256h __C)
4285 {
4286 return (__m512i)
4287 __builtin_ia32_vcvtph2udq512_mask_round (__C,
4288 (__v16si) __A,
4289 __B,
4290 _MM_FROUND_CUR_DIRECTION);
4291 }
4292
4293 extern __inline __m512i
4294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4295 _mm512_maskz_cvtph_epu32 (__mmask16 __A, __m256h __B)
4296 {
4297 return (__m512i)
4298 __builtin_ia32_vcvtph2udq512_mask_round (__B,
4299 (__v16si)
4300 _mm512_setzero_si512 (),
4301 __A,
4302 _MM_FROUND_CUR_DIRECTION);
4303 }
4304
4305 #ifdef __OPTIMIZE__
4306 extern __inline __m512i
4307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4308 _mm512_cvt_roundph_epu32 (__m256h __A, int __B)
4309 {
4310 return (__m512i)
4311 __builtin_ia32_vcvtph2udq512_mask_round (__A,
4312 (__v16si)
4313 _mm512_setzero_si512 (),
4314 (__mmask16) -1,
4315 __B);
4316 }
4317
4318 extern __inline __m512i
4319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4320 _mm512_mask_cvt_roundph_epu32 (__m512i __A, __mmask16 __B, __m256h __C, int __D)
4321 {
4322 return (__m512i)
4323 __builtin_ia32_vcvtph2udq512_mask_round (__C,
4324 (__v16si) __A,
4325 __B,
4326 __D);
4327 }
4328
4329 extern __inline __m512i
4330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4331 _mm512_maskz_cvt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C)
4332 {
4333 return (__m512i)
4334 __builtin_ia32_vcvtph2udq512_mask_round (__B,
4335 (__v16si)
4336 _mm512_setzero_si512 (),
4337 __A,
4338 __C);
4339 }
4340
4341 #else
4342 #define _mm512_cvt_roundph_epu32(A, B) \
4343 ((__m512i) \
4344 __builtin_ia32_vcvtph2udq512_mask_round ((A), \
4345 (__v16si) \
4346 _mm512_setzero_si512 (), \
4347 (__mmask16)-1, \
4348 (B)))
4349
4350 #define _mm512_mask_cvt_roundph_epu32(A, B, C, D) \
4351 ((__m512i) \
4352 __builtin_ia32_vcvtph2udq512_mask_round ((C), (__v16si)(A), (B), (D)))
4353
4354 #define _mm512_maskz_cvt_roundph_epu32(A, B, C) \
4355 ((__m512i) \
4356 __builtin_ia32_vcvtph2udq512_mask_round ((B), \
4357 (__v16si) \
4358 _mm512_setzero_si512 (), \
4359 (A), \
4360 (C)))
4361
4362 #endif /* __OPTIMIZE__ */
4363
4364 /* Intrinsics vcvttph2dq. */
4365 extern __inline __m512i
4366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4367 _mm512_cvttph_epi32 (__m256h __A)
4368 {
4369 return (__m512i)
4370 __builtin_ia32_vcvttph2dq512_mask_round (__A,
4371 (__v16si)
4372 _mm512_setzero_si512 (),
4373 (__mmask16) -1,
4374 _MM_FROUND_CUR_DIRECTION);
4375 }
4376
4377 extern __inline __m512i
4378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4379 _mm512_mask_cvttph_epi32 (__m512i __A, __mmask16 __B, __m256h __C)
4380 {
4381 return (__m512i)
4382 __builtin_ia32_vcvttph2dq512_mask_round (__C,
4383 (__v16si) __A,
4384 __B,
4385 _MM_FROUND_CUR_DIRECTION);
4386 }
4387
4388 extern __inline __m512i
4389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4390 _mm512_maskz_cvttph_epi32 (__mmask16 __A, __m256h __B)
4391 {
4392 return (__m512i)
4393 __builtin_ia32_vcvttph2dq512_mask_round (__B,
4394 (__v16si)
4395 _mm512_setzero_si512 (),
4396 __A,
4397 _MM_FROUND_CUR_DIRECTION);
4398 }
4399
4400 #ifdef __OPTIMIZE__
4401 extern __inline __m512i
4402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4403 _mm512_cvtt_roundph_epi32 (__m256h __A, int __B)
4404 {
4405 return (__m512i)
4406 __builtin_ia32_vcvttph2dq512_mask_round (__A,
4407 (__v16si)
4408 _mm512_setzero_si512 (),
4409 (__mmask16) -1,
4410 __B);
4411 }
4412
4413 extern __inline __m512i
4414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4415 _mm512_mask_cvtt_roundph_epi32 (__m512i __A, __mmask16 __B,
4416 __m256h __C, int __D)
4417 {
4418 return (__m512i)
4419 __builtin_ia32_vcvttph2dq512_mask_round (__C,
4420 (__v16si) __A,
4421 __B,
4422 __D);
4423 }
4424
4425 extern __inline __m512i
4426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4427 _mm512_maskz_cvtt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C)
4428 {
4429 return (__m512i)
4430 __builtin_ia32_vcvttph2dq512_mask_round (__B,
4431 (__v16si)
4432 _mm512_setzero_si512 (),
4433 __A,
4434 __C);
4435 }
4436
4437 #else
4438 #define _mm512_cvtt_roundph_epi32(A, B) \
4439 ((__m512i) \
4440 __builtin_ia32_vcvttph2dq512_mask_round ((A), \
4441 (__v16si) \
4442 (_mm512_setzero_si512 ()), \
4443 (__mmask16)(-1), (B)))
4444
4445 #define _mm512_mask_cvtt_roundph_epi32(A, B, C, D) \
4446 ((__m512i) \
4447 __builtin_ia32_vcvttph2dq512_mask_round ((C), \
4448 (__v16si)(A), \
4449 (B), \
4450 (D)))
4451
4452 #define _mm512_maskz_cvtt_roundph_epi32(A, B, C) \
4453 ((__m512i) \
4454 __builtin_ia32_vcvttph2dq512_mask_round ((B), \
4455 (__v16si) \
4456 _mm512_setzero_si512 (), \
4457 (A), \
4458 (C)))
4459
4460 #endif /* __OPTIMIZE__ */
4461
4462 /* Intrinsics vcvttph2udq. */
4463 extern __inline __m512i
4464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4465 _mm512_cvttph_epu32 (__m256h __A)
4466 {
4467 return (__m512i)
4468 __builtin_ia32_vcvttph2udq512_mask_round (__A,
4469 (__v16si)
4470 _mm512_setzero_si512 (),
4471 (__mmask16) -1,
4472 _MM_FROUND_CUR_DIRECTION);
4473 }
4474
4475 extern __inline __m512i
4476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4477 _mm512_mask_cvttph_epu32 (__m512i __A, __mmask16 __B, __m256h __C)
4478 {
4479 return (__m512i)
4480 __builtin_ia32_vcvttph2udq512_mask_round (__C,
4481 (__v16si) __A,
4482 __B,
4483 _MM_FROUND_CUR_DIRECTION);
4484 }
4485
4486 extern __inline __m512i
4487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4488 _mm512_maskz_cvttph_epu32 (__mmask16 __A, __m256h __B)
4489 {
4490 return (__m512i)
4491 __builtin_ia32_vcvttph2udq512_mask_round (__B,
4492 (__v16si)
4493 _mm512_setzero_si512 (),
4494 __A,
4495 _MM_FROUND_CUR_DIRECTION);
4496 }
4497
4498 #ifdef __OPTIMIZE__
4499 extern __inline __m512i
4500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4501 _mm512_cvtt_roundph_epu32 (__m256h __A, int __B)
4502 {
4503 return (__m512i)
4504 __builtin_ia32_vcvttph2udq512_mask_round (__A,
4505 (__v16si)
4506 _mm512_setzero_si512 (),
4507 (__mmask16) -1,
4508 __B);
4509 }
4510
4511 extern __inline __m512i
4512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4513 _mm512_mask_cvtt_roundph_epu32 (__m512i __A, __mmask16 __B,
4514 __m256h __C, int __D)
4515 {
4516 return (__m512i)
4517 __builtin_ia32_vcvttph2udq512_mask_round (__C,
4518 (__v16si) __A,
4519 __B,
4520 __D);
4521 }
4522
4523 extern __inline __m512i
4524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4525 _mm512_maskz_cvtt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C)
4526 {
4527 return (__m512i)
4528 __builtin_ia32_vcvttph2udq512_mask_round (__B,
4529 (__v16si)
4530 _mm512_setzero_si512 (),
4531 __A,
4532 __C);
4533 }
4534
4535 #else
4536 #define _mm512_cvtt_roundph_epu32(A, B) \
4537 ((__m512i) \
4538 __builtin_ia32_vcvttph2udq512_mask_round ((A), \
4539 (__v16si) \
4540 _mm512_setzero_si512 (), \
4541 (__mmask16)-1, \
4542 (B)))
4543
4544 #define _mm512_mask_cvtt_roundph_epu32(A, B, C, D) \
4545 ((__m512i) \
4546 __builtin_ia32_vcvttph2udq512_mask_round ((C), \
4547 (__v16si)(A), \
4548 (B), \
4549 (D)))
4550
4551 #define _mm512_maskz_cvtt_roundph_epu32(A, B, C) \
4552 ((__m512i) \
4553 __builtin_ia32_vcvttph2udq512_mask_round ((B), \
4554 (__v16si) \
4555 _mm512_setzero_si512 (), \
4556 (A), \
4557 (C)))
4558
4559 #endif /* __OPTIMIZE__ */
4560
4561 /* Intrinsics vcvtdq2ph. */
4562 extern __inline __m256h
4563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4564 _mm512_cvtepi32_ph (__m512i __A)
4565 {
4566 return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __A,
4567 _mm256_setzero_ph (),
4568 (__mmask16) -1,
4569 _MM_FROUND_CUR_DIRECTION);
4570 }
4571
4572 extern __inline __m256h
4573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4574 _mm512_mask_cvtepi32_ph (__m256h __A, __mmask16 __B, __m512i __C)
4575 {
4576 return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __C,
4577 __A,
4578 __B,
4579 _MM_FROUND_CUR_DIRECTION);
4580 }
4581
4582 extern __inline __m256h
4583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4584 _mm512_maskz_cvtepi32_ph (__mmask16 __A, __m512i __B)
4585 {
4586 return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __B,
4587 _mm256_setzero_ph (),
4588 __A,
4589 _MM_FROUND_CUR_DIRECTION);
4590 }
4591
4592 #ifdef __OPTIMIZE__
4593 extern __inline __m256h
4594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4595 _mm512_cvt_roundepi32_ph (__m512i __A, int __B)
4596 {
4597 return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __A,
4598 _mm256_setzero_ph (),
4599 (__mmask16) -1,
4600 __B);
4601 }
4602
4603 extern __inline __m256h
4604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4605 _mm512_mask_cvt_roundepi32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D)
4606 {
4607 return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __C,
4608 __A,
4609 __B,
4610 __D);
4611 }
4612
4613 extern __inline __m256h
4614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4615 _mm512_maskz_cvt_roundepi32_ph (__mmask16 __A, __m512i __B, int __C)
4616 {
4617 return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __B,
4618 _mm256_setzero_ph (),
4619 __A,
4620 __C);
4621 }
4622
4623 #else
4624 #define _mm512_cvt_roundepi32_ph(A, B) \
4625 (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(A), \
4626 _mm256_setzero_ph (), \
4627 (__mmask16)-1, \
4628 (B)))
4629
4630 #define _mm512_mask_cvt_roundepi32_ph(A, B, C, D) \
4631 (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(C), \
4632 (A), \
4633 (B), \
4634 (D)))
4635
4636 #define _mm512_maskz_cvt_roundepi32_ph(A, B, C) \
4637 (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(B), \
4638 _mm256_setzero_ph (), \
4639 (A), \
4640 (C)))
4641
4642 #endif /* __OPTIMIZE__ */
4643
4644 /* Intrinsics vcvtudq2ph. */
4645 extern __inline __m256h
4646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4647 _mm512_cvtepu32_ph (__m512i __A)
4648 {
4649 return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __A,
4650 _mm256_setzero_ph (),
4651 (__mmask16) -1,
4652 _MM_FROUND_CUR_DIRECTION);
4653 }
4654
4655 extern __inline __m256h
4656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4657 _mm512_mask_cvtepu32_ph (__m256h __A, __mmask16 __B, __m512i __C)
4658 {
4659 return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __C,
4660 __A,
4661 __B,
4662 _MM_FROUND_CUR_DIRECTION);
4663 }
4664
4665 extern __inline __m256h
4666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4667 _mm512_maskz_cvtepu32_ph (__mmask16 __A, __m512i __B)
4668 {
4669 return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __B,
4670 _mm256_setzero_ph (),
4671 __A,
4672 _MM_FROUND_CUR_DIRECTION);
4673 }
4674
4675 #ifdef __OPTIMIZE__
4676 extern __inline __m256h
4677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4678 _mm512_cvt_roundepu32_ph (__m512i __A, int __B)
4679 {
4680 return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __A,
4681 _mm256_setzero_ph (),
4682 (__mmask16) -1,
4683 __B);
4684 }
4685
4686 extern __inline __m256h
4687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4688 _mm512_mask_cvt_roundepu32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D)
4689 {
4690 return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __C,
4691 __A,
4692 __B,
4693 __D);
4694 }
4695
4696 extern __inline __m256h
4697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4698 _mm512_maskz_cvt_roundepu32_ph (__mmask16 __A, __m512i __B, int __C)
4699 {
4700 return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __B,
4701 _mm256_setzero_ph (),
4702 __A,
4703 __C);
4704 }
4705
4706 #else
4707 #define _mm512_cvt_roundepu32_ph(A, B) \
4708 (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)(A), \
4709 _mm256_setzero_ph (), \
4710 (__mmask16)-1, \
4711 B))
4712
4713 #define _mm512_mask_cvt_roundepu32_ph(A, B, C, D) \
4714 (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)C, \
4715 A, \
4716 B, \
4717 D))
4718
4719 #define _mm512_maskz_cvt_roundepu32_ph(A, B, C) \
4720 (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)B, \
4721 _mm256_setzero_ph (), \
4722 A, \
4723 C))
4724
4725 #endif /* __OPTIMIZE__ */
4726
4727 /* Intrinsics vcvtph2qq. */
4728 extern __inline __m512i
4729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4730 _mm512_cvtph_epi64 (__m128h __A)
4731 {
4732 return __builtin_ia32_vcvtph2qq512_mask_round (__A,
4733 _mm512_setzero_si512 (),
4734 (__mmask8) -1,
4735 _MM_FROUND_CUR_DIRECTION);
4736 }
4737
4738 extern __inline __m512i
4739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4740 _mm512_mask_cvtph_epi64 (__m512i __A, __mmask8 __B, __m128h __C)
4741 {
4742 return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B,
4743 _MM_FROUND_CUR_DIRECTION);
4744 }
4745
4746 extern __inline __m512i
4747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4748 _mm512_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B)
4749 {
4750 return __builtin_ia32_vcvtph2qq512_mask_round (__B,
4751 _mm512_setzero_si512 (),
4752 __A,
4753 _MM_FROUND_CUR_DIRECTION);
4754 }
4755
4756 #ifdef __OPTIMIZE__
4757 extern __inline __m512i
4758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4759 _mm512_cvt_roundph_epi64 (__m128h __A, int __B)
4760 {
4761 return __builtin_ia32_vcvtph2qq512_mask_round (__A,
4762 _mm512_setzero_si512 (),
4763 (__mmask8) -1,
4764 __B);
4765 }
4766
4767 extern __inline __m512i
4768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4769 _mm512_mask_cvt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
4770 {
4771 return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B, __D);
4772 }
4773
4774 extern __inline __m512i
4775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4776 _mm512_maskz_cvt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C)
4777 {
4778 return __builtin_ia32_vcvtph2qq512_mask_round (__B,
4779 _mm512_setzero_si512 (),
4780 __A,
4781 __C);
4782 }
4783
4784 #else
4785 #define _mm512_cvt_roundph_epi64(A, B) \
4786 (__builtin_ia32_vcvtph2qq512_mask_round ((A), \
4787 _mm512_setzero_si512 (), \
4788 (__mmask8)-1, \
4789 (B)))
4790
4791 #define _mm512_mask_cvt_roundph_epi64(A, B, C, D) \
4792 (__builtin_ia32_vcvtph2qq512_mask_round ((C), (A), (B), (D)))
4793
4794 #define _mm512_maskz_cvt_roundph_epi64(A, B, C) \
4795 (__builtin_ia32_vcvtph2qq512_mask_round ((B), \
4796 _mm512_setzero_si512 (), \
4797 (A), \
4798 (C)))
4799
4800 #endif /* __OPTIMIZE__ */
4801
4802 /* Intrinsics vcvtph2uqq. */
4803 extern __inline __m512i
4804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4805 _mm512_cvtph_epu64 (__m128h __A)
4806 {
4807 return __builtin_ia32_vcvtph2uqq512_mask_round (__A,
4808 _mm512_setzero_si512 (),
4809 (__mmask8) -1,
4810 _MM_FROUND_CUR_DIRECTION);
4811 }
4812
4813 extern __inline __m512i
4814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4815 _mm512_mask_cvtph_epu64 (__m512i __A, __mmask8 __B, __m128h __C)
4816 {
4817 return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B,
4818 _MM_FROUND_CUR_DIRECTION);
4819 }
4820
4821 extern __inline __m512i
4822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4823 _mm512_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B)
4824 {
4825 return __builtin_ia32_vcvtph2uqq512_mask_round (__B,
4826 _mm512_setzero_si512 (),
4827 __A,
4828 _MM_FROUND_CUR_DIRECTION);
4829 }
4830
4831 #ifdef __OPTIMIZE__
4832
4833 extern __inline __m512i
4834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4835 _mm512_cvt_roundph_epu64 (__m128h __A, int __B)
4836 {
4837 return __builtin_ia32_vcvtph2uqq512_mask_round (__A,
4838 _mm512_setzero_si512 (),
4839 (__mmask8) -1,
4840 __B);
4841 }
4842
4843 extern __inline __m512i
4844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4845 _mm512_mask_cvt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
4846 {
4847 return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B, __D);
4848 }
4849
4850 extern __inline __m512i
4851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4852 _mm512_maskz_cvt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C)
4853 {
4854 return __builtin_ia32_vcvtph2uqq512_mask_round (__B,
4855 _mm512_setzero_si512 (),
4856 __A,
4857 __C);
4858 }
4859
4860 #else
4861 #define _mm512_cvt_roundph_epu64(A, B) \
4862 (__builtin_ia32_vcvtph2uqq512_mask_round ((A), \
4863 _mm512_setzero_si512 (), \
4864 (__mmask8)-1, \
4865 (B)))
4866
4867 #define _mm512_mask_cvt_roundph_epu64(A, B, C, D) \
4868 (__builtin_ia32_vcvtph2uqq512_mask_round ((C), (A), (B), (D)))
4869
4870 #define _mm512_maskz_cvt_roundph_epu64(A, B, C) \
4871 (__builtin_ia32_vcvtph2uqq512_mask_round ((B), \
4872 _mm512_setzero_si512 (), \
4873 (A), \
4874 (C)))
4875
4876 #endif /* __OPTIMIZE__ */
4877
4878 /* Intrinsics vcvttph2qq. */
4879 extern __inline __m512i
4880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4881 _mm512_cvttph_epi64 (__m128h __A)
4882 {
4883 return __builtin_ia32_vcvttph2qq512_mask_round (__A,
4884 _mm512_setzero_si512 (),
4885 (__mmask8) -1,
4886 _MM_FROUND_CUR_DIRECTION);
4887 }
4888
4889 extern __inline __m512i
4890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4891 _mm512_mask_cvttph_epi64 (__m512i __A, __mmask8 __B, __m128h __C)
4892 {
4893 return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B,
4894 _MM_FROUND_CUR_DIRECTION);
4895 }
4896
4897 extern __inline __m512i
4898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4899 _mm512_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B)
4900 {
4901 return __builtin_ia32_vcvttph2qq512_mask_round (__B,
4902 _mm512_setzero_si512 (),
4903 __A,
4904 _MM_FROUND_CUR_DIRECTION);
4905 }
4906
4907 #ifdef __OPTIMIZE__
4908 extern __inline __m512i
4909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4910 _mm512_cvtt_roundph_epi64 (__m128h __A, int __B)
4911 {
4912 return __builtin_ia32_vcvttph2qq512_mask_round (__A,
4913 _mm512_setzero_si512 (),
4914 (__mmask8) -1,
4915 __B);
4916 }
4917
4918 extern __inline __m512i
4919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4920 _mm512_mask_cvtt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
4921 {
4922 return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B, __D);
4923 }
4924
4925 extern __inline __m512i
4926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4927 _mm512_maskz_cvtt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C)
4928 {
4929 return __builtin_ia32_vcvttph2qq512_mask_round (__B,
4930 _mm512_setzero_si512 (),
4931 __A,
4932 __C);
4933 }
4934
4935 #else
4936 #define _mm512_cvtt_roundph_epi64(A, B) \
4937 (__builtin_ia32_vcvttph2qq512_mask_round ((A), \
4938 _mm512_setzero_si512 (), \
4939 (__mmask8)-1, \
4940 (B)))
4941
4942 #define _mm512_mask_cvtt_roundph_epi64(A, B, C, D) \
4943 __builtin_ia32_vcvttph2qq512_mask_round ((C), (A), (B), (D))
4944
4945 #define _mm512_maskz_cvtt_roundph_epi64(A, B, C) \
4946 (__builtin_ia32_vcvttph2qq512_mask_round ((B), \
4947 _mm512_setzero_si512 (), \
4948 (A), \
4949 (C)))
4950
4951 #endif /* __OPTIMIZE__ */
4952
4953 /* Intrinsics vcvttph2uqq. */
4954 extern __inline __m512i
4955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4956 _mm512_cvttph_epu64 (__m128h __A)
4957 {
4958 return __builtin_ia32_vcvttph2uqq512_mask_round (__A,
4959 _mm512_setzero_si512 (),
4960 (__mmask8) -1,
4961 _MM_FROUND_CUR_DIRECTION);
4962 }
4963
4964 extern __inline __m512i
4965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4966 _mm512_mask_cvttph_epu64 (__m512i __A, __mmask8 __B, __m128h __C)
4967 {
4968 return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B,
4969 _MM_FROUND_CUR_DIRECTION);
4970 }
4971
4972 extern __inline __m512i
4973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4974 _mm512_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B)
4975 {
4976 return __builtin_ia32_vcvttph2uqq512_mask_round (__B,
4977 _mm512_setzero_si512 (),
4978 __A,
4979 _MM_FROUND_CUR_DIRECTION);
4980 }
4981
4982 #ifdef __OPTIMIZE__
4983 extern __inline __m512i
4984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4985 _mm512_cvtt_roundph_epu64 (__m128h __A, int __B)
4986 {
4987 return __builtin_ia32_vcvttph2uqq512_mask_round (__A,
4988 _mm512_setzero_si512 (),
4989 (__mmask8) -1,
4990 __B);
4991 }
4992
4993 extern __inline __m512i
4994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4995 _mm512_mask_cvtt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
4996 {
4997 return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B, __D);
4998 }
4999
5000 extern __inline __m512i
5001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5002 _mm512_maskz_cvtt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C)
5003 {
5004 return __builtin_ia32_vcvttph2uqq512_mask_round (__B,
5005 _mm512_setzero_si512 (),
5006 __A,
5007 __C);
5008 }
5009
5010 #else
5011 #define _mm512_cvtt_roundph_epu64(A, B) \
5012 (__builtin_ia32_vcvttph2uqq512_mask_round ((A), \
5013 _mm512_setzero_si512 (), \
5014 (__mmask8)-1, \
5015 (B)))
5016
5017 #define _mm512_mask_cvtt_roundph_epu64(A, B, C, D) \
5018 __builtin_ia32_vcvttph2uqq512_mask_round ((C), (A), (B), (D))
5019
5020 #define _mm512_maskz_cvtt_roundph_epu64(A, B, C) \
5021 (__builtin_ia32_vcvttph2uqq512_mask_round ((B), \
5022 _mm512_setzero_si512 (), \
5023 (A), \
5024 (C)))
5025
5026 #endif /* __OPTIMIZE__ */
5027
5028 /* Intrinsics vcvtqq2ph. */
5029 extern __inline __m128h
5030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5031 _mm512_cvtepi64_ph (__m512i __A)
5032 {
5033 return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __A,
5034 _mm_setzero_ph (),
5035 (__mmask8) -1,
5036 _MM_FROUND_CUR_DIRECTION);
5037 }
5038
5039 extern __inline __m128h
5040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5041 _mm512_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m512i __C)
5042 {
5043 return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __C,
5044 __A,
5045 __B,
5046 _MM_FROUND_CUR_DIRECTION);
5047 }
5048
5049 extern __inline __m128h
5050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5051 _mm512_maskz_cvtepi64_ph (__mmask8 __A, __m512i __B)
5052 {
5053 return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __B,
5054 _mm_setzero_ph (),
5055 __A,
5056 _MM_FROUND_CUR_DIRECTION);
5057 }
5058
5059 #ifdef __OPTIMIZE__
5060 extern __inline __m128h
5061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5062 _mm512_cvt_roundepi64_ph (__m512i __A, int __B)
5063 {
5064 return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __A,
5065 _mm_setzero_ph (),
5066 (__mmask8) -1,
5067 __B);
5068 }
5069
5070 extern __inline __m128h
5071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5072 _mm512_mask_cvt_roundepi64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D)
5073 {
5074 return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __C,
5075 __A,
5076 __B,
5077 __D);
5078 }
5079
5080 extern __inline __m128h
5081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5082 _mm512_maskz_cvt_roundepi64_ph (__mmask8 __A, __m512i __B, int __C)
5083 {
5084 return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __B,
5085 _mm_setzero_ph (),
5086 __A,
5087 __C);
5088 }
5089
5090 #else
5091 #define _mm512_cvt_roundepi64_ph(A, B) \
5092 (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(A), \
5093 _mm_setzero_ph (), \
5094 (__mmask8)-1, \
5095 (B)))
5096
5097 #define _mm512_mask_cvt_roundepi64_ph(A, B, C, D) \
5098 (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(C), (A), (B), (D)))
5099
5100 #define _mm512_maskz_cvt_roundepi64_ph(A, B, C) \
5101 (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(B), \
5102 _mm_setzero_ph (), \
5103 (A), \
5104 (C)))
5105
5106 #endif /* __OPTIMIZE__ */
5107
5108 /* Intrinsics vcvtuqq2ph. */
5109 extern __inline __m128h
5110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5111 _mm512_cvtepu64_ph (__m512i __A)
5112 {
5113 return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __A,
5114 _mm_setzero_ph (),
5115 (__mmask8) -1,
5116 _MM_FROUND_CUR_DIRECTION);
5117 }
5118
5119 extern __inline __m128h
5120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5121 _mm512_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m512i __C)
5122 {
5123 return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __C,
5124 __A,
5125 __B,
5126 _MM_FROUND_CUR_DIRECTION);
5127 }
5128
5129 extern __inline __m128h
5130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5131 _mm512_maskz_cvtepu64_ph (__mmask8 __A, __m512i __B)
5132 {
5133 return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __B,
5134 _mm_setzero_ph (),
5135 __A,
5136 _MM_FROUND_CUR_DIRECTION);
5137 }
5138
5139 #ifdef __OPTIMIZE__
5140 extern __inline __m128h
5141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5142 _mm512_cvt_roundepu64_ph (__m512i __A, int __B)
5143 {
5144 return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __A,
5145 _mm_setzero_ph (),
5146 (__mmask8) -1,
5147 __B);
5148 }
5149
5150 extern __inline __m128h
5151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5152 _mm512_mask_cvt_roundepu64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D)
5153 {
5154 return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __C,
5155 __A,
5156 __B,
5157 __D);
5158 }
5159
5160 extern __inline __m128h
5161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5162 _mm512_maskz_cvt_roundepu64_ph (__mmask8 __A, __m512i __B, int __C)
5163 {
5164 return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __B,
5165 _mm_setzero_ph (),
5166 __A,
5167 __C);
5168 }
5169
5170 #else
5171 #define _mm512_cvt_roundepu64_ph(A, B) \
5172 (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(A), \
5173 _mm_setzero_ph (), \
5174 (__mmask8)-1, \
5175 (B)))
5176
5177 #define _mm512_mask_cvt_roundepu64_ph(A, B, C, D) \
5178 (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(C), (A), (B), (D)))
5179
5180 #define _mm512_maskz_cvt_roundepu64_ph(A, B, C) \
5181 (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(B), \
5182 _mm_setzero_ph (), \
5183 (A), \
5184 (C)))
5185
5186 #endif /* __OPTIMIZE__ */
5187
5188 /* Intrinsics vcvtph2w. */
5189 extern __inline __m512i
5190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5191 _mm512_cvtph_epi16 (__m512h __A)
5192 {
5193 return (__m512i)
5194 __builtin_ia32_vcvtph2w512_mask_round (__A,
5195 (__v32hi)
5196 _mm512_setzero_si512 (),
5197 (__mmask32) -1,
5198 _MM_FROUND_CUR_DIRECTION);
5199 }
5200
5201 extern __inline __m512i
5202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5203 _mm512_mask_cvtph_epi16 (__m512i __A, __mmask32 __B, __m512h __C)
5204 {
5205 return (__m512i)
5206 __builtin_ia32_vcvtph2w512_mask_round (__C,
5207 (__v32hi) __A,
5208 __B,
5209 _MM_FROUND_CUR_DIRECTION);
5210 }
5211
5212 extern __inline __m512i
5213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5214 _mm512_maskz_cvtph_epi16 (__mmask32 __A, __m512h __B)
5215 {
5216 return (__m512i)
5217 __builtin_ia32_vcvtph2w512_mask_round (__B,
5218 (__v32hi)
5219 _mm512_setzero_si512 (),
5220 __A,
5221 _MM_FROUND_CUR_DIRECTION);
5222 }
5223
5224 #ifdef __OPTIMIZE__
5225 extern __inline __m512i
5226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5227 _mm512_cvt_roundph_epi16 (__m512h __A, int __B)
5228 {
5229 return (__m512i)
5230 __builtin_ia32_vcvtph2w512_mask_round (__A,
5231 (__v32hi)
5232 _mm512_setzero_si512 (),
5233 (__mmask32) -1,
5234 __B);
5235 }
5236
5237 extern __inline __m512i
5238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5239 _mm512_mask_cvt_roundph_epi16 (__m512i __A, __mmask32 __B, __m512h __C, int __D)
5240 {
5241 return (__m512i)
5242 __builtin_ia32_vcvtph2w512_mask_round (__C,
5243 (__v32hi) __A,
5244 __B,
5245 __D);
5246 }
5247
5248 extern __inline __m512i
5249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5250 _mm512_maskz_cvt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C)
5251 {
5252 return (__m512i)
5253 __builtin_ia32_vcvtph2w512_mask_round (__B,
5254 (__v32hi)
5255 _mm512_setzero_si512 (),
5256 __A,
5257 __C);
5258 }
5259
5260 #else
5261 #define _mm512_cvt_roundph_epi16(A, B) \
5262 ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((A), \
5263 (__v32hi) \
5264 _mm512_setzero_si512 (), \
5265 (__mmask32)-1, \
5266 (B)))
5267
5268 #define _mm512_mask_cvt_roundph_epi16(A, B, C, D) \
5269 ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((C), \
5270 (__v32hi)(A), \
5271 (B), \
5272 (D)))
5273
5274 #define _mm512_maskz_cvt_roundph_epi16(A, B, C) \
5275 ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((B), \
5276 (__v32hi) \
5277 _mm512_setzero_si512 (), \
5278 (A), \
5279 (C)))
5280
5281 #endif /* __OPTIMIZE__ */
5282
5283 /* Intrinsics vcvtph2uw. */
5284 extern __inline __m512i
5285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5286 _mm512_cvtph_epu16 (__m512h __A)
5287 {
5288 return (__m512i)
5289 __builtin_ia32_vcvtph2uw512_mask_round (__A,
5290 (__v32hi)
5291 _mm512_setzero_si512 (),
5292 (__mmask32) -1,
5293 _MM_FROUND_CUR_DIRECTION);
5294 }
5295
5296 extern __inline __m512i
5297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5298 _mm512_mask_cvtph_epu16 (__m512i __A, __mmask32 __B, __m512h __C)
5299 {
5300 return (__m512i)
5301 __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B,
5302 _MM_FROUND_CUR_DIRECTION);
5303 }
5304
5305 extern __inline __m512i
5306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5307 _mm512_maskz_cvtph_epu16 (__mmask32 __A, __m512h __B)
5308 {
5309 return (__m512i)
5310 __builtin_ia32_vcvtph2uw512_mask_round (__B,
5311 (__v32hi)
5312 _mm512_setzero_si512 (),
5313 __A,
5314 _MM_FROUND_CUR_DIRECTION);
5315 }
5316
5317 #ifdef __OPTIMIZE__
5318 extern __inline __m512i
5319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5320 _mm512_cvt_roundph_epu16 (__m512h __A, int __B)
5321 {
5322 return (__m512i)
5323 __builtin_ia32_vcvtph2uw512_mask_round (__A,
5324 (__v32hi)
5325 _mm512_setzero_si512 (),
5326 (__mmask32) -1,
5327 __B);
5328 }
5329
5330 extern __inline __m512i
5331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5332 _mm512_mask_cvt_roundph_epu16 (__m512i __A, __mmask32 __B, __m512h __C, int __D)
5333 {
5334 return (__m512i)
5335 __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B, __D);
5336 }
5337
5338 extern __inline __m512i
5339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5340 _mm512_maskz_cvt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C)
5341 {
5342 return (__m512i)
5343 __builtin_ia32_vcvtph2uw512_mask_round (__B,
5344 (__v32hi)
5345 _mm512_setzero_si512 (),
5346 __A,
5347 __C);
5348 }
5349
5350 #else
5351 #define _mm512_cvt_roundph_epu16(A, B) \
5352 ((__m512i) \
5353 __builtin_ia32_vcvtph2uw512_mask_round ((A), \
5354 (__v32hi) \
5355 _mm512_setzero_si512 (), \
5356 (__mmask32)-1, (B)))
5357
5358 #define _mm512_mask_cvt_roundph_epu16(A, B, C, D) \
5359 ((__m512i) \
5360 __builtin_ia32_vcvtph2uw512_mask_round ((C), (__v32hi)(A), (B), (D)))
5361
5362 #define _mm512_maskz_cvt_roundph_epu16(A, B, C) \
5363 ((__m512i) \
5364 __builtin_ia32_vcvtph2uw512_mask_round ((B), \
5365 (__v32hi) \
5366 _mm512_setzero_si512 (), \
5367 (A), \
5368 (C)))
5369
5370 #endif /* __OPTIMIZE__ */
5371
5372 /* Intrinsics vcvttph2w. */
5373 extern __inline __m512i
5374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5375 _mm512_cvttph_epi16 (__m512h __A)
5376 {
5377 return (__m512i)
5378 __builtin_ia32_vcvttph2w512_mask_round (__A,
5379 (__v32hi)
5380 _mm512_setzero_si512 (),
5381 (__mmask32) -1,
5382 _MM_FROUND_CUR_DIRECTION);
5383 }
5384
5385 extern __inline __m512i
5386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5387 _mm512_mask_cvttph_epi16 (__m512i __A, __mmask32 __B, __m512h __C)
5388 {
5389 return (__m512i)
5390 __builtin_ia32_vcvttph2w512_mask_round (__C,
5391 (__v32hi) __A,
5392 __B,
5393 _MM_FROUND_CUR_DIRECTION);
5394 }
5395
5396 extern __inline __m512i
5397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5398 _mm512_maskz_cvttph_epi16 (__mmask32 __A, __m512h __B)
5399 {
5400 return (__m512i)
5401 __builtin_ia32_vcvttph2w512_mask_round (__B,
5402 (__v32hi)
5403 _mm512_setzero_si512 (),
5404 __A,
5405 _MM_FROUND_CUR_DIRECTION);
5406 }
5407
5408 #ifdef __OPTIMIZE__
5409 extern __inline __m512i
5410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5411 _mm512_cvtt_roundph_epi16 (__m512h __A, int __B)
5412 {
5413 return (__m512i)
5414 __builtin_ia32_vcvttph2w512_mask_round (__A,
5415 (__v32hi)
5416 _mm512_setzero_si512 (),
5417 (__mmask32) -1,
5418 __B);
5419 }
5420
5421 extern __inline __m512i
5422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5423 _mm512_mask_cvtt_roundph_epi16 (__m512i __A, __mmask32 __B,
5424 __m512h __C, int __D)
5425 {
5426 return (__m512i)
5427 __builtin_ia32_vcvttph2w512_mask_round (__C,
5428 (__v32hi) __A,
5429 __B,
5430 __D);
5431 }
5432
5433 extern __inline __m512i
5434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5435 _mm512_maskz_cvtt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C)
5436 {
5437 return (__m512i)
5438 __builtin_ia32_vcvttph2w512_mask_round (__B,
5439 (__v32hi)
5440 _mm512_setzero_si512 (),
5441 __A,
5442 __C);
5443 }
5444
5445 #else
5446 #define _mm512_cvtt_roundph_epi16(A, B) \
5447 ((__m512i) \
5448 __builtin_ia32_vcvttph2w512_mask_round ((A), \
5449 (__v32hi) \
5450 _mm512_setzero_si512 (), \
5451 (__mmask32)-1, \
5452 (B)))
5453
5454 #define _mm512_mask_cvtt_roundph_epi16(A, B, C, D) \
5455 ((__m512i) \
5456 __builtin_ia32_vcvttph2w512_mask_round ((C), \
5457 (__v32hi)(A), \
5458 (B), \
5459 (D)))
5460
5461 #define _mm512_maskz_cvtt_roundph_epi16(A, B, C) \
5462 ((__m512i) \
5463 __builtin_ia32_vcvttph2w512_mask_round ((B), \
5464 (__v32hi) \
5465 _mm512_setzero_si512 (), \
5466 (A), \
5467 (C)))
5468
5469 #endif /* __OPTIMIZE__ */
5470
5471 /* Intrinsics vcvttph2uw. */
5472 extern __inline __m512i
5473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5474 _mm512_cvttph_epu16 (__m512h __A)
5475 {
5476 return (__m512i)
5477 __builtin_ia32_vcvttph2uw512_mask_round (__A,
5478 (__v32hi)
5479 _mm512_setzero_si512 (),
5480 (__mmask32) -1,
5481 _MM_FROUND_CUR_DIRECTION);
5482 }
5483
5484 extern __inline __m512i
5485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5486 _mm512_mask_cvttph_epu16 (__m512i __A, __mmask32 __B, __m512h __C)
5487 {
5488 return (__m512i)
5489 __builtin_ia32_vcvttph2uw512_mask_round (__C,
5490 (__v32hi) __A,
5491 __B,
5492 _MM_FROUND_CUR_DIRECTION);
5493 }
5494
5495 extern __inline __m512i
5496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5497 _mm512_maskz_cvttph_epu16 (__mmask32 __A, __m512h __B)
5498 {
5499 return (__m512i)
5500 __builtin_ia32_vcvttph2uw512_mask_round (__B,
5501 (__v32hi)
5502 _mm512_setzero_si512 (),
5503 __A,
5504 _MM_FROUND_CUR_DIRECTION);
5505 }
5506
5507 #ifdef __OPTIMIZE__
5508 extern __inline __m512i
5509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5510 _mm512_cvtt_roundph_epu16 (__m512h __A, int __B)
5511 {
5512 return (__m512i)
5513 __builtin_ia32_vcvttph2uw512_mask_round (__A,
5514 (__v32hi)
5515 _mm512_setzero_si512 (),
5516 (__mmask32) -1,
5517 __B);
5518 }
5519
5520 extern __inline __m512i
5521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5522 _mm512_mask_cvtt_roundph_epu16 (__m512i __A, __mmask32 __B,
5523 __m512h __C, int __D)
5524 {
5525 return (__m512i)
5526 __builtin_ia32_vcvttph2uw512_mask_round (__C,
5527 (__v32hi) __A,
5528 __B,
5529 __D);
5530 }
5531
5532 extern __inline __m512i
5533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5534 _mm512_maskz_cvtt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C)
5535 {
5536 return (__m512i)
5537 __builtin_ia32_vcvttph2uw512_mask_round (__B,
5538 (__v32hi)
5539 _mm512_setzero_si512 (),
5540 __A,
5541 __C);
5542 }
5543
5544 #else
5545 #define _mm512_cvtt_roundph_epu16(A, B) \
5546 ((__m512i) \
5547 __builtin_ia32_vcvttph2uw512_mask_round ((A), \
5548 (__v32hi) \
5549 _mm512_setzero_si512 (), \
5550 (__mmask32)-1, \
5551 (B)))
5552
5553 #define _mm512_mask_cvtt_roundph_epu16(A, B, C, D) \
5554 ((__m512i) \
5555 __builtin_ia32_vcvttph2uw512_mask_round ((C), \
5556 (__v32hi)(A), \
5557 (B), \
5558 (D)))
5559
5560 #define _mm512_maskz_cvtt_roundph_epu16(A, B, C) \
5561 ((__m512i) \
5562 __builtin_ia32_vcvttph2uw512_mask_round ((B), \
5563 (__v32hi) \
5564 _mm512_setzero_si512 (), \
5565 (A), \
5566 (C)))
5567
5568 #endif /* __OPTIMIZE__ */
5569
5570 /* Intrinsics vcvtw2ph. */
5571 extern __inline __m512h
5572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5573 _mm512_cvtepi16_ph (__m512i __A)
5574 {
5575 return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __A,
5576 _mm512_setzero_ph (),
5577 (__mmask32) -1,
5578 _MM_FROUND_CUR_DIRECTION);
5579 }
5580
5581 extern __inline __m512h
5582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5583 _mm512_mask_cvtepi16_ph (__m512h __A, __mmask32 __B, __m512i __C)
5584 {
5585 return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __C,
5586 __A,
5587 __B,
5588 _MM_FROUND_CUR_DIRECTION);
5589 }
5590
5591 extern __inline __m512h
5592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5593 _mm512_maskz_cvtepi16_ph (__mmask32 __A, __m512i __B)
5594 {
5595 return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __B,
5596 _mm512_setzero_ph (),
5597 __A,
5598 _MM_FROUND_CUR_DIRECTION);
5599 }
5600
5601 #ifdef __OPTIMIZE__
5602 extern __inline __m512h
5603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5604 _mm512_cvt_roundepi16_ph (__m512i __A, int __B)
5605 {
5606 return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __A,
5607 _mm512_setzero_ph (),
5608 (__mmask32) -1,
5609 __B);
5610 }
5611
5612 extern __inline __m512h
5613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5614 _mm512_mask_cvt_roundepi16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D)
5615 {
5616 return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __C,
5617 __A,
5618 __B,
5619 __D);
5620 }
5621
5622 extern __inline __m512h
5623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5624 _mm512_maskz_cvt_roundepi16_ph (__mmask32 __A, __m512i __B, int __C)
5625 {
5626 return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __B,
5627 _mm512_setzero_ph (),
5628 __A,
5629 __C);
5630 }
5631
5632 #else
5633 #define _mm512_cvt_roundepi16_ph(A, B) \
5634 (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(A), \
5635 _mm512_setzero_ph (), \
5636 (__mmask32)-1, \
5637 (B)))
5638
5639 #define _mm512_mask_cvt_roundepi16_ph(A, B, C, D) \
5640 (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(C), \
5641 (A), \
5642 (B), \
5643 (D)))
5644
5645 #define _mm512_maskz_cvt_roundepi16_ph(A, B, C) \
5646 (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(B), \
5647 _mm512_setzero_ph (), \
5648 (A), \
5649 (C)))
5650
5651 #endif /* __OPTIMIZE__ */
5652
5653 /* Intrinsics vcvtuw2ph. */
5654 extern __inline __m512h
5655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5656 _mm512_cvtepu16_ph (__m512i __A)
5657 {
5658 return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __A,
5659 _mm512_setzero_ph (),
5660 (__mmask32) -1,
5661 _MM_FROUND_CUR_DIRECTION);
5662 }
5663
5664 extern __inline __m512h
5665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5666 _mm512_mask_cvtepu16_ph (__m512h __A, __mmask32 __B, __m512i __C)
5667 {
5668 return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __C,
5669 __A,
5670 __B,
5671 _MM_FROUND_CUR_DIRECTION);
5672 }
5673
5674 extern __inline __m512h
5675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5676 _mm512_maskz_cvtepu16_ph (__mmask32 __A, __m512i __B)
5677 {
5678 return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __B,
5679 _mm512_setzero_ph (),
5680 __A,
5681 _MM_FROUND_CUR_DIRECTION);
5682 }
5683
5684 #ifdef __OPTIMIZE__
5685 extern __inline __m512h
5686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5687 _mm512_cvt_roundepu16_ph (__m512i __A, int __B)
5688 {
5689 return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __A,
5690 _mm512_setzero_ph (),
5691 (__mmask32) -1,
5692 __B);
5693 }
5694
5695 extern __inline __m512h
5696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5697 _mm512_mask_cvt_roundepu16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D)
5698 {
5699 return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __C,
5700 __A,
5701 __B,
5702 __D);
5703 }
5704
5705 extern __inline __m512h
5706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5707 _mm512_maskz_cvt_roundepu16_ph (__mmask32 __A, __m512i __B, int __C)
5708 {
5709 return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __B,
5710 _mm512_setzero_ph (),
5711 __A,
5712 __C);
5713 }
5714
5715 #else
5716 #define _mm512_cvt_roundepu16_ph(A, B) \
5717 (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(A), \
5718 _mm512_setzero_ph (), \
5719 (__mmask32)-1, \
5720 (B)))
5721
5722 #define _mm512_mask_cvt_roundepu16_ph(A, B, C, D) \
5723 (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(C), \
5724 (A), \
5725 (B), \
5726 (D)))
5727
5728 #define _mm512_maskz_cvt_roundepu16_ph(A, B, C) \
5729 (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(B), \
5730 _mm512_setzero_ph (), \
5731 (A), \
5732 (C)))
5733
5734 #endif /* __OPTIMIZE__ */
5735
5736 /* Intrinsics vcvtph2pd. */
5737 extern __inline __m512d
5738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5739 _mm512_cvtph_pd (__m128h __A)
5740 {
5741 return __builtin_ia32_vcvtph2pd512_mask_round (__A,
5742 _mm512_setzero_pd (),
5743 (__mmask8) -1,
5744 _MM_FROUND_CUR_DIRECTION);
5745 }
5746
5747 extern __inline __m512d
5748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5749 _mm512_mask_cvtph_pd (__m512d __A, __mmask8 __B, __m128h __C)
5750 {
5751 return __builtin_ia32_vcvtph2pd512_mask_round (__C, __A, __B,
5752 _MM_FROUND_CUR_DIRECTION);
5753 }
5754
5755 extern __inline __m512d
5756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5757 _mm512_maskz_cvtph_pd (__mmask8 __A, __m128h __B)
5758 {
5759 return __builtin_ia32_vcvtph2pd512_mask_round (__B,
5760 _mm512_setzero_pd (),
5761 __A,
5762 _MM_FROUND_CUR_DIRECTION);
5763 }
5764
5765 #ifdef __OPTIMIZE__
5766 extern __inline __m512d
5767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5768 _mm512_cvt_roundph_pd (__m128h __A, int __B)
5769 {
5770 return __builtin_ia32_vcvtph2pd512_mask_round (__A,
5771 _mm512_setzero_pd (),
5772 (__mmask8) -1,
5773 __B);
5774 }
5775
5776 extern __inline __m512d
5777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5778 _mm512_mask_cvt_roundph_pd (__m512d __A, __mmask8 __B, __m128h __C, int __D)
5779 {
5780 return __builtin_ia32_vcvtph2pd512_mask_round (__C, __A, __B, __D);
5781 }
5782
5783 extern __inline __m512d
5784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5785 _mm512_maskz_cvt_roundph_pd (__mmask8 __A, __m128h __B, int __C)
5786 {
5787 return __builtin_ia32_vcvtph2pd512_mask_round (__B,
5788 _mm512_setzero_pd (),
5789 __A,
5790 __C);
5791 }
5792
5793 #else
5794 #define _mm512_cvt_roundph_pd(A, B) \
5795 (__builtin_ia32_vcvtph2pd512_mask_round ((A), \
5796 _mm512_setzero_pd (), \
5797 (__mmask8)-1, \
5798 (B)))
5799
5800 #define _mm512_mask_cvt_roundph_pd(A, B, C, D) \
5801 (__builtin_ia32_vcvtph2pd512_mask_round ((C), (A), (B), (D)))
5802
5803 #define _mm512_maskz_cvt_roundph_pd(A, B, C) \
5804 (__builtin_ia32_vcvtph2pd512_mask_round ((B), \
5805 _mm512_setzero_pd (), \
5806 (A), \
5807 (C)))
5808
5809 #endif /* __OPTIMIZE__ */
5810
5811 /* Intrinsics vcvtph2psx. */
5812 extern __inline __m512
5813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5814 _mm512_cvtxph_ps (__m256h __A)
5815 {
5816 return __builtin_ia32_vcvtph2psx512_mask_round (__A,
5817 _mm512_setzero_ps (),
5818 (__mmask16) -1,
5819 _MM_FROUND_CUR_DIRECTION);
5820 }
5821
5822 extern __inline __m512
5823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5824 _mm512_mask_cvtxph_ps (__m512 __A, __mmask16 __B, __m256h __C)
5825 {
5826 return __builtin_ia32_vcvtph2psx512_mask_round (__C, __A, __B,
5827 _MM_FROUND_CUR_DIRECTION);
5828 }
5829
5830 extern __inline __m512
5831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5832 _mm512_maskz_cvtxph_ps (__mmask16 __A, __m256h __B)
5833 {
5834 return __builtin_ia32_vcvtph2psx512_mask_round (__B,
5835 _mm512_setzero_ps (),
5836 __A,
5837 _MM_FROUND_CUR_DIRECTION);
5838 }
5839
5840 #ifdef __OPTIMIZE__
5841 extern __inline __m512
5842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5843 _mm512_cvtx_roundph_ps (__m256h __A, int __B)
5844 {
5845 return __builtin_ia32_vcvtph2psx512_mask_round (__A,
5846 _mm512_setzero_ps (),
5847 (__mmask16) -1,
5848 __B);
5849 }
5850
5851 extern __inline __m512
5852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5853 _mm512_mask_cvtx_roundph_ps (__m512 __A, __mmask16 __B, __m256h __C, int __D)
5854 {
5855 return __builtin_ia32_vcvtph2psx512_mask_round (__C, __A, __B, __D);
5856 }
5857
5858 extern __inline __m512
5859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5860 _mm512_maskz_cvtx_roundph_ps (__mmask16 __A, __m256h __B, int __C)
5861 {
5862 return __builtin_ia32_vcvtph2psx512_mask_round (__B,
5863 _mm512_setzero_ps (),
5864 __A,
5865 __C);
5866 }
5867
5868 #else
5869 #define _mm512_cvtx_roundph_ps(A, B) \
5870 (__builtin_ia32_vcvtph2psx512_mask_round ((A), \
5871 _mm512_setzero_ps (), \
5872 (__mmask16)-1, \
5873 (B)))
5874
5875 #define _mm512_mask_cvtx_roundph_ps(A, B, C, D) \
5876 (__builtin_ia32_vcvtph2psx512_mask_round ((C), (A), (B), (D)))
5877
5878 #define _mm512_maskz_cvtx_roundph_ps(A, B, C) \
5879 (__builtin_ia32_vcvtph2psx512_mask_round ((B), \
5880 _mm512_setzero_ps (), \
5881 (A), \
5882 (C)))
5883 #endif /* __OPTIMIZE__ */
5884
5885 /* Intrinsics vcvtps2ph. */
5886 extern __inline __m256h
5887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5888 _mm512_cvtxps_ph (__m512 __A)
5889 {
5890 return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __A,
5891 _mm256_setzero_ph (),
5892 (__mmask16) -1,
5893 _MM_FROUND_CUR_DIRECTION);
5894 }
5895
5896 extern __inline __m256h
5897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5898 _mm512_mask_cvtxps_ph (__m256h __A, __mmask16 __B, __m512 __C)
5899 {
5900 return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __C,
5901 __A, __B,
5902 _MM_FROUND_CUR_DIRECTION);
5903 }
5904
5905 extern __inline __m256h
5906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5907 _mm512_maskz_cvtxps_ph (__mmask16 __A, __m512 __B)
5908 {
5909 return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __B,
5910 _mm256_setzero_ph (),
5911 __A,
5912 _MM_FROUND_CUR_DIRECTION);
5913 }
5914
5915 #ifdef __OPTIMIZE__
5916 extern __inline __m256h
5917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5918 _mm512_cvtx_roundps_ph (__m512 __A, int __B)
5919 {
5920 return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __A,
5921 _mm256_setzero_ph (),
5922 (__mmask16) -1,
5923 __B);
5924 }
5925
5926 extern __inline __m256h
5927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5928 _mm512_mask_cvtx_roundps_ph (__m256h __A, __mmask16 __B, __m512 __C, int __D)
5929 {
5930 return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __C,
5931 __A, __B, __D);
5932 }
5933
5934 extern __inline __m256h
5935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5936 _mm512_maskz_cvtx_roundps_ph (__mmask16 __A, __m512 __B, int __C)
5937 {
5938 return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __B,
5939 _mm256_setzero_ph (),
5940 __A, __C);
5941 }
5942
5943 #else
5944 #define _mm512_cvtx_roundps_ph(A, B) \
5945 (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(A), \
5946 _mm256_setzero_ph (),\
5947 (__mmask16)-1, (B)))
5948
5949 #define _mm512_mask_cvtx_roundps_ph(A, B, C, D) \
5950 (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(C), \
5951 (A), (B), (D)))
5952
5953 #define _mm512_maskz_cvtx_roundps_ph(A, B, C) \
5954 (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(B), \
5955 _mm256_setzero_ph (),\
5956 (A), (C)))
5957 #endif /* __OPTIMIZE__ */
5958
5959 /* Intrinsics vcvtpd2ph. */
5960 extern __inline __m128h
5961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5962 _mm512_cvtpd_ph (__m512d __A)
5963 {
5964 return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __A,
5965 _mm_setzero_ph (),
5966 (__mmask8) -1,
5967 _MM_FROUND_CUR_DIRECTION);
5968 }
5969
5970 extern __inline __m128h
5971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5972 _mm512_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m512d __C)
5973 {
5974 return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __C,
5975 __A, __B,
5976 _MM_FROUND_CUR_DIRECTION);
5977 }
5978
5979 extern __inline __m128h
5980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5981 _mm512_maskz_cvtpd_ph (__mmask8 __A, __m512d __B)
5982 {
5983 return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __B,
5984 _mm_setzero_ph (),
5985 __A,
5986 _MM_FROUND_CUR_DIRECTION);
5987 }
5988
5989 #ifdef __OPTIMIZE__
5990 extern __inline __m128h
5991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5992 _mm512_cvt_roundpd_ph (__m512d __A, int __B)
5993 {
5994 return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __A,
5995 _mm_setzero_ph (),
5996 (__mmask8) -1,
5997 __B);
5998 }
5999
6000 extern __inline __m128h
6001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6002 _mm512_mask_cvt_roundpd_ph (__m128h __A, __mmask8 __B, __m512d __C, int __D)
6003 {
6004 return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __C,
6005 __A, __B, __D);
6006 }
6007
6008 extern __inline __m128h
6009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6010 _mm512_maskz_cvt_roundpd_ph (__mmask8 __A, __m512d __B, int __C)
6011 {
6012 return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __B,
6013 _mm_setzero_ph (),
6014 __A, __C);
6015 }
6016
6017 #else
6018 #define _mm512_cvt_roundpd_ph(A, B) \
6019 (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(A), \
6020 _mm_setzero_ph (), \
6021 (__mmask8)-1, (B)))
6022
6023 #define _mm512_mask_cvt_roundpd_ph(A, B, C, D) \
6024 (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(C), \
6025 (A), (B), (D)))
6026
6027 #define _mm512_maskz_cvt_roundpd_ph(A, B, C) \
6028 (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(B), \
6029 _mm_setzero_ph (), \
6030 (A), (C)))
6031
6032 #endif /* __OPTIMIZE__ */
6033
6034 /* Intrinsics vfmaddsub[132,213,231]ph. */
6035 extern __inline __m512h
6036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6037 _mm512_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C)
6038 {
6039 return (__m512h)
6040 __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
6041 (__v32hf) __B,
6042 (__v32hf) __C,
6043 (__mmask32) -1,
6044 _MM_FROUND_CUR_DIRECTION);
6045 }
6046
6047 extern __inline __m512h
6048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6049 _mm512_mask_fmaddsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
6050 {
6051 return (__m512h)
6052 __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
6053 (__v32hf) __B,
6054 (__v32hf) __C,
6055 (__mmask32) __U,
6056 _MM_FROUND_CUR_DIRECTION);
6057 }
6058
6059 extern __inline __m512h
6060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6061 _mm512_mask3_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
6062 {
6063 return (__m512h)
6064 __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A,
6065 (__v32hf) __B,
6066 (__v32hf) __C,
6067 (__mmask32) __U,
6068 _MM_FROUND_CUR_DIRECTION);
6069 }
6070
6071 extern __inline __m512h
6072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6073 _mm512_maskz_fmaddsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
6074 {
6075 return (__m512h)
6076 __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A,
6077 (__v32hf) __B,
6078 (__v32hf) __C,
6079 (__mmask32) __U,
6080 _MM_FROUND_CUR_DIRECTION);
6081 }
6082
6083 #ifdef __OPTIMIZE__
6084 extern __inline __m512h
6085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6086 _mm512_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
6087 {
6088 return (__m512h)
6089 __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
6090 (__v32hf) __B,
6091 (__v32hf) __C,
6092 (__mmask32) -1, __R);
6093 }
6094
6095 extern __inline __m512h
6096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6097 _mm512_mask_fmaddsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
6098 __m512h __C, const int __R)
6099 {
6100 return (__m512h)
6101 __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
6102 (__v32hf) __B,
6103 (__v32hf) __C,
6104 (__mmask32) __U, __R);
6105 }
6106
6107 extern __inline __m512h
6108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6109 _mm512_mask3_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
6110 __mmask32 __U, const int __R)
6111 {
6112 return (__m512h)
6113 __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A,
6114 (__v32hf) __B,
6115 (__v32hf) __C,
6116 (__mmask32) __U, __R);
6117 }
6118
6119 extern __inline __m512h
6120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6121 _mm512_maskz_fmaddsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
6122 __m512h __C, const int __R)
6123 {
6124 return (__m512h)
6125 __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A,
6126 (__v32hf) __B,
6127 (__v32hf) __C,
6128 (__mmask32) __U, __R);
6129 }
6130
6131 #else
6132 #define _mm512_fmaddsub_round_ph(A, B, C, R) \
6133 ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), -1, (R)))
6134
6135 #define _mm512_mask_fmaddsub_round_ph(A, U, B, C, R) \
6136 ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), (U), (R)))
6137
6138 #define _mm512_mask3_fmaddsub_round_ph(A, B, C, U, R) \
6139 ((__m512h)__builtin_ia32_vfmaddsubph512_mask3 ((A), (B), (C), (U), (R)))
6140
6141 #define _mm512_maskz_fmaddsub_round_ph(U, A, B, C, R) \
6142 ((__m512h)__builtin_ia32_vfmaddsubph512_maskz ((A), (B), (C), (U), (R)))
6143
6144 #endif /* __OPTIMIZE__ */
6145
6146 /* Intrinsics vfmsubadd[132,213,231]ph. */
6147 extern __inline __m512h
6148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6149 _mm512_fmsubadd_ph (__m512h __A, __m512h __B, __m512h __C)
6150 {
6151 return (__m512h)
6152 __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
6153 (__v32hf) __B,
6154 (__v32hf) __C,
6155 (__mmask32) -1,
6156 _MM_FROUND_CUR_DIRECTION);
6157 }
6158
6159 extern __inline __m512h
6160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6161 _mm512_mask_fmsubadd_ph (__m512h __A, __mmask32 __U,
6162 __m512h __B, __m512h __C)
6163 {
6164 return (__m512h)
6165 __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
6166 (__v32hf) __B,
6167 (__v32hf) __C,
6168 (__mmask32) __U,
6169 _MM_FROUND_CUR_DIRECTION);
6170 }
6171
6172 extern __inline __m512h
6173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6174 _mm512_mask3_fmsubadd_ph (__m512h __A, __m512h __B,
6175 __m512h __C, __mmask32 __U)
6176 {
6177 return (__m512h)
6178 __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A,
6179 (__v32hf) __B,
6180 (__v32hf) __C,
6181 (__mmask32) __U,
6182 _MM_FROUND_CUR_DIRECTION);
6183 }
6184
6185 extern __inline __m512h
6186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6187 _mm512_maskz_fmsubadd_ph (__mmask32 __U, __m512h __A,
6188 __m512h __B, __m512h __C)
6189 {
6190 return (__m512h)
6191 __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A,
6192 (__v32hf) __B,
6193 (__v32hf) __C,
6194 (__mmask32) __U,
6195 _MM_FROUND_CUR_DIRECTION);
6196 }
6197
6198 #ifdef __OPTIMIZE__
6199 extern __inline __m512h
6200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6201 _mm512_fmsubadd_round_ph (__m512h __A, __m512h __B,
6202 __m512h __C, const int __R)
6203 {
6204 return (__m512h)
6205 __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
6206 (__v32hf) __B,
6207 (__v32hf) __C,
6208 (__mmask32) -1, __R);
6209 }
6210
6211 extern __inline __m512h
6212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6213 _mm512_mask_fmsubadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
6214 __m512h __C, const int __R)
6215 {
6216 return (__m512h)
6217 __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
6218 (__v32hf) __B,
6219 (__v32hf) __C,
6220 (__mmask32) __U, __R);
6221 }
6222
6223 extern __inline __m512h
6224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6225 _mm512_mask3_fmsubadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
6226 __mmask32 __U, const int __R)
6227 {
6228 return (__m512h)
6229 __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A,
6230 (__v32hf) __B,
6231 (__v32hf) __C,
6232 (__mmask32) __U, __R);
6233 }
6234
6235 extern __inline __m512h
6236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6237 _mm512_maskz_fmsubadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
6238 __m512h __C, const int __R)
6239 {
6240 return (__m512h)
6241 __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A,
6242 (__v32hf) __B,
6243 (__v32hf) __C,
6244 (__mmask32) __U, __R);
6245 }
6246
6247 #else
6248 #define _mm512_fmsubadd_round_ph(A, B, C, R) \
6249 ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), -1, (R)))
6250
6251 #define _mm512_mask_fmsubadd_round_ph(A, U, B, C, R) \
6252 ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), (U), (R)))
6253
6254 #define _mm512_mask3_fmsubadd_round_ph(A, B, C, U, R) \
6255 ((__m512h)__builtin_ia32_vfmsubaddph512_mask3 ((A), (B), (C), (U), (R)))
6256
6257 #define _mm512_maskz_fmsubadd_round_ph(U, A, B, C, R) \
6258 ((__m512h)__builtin_ia32_vfmsubaddph512_maskz ((A), (B), (C), (U), (R)))
6259
6260 #endif /* __OPTIMIZE__ */
6261
6262 /* Intrinsics vfmadd[132,213,231]ph. */
6263 extern __inline __m512h
6264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6265 _mm512_fmadd_ph (__m512h __A, __m512h __B, __m512h __C)
6266 {
6267 return (__m512h)
6268 __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
6269 (__v32hf) __B,
6270 (__v32hf) __C,
6271 (__mmask32) -1,
6272 _MM_FROUND_CUR_DIRECTION);
6273 }
6274
6275 extern __inline __m512h
6276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6277 _mm512_mask_fmadd_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
6278 {
6279 return (__m512h)
6280 __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
6281 (__v32hf) __B,
6282 (__v32hf) __C,
6283 (__mmask32) __U,
6284 _MM_FROUND_CUR_DIRECTION);
6285 }
6286
6287 extern __inline __m512h
6288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6289 _mm512_mask3_fmadd_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
6290 {
6291 return (__m512h)
6292 __builtin_ia32_vfmaddph512_mask3 ((__v32hf) __A,
6293 (__v32hf) __B,
6294 (__v32hf) __C,
6295 (__mmask32) __U,
6296 _MM_FROUND_CUR_DIRECTION);
6297 }
6298
6299 extern __inline __m512h
6300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6301 _mm512_maskz_fmadd_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
6302 {
6303 return (__m512h)
6304 __builtin_ia32_vfmaddph512_maskz ((__v32hf) __A,
6305 (__v32hf) __B,
6306 (__v32hf) __C,
6307 (__mmask32) __U,
6308 _MM_FROUND_CUR_DIRECTION);
6309 }
6310
6311 #ifdef __OPTIMIZE__
6312 extern __inline __m512h
6313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6314 _mm512_fmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
6315 {
6316 return (__m512h) __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
6317 (__v32hf) __B,
6318 (__v32hf) __C,
6319 (__mmask32) -1, __R);
6320 }
6321
6322 extern __inline __m512h
6323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6324 _mm512_mask_fmadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
6325 __m512h __C, const int __R)
6326 {
6327 return (__m512h) __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
6328 (__v32hf) __B,
6329 (__v32hf) __C,
6330 (__mmask32) __U, __R);
6331 }
6332
6333 extern __inline __m512h
6334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6335 _mm512_mask3_fmadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
6336 __mmask32 __U, const int __R)
6337 {
6338 return (__m512h) __builtin_ia32_vfmaddph512_mask3 ((__v32hf) __A,
6339 (__v32hf) __B,
6340 (__v32hf) __C,
6341 (__mmask32) __U, __R);
6342 }
6343
6344 extern __inline __m512h
6345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6346 _mm512_maskz_fmadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
6347 __m512h __C, const int __R)
6348 {
6349 return (__m512h) __builtin_ia32_vfmaddph512_maskz ((__v32hf) __A,
6350 (__v32hf) __B,
6351 (__v32hf) __C,
6352 (__mmask32) __U, __R);
6353 }
6354
6355 #else
6356 #define _mm512_fmadd_round_ph(A, B, C, R) \
6357 ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), -1, (R)))
6358
6359 #define _mm512_mask_fmadd_round_ph(A, U, B, C, R) \
6360 ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), (U), (R)))
6361
6362 #define _mm512_mask3_fmadd_round_ph(A, B, C, U, R) \
6363 ((__m512h)__builtin_ia32_vfmaddph512_mask3 ((A), (B), (C), (U), (R)))
6364
6365 #define _mm512_maskz_fmadd_round_ph(U, A, B, C, R) \
6366 ((__m512h)__builtin_ia32_vfmaddph512_maskz ((A), (B), (C), (U), (R)))
6367
6368 #endif /* __OPTIMIZE__ */
6369
6370 /* Intrinsics vfnmadd[132,213,231]ph. */
6371 extern __inline __m512h
6372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6373 _mm512_fnmadd_ph (__m512h __A, __m512h __B, __m512h __C)
6374 {
6375 return (__m512h)
6376 __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
6377 (__v32hf) __B,
6378 (__v32hf) __C,
6379 (__mmask32) -1,
6380 _MM_FROUND_CUR_DIRECTION);
6381 }
6382
6383 extern __inline __m512h
6384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6385 _mm512_mask_fnmadd_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
6386 {
6387 return (__m512h)
6388 __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
6389 (__v32hf) __B,
6390 (__v32hf) __C,
6391 (__mmask32) __U,
6392 _MM_FROUND_CUR_DIRECTION);
6393 }
6394
6395 extern __inline __m512h
6396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6397 _mm512_mask3_fnmadd_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
6398 {
6399 return (__m512h)
6400 __builtin_ia32_vfnmaddph512_mask3 ((__v32hf) __A,
6401 (__v32hf) __B,
6402 (__v32hf) __C,
6403 (__mmask32) __U,
6404 _MM_FROUND_CUR_DIRECTION);
6405 }
6406
6407 extern __inline __m512h
6408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6409 _mm512_maskz_fnmadd_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
6410 {
6411 return (__m512h)
6412 __builtin_ia32_vfnmaddph512_maskz ((__v32hf) __A,
6413 (__v32hf) __B,
6414 (__v32hf) __C,
6415 (__mmask32) __U,
6416 _MM_FROUND_CUR_DIRECTION);
6417 }
6418
6419 #ifdef __OPTIMIZE__
6420 extern __inline __m512h
6421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6422 _mm512_fnmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
6423 {
6424 return (__m512h) __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
6425 (__v32hf) __B,
6426 (__v32hf) __C,
6427 (__mmask32) -1, __R);
6428 }
6429
6430 extern __inline __m512h
6431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6432 _mm512_mask_fnmadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
6433 __m512h __C, const int __R)
6434 {
6435 return (__m512h) __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
6436 (__v32hf) __B,
6437 (__v32hf) __C,
6438 (__mmask32) __U, __R);
6439 }
6440
6441 extern __inline __m512h
6442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6443 _mm512_mask3_fnmadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
6444 __mmask32 __U, const int __R)
6445 {
6446 return (__m512h) __builtin_ia32_vfnmaddph512_mask3 ((__v32hf) __A,
6447 (__v32hf) __B,
6448 (__v32hf) __C,
6449 (__mmask32) __U, __R);
6450 }
6451
6452 extern __inline __m512h
6453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6454 _mm512_maskz_fnmadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
6455 __m512h __C, const int __R)
6456 {
6457 return (__m512h) __builtin_ia32_vfnmaddph512_maskz ((__v32hf) __A,
6458 (__v32hf) __B,
6459 (__v32hf) __C,
6460 (__mmask32) __U, __R);
6461 }
6462
6463 #else
6464 #define _mm512_fnmadd_round_ph(A, B, C, R) \
6465 ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), -1, (R)))
6466
6467 #define _mm512_mask_fnmadd_round_ph(A, U, B, C, R) \
6468 ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), (U), (R)))
6469
6470 #define _mm512_mask3_fnmadd_round_ph(A, B, C, U, R) \
6471 ((__m512h)__builtin_ia32_vfnmaddph512_mask3 ((A), (B), (C), (U), (R)))
6472
6473 #define _mm512_maskz_fnmadd_round_ph(U, A, B, C, R) \
6474 ((__m512h)__builtin_ia32_vfnmaddph512_maskz ((A), (B), (C), (U), (R)))
6475
6476 #endif /* __OPTIMIZE__ */
6477
6478 /* Intrinsics vfmsub[132,213,231]ph. */
6479 extern __inline __m512h
6480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6481 _mm512_fmsub_ph (__m512h __A, __m512h __B, __m512h __C)
6482 {
6483 return (__m512h)
6484 __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
6485 (__v32hf) __B,
6486 (__v32hf) __C,
6487 (__mmask32) -1,
6488 _MM_FROUND_CUR_DIRECTION);
6489 }
6490
6491 extern __inline __m512h
6492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6493 _mm512_mask_fmsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
6494 {
6495 return (__m512h)
6496 __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
6497 (__v32hf) __B,
6498 (__v32hf) __C,
6499 (__mmask32) __U,
6500 _MM_FROUND_CUR_DIRECTION);
6501 }
6502
6503 extern __inline __m512h
6504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6505 _mm512_mask3_fmsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
6506 {
6507 return (__m512h)
6508 __builtin_ia32_vfmsubph512_mask3 ((__v32hf) __A,
6509 (__v32hf) __B,
6510 (__v32hf) __C,
6511 (__mmask32) __U,
6512 _MM_FROUND_CUR_DIRECTION);
6513 }
6514
6515 extern __inline __m512h
6516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6517 _mm512_maskz_fmsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
6518 {
6519 return (__m512h)
6520 __builtin_ia32_vfmsubph512_maskz ((__v32hf) __A,
6521 (__v32hf) __B,
6522 (__v32hf) __C,
6523 (__mmask32) __U,
6524 _MM_FROUND_CUR_DIRECTION);
6525 }
6526
6527 #ifdef __OPTIMIZE__
6528 extern __inline __m512h
6529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6530 _mm512_fmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
6531 {
6532 return (__m512h) __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
6533 (__v32hf) __B,
6534 (__v32hf) __C,
6535 (__mmask32) -1, __R);
6536 }
6537
6538 extern __inline __m512h
6539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6540 _mm512_mask_fmsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
6541 __m512h __C, const int __R)
6542 {
6543 return (__m512h) __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
6544 (__v32hf) __B,
6545 (__v32hf) __C,
6546 (__mmask32) __U, __R);
6547 }
6548
6549 extern __inline __m512h
6550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6551 _mm512_mask3_fmsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
6552 __mmask32 __U, const int __R)
6553 {
6554 return (__m512h) __builtin_ia32_vfmsubph512_mask3 ((__v32hf) __A,
6555 (__v32hf) __B,
6556 (__v32hf) __C,
6557 (__mmask32) __U, __R);
6558 }
6559
6560 extern __inline __m512h
6561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6562 _mm512_maskz_fmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
6563 __m512h __C, const int __R)
6564 {
6565 return (__m512h) __builtin_ia32_vfmsubph512_maskz ((__v32hf) __A,
6566 (__v32hf) __B,
6567 (__v32hf) __C,
6568 (__mmask32) __U, __R);
6569 }
6570
6571 #else
6572 #define _mm512_fmsub_round_ph(A, B, C, R) \
6573 ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), -1, (R)))
6574
6575 #define _mm512_mask_fmsub_round_ph(A, U, B, C, R) \
6576 ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), (U), (R)))
6577
6578 #define _mm512_mask3_fmsub_round_ph(A, B, C, U, R) \
6579 ((__m512h)__builtin_ia32_vfmsubph512_mask3 ((A), (B), (C), (U), (R)))
6580
6581 #define _mm512_maskz_fmsub_round_ph(U, A, B, C, R) \
6582 ((__m512h)__builtin_ia32_vfmsubph512_maskz ((A), (B), (C), (U), (R)))
6583
6584 #endif /* __OPTIMIZE__ */
6585
6586 /* Intrinsics vfnmsub[132,213,231]ph. */
6587 extern __inline __m512h
6588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6589 _mm512_fnmsub_ph (__m512h __A, __m512h __B, __m512h __C)
6590 {
6591 return (__m512h)
6592 __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
6593 (__v32hf) __B,
6594 (__v32hf) __C,
6595 (__mmask32) -1,
6596 _MM_FROUND_CUR_DIRECTION);
6597 }
6598
6599 extern __inline __m512h
6600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6601 _mm512_mask_fnmsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
6602 {
6603 return (__m512h)
6604 __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
6605 (__v32hf) __B,
6606 (__v32hf) __C,
6607 (__mmask32) __U,
6608 _MM_FROUND_CUR_DIRECTION);
6609 }
6610
6611 extern __inline __m512h
6612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6613 _mm512_mask3_fnmsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
6614 {
6615 return (__m512h)
6616 __builtin_ia32_vfnmsubph512_mask3 ((__v32hf) __A,
6617 (__v32hf) __B,
6618 (__v32hf) __C,
6619 (__mmask32) __U,
6620 _MM_FROUND_CUR_DIRECTION);
6621 }
6622
6623 extern __inline __m512h
6624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6625 _mm512_maskz_fnmsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
6626 {
6627 return (__m512h)
6628 __builtin_ia32_vfnmsubph512_maskz ((__v32hf) __A,
6629 (__v32hf) __B,
6630 (__v32hf) __C,
6631 (__mmask32) __U,
6632 _MM_FROUND_CUR_DIRECTION);
6633 }
6634
6635 #ifdef __OPTIMIZE__
6636 extern __inline __m512h
6637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6638 _mm512_fnmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
6639 {
6640 return (__m512h) __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
6641 (__v32hf) __B,
6642 (__v32hf) __C,
6643 (__mmask32) -1, __R);
6644 }
6645
6646 extern __inline __m512h
6647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6648 _mm512_mask_fnmsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
6649 __m512h __C, const int __R)
6650 {
6651 return (__m512h) __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
6652 (__v32hf) __B,
6653 (__v32hf) __C,
6654 (__mmask32) __U, __R);
6655 }
6656
6657 extern __inline __m512h
6658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6659 _mm512_mask3_fnmsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
6660 __mmask32 __U, const int __R)
6661 {
6662 return (__m512h) __builtin_ia32_vfnmsubph512_mask3 ((__v32hf) __A,
6663 (__v32hf) __B,
6664 (__v32hf) __C,
6665 (__mmask32) __U, __R);
6666 }
6667
6668 extern __inline __m512h
6669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6670 _mm512_maskz_fnmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
6671 __m512h __C, const int __R)
6672 {
6673 return (__m512h) __builtin_ia32_vfnmsubph512_maskz ((__v32hf) __A,
6674 (__v32hf) __B,
6675 (__v32hf) __C,
6676 (__mmask32) __U, __R);
6677 }
6678
6679 #else
6680 #define _mm512_fnmsub_round_ph(A, B, C, R) \
6681 ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), -1, (R)))
6682
6683 #define _mm512_mask_fnmsub_round_ph(A, U, B, C, R) \
6684 ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), (U), (R)))
6685
6686 #define _mm512_mask3_fnmsub_round_ph(A, B, C, U, R) \
6687 ((__m512h)__builtin_ia32_vfnmsubph512_mask3 ((A), (B), (C), (U), (R)))
6688
6689 #define _mm512_maskz_fnmsub_round_ph(U, A, B, C, R) \
6690 ((__m512h)__builtin_ia32_vfnmsubph512_maskz ((A), (B), (C), (U), (R)))
6691
6692 #endif /* __OPTIMIZE__ */
6693
6694 /* Intrinsics vf[,c]maddcph. */
6695 extern __inline __m512h
6696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6697 _mm512_fcmadd_pch (__m512h __A, __m512h __B, __m512h __C)
6698 {
6699 return (__m512h)
6700 __builtin_ia32_vfcmaddcph512_round ((__v32hf) __A,
6701 (__v32hf) __B,
6702 (__v32hf) __C,
6703 _MM_FROUND_CUR_DIRECTION);
6704 }
6705
6706 extern __inline __m512h
6707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6708 _mm512_mask_fcmadd_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
6709 {
6710 return (__m512h)
6711 __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) __A,
6712 (__v32hf) __C,
6713 (__v32hf) __D, __B,
6714 _MM_FROUND_CUR_DIRECTION);
6715 }
6716
6717 extern __inline __m512h
6718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6719 _mm512_mask3_fcmadd_pch (__m512h __A, __m512h __B, __m512h __C, __mmask16 __D)
6720 {
6721 return (__m512h)
6722 __builtin_ia32_vfcmaddcph512_mask3_round ((__v32hf) __A,
6723 (__v32hf) __B,
6724 (__v32hf) __C,
6725 __D, _MM_FROUND_CUR_DIRECTION);
6726 }
6727
6728 extern __inline __m512h
6729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6730 _mm512_maskz_fcmadd_pch (__mmask16 __A, __m512h __B, __m512h __C, __m512h __D)
6731 {
6732 return (__m512h)
6733 __builtin_ia32_vfcmaddcph512_maskz_round ((__v32hf) __B,
6734 (__v32hf) __C,
6735 (__v32hf) __D,
6736 __A, _MM_FROUND_CUR_DIRECTION);
6737 }
6738
6739 extern __inline __m512h
6740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6741 _mm512_fmadd_pch (__m512h __A, __m512h __B, __m512h __C)
6742 {
6743 return (__m512h)
6744 __builtin_ia32_vfmaddcph512_round ((__v32hf) __A,
6745 (__v32hf) __B,
6746 (__v32hf) __C,
6747 _MM_FROUND_CUR_DIRECTION);
6748 }
6749
6750 extern __inline __m512h
6751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6752 _mm512_mask_fmadd_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
6753 {
6754 return (__m512h)
6755 __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) __A,
6756 (__v32hf) __C,
6757 (__v32hf) __D, __B,
6758 _MM_FROUND_CUR_DIRECTION);
6759 }
6760
6761 extern __inline __m512h
6762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6763 _mm512_mask3_fmadd_pch (__m512h __A, __m512h __B, __m512h __C, __mmask16 __D)
6764 {
6765 return (__m512h)
6766 __builtin_ia32_vfmaddcph512_mask3_round ((__v32hf) __A,
6767 (__v32hf) __B,
6768 (__v32hf) __C,
6769 __D, _MM_FROUND_CUR_DIRECTION);
6770 }
6771
6772 extern __inline __m512h
6773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6774 _mm512_maskz_fmadd_pch (__mmask16 __A, __m512h __B, __m512h __C, __m512h __D)
6775 {
6776 return (__m512h)
6777 __builtin_ia32_vfmaddcph512_maskz_round ((__v32hf) __B,
6778 (__v32hf) __C,
6779 (__v32hf) __D,
6780 __A, _MM_FROUND_CUR_DIRECTION);
6781 }
6782
6783 #ifdef __OPTIMIZE__
6784 extern __inline __m512h
6785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6786 _mm512_fcmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, const int __D)
6787 {
6788 return (__m512h)
6789 __builtin_ia32_vfcmaddcph512_round ((__v32hf) __A,
6790 (__v32hf) __B,
6791 (__v32hf) __C,
6792 __D);
6793 }
6794
6795 extern __inline __m512h
6796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6797 _mm512_mask_fcmadd_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
6798 __m512h __D, const int __E)
6799 {
6800 return (__m512h)
6801 __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) __A,
6802 (__v32hf) __C,
6803 (__v32hf) __D, __B,
6804 __E);
6805 }
6806
6807 extern __inline __m512h
6808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6809 _mm512_mask3_fcmadd_round_pch (__m512h __A, __m512h __B, __m512h __C,
6810 __mmask16 __D, const int __E)
6811 {
6812 return (__m512h)
6813 __builtin_ia32_vfcmaddcph512_mask3_round ((__v32hf) __A,
6814 (__v32hf) __B,
6815 (__v32hf) __C,
6816 __D, __E);
6817 }
6818
6819 extern __inline __m512h
6820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6821 _mm512_maskz_fcmadd_round_pch (__mmask16 __A, __m512h __B, __m512h __C,
6822 __m512h __D, const int __E)
6823 {
6824 return (__m512h)
6825 __builtin_ia32_vfcmaddcph512_maskz_round ((__v32hf) __B,
6826 (__v32hf) __C,
6827 (__v32hf) __D,
6828 __A, __E);
6829 }
6830
6831 extern __inline __m512h
6832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6833 _mm512_fmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, const int __D)
6834 {
6835 return (__m512h)
6836 __builtin_ia32_vfmaddcph512_round ((__v32hf) __A,
6837 (__v32hf) __B,
6838 (__v32hf) __C,
6839 __D);
6840 }
6841
6842 extern __inline __m512h
6843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6844 _mm512_mask_fmadd_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
6845 __m512h __D, const int __E)
6846 {
6847 return (__m512h)
6848 __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) __A,
6849 (__v32hf) __C,
6850 (__v32hf) __D, __B,
6851 __E);
6852 }
6853
6854 extern __inline __m512h
6855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6856 _mm512_mask3_fmadd_round_pch (__m512h __A, __m512h __B, __m512h __C,
6857 __mmask16 __D, const int __E)
6858 {
6859 return (__m512h)
6860 __builtin_ia32_vfmaddcph512_mask3_round ((__v32hf) __A,
6861 (__v32hf) __B,
6862 (__v32hf) __C,
6863 __D, __E);
6864 }
6865
6866 extern __inline __m512h
6867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6868 _mm512_maskz_fmadd_round_pch (__mmask16 __A, __m512h __B, __m512h __C,
6869 __m512h __D, const int __E)
6870 {
6871 return (__m512h)
6872 __builtin_ia32_vfmaddcph512_maskz_round ((__v32hf) __B,
6873 (__v32hf) __C,
6874 (__v32hf) __D,
6875 __A, __E);
6876 }
6877
6878 #else
6879 #define _mm512_fcmadd_round_pch(A, B, C, D) \
6880 (__m512h) __builtin_ia32_vfcmaddcph512_round ((A), (B), (C), (D))
6881
6882 #define _mm512_mask_fcmadd_round_pch(A, B, C, D, E) \
6883 ((__m512h) \
6884 __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) (A), \
6885 (__v32hf) (C), \
6886 (__v32hf) (D), \
6887 (B), (E)))
6888
6889
6890 #define _mm512_mask3_fcmadd_round_pch(A, B, C, D, E) \
6891 ((__m512h) \
6892 __builtin_ia32_vfcmaddcph512_mask3_round ((A), (B), (C), (D), (E)))
6893
6894 #define _mm512_maskz_fcmadd_round_pch(A, B, C, D, E) \
6895 (__m512h) \
6896 __builtin_ia32_vfcmaddcph512_maskz_round ((B), (C), (D), (A), (E))
6897
6898 #define _mm512_fmadd_round_pch(A, B, C, D) \
6899 (__m512h) __builtin_ia32_vfmaddcph512_round ((A), (B), (C), (D))
6900
6901 #define _mm512_mask_fmadd_round_pch(A, B, C, D, E) \
6902 ((__m512h) \
6903 __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) (A), \
6904 (__v32hf) (C), \
6905 (__v32hf) (D), \
6906 (B), (E)))
6907
6908 #define _mm512_mask3_fmadd_round_pch(A, B, C, D, E) \
6909 (__m512h) \
6910 __builtin_ia32_vfmaddcph512_mask3_round ((A), (B), (C), (D), (E))
6911
6912 #define _mm512_maskz_fmadd_round_pch(A, B, C, D, E) \
6913 (__m512h) \
6914 __builtin_ia32_vfmaddcph512_maskz_round ((B), (C), (D), (A), (E))
6915
6916 #endif /* __OPTIMIZE__ */
6917
6918 /* Intrinsics vf[,c]mulcph. */
6919 extern __inline __m512h
6920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6921 _mm512_fcmul_pch (__m512h __A, __m512h __B)
6922 {
6923 return (__m512h)
6924 __builtin_ia32_vfcmulcph512_round ((__v32hf) __A,
6925 (__v32hf) __B,
6926 _MM_FROUND_CUR_DIRECTION);
6927 }
6928
6929 extern __inline __m512h
6930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6931 _mm512_mask_fcmul_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
6932 {
6933 return (__m512h)
6934 __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __C,
6935 (__v32hf) __D,
6936 (__v32hf) __A,
6937 __B, _MM_FROUND_CUR_DIRECTION);
6938 }
6939
6940 extern __inline __m512h
6941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6942 _mm512_maskz_fcmul_pch (__mmask16 __A, __m512h __B, __m512h __C)
6943 {
6944 return (__m512h)
6945 __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __B,
6946 (__v32hf) __C,
6947 _mm512_setzero_ph (),
6948 __A, _MM_FROUND_CUR_DIRECTION);
6949 }
6950
6951 extern __inline __m512h
6952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6953 _mm512_fmul_pch (__m512h __A, __m512h __B)
6954 {
6955 return (__m512h)
6956 __builtin_ia32_vfmulcph512_round ((__v32hf) __A,
6957 (__v32hf) __B,
6958 _MM_FROUND_CUR_DIRECTION);
6959 }
6960
6961 extern __inline __m512h
6962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6963 _mm512_mask_fmul_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
6964 {
6965 return (__m512h)
6966 __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __C,
6967 (__v32hf) __D,
6968 (__v32hf) __A,
6969 __B, _MM_FROUND_CUR_DIRECTION);
6970 }
6971
6972 extern __inline __m512h
6973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6974 _mm512_maskz_fmul_pch (__mmask16 __A, __m512h __B, __m512h __C)
6975 {
6976 return (__m512h)
6977 __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __B,
6978 (__v32hf) __C,
6979 _mm512_setzero_ph (),
6980 __A, _MM_FROUND_CUR_DIRECTION);
6981 }
6982
6983 #ifdef __OPTIMIZE__
6984 extern __inline __m512h
6985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6986 _mm512_fcmul_round_pch (__m512h __A, __m512h __B, const int __D)
6987 {
6988 return (__m512h)
6989 __builtin_ia32_vfcmulcph512_round ((__v32hf) __A,
6990 (__v32hf) __B, __D);
6991 }
6992
6993 extern __inline __m512h
6994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6995 _mm512_mask_fcmul_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
6996 __m512h __D, const int __E)
6997 {
6998 return (__m512h)
6999 __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __C,
7000 (__v32hf) __D,
7001 (__v32hf) __A,
7002 __B, __E);
7003 }
7004
7005 extern __inline __m512h
7006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7007 _mm512_maskz_fcmul_round_pch (__mmask16 __A, __m512h __B,
7008 __m512h __C, const int __E)
7009 {
7010 return (__m512h)
7011 __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __B,
7012 (__v32hf) __C,
7013 _mm512_setzero_ph (),
7014 __A, __E);
7015 }
7016
7017 extern __inline __m512h
7018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7019 _mm512_fmul_round_pch (__m512h __A, __m512h __B, const int __D)
7020 {
7021 return (__m512h)
7022 __builtin_ia32_vfmulcph512_round ((__v32hf) __A,
7023 (__v32hf) __B,
7024 __D);
7025 }
7026
7027 extern __inline __m512h
7028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7029 _mm512_mask_fmul_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
7030 __m512h __D, const int __E)
7031 {
7032 return (__m512h)
7033 __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __C,
7034 (__v32hf) __D,
7035 (__v32hf) __A,
7036 __B, __E);
7037 }
7038
7039 extern __inline __m512h
7040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7041 _mm512_maskz_fmul_round_pch (__mmask16 __A, __m512h __B,
7042 __m512h __C, const int __E)
7043 {
7044 return (__m512h)
7045 __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __B,
7046 (__v32hf) __C,
7047 _mm512_setzero_ph (),
7048 __A, __E);
7049 }
7050
7051 #else
7052 #define _mm512_fcmul_round_pch(A, B, D) \
7053 (__m512h) __builtin_ia32_vfcmulcph512_round ((A), (B), (D))
7054
7055 #define _mm512_mask_fcmul_round_pch(A, B, C, D, E) \
7056 (__m512h) __builtin_ia32_vfcmulcph512_mask_round ((C), (D), (A), (B), (E))
7057
7058 #define _mm512_maskz_fcmul_round_pch(A, B, C, E) \
7059 (__m512h) __builtin_ia32_vfcmulcph512_mask_round ((B), (C), \
7060 (__v32hf) \
7061 _mm512_setzero_ph (), \
7062 (A), (E))
7063
7064 #define _mm512_fmul_round_pch(A, B, D) \
7065 (__m512h) __builtin_ia32_vfmulcph512_round ((A), (B), (D))
7066
7067 #define _mm512_mask_fmul_round_pch(A, B, C, D, E) \
7068 (__m512h) __builtin_ia32_vfmulcph512_mask_round ((C), (D), (A), (B), (E))
7069
7070 #define _mm512_maskz_fmul_round_pch(A, B, C, E) \
7071 (__m512h) __builtin_ia32_vfmulcph512_mask_round ((B), (C), \
7072 (__v32hf) \
7073 _mm512_setzero_ph (), \
7074 (A), (E))
7075
7076 #endif /* __OPTIMIZE__ */
7077
7078 #define _MM512_REDUCE_OP(op) \
7079 __m256h __T1 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
7080 __m256h __T2 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
7081 __m256h __T3 = (__T1 op __T2); \
7082 __m128h __T4 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 0); \
7083 __m128h __T5 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 1); \
7084 __m128h __T6 = (__T4 op __T5); \
7085 __m128h __T7 = (__m128h) __builtin_shuffle ((__m128h)__T6, \
7086 (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 }); \
7087 __m128h __T8 = (__T6 op __T7); \
7088 __m128h __T9 = (__m128h) __builtin_shuffle ((__m128h)__T8, \
7089 (__v8hi) { 2, 3, 0, 1, 4, 5, 6, 7 }); \
7090 __m128h __T10 = __T8 op __T9; \
7091 return __T10[0] op __T10[1]
7092
7093 // TODO reduce
7094 extern __inline _Float16
7095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7096 _mm512_reduce_add_ph (__m512h __A)
7097 {
7098 _MM512_REDUCE_OP (+);
7099 }
7100
7101 extern __inline _Float16
7102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7103 _mm512_reduce_mul_ph (__m512h __A)
7104 {
7105 _MM512_REDUCE_OP (*);
7106 }
7107
7108 #undef _MM512_REDUCE_OP
7109
7110 #ifdef __AVX512VL__
7111
7112 #define _MM512_REDUCE_OP(op) \
7113 __m256h __T1 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
7114 __m256h __T2 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
7115 __m256h __T3 = __builtin_ia32_##op##ph256_mask (__T1, __T2, \
7116 _mm256_setzero_ph (), (__mmask16) -1); \
7117 __m128h __T4 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 0); \
7118 __m128h __T5 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 1); \
7119 __m128h __T6 = __builtin_ia32_##op##ph128_mask \
7120 (__T4, __T5, _mm_setzero_ph (),(__mmask8) -1); \
7121 __m128h __T7 = (__m128h) __builtin_shuffle ((__m128h)__T6, \
7122 (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 }); \
7123 __m128h __T8 = (__m128h) __builtin_ia32_##op##ph128_mask \
7124 (__T6, __T7, _mm_setzero_ph (),(__mmask8) -1); \
7125 __m128h __T9 = (__m128h) __builtin_shuffle ((__m128h)__T8, \
7126 (__v8hi) { 4, 5 }); \
7127 __m128h __T10 = __builtin_ia32_##op##ph128_mask \
7128 (__T8, __T9, _mm_setzero_ph (),(__mmask8) -1); \
7129 __m128h __T11 = (__m128h) __builtin_shuffle (__T10, \
7130 (__v8hi) { 1, 0 }); \
7131 __m128h __T12 = __builtin_ia32_##op##ph128_mask \
7132 (__T10, __T11, _mm_setzero_ph (),(__mmask8) -1); \
7133 return __T12[0]
7134
7135 #else
7136
7137 #define _MM512_REDUCE_OP(op) \
7138 __m512h __T1 = (__m512h) __builtin_shuffle ((__m512d) __A, \
7139 (__v8di) { 4, 5, 6, 7, 0, 0, 0, 0 }); \
7140 __m512h __T2 = _mm512_##op##_ph (__A, __T1); \
7141 __m512h __T3 = (__m512h) __builtin_shuffle ((__m512d) __T2, \
7142 (__v8di) { 2, 3, 0, 0, 0, 0, 0, 0 }); \
7143 __m512h __T4 = _mm512_##op##_ph (__T2, __T3); \
7144 __m512h __T5 = (__m512h) __builtin_shuffle ((__m512d) __T4, \
7145 (__v8di) { 1, 0, 0, 0, 0, 0, 0, 0 }); \
7146 __m512h __T6 = _mm512_##op##_ph (__T4, __T5); \
7147 __m512h __T7 = (__m512h) __builtin_shuffle ((__m512) __T6, \
7148 (__v16si) { 1, 0, 0, 0, 0, 0, 0, 0, \
7149 0, 0, 0, 0, 0, 0, 0, 0 }); \
7150 __m512h __T8 = _mm512_##op##_ph (__T6, __T7); \
7151 __m512h __T9 = (__m512h) __builtin_shuffle (__T8, \
7152 (__v32hi) { 1, 0, 0, 0, 0, 0, 0, 0, \
7153 0, 0, 0, 0, 0, 0, 0, 0, \
7154 0, 0, 0, 0, 0, 0, 0, 0, \
7155 0, 0, 0, 0, 0, 0, 0, 0 }); \
7156 __m512h __T10 = _mm512_##op##_ph (__T8, __T9); \
7157 return __T10[0]
7158 #endif
7159
7160 extern __inline _Float16
7161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7162 _mm512_reduce_min_ph (__m512h __A)
7163 {
7164 _MM512_REDUCE_OP (min);
7165 }
7166
7167 extern __inline _Float16
7168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7169 _mm512_reduce_max_ph (__m512h __A)
7170 {
7171 _MM512_REDUCE_OP (max);
7172 }
7173
7174 #undef _MM512_REDUCE_OP
7175
7176 extern __inline __m512h
7177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7178 _mm512_mask_blend_ph (__mmask32 __U, __m512h __A, __m512h __W)
7179 {
7180 return (__m512h) __builtin_ia32_movdquhi512_mask ((__v32hi) __W,
7181 (__v32hi) __A,
7182 (__mmask32) __U);
7183
7184 }
7185
7186 extern __inline __m512h
7187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7188 _mm512_permutex2var_ph (__m512h __A, __m512i __I, __m512h __B)
7189 {
7190 return (__m512h) __builtin_ia32_vpermi2varhi512_mask ((__v32hi) __A,
7191 (__v32hi) __I,
7192 (__v32hi) __B,
7193 (__mmask32)-1);
7194 }
7195
7196 extern __inline __m512h
7197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7198 _mm512_permutexvar_ph (__m512i __A, __m512h __B)
7199 {
7200 return (__m512h) __builtin_ia32_permvarhi512_mask ((__v32hi) __B,
7201 (__v32hi) __A,
7202 (__v32hi)
7203 (_mm512_setzero_ph ()),
7204 (__mmask32)-1);
7205 }
7206
7207 extern __inline __m512h
7208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7209 _mm512_set1_pch (_Float16 _Complex __A)
7210 {
7211 union
7212 {
7213 _Float16 _Complex __a;
7214 float __b;
7215 } __u = { .__a = __A};
7216
7217 return (__m512h) _mm512_set1_ps (__u.__b);
7218 }
7219
7220 // intrinsics below are alias for f*mul_*ch
7221 #define _mm512_mul_pch(A, B) _mm512_fmul_pch ((A), (B))
7222 #define _mm512_mask_mul_pch(W, U, A, B) \
7223 _mm512_mask_fmul_pch ((W), (U), (A), (B))
7224 #define _mm512_maskz_mul_pch(U, A, B) _mm512_maskz_fmul_pch ((U), (A), (B))
7225 #define _mm512_mul_round_pch(A, B, R) _mm512_fmul_round_pch ((A), (B), (R))
7226 #define _mm512_mask_mul_round_pch(W, U, A, B, R) \
7227 _mm512_mask_fmul_round_pch ((W), (U), (A), (B), (R))
7228 #define _mm512_maskz_mul_round_pch(U, A, B, R) \
7229 _mm512_maskz_fmul_round_pch ((U), (A), (B), (R))
7230
7231 #define _mm512_cmul_pch(A, B) _mm512_fcmul_pch ((A), (B))
7232 #define _mm512_mask_cmul_pch(W, U, A, B) \
7233 _mm512_mask_fcmul_pch ((W), (U), (A), (B))
7234 #define _mm512_maskz_cmul_pch(U, A, B) _mm512_maskz_fcmul_pch ((U), (A), (B))
7235 #define _mm512_cmul_round_pch(A, B, R) _mm512_fcmul_round_pch ((A), (B), (R))
7236 #define _mm512_mask_cmul_round_pch(W, U, A, B, R) \
7237 _mm512_mask_fcmul_round_pch ((W), (U), (A), (B), (R))
7238 #define _mm512_maskz_cmul_round_pch(U, A, B, R) \
7239 _mm512_maskz_fcmul_round_pch ((U), (A), (B), (R))
7240
7241 #ifdef __DISABLE_AVX512FP16_512__
7242 #undef __DISABLE_AVX512FP16_512__
7243 #pragma GCC pop_options
7244 #endif /* __DISABLE_AVX512FP16_512__ */
7245
7246 #endif /* _AVX512FP16INTRIN_H_INCLUDED */