]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/avx512fintrin.h
* config/i386/avx512fintrin.h (_mm_mask_max_sd)
[thirdparty/gcc.git] / gcc / config / i386 / avx512fintrin.h
1 /* Copyright (C) 2013-2017 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef _AVX512FINTRIN_H_INCLUDED
29 #define _AVX512FINTRIN_H_INCLUDED
30
31 #ifndef __AVX512F__
32 #pragma GCC push_options
33 #pragma GCC target("avx512f")
34 #define __DISABLE_AVX512F__
35 #endif /* __AVX512F__ */
36
37 /* Internal data types for implementing the intrinsics. */
38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40 typedef long long __v8di __attribute__ ((__vector_size__ (64)));
41 typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
42 typedef int __v16si __attribute__ ((__vector_size__ (64)));
43 typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
44 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
45 typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
46 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
47 typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
48
49 /* The Intel API is flexible enough that we must allow aliasing with other
50 vector types, and their scalar components. */
51 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52 typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
54
55 /* Unaligned version of the same type. */
56 typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
57 typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
58 typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
59
60 typedef unsigned char __mmask8;
61 typedef unsigned short __mmask16;
62
63 extern __inline __mmask16
64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
65 _mm512_int2mask (int __M)
66 {
67 return (__mmask16) __M;
68 }
69
70 extern __inline int
71 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
72 _mm512_mask2int (__mmask16 __M)
73 {
74 return (int) __M;
75 }
76
77 extern __inline __m512i
78 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79 _mm512_set_epi64 (long long __A, long long __B, long long __C,
80 long long __D, long long __E, long long __F,
81 long long __G, long long __H)
82 {
83 return __extension__ (__m512i) (__v8di)
84 { __H, __G, __F, __E, __D, __C, __B, __A };
85 }
86
87 /* Create the vector [A B C D E F G H I J K L M N O P]. */
88 extern __inline __m512i
89 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
90 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
91 int __E, int __F, int __G, int __H,
92 int __I, int __J, int __K, int __L,
93 int __M, int __N, int __O, int __P)
94 {
95 return __extension__ (__m512i)(__v16si)
96 { __P, __O, __N, __M, __L, __K, __J, __I,
97 __H, __G, __F, __E, __D, __C, __B, __A };
98 }
99
100 extern __inline __m512d
101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
102 _mm512_set_pd (double __A, double __B, double __C, double __D,
103 double __E, double __F, double __G, double __H)
104 {
105 return __extension__ (__m512d)
106 { __H, __G, __F, __E, __D, __C, __B, __A };
107 }
108
109 extern __inline __m512
110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
111 _mm512_set_ps (float __A, float __B, float __C, float __D,
112 float __E, float __F, float __G, float __H,
113 float __I, float __J, float __K, float __L,
114 float __M, float __N, float __O, float __P)
115 {
116 return __extension__ (__m512)
117 { __P, __O, __N, __M, __L, __K, __J, __I,
118 __H, __G, __F, __E, __D, __C, __B, __A };
119 }
120
121 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
122 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
123
124 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
125 e8,e9,e10,e11,e12,e13,e14,e15) \
126 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
127
128 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
129 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
130
131 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
132 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
133
134 extern __inline __m512
135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136 _mm512_undefined_ps (void)
137 {
138 __m512 __Y = __Y;
139 return __Y;
140 }
141
142 #define _mm512_undefined _mm512_undefined_ps
143
144 extern __inline __m512d
145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
146 _mm512_undefined_pd (void)
147 {
148 __m512d __Y = __Y;
149 return __Y;
150 }
151
152 extern __inline __m512i
153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
154 _mm512_undefined_epi32 (void)
155 {
156 __m512i __Y = __Y;
157 return __Y;
158 }
159
160 #define _mm512_undefined_si512 _mm512_undefined_epi32
161
162 extern __inline __m512i
163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
164 _mm512_set1_epi8 (char __A)
165 {
166 return __extension__ (__m512i)(__v64qi)
167 { __A, __A, __A, __A, __A, __A, __A, __A,
168 __A, __A, __A, __A, __A, __A, __A, __A,
169 __A, __A, __A, __A, __A, __A, __A, __A,
170 __A, __A, __A, __A, __A, __A, __A, __A,
171 __A, __A, __A, __A, __A, __A, __A, __A,
172 __A, __A, __A, __A, __A, __A, __A, __A,
173 __A, __A, __A, __A, __A, __A, __A, __A,
174 __A, __A, __A, __A, __A, __A, __A, __A };
175 }
176
177 extern __inline __m512i
178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
179 _mm512_set1_epi16 (short __A)
180 {
181 return __extension__ (__m512i)(__v32hi)
182 { __A, __A, __A, __A, __A, __A, __A, __A,
183 __A, __A, __A, __A, __A, __A, __A, __A,
184 __A, __A, __A, __A, __A, __A, __A, __A,
185 __A, __A, __A, __A, __A, __A, __A, __A };
186 }
187
188 extern __inline __m512d
189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
190 _mm512_set1_pd (double __A)
191 {
192 return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
193 (__v2df) { __A, },
194 (__v8df)
195 _mm512_undefined_pd (),
196 (__mmask8) -1);
197 }
198
199 extern __inline __m512
200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
201 _mm512_set1_ps (float __A)
202 {
203 return (__m512) __builtin_ia32_broadcastss512 (__extension__
204 (__v4sf) { __A, },
205 (__v16sf)
206 _mm512_undefined_ps (),
207 (__mmask16) -1);
208 }
209
210 /* Create the vector [A B C D A B C D A B C D A B C D]. */
211 extern __inline __m512i
212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
213 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
214 {
215 return __extension__ (__m512i)(__v16si)
216 { __D, __C, __B, __A, __D, __C, __B, __A,
217 __D, __C, __B, __A, __D, __C, __B, __A };
218 }
219
220 extern __inline __m512i
221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
222 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
223 long long __D)
224 {
225 return __extension__ (__m512i) (__v8di)
226 { __D, __C, __B, __A, __D, __C, __B, __A };
227 }
228
229 extern __inline __m512d
230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
231 _mm512_set4_pd (double __A, double __B, double __C, double __D)
232 {
233 return __extension__ (__m512d)
234 { __D, __C, __B, __A, __D, __C, __B, __A };
235 }
236
237 extern __inline __m512
238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
239 _mm512_set4_ps (float __A, float __B, float __C, float __D)
240 {
241 return __extension__ (__m512)
242 { __D, __C, __B, __A, __D, __C, __B, __A,
243 __D, __C, __B, __A, __D, __C, __B, __A };
244 }
245
246 #define _mm512_setr4_epi64(e0,e1,e2,e3) \
247 _mm512_set4_epi64(e3,e2,e1,e0)
248
249 #define _mm512_setr4_epi32(e0,e1,e2,e3) \
250 _mm512_set4_epi32(e3,e2,e1,e0)
251
252 #define _mm512_setr4_pd(e0,e1,e2,e3) \
253 _mm512_set4_pd(e3,e2,e1,e0)
254
255 #define _mm512_setr4_ps(e0,e1,e2,e3) \
256 _mm512_set4_ps(e3,e2,e1,e0)
257
258 extern __inline __m512
259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
260 _mm512_setzero_ps (void)
261 {
262 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
263 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
264 }
265
266 extern __inline __m512d
267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
268 _mm512_setzero_pd (void)
269 {
270 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
271 }
272
273 extern __inline __m512i
274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
275 _mm512_setzero_epi32 (void)
276 {
277 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
278 }
279
280 extern __inline __m512i
281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
282 _mm512_setzero_si512 (void)
283 {
284 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
285 }
286
287 extern __inline __m512d
288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
289 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
290 {
291 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
292 (__v8df) __W,
293 (__mmask8) __U);
294 }
295
296 extern __inline __m512d
297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
298 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
299 {
300 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
301 (__v8df)
302 _mm512_setzero_pd (),
303 (__mmask8) __U);
304 }
305
306 extern __inline __m512
307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
308 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
309 {
310 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
311 (__v16sf) __W,
312 (__mmask16) __U);
313 }
314
315 extern __inline __m512
316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
317 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
318 {
319 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
320 (__v16sf)
321 _mm512_setzero_ps (),
322 (__mmask16) __U);
323 }
324
325 extern __inline __m512d
326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
327 _mm512_load_pd (void const *__P)
328 {
329 return *(__m512d *) __P;
330 }
331
332 extern __inline __m512d
333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
334 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
335 {
336 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
337 (__v8df) __W,
338 (__mmask8) __U);
339 }
340
341 extern __inline __m512d
342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
343 _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
344 {
345 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
346 (__v8df)
347 _mm512_setzero_pd (),
348 (__mmask8) __U);
349 }
350
351 extern __inline void
352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
353 _mm512_store_pd (void *__P, __m512d __A)
354 {
355 *(__m512d *) __P = __A;
356 }
357
358 extern __inline void
359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
360 _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
361 {
362 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
363 (__mmask8) __U);
364 }
365
366 extern __inline __m512
367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
368 _mm512_load_ps (void const *__P)
369 {
370 return *(__m512 *) __P;
371 }
372
373 extern __inline __m512
374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
375 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
376 {
377 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
378 (__v16sf) __W,
379 (__mmask16) __U);
380 }
381
382 extern __inline __m512
383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
384 _mm512_maskz_load_ps (__mmask16 __U, void const *__P)
385 {
386 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
387 (__v16sf)
388 _mm512_setzero_ps (),
389 (__mmask16) __U);
390 }
391
392 extern __inline void
393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
394 _mm512_store_ps (void *__P, __m512 __A)
395 {
396 *(__m512 *) __P = __A;
397 }
398
399 extern __inline void
400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
401 _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
402 {
403 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
404 (__mmask16) __U);
405 }
406
407 extern __inline __m512i
408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
409 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
410 {
411 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
412 (__v8di) __W,
413 (__mmask8) __U);
414 }
415
416 extern __inline __m512i
417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
418 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
419 {
420 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
421 (__v8di)
422 _mm512_setzero_si512 (),
423 (__mmask8) __U);
424 }
425
426 extern __inline __m512i
427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
428 _mm512_load_epi64 (void const *__P)
429 {
430 return *(__m512i *) __P;
431 }
432
433 extern __inline __m512i
434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
435 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
436 {
437 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
438 (__v8di) __W,
439 (__mmask8) __U);
440 }
441
442 extern __inline __m512i
443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
444 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
445 {
446 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
447 (__v8di)
448 _mm512_setzero_si512 (),
449 (__mmask8) __U);
450 }
451
452 extern __inline void
453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
454 _mm512_store_epi64 (void *__P, __m512i __A)
455 {
456 *(__m512i *) __P = __A;
457 }
458
459 extern __inline void
460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
461 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
462 {
463 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
464 (__mmask8) __U);
465 }
466
467 extern __inline __m512i
468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
469 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
470 {
471 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
472 (__v16si) __W,
473 (__mmask16) __U);
474 }
475
476 extern __inline __m512i
477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
478 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
479 {
480 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
481 (__v16si)
482 _mm512_setzero_si512 (),
483 (__mmask16) __U);
484 }
485
486 extern __inline __m512i
487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
488 _mm512_load_si512 (void const *__P)
489 {
490 return *(__m512i *) __P;
491 }
492
493 extern __inline __m512i
494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
495 _mm512_load_epi32 (void const *__P)
496 {
497 return *(__m512i *) __P;
498 }
499
500 extern __inline __m512i
501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
502 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
503 {
504 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
505 (__v16si) __W,
506 (__mmask16) __U);
507 }
508
509 extern __inline __m512i
510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
511 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
512 {
513 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
514 (__v16si)
515 _mm512_setzero_si512 (),
516 (__mmask16) __U);
517 }
518
519 extern __inline void
520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
521 _mm512_store_si512 (void *__P, __m512i __A)
522 {
523 *(__m512i *) __P = __A;
524 }
525
526 extern __inline void
527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
528 _mm512_store_epi32 (void *__P, __m512i __A)
529 {
530 *(__m512i *) __P = __A;
531 }
532
533 extern __inline void
534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
535 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
536 {
537 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
538 (__mmask16) __U);
539 }
540
541 extern __inline __m512i
542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
543 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
544 {
545 return (__m512i) ((__v16su) __A * (__v16su) __B);
546 }
547
548 extern __inline __m512i
549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
550 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
551 {
552 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
553 (__v16si) __B,
554 (__v16si)
555 _mm512_setzero_si512 (),
556 __M);
557 }
558
559 extern __inline __m512i
560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
561 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
562 {
563 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
564 (__v16si) __B,
565 (__v16si) __W, __M);
566 }
567
568 extern __inline __m512i
569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
570 _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
571 {
572 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
573 (__v16si) __Y,
574 (__v16si)
575 _mm512_undefined_epi32 (),
576 (__mmask16) -1);
577 }
578
579 extern __inline __m512i
580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
581 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
582 {
583 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
584 (__v16si) __Y,
585 (__v16si) __W,
586 (__mmask16) __U);
587 }
588
589 extern __inline __m512i
590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
591 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
592 {
593 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
594 (__v16si) __Y,
595 (__v16si)
596 _mm512_setzero_si512 (),
597 (__mmask16) __U);
598 }
599
600 extern __inline __m512i
601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
602 _mm512_srav_epi32 (__m512i __X, __m512i __Y)
603 {
604 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
605 (__v16si) __Y,
606 (__v16si)
607 _mm512_undefined_epi32 (),
608 (__mmask16) -1);
609 }
610
611 extern __inline __m512i
612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
613 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
614 {
615 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
616 (__v16si) __Y,
617 (__v16si) __W,
618 (__mmask16) __U);
619 }
620
621 extern __inline __m512i
622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
623 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
624 {
625 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
626 (__v16si) __Y,
627 (__v16si)
628 _mm512_setzero_si512 (),
629 (__mmask16) __U);
630 }
631
632 extern __inline __m512i
633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
634 _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
635 {
636 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
637 (__v16si) __Y,
638 (__v16si)
639 _mm512_undefined_epi32 (),
640 (__mmask16) -1);
641 }
642
643 extern __inline __m512i
644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
645 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
646 {
647 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
648 (__v16si) __Y,
649 (__v16si) __W,
650 (__mmask16) __U);
651 }
652
653 extern __inline __m512i
654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
655 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
656 {
657 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
658 (__v16si) __Y,
659 (__v16si)
660 _mm512_setzero_si512 (),
661 (__mmask16) __U);
662 }
663
664 extern __inline __m512i
665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
666 _mm512_add_epi64 (__m512i __A, __m512i __B)
667 {
668 return (__m512i) ((__v8du) __A + (__v8du) __B);
669 }
670
671 extern __inline __m512i
672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
673 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
674 {
675 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
676 (__v8di) __B,
677 (__v8di) __W,
678 (__mmask8) __U);
679 }
680
681 extern __inline __m512i
682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
683 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
684 {
685 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
686 (__v8di) __B,
687 (__v8di)
688 _mm512_setzero_si512 (),
689 (__mmask8) __U);
690 }
691
692 extern __inline __m512i
693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
694 _mm512_sub_epi64 (__m512i __A, __m512i __B)
695 {
696 return (__m512i) ((__v8du) __A - (__v8du) __B);
697 }
698
699 extern __inline __m512i
700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
701 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
702 {
703 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
704 (__v8di) __B,
705 (__v8di) __W,
706 (__mmask8) __U);
707 }
708
709 extern __inline __m512i
710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
711 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
712 {
713 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
714 (__v8di) __B,
715 (__v8di)
716 _mm512_setzero_si512 (),
717 (__mmask8) __U);
718 }
719
720 extern __inline __m512i
721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722 _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
723 {
724 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
725 (__v8di) __Y,
726 (__v8di)
727 _mm512_undefined_pd (),
728 (__mmask8) -1);
729 }
730
731 extern __inline __m512i
732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
733 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
734 {
735 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
736 (__v8di) __Y,
737 (__v8di) __W,
738 (__mmask8) __U);
739 }
740
741 extern __inline __m512i
742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
743 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
744 {
745 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
746 (__v8di) __Y,
747 (__v8di)
748 _mm512_setzero_si512 (),
749 (__mmask8) __U);
750 }
751
752 extern __inline __m512i
753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
754 _mm512_srav_epi64 (__m512i __X, __m512i __Y)
755 {
756 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
757 (__v8di) __Y,
758 (__v8di)
759 _mm512_undefined_epi32 (),
760 (__mmask8) -1);
761 }
762
763 extern __inline __m512i
764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
765 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
766 {
767 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
768 (__v8di) __Y,
769 (__v8di) __W,
770 (__mmask8) __U);
771 }
772
773 extern __inline __m512i
774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
775 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
776 {
777 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
778 (__v8di) __Y,
779 (__v8di)
780 _mm512_setzero_si512 (),
781 (__mmask8) __U);
782 }
783
784 extern __inline __m512i
785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
787 {
788 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
789 (__v8di) __Y,
790 (__v8di)
791 _mm512_undefined_epi32 (),
792 (__mmask8) -1);
793 }
794
795 extern __inline __m512i
796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
798 {
799 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
800 (__v8di) __Y,
801 (__v8di) __W,
802 (__mmask8) __U);
803 }
804
805 extern __inline __m512i
806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
807 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
808 {
809 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
810 (__v8di) __Y,
811 (__v8di)
812 _mm512_setzero_si512 (),
813 (__mmask8) __U);
814 }
815
816 extern __inline __m512i
817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
818 _mm512_add_epi32 (__m512i __A, __m512i __B)
819 {
820 return (__m512i) ((__v16su) __A + (__v16su) __B);
821 }
822
823 extern __inline __m512i
824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
825 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
826 {
827 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
828 (__v16si) __B,
829 (__v16si) __W,
830 (__mmask16) __U);
831 }
832
833 extern __inline __m512i
834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
835 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
836 {
837 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
838 (__v16si) __B,
839 (__v16si)
840 _mm512_setzero_si512 (),
841 (__mmask16) __U);
842 }
843
844 extern __inline __m512i
845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
846 _mm512_mul_epi32 (__m512i __X, __m512i __Y)
847 {
848 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
849 (__v16si) __Y,
850 (__v8di)
851 _mm512_undefined_epi32 (),
852 (__mmask8) -1);
853 }
854
855 extern __inline __m512i
856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
857 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
858 {
859 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
860 (__v16si) __Y,
861 (__v8di) __W, __M);
862 }
863
864 extern __inline __m512i
865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
866 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
867 {
868 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
869 (__v16si) __Y,
870 (__v8di)
871 _mm512_setzero_si512 (),
872 __M);
873 }
874
875 extern __inline __m512i
876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
877 _mm512_sub_epi32 (__m512i __A, __m512i __B)
878 {
879 return (__m512i) ((__v16su) __A - (__v16su) __B);
880 }
881
882 extern __inline __m512i
883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
884 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
885 {
886 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
887 (__v16si) __B,
888 (__v16si) __W,
889 (__mmask16) __U);
890 }
891
892 extern __inline __m512i
893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
894 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
895 {
896 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
897 (__v16si) __B,
898 (__v16si)
899 _mm512_setzero_si512 (),
900 (__mmask16) __U);
901 }
902
903 extern __inline __m512i
904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
905 _mm512_mul_epu32 (__m512i __X, __m512i __Y)
906 {
907 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
908 (__v16si) __Y,
909 (__v8di)
910 _mm512_undefined_epi32 (),
911 (__mmask8) -1);
912 }
913
914 extern __inline __m512i
915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
916 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
917 {
918 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
919 (__v16si) __Y,
920 (__v8di) __W, __M);
921 }
922
923 extern __inline __m512i
924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
925 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
926 {
927 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
928 (__v16si) __Y,
929 (__v8di)
930 _mm512_setzero_si512 (),
931 __M);
932 }
933
934 #ifdef __OPTIMIZE__
935 extern __inline __m512i
936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
937 _mm512_slli_epi64 (__m512i __A, unsigned int __B)
938 {
939 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
940 (__v8di)
941 _mm512_undefined_epi32 (),
942 (__mmask8) -1);
943 }
944
945 extern __inline __m512i
946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
947 _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
948 unsigned int __B)
949 {
950 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
951 (__v8di) __W,
952 (__mmask8) __U);
953 }
954
955 extern __inline __m512i
956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
957 _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
958 {
959 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
960 (__v8di)
961 _mm512_setzero_si512 (),
962 (__mmask8) __U);
963 }
964 #else
965 #define _mm512_slli_epi64(X, C) \
966 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
967 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
968 (__mmask8)-1))
969
970 #define _mm512_mask_slli_epi64(W, U, X, C) \
971 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
972 (__v8di)(__m512i)(W),\
973 (__mmask8)(U)))
974
975 #define _mm512_maskz_slli_epi64(U, X, C) \
976 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
977 (__v8di)(__m512i)_mm512_setzero_si512 (),\
978 (__mmask8)(U)))
979 #endif
980
981 extern __inline __m512i
982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
983 _mm512_sll_epi64 (__m512i __A, __m128i __B)
984 {
985 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
986 (__v2di) __B,
987 (__v8di)
988 _mm512_undefined_epi32 (),
989 (__mmask8) -1);
990 }
991
992 extern __inline __m512i
993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
994 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
995 {
996 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
997 (__v2di) __B,
998 (__v8di) __W,
999 (__mmask8) __U);
1000 }
1001
1002 extern __inline __m512i
1003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1004 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1005 {
1006 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1007 (__v2di) __B,
1008 (__v8di)
1009 _mm512_setzero_si512 (),
1010 (__mmask8) __U);
1011 }
1012
1013 #ifdef __OPTIMIZE__
1014 extern __inline __m512i
1015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1016 _mm512_srli_epi64 (__m512i __A, unsigned int __B)
1017 {
1018 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1019 (__v8di)
1020 _mm512_undefined_epi32 (),
1021 (__mmask8) -1);
1022 }
1023
1024 extern __inline __m512i
1025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1026 _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1027 __m512i __A, unsigned int __B)
1028 {
1029 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1030 (__v8di) __W,
1031 (__mmask8) __U);
1032 }
1033
1034 extern __inline __m512i
1035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1036 _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1037 {
1038 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1039 (__v8di)
1040 _mm512_setzero_si512 (),
1041 (__mmask8) __U);
1042 }
1043 #else
1044 #define _mm512_srli_epi64(X, C) \
1045 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1046 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1047 (__mmask8)-1))
1048
1049 #define _mm512_mask_srli_epi64(W, U, X, C) \
1050 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1051 (__v8di)(__m512i)(W),\
1052 (__mmask8)(U)))
1053
1054 #define _mm512_maskz_srli_epi64(U, X, C) \
1055 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1056 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1057 (__mmask8)(U)))
1058 #endif
1059
1060 extern __inline __m512i
1061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1062 _mm512_srl_epi64 (__m512i __A, __m128i __B)
1063 {
1064 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1065 (__v2di) __B,
1066 (__v8di)
1067 _mm512_undefined_epi32 (),
1068 (__mmask8) -1);
1069 }
1070
1071 extern __inline __m512i
1072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1073 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1074 {
1075 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1076 (__v2di) __B,
1077 (__v8di) __W,
1078 (__mmask8) __U);
1079 }
1080
1081 extern __inline __m512i
1082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1083 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1084 {
1085 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1086 (__v2di) __B,
1087 (__v8di)
1088 _mm512_setzero_si512 (),
1089 (__mmask8) __U);
1090 }
1091
1092 #ifdef __OPTIMIZE__
1093 extern __inline __m512i
1094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1095 _mm512_srai_epi64 (__m512i __A, unsigned int __B)
1096 {
1097 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1098 (__v8di)
1099 _mm512_undefined_epi32 (),
1100 (__mmask8) -1);
1101 }
1102
1103 extern __inline __m512i
1104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1105 _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1106 unsigned int __B)
1107 {
1108 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1109 (__v8di) __W,
1110 (__mmask8) __U);
1111 }
1112
1113 extern __inline __m512i
1114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1115 _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1116 {
1117 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1118 (__v8di)
1119 _mm512_setzero_si512 (),
1120 (__mmask8) __U);
1121 }
1122 #else
1123 #define _mm512_srai_epi64(X, C) \
1124 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1125 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1126 (__mmask8)-1))
1127
1128 #define _mm512_mask_srai_epi64(W, U, X, C) \
1129 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1130 (__v8di)(__m512i)(W),\
1131 (__mmask8)(U)))
1132
1133 #define _mm512_maskz_srai_epi64(U, X, C) \
1134 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1135 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1136 (__mmask8)(U)))
1137 #endif
1138
1139 extern __inline __m512i
1140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1141 _mm512_sra_epi64 (__m512i __A, __m128i __B)
1142 {
1143 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1144 (__v2di) __B,
1145 (__v8di)
1146 _mm512_undefined_epi32 (),
1147 (__mmask8) -1);
1148 }
1149
1150 extern __inline __m512i
1151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1152 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1153 {
1154 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1155 (__v2di) __B,
1156 (__v8di) __W,
1157 (__mmask8) __U);
1158 }
1159
1160 extern __inline __m512i
1161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1162 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1163 {
1164 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1165 (__v2di) __B,
1166 (__v8di)
1167 _mm512_setzero_si512 (),
1168 (__mmask8) __U);
1169 }
1170
1171 #ifdef __OPTIMIZE__
1172 extern __inline __m512i
1173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1174 _mm512_slli_epi32 (__m512i __A, unsigned int __B)
1175 {
1176 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1177 (__v16si)
1178 _mm512_undefined_epi32 (),
1179 (__mmask16) -1);
1180 }
1181
1182 extern __inline __m512i
1183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1184 _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1185 unsigned int __B)
1186 {
1187 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1188 (__v16si) __W,
1189 (__mmask16) __U);
1190 }
1191
1192 extern __inline __m512i
1193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1194 _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1195 {
1196 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1197 (__v16si)
1198 _mm512_setzero_si512 (),
1199 (__mmask16) __U);
1200 }
1201 #else
1202 #define _mm512_slli_epi32(X, C) \
1203 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1204 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1205 (__mmask16)-1))
1206
1207 #define _mm512_mask_slli_epi32(W, U, X, C) \
1208 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1209 (__v16si)(__m512i)(W),\
1210 (__mmask16)(U)))
1211
1212 #define _mm512_maskz_slli_epi32(U, X, C) \
1213 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1214 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1215 (__mmask16)(U)))
1216 #endif
1217
1218 extern __inline __m512i
1219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1220 _mm512_sll_epi32 (__m512i __A, __m128i __B)
1221 {
1222 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1223 (__v4si) __B,
1224 (__v16si)
1225 _mm512_undefined_epi32 (),
1226 (__mmask16) -1);
1227 }
1228
1229 extern __inline __m512i
1230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1231 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1232 {
1233 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1234 (__v4si) __B,
1235 (__v16si) __W,
1236 (__mmask16) __U);
1237 }
1238
1239 extern __inline __m512i
1240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1241 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1242 {
1243 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1244 (__v4si) __B,
1245 (__v16si)
1246 _mm512_setzero_si512 (),
1247 (__mmask16) __U);
1248 }
1249
1250 #ifdef __OPTIMIZE__
1251 extern __inline __m512i
1252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1253 _mm512_srli_epi32 (__m512i __A, unsigned int __B)
1254 {
1255 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1256 (__v16si)
1257 _mm512_undefined_epi32 (),
1258 (__mmask16) -1);
1259 }
1260
1261 extern __inline __m512i
1262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1263 _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1264 __m512i __A, unsigned int __B)
1265 {
1266 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1267 (__v16si) __W,
1268 (__mmask16) __U);
1269 }
1270
1271 extern __inline __m512i
1272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1273 _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1274 {
1275 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1276 (__v16si)
1277 _mm512_setzero_si512 (),
1278 (__mmask16) __U);
1279 }
1280 #else
1281 #define _mm512_srli_epi32(X, C) \
1282 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1283 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1284 (__mmask16)-1))
1285
1286 #define _mm512_mask_srli_epi32(W, U, X, C) \
1287 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1288 (__v16si)(__m512i)(W),\
1289 (__mmask16)(U)))
1290
1291 #define _mm512_maskz_srli_epi32(U, X, C) \
1292 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1293 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1294 (__mmask16)(U)))
1295 #endif
1296
1297 extern __inline __m512i
1298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1299 _mm512_srl_epi32 (__m512i __A, __m128i __B)
1300 {
1301 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1302 (__v4si) __B,
1303 (__v16si)
1304 _mm512_undefined_epi32 (),
1305 (__mmask16) -1);
1306 }
1307
1308 extern __inline __m512i
1309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1310 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1311 {
1312 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1313 (__v4si) __B,
1314 (__v16si) __W,
1315 (__mmask16) __U);
1316 }
1317
1318 extern __inline __m512i
1319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1320 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1321 {
1322 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1323 (__v4si) __B,
1324 (__v16si)
1325 _mm512_setzero_si512 (),
1326 (__mmask16) __U);
1327 }
1328
1329 #ifdef __OPTIMIZE__
1330 extern __inline __m512i
1331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1332 _mm512_srai_epi32 (__m512i __A, unsigned int __B)
1333 {
1334 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1335 (__v16si)
1336 _mm512_undefined_epi32 (),
1337 (__mmask16) -1);
1338 }
1339
1340 extern __inline __m512i
1341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1342 _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1343 unsigned int __B)
1344 {
1345 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1346 (__v16si) __W,
1347 (__mmask16) __U);
1348 }
1349
1350 extern __inline __m512i
1351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1352 _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1353 {
1354 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1355 (__v16si)
1356 _mm512_setzero_si512 (),
1357 (__mmask16) __U);
1358 }
1359 #else
1360 #define _mm512_srai_epi32(X, C) \
1361 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1362 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1363 (__mmask16)-1))
1364
1365 #define _mm512_mask_srai_epi32(W, U, X, C) \
1366 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1367 (__v16si)(__m512i)(W),\
1368 (__mmask16)(U)))
1369
1370 #define _mm512_maskz_srai_epi32(U, X, C) \
1371 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1372 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1373 (__mmask16)(U)))
1374 #endif
1375
1376 extern __inline __m512i
1377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1378 _mm512_sra_epi32 (__m512i __A, __m128i __B)
1379 {
1380 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1381 (__v4si) __B,
1382 (__v16si)
1383 _mm512_undefined_epi32 (),
1384 (__mmask16) -1);
1385 }
1386
1387 extern __inline __m512i
1388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1389 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1390 {
1391 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1392 (__v4si) __B,
1393 (__v16si) __W,
1394 (__mmask16) __U);
1395 }
1396
1397 extern __inline __m512i
1398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1399 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1400 {
1401 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1402 (__v4si) __B,
1403 (__v16si)
1404 _mm512_setzero_si512 (),
1405 (__mmask16) __U);
1406 }
1407
1408 #ifdef __OPTIMIZE__
1409 extern __inline __m128d
1410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1411 _mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1412 {
1413 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1414 (__v2df) __B,
1415 __R);
1416 }
1417
1418 extern __inline __m128d
1419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1420 _mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1421 __m128d __B, const int __R)
1422 {
1423 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1424 (__v2df) __B,
1425 (__v2df) __W,
1426 (__mmask8) __U, __R);
1427 }
1428
1429 extern __inline __m128d
1430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1431 _mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1432 const int __R)
1433 {
1434 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1435 (__v2df) __B,
1436 (__v2df)
1437 _mm_setzero_pd (),
1438 (__mmask8) __U, __R);
1439 }
1440
1441 extern __inline __m128
1442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1443 _mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1444 {
1445 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1446 (__v4sf) __B,
1447 __R);
1448 }
1449
1450 extern __inline __m128
1451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1452 _mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1453 __m128 __B, const int __R)
1454 {
1455 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1456 (__v4sf) __B,
1457 (__v4sf) __W,
1458 (__mmask8) __U, __R);
1459 }
1460
1461 extern __inline __m128
1462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1463 _mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1464 const int __R)
1465 {
1466 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1467 (__v4sf) __B,
1468 (__v4sf)
1469 _mm_setzero_ps (),
1470 (__mmask8) __U, __R);
1471 }
1472
1473 extern __inline __m128d
1474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1475 _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1476 {
1477 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1478 (__v2df) __B,
1479 __R);
1480 }
1481
1482 extern __inline __m128d
1483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1484 _mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1485 __m128d __B, const int __R)
1486 {
1487 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1488 (__v2df) __B,
1489 (__v2df) __W,
1490 (__mmask8) __U, __R);
1491 }
1492
1493 extern __inline __m128d
1494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1495 _mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1496 const int __R)
1497 {
1498 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1499 (__v2df) __B,
1500 (__v2df)
1501 _mm_setzero_pd (),
1502 (__mmask8) __U, __R);
1503 }
1504
1505 extern __inline __m128
1506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1507 _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1508 {
1509 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1510 (__v4sf) __B,
1511 __R);
1512 }
1513
1514 extern __inline __m128
1515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1516 _mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1517 __m128 __B, const int __R)
1518 {
1519 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1520 (__v4sf) __B,
1521 (__v4sf) __W,
1522 (__mmask8) __U, __R);
1523 }
1524
1525 extern __inline __m128
1526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1527 _mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1528 const int __R)
1529 {
1530 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1531 (__v4sf) __B,
1532 (__v4sf)
1533 _mm_setzero_ps (),
1534 (__mmask8) __U, __R);
1535 }
1536
1537 #else
1538 #define _mm_add_round_sd(A, B, C) \
1539 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1540
1541 #define _mm_mask_add_round_sd(W, U, A, B, C) \
1542 (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C)
1543
1544 #define _mm_maskz_add_round_sd(U, A, B, C) \
1545 (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1546
1547 #define _mm_add_round_ss(A, B, C) \
1548 (__m128)__builtin_ia32_addss_round(A, B, C)
1549
1550 #define _mm_mask_add_round_ss(W, U, A, B, C) \
1551 (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C)
1552
1553 #define _mm_maskz_add_round_ss(U, A, B, C) \
1554 (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1555
1556 #define _mm_sub_round_sd(A, B, C) \
1557 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1558
1559 #define _mm_mask_sub_round_sd(W, U, A, B, C) \
1560 (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C)
1561
1562 #define _mm_maskz_sub_round_sd(U, A, B, C) \
1563 (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1564
1565 #define _mm_sub_round_ss(A, B, C) \
1566 (__m128)__builtin_ia32_subss_round(A, B, C)
1567
1568 #define _mm_mask_sub_round_ss(W, U, A, B, C) \
1569 (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C)
1570
1571 #define _mm_maskz_sub_round_ss(U, A, B, C) \
1572 (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1573
1574 #endif
1575
1576 #ifdef __OPTIMIZE__
1577 extern __inline __m512i
1578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1579 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
1580 const int __imm)
1581 {
1582 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1583 (__v8di) __B,
1584 (__v8di) __C, __imm,
1585 (__mmask8) -1);
1586 }
1587
1588 extern __inline __m512i
1589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1590 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
1591 __m512i __C, const int __imm)
1592 {
1593 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1594 (__v8di) __B,
1595 (__v8di) __C, __imm,
1596 (__mmask8) __U);
1597 }
1598
1599 extern __inline __m512i
1600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1601 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
1602 __m512i __C, const int __imm)
1603 {
1604 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1605 (__v8di) __B,
1606 (__v8di) __C,
1607 __imm, (__mmask8) __U);
1608 }
1609
1610 extern __inline __m512i
1611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1612 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
1613 const int __imm)
1614 {
1615 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1616 (__v16si) __B,
1617 (__v16si) __C,
1618 __imm, (__mmask16) -1);
1619 }
1620
1621 extern __inline __m512i
1622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1623 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
1624 __m512i __C, const int __imm)
1625 {
1626 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1627 (__v16si) __B,
1628 (__v16si) __C,
1629 __imm, (__mmask16) __U);
1630 }
1631
1632 extern __inline __m512i
1633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1634 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
1635 __m512i __C, const int __imm)
1636 {
1637 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1638 (__v16si) __B,
1639 (__v16si) __C,
1640 __imm, (__mmask16) __U);
1641 }
1642 #else
1643 #define _mm512_ternarylogic_epi64(A, B, C, I) \
1644 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1645 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1646 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1647 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1648 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1649 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1650 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
1651 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1652 #define _mm512_ternarylogic_epi32(A, B, C, I) \
1653 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1654 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1655 (__mmask16)-1))
1656 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1657 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1658 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1659 (__mmask16)(U)))
1660 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1661 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
1662 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1663 (__mmask16)(U)))
1664 #endif
1665
1666 extern __inline __m512d
1667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1668 _mm512_rcp14_pd (__m512d __A)
1669 {
1670 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1671 (__v8df)
1672 _mm512_undefined_pd (),
1673 (__mmask8) -1);
1674 }
1675
1676 extern __inline __m512d
1677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1678 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1679 {
1680 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1681 (__v8df) __W,
1682 (__mmask8) __U);
1683 }
1684
1685 extern __inline __m512d
1686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1687 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1688 {
1689 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1690 (__v8df)
1691 _mm512_setzero_pd (),
1692 (__mmask8) __U);
1693 }
1694
1695 extern __inline __m512
1696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1697 _mm512_rcp14_ps (__m512 __A)
1698 {
1699 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1700 (__v16sf)
1701 _mm512_undefined_ps (),
1702 (__mmask16) -1);
1703 }
1704
1705 extern __inline __m512
1706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1707 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1708 {
1709 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1710 (__v16sf) __W,
1711 (__mmask16) __U);
1712 }
1713
1714 extern __inline __m512
1715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1716 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1717 {
1718 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1719 (__v16sf)
1720 _mm512_setzero_ps (),
1721 (__mmask16) __U);
1722 }
1723
1724 extern __inline __m128d
1725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1726 _mm_rcp14_sd (__m128d __A, __m128d __B)
1727 {
1728 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1729 (__v2df) __A);
1730 }
1731
1732 extern __inline __m128d
1733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1734 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1735 {
1736 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1737 (__v2df) __A,
1738 (__v2df) __W,
1739 (__mmask8) __U);
1740 }
1741
1742 extern __inline __m128d
1743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1744 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1745 {
1746 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1747 (__v2df) __A,
1748 (__v2df) _mm_setzero_ps (),
1749 (__mmask8) __U);
1750 }
1751
1752 extern __inline __m128
1753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1754 _mm_rcp14_ss (__m128 __A, __m128 __B)
1755 {
1756 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1757 (__v4sf) __A);
1758 }
1759
1760 extern __inline __m128
1761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1762 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1763 {
1764 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1765 (__v4sf) __A,
1766 (__v4sf) __W,
1767 (__mmask8) __U);
1768 }
1769
1770 extern __inline __m128
1771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1772 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1773 {
1774 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1775 (__v4sf) __A,
1776 (__v4sf) _mm_setzero_ps (),
1777 (__mmask8) __U);
1778 }
1779
1780 extern __inline __m512d
1781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1782 _mm512_rsqrt14_pd (__m512d __A)
1783 {
1784 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1785 (__v8df)
1786 _mm512_undefined_pd (),
1787 (__mmask8) -1);
1788 }
1789
1790 extern __inline __m512d
1791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1792 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1793 {
1794 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1795 (__v8df) __W,
1796 (__mmask8) __U);
1797 }
1798
1799 extern __inline __m512d
1800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1801 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1802 {
1803 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1804 (__v8df)
1805 _mm512_setzero_pd (),
1806 (__mmask8) __U);
1807 }
1808
1809 extern __inline __m512
1810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1811 _mm512_rsqrt14_ps (__m512 __A)
1812 {
1813 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1814 (__v16sf)
1815 _mm512_undefined_ps (),
1816 (__mmask16) -1);
1817 }
1818
1819 extern __inline __m512
1820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1821 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1822 {
1823 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1824 (__v16sf) __W,
1825 (__mmask16) __U);
1826 }
1827
1828 extern __inline __m512
1829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1830 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1831 {
1832 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1833 (__v16sf)
1834 _mm512_setzero_ps (),
1835 (__mmask16) __U);
1836 }
1837
1838 extern __inline __m128d
1839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1840 _mm_rsqrt14_sd (__m128d __A, __m128d __B)
1841 {
1842 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1843 (__v2df) __A);
1844 }
1845
1846 extern __inline __m128d
1847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1848 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1849 {
1850 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1851 (__v2df) __A,
1852 (__v2df) __W,
1853 (__mmask8) __U);
1854 }
1855
1856 extern __inline __m128d
1857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1858 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1859 {
1860 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1861 (__v2df) __A,
1862 (__v2df) _mm_setzero_pd (),
1863 (__mmask8) __U);
1864 }
1865
1866 extern __inline __m128
1867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1868 _mm_rsqrt14_ss (__m128 __A, __m128 __B)
1869 {
1870 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1871 (__v4sf) __A);
1872 }
1873
1874 extern __inline __m128
1875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1876 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1877 {
1878 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
1879 (__v4sf) __A,
1880 (__v4sf) __W,
1881 (__mmask8) __U);
1882 }
1883
1884 extern __inline __m128
1885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1886 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1887 {
1888 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
1889 (__v4sf) __A,
1890 (__v4sf) _mm_setzero_ps (),
1891 (__mmask8) __U);
1892 }
1893
1894 #ifdef __OPTIMIZE__
1895 extern __inline __m512d
1896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1897 _mm512_sqrt_round_pd (__m512d __A, const int __R)
1898 {
1899 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1900 (__v8df)
1901 _mm512_undefined_pd (),
1902 (__mmask8) -1, __R);
1903 }
1904
1905 extern __inline __m512d
1906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1907 _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1908 const int __R)
1909 {
1910 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1911 (__v8df) __W,
1912 (__mmask8) __U, __R);
1913 }
1914
1915 extern __inline __m512d
1916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1917 _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1918 {
1919 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1920 (__v8df)
1921 _mm512_setzero_pd (),
1922 (__mmask8) __U, __R);
1923 }
1924
1925 extern __inline __m512
1926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1927 _mm512_sqrt_round_ps (__m512 __A, const int __R)
1928 {
1929 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1930 (__v16sf)
1931 _mm512_undefined_ps (),
1932 (__mmask16) -1, __R);
1933 }
1934
1935 extern __inline __m512
1936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1937 _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
1938 {
1939 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1940 (__v16sf) __W,
1941 (__mmask16) __U, __R);
1942 }
1943
1944 extern __inline __m512
1945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1946 _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
1947 {
1948 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1949 (__v16sf)
1950 _mm512_setzero_ps (),
1951 (__mmask16) __U, __R);
1952 }
1953
1954 extern __inline __m128d
1955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1956 _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
1957 {
1958 return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
1959 (__v2df) __A,
1960 __R);
1961 }
1962
1963 extern __inline __m128
1964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1965 _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
1966 {
1967 return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
1968 (__v4sf) __A,
1969 __R);
1970 }
1971 #else
1972 #define _mm512_sqrt_round_pd(A, C) \
1973 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
1974
1975 #define _mm512_mask_sqrt_round_pd(W, U, A, C) \
1976 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
1977
1978 #define _mm512_maskz_sqrt_round_pd(U, A, C) \
1979 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
1980
1981 #define _mm512_sqrt_round_ps(A, C) \
1982 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
1983
1984 #define _mm512_mask_sqrt_round_ps(W, U, A, C) \
1985 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
1986
1987 #define _mm512_maskz_sqrt_round_ps(U, A, C) \
1988 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
1989
1990 #define _mm_sqrt_round_sd(A, B, C) \
1991 (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
1992
1993 #define _mm_sqrt_round_ss(A, B, C) \
1994 (__m128)__builtin_ia32_sqrtss_round(A, B, C)
1995 #endif
1996
1997 extern __inline __m512i
1998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1999 _mm512_cvtepi8_epi32 (__m128i __A)
2000 {
2001 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2002 (__v16si)
2003 _mm512_undefined_epi32 (),
2004 (__mmask16) -1);
2005 }
2006
2007 extern __inline __m512i
2008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2009 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2010 {
2011 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2012 (__v16si) __W,
2013 (__mmask16) __U);
2014 }
2015
2016 extern __inline __m512i
2017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2018 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
2019 {
2020 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2021 (__v16si)
2022 _mm512_setzero_si512 (),
2023 (__mmask16) __U);
2024 }
2025
2026 extern __inline __m512i
2027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028 _mm512_cvtepi8_epi64 (__m128i __A)
2029 {
2030 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2031 (__v8di)
2032 _mm512_undefined_epi32 (),
2033 (__mmask8) -1);
2034 }
2035
2036 extern __inline __m512i
2037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2038 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2039 {
2040 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2041 (__v8di) __W,
2042 (__mmask8) __U);
2043 }
2044
2045 extern __inline __m512i
2046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2047 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2048 {
2049 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2050 (__v8di)
2051 _mm512_setzero_si512 (),
2052 (__mmask8) __U);
2053 }
2054
2055 extern __inline __m512i
2056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2057 _mm512_cvtepi16_epi32 (__m256i __A)
2058 {
2059 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2060 (__v16si)
2061 _mm512_undefined_epi32 (),
2062 (__mmask16) -1);
2063 }
2064
2065 extern __inline __m512i
2066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2067 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2068 {
2069 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2070 (__v16si) __W,
2071 (__mmask16) __U);
2072 }
2073
2074 extern __inline __m512i
2075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2076 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
2077 {
2078 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2079 (__v16si)
2080 _mm512_setzero_si512 (),
2081 (__mmask16) __U);
2082 }
2083
2084 extern __inline __m512i
2085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2086 _mm512_cvtepi16_epi64 (__m128i __A)
2087 {
2088 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2089 (__v8di)
2090 _mm512_undefined_epi32 (),
2091 (__mmask8) -1);
2092 }
2093
2094 extern __inline __m512i
2095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2096 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2097 {
2098 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2099 (__v8di) __W,
2100 (__mmask8) __U);
2101 }
2102
2103 extern __inline __m512i
2104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2105 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2106 {
2107 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2108 (__v8di)
2109 _mm512_setzero_si512 (),
2110 (__mmask8) __U);
2111 }
2112
2113 extern __inline __m512i
2114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2115 _mm512_cvtepi32_epi64 (__m256i __X)
2116 {
2117 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2118 (__v8di)
2119 _mm512_undefined_epi32 (),
2120 (__mmask8) -1);
2121 }
2122
2123 extern __inline __m512i
2124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2125 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2126 {
2127 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2128 (__v8di) __W,
2129 (__mmask8) __U);
2130 }
2131
2132 extern __inline __m512i
2133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2134 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
2135 {
2136 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2137 (__v8di)
2138 _mm512_setzero_si512 (),
2139 (__mmask8) __U);
2140 }
2141
2142 extern __inline __m512i
2143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2144 _mm512_cvtepu8_epi32 (__m128i __A)
2145 {
2146 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2147 (__v16si)
2148 _mm512_undefined_epi32 (),
2149 (__mmask16) -1);
2150 }
2151
2152 extern __inline __m512i
2153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2154 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2155 {
2156 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2157 (__v16si) __W,
2158 (__mmask16) __U);
2159 }
2160
2161 extern __inline __m512i
2162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2163 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
2164 {
2165 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2166 (__v16si)
2167 _mm512_setzero_si512 (),
2168 (__mmask16) __U);
2169 }
2170
2171 extern __inline __m512i
2172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2173 _mm512_cvtepu8_epi64 (__m128i __A)
2174 {
2175 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2176 (__v8di)
2177 _mm512_undefined_epi32 (),
2178 (__mmask8) -1);
2179 }
2180
2181 extern __inline __m512i
2182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2183 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2184 {
2185 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2186 (__v8di) __W,
2187 (__mmask8) __U);
2188 }
2189
2190 extern __inline __m512i
2191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2192 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
2193 {
2194 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2195 (__v8di)
2196 _mm512_setzero_si512 (),
2197 (__mmask8) __U);
2198 }
2199
2200 extern __inline __m512i
2201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2202 _mm512_cvtepu16_epi32 (__m256i __A)
2203 {
2204 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2205 (__v16si)
2206 _mm512_undefined_epi32 (),
2207 (__mmask16) -1);
2208 }
2209
2210 extern __inline __m512i
2211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2212 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2213 {
2214 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2215 (__v16si) __W,
2216 (__mmask16) __U);
2217 }
2218
2219 extern __inline __m512i
2220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2221 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2222 {
2223 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2224 (__v16si)
2225 _mm512_setzero_si512 (),
2226 (__mmask16) __U);
2227 }
2228
2229 extern __inline __m512i
2230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2231 _mm512_cvtepu16_epi64 (__m128i __A)
2232 {
2233 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2234 (__v8di)
2235 _mm512_undefined_epi32 (),
2236 (__mmask8) -1);
2237 }
2238
2239 extern __inline __m512i
2240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2241 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2242 {
2243 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2244 (__v8di) __W,
2245 (__mmask8) __U);
2246 }
2247
2248 extern __inline __m512i
2249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2250 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2251 {
2252 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2253 (__v8di)
2254 _mm512_setzero_si512 (),
2255 (__mmask8) __U);
2256 }
2257
2258 extern __inline __m512i
2259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2260 _mm512_cvtepu32_epi64 (__m256i __X)
2261 {
2262 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2263 (__v8di)
2264 _mm512_undefined_epi32 (),
2265 (__mmask8) -1);
2266 }
2267
2268 extern __inline __m512i
2269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2270 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2271 {
2272 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2273 (__v8di) __W,
2274 (__mmask8) __U);
2275 }
2276
2277 extern __inline __m512i
2278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2279 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2280 {
2281 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2282 (__v8di)
2283 _mm512_setzero_si512 (),
2284 (__mmask8) __U);
2285 }
2286
2287 #ifdef __OPTIMIZE__
2288 extern __inline __m512d
2289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2290 _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2291 {
2292 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2293 (__v8df) __B,
2294 (__v8df)
2295 _mm512_undefined_pd (),
2296 (__mmask8) -1, __R);
2297 }
2298
2299 extern __inline __m512d
2300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2301 _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2302 __m512d __B, const int __R)
2303 {
2304 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2305 (__v8df) __B,
2306 (__v8df) __W,
2307 (__mmask8) __U, __R);
2308 }
2309
2310 extern __inline __m512d
2311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2312 _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2313 const int __R)
2314 {
2315 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2316 (__v8df) __B,
2317 (__v8df)
2318 _mm512_setzero_pd (),
2319 (__mmask8) __U, __R);
2320 }
2321
2322 extern __inline __m512
2323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2324 _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2325 {
2326 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2327 (__v16sf) __B,
2328 (__v16sf)
2329 _mm512_undefined_ps (),
2330 (__mmask16) -1, __R);
2331 }
2332
2333 extern __inline __m512
2334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2335 _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2336 __m512 __B, const int __R)
2337 {
2338 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2339 (__v16sf) __B,
2340 (__v16sf) __W,
2341 (__mmask16) __U, __R);
2342 }
2343
2344 extern __inline __m512
2345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2346 _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2347 {
2348 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2349 (__v16sf) __B,
2350 (__v16sf)
2351 _mm512_setzero_ps (),
2352 (__mmask16) __U, __R);
2353 }
2354
2355 extern __inline __m512d
2356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2357 _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2358 {
2359 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2360 (__v8df) __B,
2361 (__v8df)
2362 _mm512_undefined_pd (),
2363 (__mmask8) -1, __R);
2364 }
2365
2366 extern __inline __m512d
2367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2368 _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2369 __m512d __B, const int __R)
2370 {
2371 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2372 (__v8df) __B,
2373 (__v8df) __W,
2374 (__mmask8) __U, __R);
2375 }
2376
2377 extern __inline __m512d
2378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2379 _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2380 const int __R)
2381 {
2382 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2383 (__v8df) __B,
2384 (__v8df)
2385 _mm512_setzero_pd (),
2386 (__mmask8) __U, __R);
2387 }
2388
2389 extern __inline __m512
2390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2391 _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2392 {
2393 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2394 (__v16sf) __B,
2395 (__v16sf)
2396 _mm512_undefined_ps (),
2397 (__mmask16) -1, __R);
2398 }
2399
2400 extern __inline __m512
2401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2402 _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2403 __m512 __B, const int __R)
2404 {
2405 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2406 (__v16sf) __B,
2407 (__v16sf) __W,
2408 (__mmask16) __U, __R);
2409 }
2410
2411 extern __inline __m512
2412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2413 _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2414 {
2415 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2416 (__v16sf) __B,
2417 (__v16sf)
2418 _mm512_setzero_ps (),
2419 (__mmask16) __U, __R);
2420 }
2421 #else
2422 #define _mm512_add_round_pd(A, B, C) \
2423 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2424
2425 #define _mm512_mask_add_round_pd(W, U, A, B, C) \
2426 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2427
2428 #define _mm512_maskz_add_round_pd(U, A, B, C) \
2429 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2430
2431 #define _mm512_add_round_ps(A, B, C) \
2432 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2433
2434 #define _mm512_mask_add_round_ps(W, U, A, B, C) \
2435 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2436
2437 #define _mm512_maskz_add_round_ps(U, A, B, C) \
2438 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2439
2440 #define _mm512_sub_round_pd(A, B, C) \
2441 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2442
2443 #define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2444 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2445
2446 #define _mm512_maskz_sub_round_pd(U, A, B, C) \
2447 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2448
2449 #define _mm512_sub_round_ps(A, B, C) \
2450 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2451
2452 #define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2453 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2454
2455 #define _mm512_maskz_sub_round_ps(U, A, B, C) \
2456 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2457 #endif
2458
2459 #ifdef __OPTIMIZE__
2460 extern __inline __m512d
2461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2462 _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2463 {
2464 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2465 (__v8df) __B,
2466 (__v8df)
2467 _mm512_undefined_pd (),
2468 (__mmask8) -1, __R);
2469 }
2470
2471 extern __inline __m512d
2472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2473 _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2474 __m512d __B, const int __R)
2475 {
2476 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2477 (__v8df) __B,
2478 (__v8df) __W,
2479 (__mmask8) __U, __R);
2480 }
2481
2482 extern __inline __m512d
2483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2484 _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2485 const int __R)
2486 {
2487 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2488 (__v8df) __B,
2489 (__v8df)
2490 _mm512_setzero_pd (),
2491 (__mmask8) __U, __R);
2492 }
2493
2494 extern __inline __m512
2495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2496 _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2497 {
2498 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2499 (__v16sf) __B,
2500 (__v16sf)
2501 _mm512_undefined_ps (),
2502 (__mmask16) -1, __R);
2503 }
2504
2505 extern __inline __m512
2506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2507 _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2508 __m512 __B, const int __R)
2509 {
2510 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2511 (__v16sf) __B,
2512 (__v16sf) __W,
2513 (__mmask16) __U, __R);
2514 }
2515
2516 extern __inline __m512
2517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2518 _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2519 {
2520 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2521 (__v16sf) __B,
2522 (__v16sf)
2523 _mm512_setzero_ps (),
2524 (__mmask16) __U, __R);
2525 }
2526
2527 extern __inline __m512d
2528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2529 _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2530 {
2531 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2532 (__v8df) __V,
2533 (__v8df)
2534 _mm512_undefined_pd (),
2535 (__mmask8) -1, __R);
2536 }
2537
2538 extern __inline __m512d
2539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2540 _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2541 __m512d __V, const int __R)
2542 {
2543 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2544 (__v8df) __V,
2545 (__v8df) __W,
2546 (__mmask8) __U, __R);
2547 }
2548
2549 extern __inline __m512d
2550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2551 _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2552 const int __R)
2553 {
2554 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2555 (__v8df) __V,
2556 (__v8df)
2557 _mm512_setzero_pd (),
2558 (__mmask8) __U, __R);
2559 }
2560
2561 extern __inline __m512
2562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2563 _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2564 {
2565 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2566 (__v16sf) __B,
2567 (__v16sf)
2568 _mm512_undefined_ps (),
2569 (__mmask16) -1, __R);
2570 }
2571
2572 extern __inline __m512
2573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2574 _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2575 __m512 __B, const int __R)
2576 {
2577 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2578 (__v16sf) __B,
2579 (__v16sf) __W,
2580 (__mmask16) __U, __R);
2581 }
2582
2583 extern __inline __m512
2584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2585 _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2586 {
2587 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2588 (__v16sf) __B,
2589 (__v16sf)
2590 _mm512_setzero_ps (),
2591 (__mmask16) __U, __R);
2592 }
2593
2594 extern __inline __m128d
2595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2596 _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2597 {
2598 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2599 (__v2df) __B,
2600 __R);
2601 }
2602
2603 extern __inline __m128d
2604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2605 _mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2606 __m128d __B, const int __R)
2607 {
2608 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2609 (__v2df) __B,
2610 (__v2df) __W,
2611 (__mmask8) __U, __R);
2612 }
2613
2614 extern __inline __m128d
2615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2616 _mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2617 const int __R)
2618 {
2619 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2620 (__v2df) __B,
2621 (__v2df)
2622 _mm_setzero_pd (),
2623 (__mmask8) __U, __R);
2624 }
2625
2626 extern __inline __m128
2627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2628 _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2629 {
2630 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2631 (__v4sf) __B,
2632 __R);
2633 }
2634
2635 extern __inline __m128
2636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2637 _mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2638 __m128 __B, const int __R)
2639 {
2640 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2641 (__v4sf) __B,
2642 (__v4sf) __W,
2643 (__mmask8) __U, __R);
2644 }
2645
2646 extern __inline __m128
2647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2648 _mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2649 const int __R)
2650 {
2651 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2652 (__v4sf) __B,
2653 (__v4sf)
2654 _mm_setzero_ps (),
2655 (__mmask8) __U, __R);
2656 }
2657
2658 extern __inline __m128d
2659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2660 _mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2661 {
2662 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2663 (__v2df) __B,
2664 __R);
2665 }
2666
2667 extern __inline __m128d
2668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2669 _mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2670 __m128d __B, const int __R)
2671 {
2672 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2673 (__v2df) __B,
2674 (__v2df) __W,
2675 (__mmask8) __U, __R);
2676 }
2677
2678 extern __inline __m128d
2679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2680 _mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2681 const int __R)
2682 {
2683 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2684 (__v2df) __B,
2685 (__v2df)
2686 _mm_setzero_pd (),
2687 (__mmask8) __U, __R);
2688 }
2689
2690 extern __inline __m128
2691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2692 _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2693 {
2694 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2695 (__v4sf) __B,
2696 __R);
2697 }
2698
2699 extern __inline __m128
2700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2701 _mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2702 __m128 __B, const int __R)
2703 {
2704 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2705 (__v4sf) __B,
2706 (__v4sf) __W,
2707 (__mmask8) __U, __R);
2708 }
2709
2710 extern __inline __m128
2711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2712 _mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2713 const int __R)
2714 {
2715 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2716 (__v4sf) __B,
2717 (__v4sf)
2718 _mm_setzero_ps (),
2719 (__mmask8) __U, __R);
2720 }
2721
2722 #else
2723 #define _mm512_mul_round_pd(A, B, C) \
2724 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2725
2726 #define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2727 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2728
2729 #define _mm512_maskz_mul_round_pd(U, A, B, C) \
2730 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2731
2732 #define _mm512_mul_round_ps(A, B, C) \
2733 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2734
2735 #define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2736 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2737
2738 #define _mm512_maskz_mul_round_ps(U, A, B, C) \
2739 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2740
2741 #define _mm512_div_round_pd(A, B, C) \
2742 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2743
2744 #define _mm512_mask_div_round_pd(W, U, A, B, C) \
2745 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2746
2747 #define _mm512_maskz_div_round_pd(U, A, B, C) \
2748 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2749
2750 #define _mm512_div_round_ps(A, B, C) \
2751 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2752
2753 #define _mm512_mask_div_round_ps(W, U, A, B, C) \
2754 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2755
2756 #define _mm512_maskz_div_round_ps(U, A, B, C) \
2757 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2758
2759 #define _mm_mul_round_sd(A, B, C) \
2760 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2761
2762 #define _mm_mask_mul_round_sd(W, U, A, B, C) \
2763 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, W, U, C)
2764
2765 #define _mm_maskz_mul_round_sd(U, A, B, C) \
2766 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2767
2768 #define _mm_mul_round_ss(A, B, C) \
2769 (__m128)__builtin_ia32_mulss_round(A, B, C)
2770
2771 #define _mm_mask_mul_round_ss(W, U, A, B, C) \
2772 (__m128)__builtin_ia32_mulss_mask_round(A, B, W, U, C)
2773
2774 #define _mm_maskz_mul_round_ss(U, A, B, C) \
2775 (__m128)__builtin_ia32_mulss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
2776
2777 #define _mm_div_round_sd(A, B, C) \
2778 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2779
2780 #define _mm_mask_div_round_sd(W, U, A, B, C) \
2781 (__m128d)__builtin_ia32_divsd_mask_round(A, B, W, U, C)
2782
2783 #define _mm_maskz_div_round_sd(U, A, B, C) \
2784 (__m128d)__builtin_ia32_divsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2785
2786 #define _mm_div_round_ss(A, B, C) \
2787 (__m128)__builtin_ia32_divss_round(A, B, C)
2788
2789 #define _mm_mask_div_round_ss(W, U, A, B, C) \
2790 (__m128)__builtin_ia32_divss_mask_round(A, B, W, U, C)
2791
2792 #define _mm_maskz_div_round_ss(U, A, B, C) \
2793 (__m128)__builtin_ia32_divss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
2794
2795 #endif
2796
2797 #ifdef __OPTIMIZE__
2798 extern __inline __m512d
2799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2800 _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2801 {
2802 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2803 (__v8df) __B,
2804 (__v8df)
2805 _mm512_undefined_pd (),
2806 (__mmask8) -1, __R);
2807 }
2808
2809 extern __inline __m512d
2810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2811 _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2812 __m512d __B, const int __R)
2813 {
2814 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2815 (__v8df) __B,
2816 (__v8df) __W,
2817 (__mmask8) __U, __R);
2818 }
2819
2820 extern __inline __m512d
2821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2822 _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2823 const int __R)
2824 {
2825 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2826 (__v8df) __B,
2827 (__v8df)
2828 _mm512_setzero_pd (),
2829 (__mmask8) __U, __R);
2830 }
2831
2832 extern __inline __m512
2833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2834 _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2835 {
2836 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2837 (__v16sf) __B,
2838 (__v16sf)
2839 _mm512_undefined_ps (),
2840 (__mmask16) -1, __R);
2841 }
2842
2843 extern __inline __m512
2844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2845 _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2846 __m512 __B, const int __R)
2847 {
2848 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2849 (__v16sf) __B,
2850 (__v16sf) __W,
2851 (__mmask16) __U, __R);
2852 }
2853
2854 extern __inline __m512
2855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2856 _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2857 {
2858 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2859 (__v16sf) __B,
2860 (__v16sf)
2861 _mm512_setzero_ps (),
2862 (__mmask16) __U, __R);
2863 }
2864
2865 extern __inline __m512d
2866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2867 _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2868 {
2869 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2870 (__v8df) __B,
2871 (__v8df)
2872 _mm512_undefined_pd (),
2873 (__mmask8) -1, __R);
2874 }
2875
2876 extern __inline __m512d
2877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2878 _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2879 __m512d __B, const int __R)
2880 {
2881 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2882 (__v8df) __B,
2883 (__v8df) __W,
2884 (__mmask8) __U, __R);
2885 }
2886
2887 extern __inline __m512d
2888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2889 _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2890 const int __R)
2891 {
2892 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2893 (__v8df) __B,
2894 (__v8df)
2895 _mm512_setzero_pd (),
2896 (__mmask8) __U, __R);
2897 }
2898
2899 extern __inline __m512
2900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2901 _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
2902 {
2903 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2904 (__v16sf) __B,
2905 (__v16sf)
2906 _mm512_undefined_ps (),
2907 (__mmask16) -1, __R);
2908 }
2909
2910 extern __inline __m512
2911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2912 _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2913 __m512 __B, const int __R)
2914 {
2915 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2916 (__v16sf) __B,
2917 (__v16sf) __W,
2918 (__mmask16) __U, __R);
2919 }
2920
2921 extern __inline __m512
2922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2923 _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2924 {
2925 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2926 (__v16sf) __B,
2927 (__v16sf)
2928 _mm512_setzero_ps (),
2929 (__mmask16) __U, __R);
2930 }
2931 #else
2932 #define _mm512_max_round_pd(A, B, R) \
2933 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2934
2935 #define _mm512_mask_max_round_pd(W, U, A, B, R) \
2936 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
2937
2938 #define _mm512_maskz_max_round_pd(U, A, B, R) \
2939 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2940
2941 #define _mm512_max_round_ps(A, B, R) \
2942 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
2943
2944 #define _mm512_mask_max_round_ps(W, U, A, B, R) \
2945 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
2946
2947 #define _mm512_maskz_max_round_ps(U, A, B, R) \
2948 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2949
2950 #define _mm512_min_round_pd(A, B, R) \
2951 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2952
2953 #define _mm512_mask_min_round_pd(W, U, A, B, R) \
2954 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
2955
2956 #define _mm512_maskz_min_round_pd(U, A, B, R) \
2957 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2958
2959 #define _mm512_min_round_ps(A, B, R) \
2960 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
2961
2962 #define _mm512_mask_min_round_ps(W, U, A, B, R) \
2963 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
2964
2965 #define _mm512_maskz_min_round_ps(U, A, B, R) \
2966 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2967 #endif
2968
2969 #ifdef __OPTIMIZE__
2970 extern __inline __m512d
2971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2972 _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
2973 {
2974 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2975 (__v8df) __B,
2976 (__v8df)
2977 _mm512_undefined_pd (),
2978 (__mmask8) -1, __R);
2979 }
2980
2981 extern __inline __m512d
2982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2983 _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2984 __m512d __B, const int __R)
2985 {
2986 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2987 (__v8df) __B,
2988 (__v8df) __W,
2989 (__mmask8) __U, __R);
2990 }
2991
2992 extern __inline __m512d
2993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2994 _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2995 const int __R)
2996 {
2997 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2998 (__v8df) __B,
2999 (__v8df)
3000 _mm512_setzero_pd (),
3001 (__mmask8) __U, __R);
3002 }
3003
3004 extern __inline __m512
3005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3006 _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
3007 {
3008 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3009 (__v16sf) __B,
3010 (__v16sf)
3011 _mm512_undefined_ps (),
3012 (__mmask16) -1, __R);
3013 }
3014
3015 extern __inline __m512
3016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3017 _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
3018 __m512 __B, const int __R)
3019 {
3020 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3021 (__v16sf) __B,
3022 (__v16sf) __W,
3023 (__mmask16) __U, __R);
3024 }
3025
3026 extern __inline __m512
3027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3028 _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3029 const int __R)
3030 {
3031 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3032 (__v16sf) __B,
3033 (__v16sf)
3034 _mm512_setzero_ps (),
3035 (__mmask16) __U, __R);
3036 }
3037
3038 extern __inline __m128d
3039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3040 _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
3041 {
3042 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
3043 (__v2df) __B,
3044 __R);
3045 }
3046
3047 extern __inline __m128
3048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3049 _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
3050 {
3051 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
3052 (__v4sf) __B,
3053 __R);
3054 }
3055 #else
3056 #define _mm512_scalef_round_pd(A, B, C) \
3057 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
3058
3059 #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
3060 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
3061
3062 #define _mm512_maskz_scalef_round_pd(U, A, B, C) \
3063 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
3064
3065 #define _mm512_scalef_round_ps(A, B, C) \
3066 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
3067
3068 #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
3069 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
3070
3071 #define _mm512_maskz_scalef_round_ps(U, A, B, C) \
3072 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
3073
3074 #define _mm_scalef_round_sd(A, B, C) \
3075 (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
3076
3077 #define _mm_scalef_round_ss(A, B, C) \
3078 (__m128)__builtin_ia32_scalefss_round(A, B, C)
3079 #endif
3080
3081 #ifdef __OPTIMIZE__
3082 extern __inline __m512d
3083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3084 _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3085 {
3086 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3087 (__v8df) __B,
3088 (__v8df) __C,
3089 (__mmask8) -1, __R);
3090 }
3091
3092 extern __inline __m512d
3093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3094 _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3095 __m512d __C, const int __R)
3096 {
3097 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3098 (__v8df) __B,
3099 (__v8df) __C,
3100 (__mmask8) __U, __R);
3101 }
3102
3103 extern __inline __m512d
3104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3105 _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3106 __mmask8 __U, const int __R)
3107 {
3108 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
3109 (__v8df) __B,
3110 (__v8df) __C,
3111 (__mmask8) __U, __R);
3112 }
3113
3114 extern __inline __m512d
3115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3116 _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3117 __m512d __C, const int __R)
3118 {
3119 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
3120 (__v8df) __B,
3121 (__v8df) __C,
3122 (__mmask8) __U, __R);
3123 }
3124
3125 extern __inline __m512
3126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3127 _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3128 {
3129 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3130 (__v16sf) __B,
3131 (__v16sf) __C,
3132 (__mmask16) -1, __R);
3133 }
3134
3135 extern __inline __m512
3136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3137 _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3138 __m512 __C, const int __R)
3139 {
3140 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3141 (__v16sf) __B,
3142 (__v16sf) __C,
3143 (__mmask16) __U, __R);
3144 }
3145
3146 extern __inline __m512
3147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3148 _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3149 __mmask16 __U, const int __R)
3150 {
3151 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
3152 (__v16sf) __B,
3153 (__v16sf) __C,
3154 (__mmask16) __U, __R);
3155 }
3156
3157 extern __inline __m512
3158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3159 _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3160 __m512 __C, const int __R)
3161 {
3162 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
3163 (__v16sf) __B,
3164 (__v16sf) __C,
3165 (__mmask16) __U, __R);
3166 }
3167
3168 extern __inline __m512d
3169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3170 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3171 {
3172 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3173 (__v8df) __B,
3174 -(__v8df) __C,
3175 (__mmask8) -1, __R);
3176 }
3177
3178 extern __inline __m512d
3179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3180 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3181 __m512d __C, const int __R)
3182 {
3183 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3184 (__v8df) __B,
3185 -(__v8df) __C,
3186 (__mmask8) __U, __R);
3187 }
3188
3189 extern __inline __m512d
3190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3191 _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3192 __mmask8 __U, const int __R)
3193 {
3194 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3195 (__v8df) __B,
3196 (__v8df) __C,
3197 (__mmask8) __U, __R);
3198 }
3199
3200 extern __inline __m512d
3201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3202 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3203 __m512d __C, const int __R)
3204 {
3205 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
3206 (__v8df) __B,
3207 -(__v8df) __C,
3208 (__mmask8) __U, __R);
3209 }
3210
3211 extern __inline __m512
3212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3213 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3214 {
3215 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3216 (__v16sf) __B,
3217 -(__v16sf) __C,
3218 (__mmask16) -1, __R);
3219 }
3220
3221 extern __inline __m512
3222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3223 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3224 __m512 __C, const int __R)
3225 {
3226 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3227 (__v16sf) __B,
3228 -(__v16sf) __C,
3229 (__mmask16) __U, __R);
3230 }
3231
3232 extern __inline __m512
3233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3234 _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3235 __mmask16 __U, const int __R)
3236 {
3237 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3238 (__v16sf) __B,
3239 (__v16sf) __C,
3240 (__mmask16) __U, __R);
3241 }
3242
3243 extern __inline __m512
3244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3245 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3246 __m512 __C, const int __R)
3247 {
3248 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
3249 (__v16sf) __B,
3250 -(__v16sf) __C,
3251 (__mmask16) __U, __R);
3252 }
3253
3254 extern __inline __m512d
3255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3256 _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3257 {
3258 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3259 (__v8df) __B,
3260 (__v8df) __C,
3261 (__mmask8) -1, __R);
3262 }
3263
3264 extern __inline __m512d
3265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3266 _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3267 __m512d __C, const int __R)
3268 {
3269 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3270 (__v8df) __B,
3271 (__v8df) __C,
3272 (__mmask8) __U, __R);
3273 }
3274
3275 extern __inline __m512d
3276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3277 _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3278 __mmask8 __U, const int __R)
3279 {
3280 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
3281 (__v8df) __B,
3282 (__v8df) __C,
3283 (__mmask8) __U, __R);
3284 }
3285
3286 extern __inline __m512d
3287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3288 _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3289 __m512d __C, const int __R)
3290 {
3291 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3292 (__v8df) __B,
3293 (__v8df) __C,
3294 (__mmask8) __U, __R);
3295 }
3296
3297 extern __inline __m512
3298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3299 _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3300 {
3301 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3302 (__v16sf) __B,
3303 (__v16sf) __C,
3304 (__mmask16) -1, __R);
3305 }
3306
3307 extern __inline __m512
3308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3309 _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3310 __m512 __C, const int __R)
3311 {
3312 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3313 (__v16sf) __B,
3314 (__v16sf) __C,
3315 (__mmask16) __U, __R);
3316 }
3317
3318 extern __inline __m512
3319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3320 _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3321 __mmask16 __U, const int __R)
3322 {
3323 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3324 (__v16sf) __B,
3325 (__v16sf) __C,
3326 (__mmask16) __U, __R);
3327 }
3328
3329 extern __inline __m512
3330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3331 _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3332 __m512 __C, const int __R)
3333 {
3334 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3335 (__v16sf) __B,
3336 (__v16sf) __C,
3337 (__mmask16) __U, __R);
3338 }
3339
3340 extern __inline __m512d
3341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3342 _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3343 {
3344 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3345 (__v8df) __B,
3346 -(__v8df) __C,
3347 (__mmask8) -1, __R);
3348 }
3349
3350 extern __inline __m512d
3351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3352 _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3353 __m512d __C, const int __R)
3354 {
3355 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3356 (__v8df) __B,
3357 -(__v8df) __C,
3358 (__mmask8) __U, __R);
3359 }
3360
3361 extern __inline __m512d
3362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3363 _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3364 __mmask8 __U, const int __R)
3365 {
3366 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3367 (__v8df) __B,
3368 (__v8df) __C,
3369 (__mmask8) __U, __R);
3370 }
3371
3372 extern __inline __m512d
3373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3374 _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3375 __m512d __C, const int __R)
3376 {
3377 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3378 (__v8df) __B,
3379 -(__v8df) __C,
3380 (__mmask8) __U, __R);
3381 }
3382
3383 extern __inline __m512
3384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3385 _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3386 {
3387 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3388 (__v16sf) __B,
3389 -(__v16sf) __C,
3390 (__mmask16) -1, __R);
3391 }
3392
3393 extern __inline __m512
3394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3395 _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3396 __m512 __C, const int __R)
3397 {
3398 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3399 (__v16sf) __B,
3400 -(__v16sf) __C,
3401 (__mmask16) __U, __R);
3402 }
3403
3404 extern __inline __m512
3405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3406 _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3407 __mmask16 __U, const int __R)
3408 {
3409 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3410 (__v16sf) __B,
3411 (__v16sf) __C,
3412 (__mmask16) __U, __R);
3413 }
3414
3415 extern __inline __m512
3416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3417 _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3418 __m512 __C, const int __R)
3419 {
3420 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3421 (__v16sf) __B,
3422 -(__v16sf) __C,
3423 (__mmask16) __U, __R);
3424 }
3425
3426 extern __inline __m512d
3427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3428 _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3429 {
3430 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3431 (__v8df) __B,
3432 (__v8df) __C,
3433 (__mmask8) -1, __R);
3434 }
3435
3436 extern __inline __m512d
3437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3438 _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3439 __m512d __C, const int __R)
3440 {
3441 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3442 (__v8df) __B,
3443 (__v8df) __C,
3444 (__mmask8) __U, __R);
3445 }
3446
3447 extern __inline __m512d
3448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3449 _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3450 __mmask8 __U, const int __R)
3451 {
3452 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3453 (__v8df) __B,
3454 (__v8df) __C,
3455 (__mmask8) __U, __R);
3456 }
3457
3458 extern __inline __m512d
3459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3460 _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3461 __m512d __C, const int __R)
3462 {
3463 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3464 (__v8df) __B,
3465 (__v8df) __C,
3466 (__mmask8) __U, __R);
3467 }
3468
3469 extern __inline __m512
3470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3471 _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3472 {
3473 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3474 (__v16sf) __B,
3475 (__v16sf) __C,
3476 (__mmask16) -1, __R);
3477 }
3478
3479 extern __inline __m512
3480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3481 _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3482 __m512 __C, const int __R)
3483 {
3484 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3485 (__v16sf) __B,
3486 (__v16sf) __C,
3487 (__mmask16) __U, __R);
3488 }
3489
3490 extern __inline __m512
3491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3492 _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3493 __mmask16 __U, const int __R)
3494 {
3495 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3496 (__v16sf) __B,
3497 (__v16sf) __C,
3498 (__mmask16) __U, __R);
3499 }
3500
3501 extern __inline __m512
3502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3503 _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3504 __m512 __C, const int __R)
3505 {
3506 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3507 (__v16sf) __B,
3508 (__v16sf) __C,
3509 (__mmask16) __U, __R);
3510 }
3511
3512 extern __inline __m512d
3513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3514 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3515 {
3516 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3517 (__v8df) __B,
3518 -(__v8df) __C,
3519 (__mmask8) -1, __R);
3520 }
3521
3522 extern __inline __m512d
3523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3524 _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3525 __m512d __C, const int __R)
3526 {
3527 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3528 (__v8df) __B,
3529 (__v8df) __C,
3530 (__mmask8) __U, __R);
3531 }
3532
3533 extern __inline __m512d
3534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3535 _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3536 __mmask8 __U, const int __R)
3537 {
3538 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3539 (__v8df) __B,
3540 (__v8df) __C,
3541 (__mmask8) __U, __R);
3542 }
3543
3544 extern __inline __m512d
3545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3546 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3547 __m512d __C, const int __R)
3548 {
3549 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3550 (__v8df) __B,
3551 -(__v8df) __C,
3552 (__mmask8) __U, __R);
3553 }
3554
3555 extern __inline __m512
3556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3557 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3558 {
3559 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3560 (__v16sf) __B,
3561 -(__v16sf) __C,
3562 (__mmask16) -1, __R);
3563 }
3564
3565 extern __inline __m512
3566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3567 _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3568 __m512 __C, const int __R)
3569 {
3570 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3571 (__v16sf) __B,
3572 (__v16sf) __C,
3573 (__mmask16) __U, __R);
3574 }
3575
3576 extern __inline __m512
3577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3578 _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3579 __mmask16 __U, const int __R)
3580 {
3581 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3582 (__v16sf) __B,
3583 (__v16sf) __C,
3584 (__mmask16) __U, __R);
3585 }
3586
3587 extern __inline __m512
3588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3589 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3590 __m512 __C, const int __R)
3591 {
3592 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3593 (__v16sf) __B,
3594 -(__v16sf) __C,
3595 (__mmask16) __U, __R);
3596 }
3597 #else
3598 #define _mm512_fmadd_round_pd(A, B, C, R) \
3599 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3600
3601 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3602 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3603
3604 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3605 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3606
3607 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3608 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3609
3610 #define _mm512_fmadd_round_ps(A, B, C, R) \
3611 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3612
3613 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3614 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3615
3616 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3617 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3618
3619 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3620 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3621
3622 #define _mm512_fmsub_round_pd(A, B, C, R) \
3623 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3624
3625 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
3626 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3627
3628 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3629 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3630
3631 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
3632 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3633
3634 #define _mm512_fmsub_round_ps(A, B, C, R) \
3635 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3636
3637 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
3638 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3639
3640 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3641 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3642
3643 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
3644 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3645
3646 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
3647 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3648
3649 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
3650 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3651
3652 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3653 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3654
3655 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3656 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3657
3658 #define _mm512_fmaddsub_round_ps(A, B, C, R) \
3659 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3660
3661 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3662 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3663
3664 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3665 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3666
3667 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3668 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3669
3670 #define _mm512_fmsubadd_round_pd(A, B, C, R) \
3671 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3672
3673 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3674 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3675
3676 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3677 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3678
3679 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3680 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3681
3682 #define _mm512_fmsubadd_round_ps(A, B, C, R) \
3683 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3684
3685 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3686 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3687
3688 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3689 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3690
3691 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3692 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3693
3694 #define _mm512_fnmadd_round_pd(A, B, C, R) \
3695 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3696
3697 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3698 (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3699
3700 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
3701 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3702
3703 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
3704 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3705
3706 #define _mm512_fnmadd_round_ps(A, B, C, R) \
3707 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3708
3709 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3710 (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3711
3712 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
3713 (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3714
3715 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
3716 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3717
3718 #define _mm512_fnmsub_round_pd(A, B, C, R) \
3719 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3720
3721 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3722 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3723
3724 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3725 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3726
3727 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
3728 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3729
3730 #define _mm512_fnmsub_round_ps(A, B, C, R) \
3731 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3732
3733 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3734 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3735
3736 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3737 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3738
3739 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
3740 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3741 #endif
3742
3743 extern __inline __m512i
3744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3745 _mm512_abs_epi64 (__m512i __A)
3746 {
3747 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3748 (__v8di)
3749 _mm512_undefined_epi32 (),
3750 (__mmask8) -1);
3751 }
3752
3753 extern __inline __m512i
3754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3755 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3756 {
3757 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3758 (__v8di) __W,
3759 (__mmask8) __U);
3760 }
3761
3762 extern __inline __m512i
3763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3764 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3765 {
3766 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3767 (__v8di)
3768 _mm512_setzero_si512 (),
3769 (__mmask8) __U);
3770 }
3771
3772 extern __inline __m512i
3773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3774 _mm512_abs_epi32 (__m512i __A)
3775 {
3776 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3777 (__v16si)
3778 _mm512_undefined_epi32 (),
3779 (__mmask16) -1);
3780 }
3781
3782 extern __inline __m512i
3783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3784 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3785 {
3786 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3787 (__v16si) __W,
3788 (__mmask16) __U);
3789 }
3790
3791 extern __inline __m512i
3792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3793 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3794 {
3795 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3796 (__v16si)
3797 _mm512_setzero_si512 (),
3798 (__mmask16) __U);
3799 }
3800
3801 extern __inline __m512
3802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3803 _mm512_broadcastss_ps (__m128 __A)
3804 {
3805 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3806 (__v16sf)
3807 _mm512_undefined_ps (),
3808 (__mmask16) -1);
3809 }
3810
3811 extern __inline __m512
3812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3813 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3814 {
3815 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3816 (__v16sf) __O, __M);
3817 }
3818
3819 extern __inline __m512
3820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3821 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
3822 {
3823 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3824 (__v16sf)
3825 _mm512_setzero_ps (),
3826 __M);
3827 }
3828
3829 extern __inline __m512d
3830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3831 _mm512_broadcastsd_pd (__m128d __A)
3832 {
3833 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3834 (__v8df)
3835 _mm512_undefined_pd (),
3836 (__mmask8) -1);
3837 }
3838
3839 extern __inline __m512d
3840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3841 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
3842 {
3843 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3844 (__v8df) __O, __M);
3845 }
3846
3847 extern __inline __m512d
3848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3849 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
3850 {
3851 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3852 (__v8df)
3853 _mm512_setzero_pd (),
3854 __M);
3855 }
3856
3857 extern __inline __m512i
3858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3859 _mm512_broadcastd_epi32 (__m128i __A)
3860 {
3861 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3862 (__v16si)
3863 _mm512_undefined_epi32 (),
3864 (__mmask16) -1);
3865 }
3866
3867 extern __inline __m512i
3868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3869 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
3870 {
3871 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3872 (__v16si) __O, __M);
3873 }
3874
3875 extern __inline __m512i
3876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3877 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
3878 {
3879 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3880 (__v16si)
3881 _mm512_setzero_si512 (),
3882 __M);
3883 }
3884
3885 extern __inline __m512i
3886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3887 _mm512_set1_epi32 (int __A)
3888 {
3889 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3890 (__v16si)
3891 _mm512_undefined_epi32 (),
3892 (__mmask16)(-1));
3893 }
3894
3895 extern __inline __m512i
3896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3897 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
3898 {
3899 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
3900 __M);
3901 }
3902
3903 extern __inline __m512i
3904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3905 _mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
3906 {
3907 return (__m512i)
3908 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3909 (__v16si) _mm512_setzero_si512 (),
3910 __M);
3911 }
3912
3913 extern __inline __m512i
3914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3915 _mm512_broadcastq_epi64 (__m128i __A)
3916 {
3917 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3918 (__v8di)
3919 _mm512_undefined_epi32 (),
3920 (__mmask8) -1);
3921 }
3922
3923 extern __inline __m512i
3924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3925 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
3926 {
3927 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3928 (__v8di) __O, __M);
3929 }
3930
3931 extern __inline __m512i
3932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3933 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
3934 {
3935 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3936 (__v8di)
3937 _mm512_setzero_si512 (),
3938 __M);
3939 }
3940
3941 extern __inline __m512i
3942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3943 _mm512_set1_epi64 (long long __A)
3944 {
3945 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3946 (__v8di)
3947 _mm512_undefined_epi32 (),
3948 (__mmask8)(-1));
3949 }
3950
3951 extern __inline __m512i
3952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3953 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
3954 {
3955 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
3956 __M);
3957 }
3958
3959 extern __inline __m512i
3960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3961 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
3962 {
3963 return (__m512i)
3964 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3965 (__v8di) _mm512_setzero_si512 (),
3966 __M);
3967 }
3968
3969 extern __inline __m512
3970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3971 _mm512_broadcast_f32x4 (__m128 __A)
3972 {
3973 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3974 (__v16sf)
3975 _mm512_undefined_ps (),
3976 (__mmask16) -1);
3977 }
3978
3979 extern __inline __m512
3980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3981 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
3982 {
3983 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3984 (__v16sf) __O,
3985 __M);
3986 }
3987
3988 extern __inline __m512
3989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3990 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
3991 {
3992 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3993 (__v16sf)
3994 _mm512_setzero_ps (),
3995 __M);
3996 }
3997
3998 extern __inline __m512i
3999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4000 _mm512_broadcast_i32x4 (__m128i __A)
4001 {
4002 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4003 (__v16si)
4004 _mm512_undefined_epi32 (),
4005 (__mmask16) -1);
4006 }
4007
4008 extern __inline __m512i
4009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4010 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
4011 {
4012 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4013 (__v16si) __O,
4014 __M);
4015 }
4016
4017 extern __inline __m512i
4018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4019 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
4020 {
4021 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4022 (__v16si)
4023 _mm512_setzero_si512 (),
4024 __M);
4025 }
4026
4027 extern __inline __m512d
4028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4029 _mm512_broadcast_f64x4 (__m256d __A)
4030 {
4031 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4032 (__v8df)
4033 _mm512_undefined_pd (),
4034 (__mmask8) -1);
4035 }
4036
4037 extern __inline __m512d
4038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4039 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
4040 {
4041 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4042 (__v8df) __O,
4043 __M);
4044 }
4045
4046 extern __inline __m512d
4047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4048 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
4049 {
4050 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4051 (__v8df)
4052 _mm512_setzero_pd (),
4053 __M);
4054 }
4055
4056 extern __inline __m512i
4057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4058 _mm512_broadcast_i64x4 (__m256i __A)
4059 {
4060 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4061 (__v8di)
4062 _mm512_undefined_epi32 (),
4063 (__mmask8) -1);
4064 }
4065
4066 extern __inline __m512i
4067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4068 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
4069 {
4070 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4071 (__v8di) __O,
4072 __M);
4073 }
4074
4075 extern __inline __m512i
4076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4077 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
4078 {
4079 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4080 (__v8di)
4081 _mm512_setzero_si512 (),
4082 __M);
4083 }
4084
4085 typedef enum
4086 {
4087 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
4088 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
4089 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
4090 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
4091 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
4092 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
4093 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
4094 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
4095 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
4096 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
4097 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
4098 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
4099 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
4100 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
4101 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
4102 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
4103 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
4104 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
4105 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
4106 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
4107 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
4108 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
4109 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
4110 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
4111 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
4112 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
4113 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
4114 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
4115 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
4116 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
4117 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
4118 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
4119 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
4120 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
4121 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
4122 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
4123 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
4124 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
4125 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
4126 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
4127 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
4128 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
4129 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
4130 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
4131 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
4132 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
4133 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
4134 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
4135 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
4136 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
4137 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
4138 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
4139 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
4140 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
4141 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
4142 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
4143 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
4144 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
4145 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
4146 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
4147 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
4148 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
4149 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
4150 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
4151 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
4152 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
4153 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
4154 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
4155 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
4156 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
4157 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
4158 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
4159 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
4160 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
4161 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
4162 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
4163 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
4164 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
4165 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
4166 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
4167 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
4168 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
4169 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
4170 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
4171 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
4172 _MM_PERM_DDDD = 0xFF
4173 } _MM_PERM_ENUM;
4174
4175 #ifdef __OPTIMIZE__
4176 extern __inline __m512i
4177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4178 _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
4179 {
4180 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4181 __mask,
4182 (__v16si)
4183 _mm512_undefined_epi32 (),
4184 (__mmask16) -1);
4185 }
4186
4187 extern __inline __m512i
4188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4189 _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
4190 _MM_PERM_ENUM __mask)
4191 {
4192 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4193 __mask,
4194 (__v16si) __W,
4195 (__mmask16) __U);
4196 }
4197
4198 extern __inline __m512i
4199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4200 _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
4201 {
4202 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4203 __mask,
4204 (__v16si)
4205 _mm512_setzero_si512 (),
4206 (__mmask16) __U);
4207 }
4208
4209 extern __inline __m512i
4210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4211 _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
4212 {
4213 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4214 (__v8di) __B, __imm,
4215 (__v8di)
4216 _mm512_undefined_epi32 (),
4217 (__mmask8) -1);
4218 }
4219
4220 extern __inline __m512i
4221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4222 _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
4223 __m512i __B, const int __imm)
4224 {
4225 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4226 (__v8di) __B, __imm,
4227 (__v8di) __W,
4228 (__mmask8) __U);
4229 }
4230
4231 extern __inline __m512i
4232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4233 _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
4234 const int __imm)
4235 {
4236 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4237 (__v8di) __B, __imm,
4238 (__v8di)
4239 _mm512_setzero_si512 (),
4240 (__mmask8) __U);
4241 }
4242
4243 extern __inline __m512i
4244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4245 _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
4246 {
4247 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4248 (__v16si) __B,
4249 __imm,
4250 (__v16si)
4251 _mm512_undefined_epi32 (),
4252 (__mmask16) -1);
4253 }
4254
4255 extern __inline __m512i
4256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4257 _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
4258 __m512i __B, const int __imm)
4259 {
4260 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4261 (__v16si) __B,
4262 __imm,
4263 (__v16si) __W,
4264 (__mmask16) __U);
4265 }
4266
4267 extern __inline __m512i
4268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4269 _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
4270 const int __imm)
4271 {
4272 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4273 (__v16si) __B,
4274 __imm,
4275 (__v16si)
4276 _mm512_setzero_si512 (),
4277 (__mmask16) __U);
4278 }
4279
4280 extern __inline __m512d
4281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4282 _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
4283 {
4284 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4285 (__v8df) __B, __imm,
4286 (__v8df)
4287 _mm512_undefined_pd (),
4288 (__mmask8) -1);
4289 }
4290
4291 extern __inline __m512d
4292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4293 _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
4294 __m512d __B, const int __imm)
4295 {
4296 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4297 (__v8df) __B, __imm,
4298 (__v8df) __W,
4299 (__mmask8) __U);
4300 }
4301
4302 extern __inline __m512d
4303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4304 _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
4305 const int __imm)
4306 {
4307 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4308 (__v8df) __B, __imm,
4309 (__v8df)
4310 _mm512_setzero_pd (),
4311 (__mmask8) __U);
4312 }
4313
4314 extern __inline __m512
4315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4316 _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
4317 {
4318 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4319 (__v16sf) __B, __imm,
4320 (__v16sf)
4321 _mm512_undefined_ps (),
4322 (__mmask16) -1);
4323 }
4324
4325 extern __inline __m512
4326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4327 _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
4328 __m512 __B, const int __imm)
4329 {
4330 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4331 (__v16sf) __B, __imm,
4332 (__v16sf) __W,
4333 (__mmask16) __U);
4334 }
4335
4336 extern __inline __m512
4337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4338 _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4339 const int __imm)
4340 {
4341 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4342 (__v16sf) __B, __imm,
4343 (__v16sf)
4344 _mm512_setzero_ps (),
4345 (__mmask16) __U);
4346 }
4347
4348 #else
4349 #define _mm512_shuffle_epi32(X, C) \
4350 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4351 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4352 (__mmask16)-1))
4353
4354 #define _mm512_mask_shuffle_epi32(W, U, X, C) \
4355 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4356 (__v16si)(__m512i)(W),\
4357 (__mmask16)(U)))
4358
4359 #define _mm512_maskz_shuffle_epi32(U, X, C) \
4360 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4361 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4362 (__mmask16)(U)))
4363
4364 #define _mm512_shuffle_i64x2(X, Y, C) \
4365 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4366 (__v8di)(__m512i)(Y), (int)(C),\
4367 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
4368 (__mmask8)-1))
4369
4370 #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
4371 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4372 (__v8di)(__m512i)(Y), (int)(C),\
4373 (__v8di)(__m512i)(W),\
4374 (__mmask8)(U)))
4375
4376 #define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
4377 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4378 (__v8di)(__m512i)(Y), (int)(C),\
4379 (__v8di)(__m512i)_mm512_setzero_si512 (),\
4380 (__mmask8)(U)))
4381
4382 #define _mm512_shuffle_i32x4(X, Y, C) \
4383 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4384 (__v16si)(__m512i)(Y), (int)(C),\
4385 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4386 (__mmask16)-1))
4387
4388 #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
4389 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4390 (__v16si)(__m512i)(Y), (int)(C),\
4391 (__v16si)(__m512i)(W),\
4392 (__mmask16)(U)))
4393
4394 #define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
4395 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4396 (__v16si)(__m512i)(Y), (int)(C),\
4397 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4398 (__mmask16)(U)))
4399
4400 #define _mm512_shuffle_f64x2(X, Y, C) \
4401 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4402 (__v8df)(__m512d)(Y), (int)(C),\
4403 (__v8df)(__m512d)_mm512_undefined_pd(),\
4404 (__mmask8)-1))
4405
4406 #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
4407 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4408 (__v8df)(__m512d)(Y), (int)(C),\
4409 (__v8df)(__m512d)(W),\
4410 (__mmask8)(U)))
4411
4412 #define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
4413 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4414 (__v8df)(__m512d)(Y), (int)(C),\
4415 (__v8df)(__m512d)_mm512_setzero_pd(),\
4416 (__mmask8)(U)))
4417
4418 #define _mm512_shuffle_f32x4(X, Y, C) \
4419 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4420 (__v16sf)(__m512)(Y), (int)(C),\
4421 (__v16sf)(__m512)_mm512_undefined_ps(),\
4422 (__mmask16)-1))
4423
4424 #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
4425 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4426 (__v16sf)(__m512)(Y), (int)(C),\
4427 (__v16sf)(__m512)(W),\
4428 (__mmask16)(U)))
4429
4430 #define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
4431 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4432 (__v16sf)(__m512)(Y), (int)(C),\
4433 (__v16sf)(__m512)_mm512_setzero_ps(),\
4434 (__mmask16)(U)))
4435 #endif
4436
4437 extern __inline __m512i
4438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4439 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
4440 {
4441 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4442 (__v16si) __B,
4443 (__v16si)
4444 _mm512_undefined_epi32 (),
4445 (__mmask16) -1);
4446 }
4447
4448 extern __inline __m512i
4449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4450 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4451 {
4452 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4453 (__v16si) __B,
4454 (__v16si) __W,
4455 (__mmask16) __U);
4456 }
4457
4458 extern __inline __m512i
4459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4460 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4461 {
4462 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4463 (__v16si) __B,
4464 (__v16si)
4465 _mm512_setzero_si512 (),
4466 (__mmask16) __U);
4467 }
4468
4469 extern __inline __m512i
4470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4471 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
4472 {
4473 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4474 (__v16si) __B,
4475 (__v16si)
4476 _mm512_undefined_epi32 (),
4477 (__mmask16) -1);
4478 }
4479
4480 extern __inline __m512i
4481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4482 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4483 {
4484 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4485 (__v16si) __B,
4486 (__v16si) __W,
4487 (__mmask16) __U);
4488 }
4489
4490 extern __inline __m512i
4491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4492 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4493 {
4494 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4495 (__v16si) __B,
4496 (__v16si)
4497 _mm512_setzero_si512 (),
4498 (__mmask16) __U);
4499 }
4500
4501 extern __inline __m512i
4502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4503 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
4504 {
4505 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4506 (__v8di) __B,
4507 (__v8di)
4508 _mm512_undefined_epi32 (),
4509 (__mmask8) -1);
4510 }
4511
4512 extern __inline __m512i
4513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4514 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4515 {
4516 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4517 (__v8di) __B,
4518 (__v8di) __W,
4519 (__mmask8) __U);
4520 }
4521
4522 extern __inline __m512i
4523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4524 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4525 {
4526 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4527 (__v8di) __B,
4528 (__v8di)
4529 _mm512_setzero_si512 (),
4530 (__mmask8) __U);
4531 }
4532
4533 extern __inline __m512i
4534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4535 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
4536 {
4537 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4538 (__v8di) __B,
4539 (__v8di)
4540 _mm512_undefined_epi32 (),
4541 (__mmask8) -1);
4542 }
4543
4544 extern __inline __m512i
4545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4546 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4547 {
4548 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4549 (__v8di) __B,
4550 (__v8di) __W,
4551 (__mmask8) __U);
4552 }
4553
4554 extern __inline __m512i
4555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4556 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4557 {
4558 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4559 (__v8di) __B,
4560 (__v8di)
4561 _mm512_setzero_si512 (),
4562 (__mmask8) __U);
4563 }
4564
4565 #ifdef __OPTIMIZE__
4566 extern __inline __m256i
4567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4568 _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4569 {
4570 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4571 (__v8si)
4572 _mm256_undefined_si256 (),
4573 (__mmask8) -1, __R);
4574 }
4575
4576 extern __inline __m256i
4577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4578 _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4579 const int __R)
4580 {
4581 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4582 (__v8si) __W,
4583 (__mmask8) __U, __R);
4584 }
4585
4586 extern __inline __m256i
4587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4588 _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4589 {
4590 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4591 (__v8si)
4592 _mm256_setzero_si256 (),
4593 (__mmask8) __U, __R);
4594 }
4595
4596 extern __inline __m256i
4597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4598 _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4599 {
4600 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4601 (__v8si)
4602 _mm256_undefined_si256 (),
4603 (__mmask8) -1, __R);
4604 }
4605
4606 extern __inline __m256i
4607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4608 _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4609 const int __R)
4610 {
4611 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4612 (__v8si) __W,
4613 (__mmask8) __U, __R);
4614 }
4615
4616 extern __inline __m256i
4617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4618 _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4619 {
4620 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4621 (__v8si)
4622 _mm256_setzero_si256 (),
4623 (__mmask8) __U, __R);
4624 }
4625 #else
4626 #define _mm512_cvtt_roundpd_epi32(A, B) \
4627 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4628
4629 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4630 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4631
4632 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4633 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4634
4635 #define _mm512_cvtt_roundpd_epu32(A, B) \
4636 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4637
4638 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4639 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4640
4641 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4642 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4643 #endif
4644
4645 #ifdef __OPTIMIZE__
4646 extern __inline __m256i
4647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4648 _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4649 {
4650 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4651 (__v8si)
4652 _mm256_undefined_si256 (),
4653 (__mmask8) -1, __R);
4654 }
4655
4656 extern __inline __m256i
4657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4658 _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4659 const int __R)
4660 {
4661 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4662 (__v8si) __W,
4663 (__mmask8) __U, __R);
4664 }
4665
4666 extern __inline __m256i
4667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4668 _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4669 {
4670 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4671 (__v8si)
4672 _mm256_setzero_si256 (),
4673 (__mmask8) __U, __R);
4674 }
4675
4676 extern __inline __m256i
4677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4678 _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4679 {
4680 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4681 (__v8si)
4682 _mm256_undefined_si256 (),
4683 (__mmask8) -1, __R);
4684 }
4685
4686 extern __inline __m256i
4687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4688 _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4689 const int __R)
4690 {
4691 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4692 (__v8si) __W,
4693 (__mmask8) __U, __R);
4694 }
4695
4696 extern __inline __m256i
4697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4698 _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4699 {
4700 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4701 (__v8si)
4702 _mm256_setzero_si256 (),
4703 (__mmask8) __U, __R);
4704 }
4705 #else
4706 #define _mm512_cvt_roundpd_epi32(A, B) \
4707 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4708
4709 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4710 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4711
4712 #define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4713 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4714
4715 #define _mm512_cvt_roundpd_epu32(A, B) \
4716 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4717
4718 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4719 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4720
4721 #define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4722 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4723 #endif
4724
4725 #ifdef __OPTIMIZE__
4726 extern __inline __m512i
4727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4728 _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4729 {
4730 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4731 (__v16si)
4732 _mm512_undefined_epi32 (),
4733 (__mmask16) -1, __R);
4734 }
4735
4736 extern __inline __m512i
4737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4738 _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4739 const int __R)
4740 {
4741 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4742 (__v16si) __W,
4743 (__mmask16) __U, __R);
4744 }
4745
4746 extern __inline __m512i
4747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4748 _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4749 {
4750 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4751 (__v16si)
4752 _mm512_setzero_si512 (),
4753 (__mmask16) __U, __R);
4754 }
4755
4756 extern __inline __m512i
4757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4758 _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4759 {
4760 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4761 (__v16si)
4762 _mm512_undefined_epi32 (),
4763 (__mmask16) -1, __R);
4764 }
4765
4766 extern __inline __m512i
4767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4768 _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4769 const int __R)
4770 {
4771 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4772 (__v16si) __W,
4773 (__mmask16) __U, __R);
4774 }
4775
4776 extern __inline __m512i
4777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4778 _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4779 {
4780 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4781 (__v16si)
4782 _mm512_setzero_si512 (),
4783 (__mmask16) __U, __R);
4784 }
4785 #else
4786 #define _mm512_cvtt_roundps_epi32(A, B) \
4787 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4788
4789 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
4790 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4791
4792 #define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
4793 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4794
4795 #define _mm512_cvtt_roundps_epu32(A, B) \
4796 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4797
4798 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
4799 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4800
4801 #define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
4802 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4803 #endif
4804
4805 #ifdef __OPTIMIZE__
4806 extern __inline __m512i
4807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4808 _mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4809 {
4810 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4811 (__v16si)
4812 _mm512_undefined_epi32 (),
4813 (__mmask16) -1, __R);
4814 }
4815
4816 extern __inline __m512i
4817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4818 _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4819 const int __R)
4820 {
4821 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4822 (__v16si) __W,
4823 (__mmask16) __U, __R);
4824 }
4825
4826 extern __inline __m512i
4827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4828 _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4829 {
4830 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4831 (__v16si)
4832 _mm512_setzero_si512 (),
4833 (__mmask16) __U, __R);
4834 }
4835
4836 extern __inline __m512i
4837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4838 _mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
4839 {
4840 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4841 (__v16si)
4842 _mm512_undefined_epi32 (),
4843 (__mmask16) -1, __R);
4844 }
4845
4846 extern __inline __m512i
4847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4848 _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4849 const int __R)
4850 {
4851 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4852 (__v16si) __W,
4853 (__mmask16) __U, __R);
4854 }
4855
4856 extern __inline __m512i
4857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4858 _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4859 {
4860 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4861 (__v16si)
4862 _mm512_setzero_si512 (),
4863 (__mmask16) __U, __R);
4864 }
4865 #else
4866 #define _mm512_cvt_roundps_epi32(A, B) \
4867 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4868
4869 #define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
4870 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
4871
4872 #define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
4873 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4874
4875 #define _mm512_cvt_roundps_epu32(A, B) \
4876 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4877
4878 #define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
4879 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
4880
4881 #define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
4882 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4883 #endif
4884
4885 extern __inline __m128d
4886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4887 _mm_cvtu32_sd (__m128d __A, unsigned __B)
4888 {
4889 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
4890 }
4891
4892 #ifdef __x86_64__
4893 #ifdef __OPTIMIZE__
4894 extern __inline __m128d
4895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4896 _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
4897 {
4898 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
4899 }
4900
4901 extern __inline __m128d
4902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4903 _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
4904 {
4905 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4906 }
4907
4908 extern __inline __m128d
4909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4910 _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
4911 {
4912 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4913 }
4914 #else
4915 #define _mm_cvt_roundu64_sd(A, B, C) \
4916 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
4917
4918 #define _mm_cvt_roundi64_sd(A, B, C) \
4919 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4920
4921 #define _mm_cvt_roundsi64_sd(A, B, C) \
4922 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4923 #endif
4924
4925 #endif
4926
4927 #ifdef __OPTIMIZE__
4928 extern __inline __m128
4929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4930 _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
4931 {
4932 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
4933 }
4934
4935 extern __inline __m128
4936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4937 _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
4938 {
4939 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4940 }
4941
4942 extern __inline __m128
4943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4944 _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
4945 {
4946 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4947 }
4948 #else
4949 #define _mm_cvt_roundu32_ss(A, B, C) \
4950 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
4951
4952 #define _mm_cvt_roundi32_ss(A, B, C) \
4953 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4954
4955 #define _mm_cvt_roundsi32_ss(A, B, C) \
4956 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4957 #endif
4958
4959 #ifdef __x86_64__
4960 #ifdef __OPTIMIZE__
4961 extern __inline __m128
4962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4963 _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
4964 {
4965 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
4966 }
4967
4968 extern __inline __m128
4969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4970 _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
4971 {
4972 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4973 }
4974
4975 extern __inline __m128
4976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4977 _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
4978 {
4979 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4980 }
4981 #else
4982 #define _mm_cvt_roundu64_ss(A, B, C) \
4983 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
4984
4985 #define _mm_cvt_roundi64_ss(A, B, C) \
4986 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4987
4988 #define _mm_cvt_roundsi64_ss(A, B, C) \
4989 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4990 #endif
4991
4992 #endif
4993
4994 extern __inline __m128i
4995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4996 _mm512_cvtepi32_epi8 (__m512i __A)
4997 {
4998 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4999 (__v16qi)
5000 _mm_undefined_si128 (),
5001 (__mmask16) -1);
5002 }
5003
5004 extern __inline void
5005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5006 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5007 {
5008 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5009 }
5010
5011 extern __inline __m128i
5012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5013 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5014 {
5015 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5016 (__v16qi) __O, __M);
5017 }
5018
5019 extern __inline __m128i
5020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5021 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
5022 {
5023 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5024 (__v16qi)
5025 _mm_setzero_si128 (),
5026 __M);
5027 }
5028
5029 extern __inline __m128i
5030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5031 _mm512_cvtsepi32_epi8 (__m512i __A)
5032 {
5033 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5034 (__v16qi)
5035 _mm_undefined_si128 (),
5036 (__mmask16) -1);
5037 }
5038
5039 extern __inline void
5040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5041 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5042 {
5043 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5044 }
5045
5046 extern __inline __m128i
5047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5048 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5049 {
5050 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5051 (__v16qi) __O, __M);
5052 }
5053
5054 extern __inline __m128i
5055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5056 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
5057 {
5058 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5059 (__v16qi)
5060 _mm_setzero_si128 (),
5061 __M);
5062 }
5063
5064 extern __inline __m128i
5065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5066 _mm512_cvtusepi32_epi8 (__m512i __A)
5067 {
5068 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5069 (__v16qi)
5070 _mm_undefined_si128 (),
5071 (__mmask16) -1);
5072 }
5073
5074 extern __inline void
5075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5076 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5077 {
5078 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5079 }
5080
5081 extern __inline __m128i
5082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5083 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5084 {
5085 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5086 (__v16qi) __O,
5087 __M);
5088 }
5089
5090 extern __inline __m128i
5091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5092 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
5093 {
5094 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5095 (__v16qi)
5096 _mm_setzero_si128 (),
5097 __M);
5098 }
5099
5100 extern __inline __m256i
5101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5102 _mm512_cvtepi32_epi16 (__m512i __A)
5103 {
5104 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5105 (__v16hi)
5106 _mm256_undefined_si256 (),
5107 (__mmask16) -1);
5108 }
5109
5110 extern __inline void
5111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5112 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
5113 {
5114 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
5115 }
5116
5117 extern __inline __m256i
5118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5119 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5120 {
5121 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5122 (__v16hi) __O, __M);
5123 }
5124
5125 extern __inline __m256i
5126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5127 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
5128 {
5129 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5130 (__v16hi)
5131 _mm256_setzero_si256 (),
5132 __M);
5133 }
5134
5135 extern __inline __m256i
5136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5137 _mm512_cvtsepi32_epi16 (__m512i __A)
5138 {
5139 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5140 (__v16hi)
5141 _mm256_undefined_si256 (),
5142 (__mmask16) -1);
5143 }
5144
5145 extern __inline void
5146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5147 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
5148 {
5149 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
5150 }
5151
5152 extern __inline __m256i
5153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5154 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5155 {
5156 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5157 (__v16hi) __O, __M);
5158 }
5159
5160 extern __inline __m256i
5161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5162 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
5163 {
5164 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5165 (__v16hi)
5166 _mm256_setzero_si256 (),
5167 __M);
5168 }
5169
5170 extern __inline __m256i
5171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5172 _mm512_cvtusepi32_epi16 (__m512i __A)
5173 {
5174 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5175 (__v16hi)
5176 _mm256_undefined_si256 (),
5177 (__mmask16) -1);
5178 }
5179
5180 extern __inline void
5181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5182 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
5183 {
5184 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
5185 }
5186
5187 extern __inline __m256i
5188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5189 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5190 {
5191 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5192 (__v16hi) __O,
5193 __M);
5194 }
5195
5196 extern __inline __m256i
5197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5198 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
5199 {
5200 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5201 (__v16hi)
5202 _mm256_setzero_si256 (),
5203 __M);
5204 }
5205
5206 extern __inline __m256i
5207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5208 _mm512_cvtepi64_epi32 (__m512i __A)
5209 {
5210 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5211 (__v8si)
5212 _mm256_undefined_si256 (),
5213 (__mmask8) -1);
5214 }
5215
5216 extern __inline void
5217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5218 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
5219 {
5220 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
5221 }
5222
5223 extern __inline __m256i
5224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5225 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5226 {
5227 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5228 (__v8si) __O, __M);
5229 }
5230
5231 extern __inline __m256i
5232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5233 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
5234 {
5235 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5236 (__v8si)
5237 _mm256_setzero_si256 (),
5238 __M);
5239 }
5240
5241 extern __inline __m256i
5242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5243 _mm512_cvtsepi64_epi32 (__m512i __A)
5244 {
5245 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5246 (__v8si)
5247 _mm256_undefined_si256 (),
5248 (__mmask8) -1);
5249 }
5250
5251 extern __inline void
5252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5253 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
5254 {
5255 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
5256 }
5257
5258 extern __inline __m256i
5259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5260 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5261 {
5262 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5263 (__v8si) __O, __M);
5264 }
5265
5266 extern __inline __m256i
5267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5268 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
5269 {
5270 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5271 (__v8si)
5272 _mm256_setzero_si256 (),
5273 __M);
5274 }
5275
5276 extern __inline __m256i
5277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5278 _mm512_cvtusepi64_epi32 (__m512i __A)
5279 {
5280 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5281 (__v8si)
5282 _mm256_undefined_si256 (),
5283 (__mmask8) -1);
5284 }
5285
5286 extern __inline void
5287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5288 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
5289 {
5290 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
5291 }
5292
5293 extern __inline __m256i
5294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5295 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5296 {
5297 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5298 (__v8si) __O, __M);
5299 }
5300
5301 extern __inline __m256i
5302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5303 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
5304 {
5305 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5306 (__v8si)
5307 _mm256_setzero_si256 (),
5308 __M);
5309 }
5310
5311 extern __inline __m128i
5312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5313 _mm512_cvtepi64_epi16 (__m512i __A)
5314 {
5315 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5316 (__v8hi)
5317 _mm_undefined_si128 (),
5318 (__mmask8) -1);
5319 }
5320
5321 extern __inline void
5322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5323 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5324 {
5325 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5326 }
5327
5328 extern __inline __m128i
5329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5330 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5331 {
5332 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5333 (__v8hi) __O, __M);
5334 }
5335
5336 extern __inline __m128i
5337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5338 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5339 {
5340 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5341 (__v8hi)
5342 _mm_setzero_si128 (),
5343 __M);
5344 }
5345
5346 extern __inline __m128i
5347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5348 _mm512_cvtsepi64_epi16 (__m512i __A)
5349 {
5350 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5351 (__v8hi)
5352 _mm_undefined_si128 (),
5353 (__mmask8) -1);
5354 }
5355
5356 extern __inline void
5357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5358 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5359 {
5360 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5361 }
5362
5363 extern __inline __m128i
5364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5365 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5366 {
5367 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5368 (__v8hi) __O, __M);
5369 }
5370
5371 extern __inline __m128i
5372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5373 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5374 {
5375 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5376 (__v8hi)
5377 _mm_setzero_si128 (),
5378 __M);
5379 }
5380
5381 extern __inline __m128i
5382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5383 _mm512_cvtusepi64_epi16 (__m512i __A)
5384 {
5385 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5386 (__v8hi)
5387 _mm_undefined_si128 (),
5388 (__mmask8) -1);
5389 }
5390
5391 extern __inline void
5392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5393 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5394 {
5395 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5396 }
5397
5398 extern __inline __m128i
5399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5400 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5401 {
5402 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5403 (__v8hi) __O, __M);
5404 }
5405
5406 extern __inline __m128i
5407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5408 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5409 {
5410 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5411 (__v8hi)
5412 _mm_setzero_si128 (),
5413 __M);
5414 }
5415
5416 extern __inline __m128i
5417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5418 _mm512_cvtepi64_epi8 (__m512i __A)
5419 {
5420 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5421 (__v16qi)
5422 _mm_undefined_si128 (),
5423 (__mmask8) -1);
5424 }
5425
5426 extern __inline void
5427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5428 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5429 {
5430 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5431 }
5432
5433 extern __inline __m128i
5434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5435 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5436 {
5437 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5438 (__v16qi) __O, __M);
5439 }
5440
5441 extern __inline __m128i
5442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5443 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5444 {
5445 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5446 (__v16qi)
5447 _mm_setzero_si128 (),
5448 __M);
5449 }
5450
5451 extern __inline __m128i
5452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5453 _mm512_cvtsepi64_epi8 (__m512i __A)
5454 {
5455 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5456 (__v16qi)
5457 _mm_undefined_si128 (),
5458 (__mmask8) -1);
5459 }
5460
5461 extern __inline void
5462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5463 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5464 {
5465 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5466 }
5467
5468 extern __inline __m128i
5469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5470 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5471 {
5472 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5473 (__v16qi) __O, __M);
5474 }
5475
5476 extern __inline __m128i
5477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5478 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5479 {
5480 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5481 (__v16qi)
5482 _mm_setzero_si128 (),
5483 __M);
5484 }
5485
5486 extern __inline __m128i
5487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5488 _mm512_cvtusepi64_epi8 (__m512i __A)
5489 {
5490 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5491 (__v16qi)
5492 _mm_undefined_si128 (),
5493 (__mmask8) -1);
5494 }
5495
5496 extern __inline void
5497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5498 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5499 {
5500 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5501 }
5502
5503 extern __inline __m128i
5504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5505 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5506 {
5507 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5508 (__v16qi) __O,
5509 __M);
5510 }
5511
5512 extern __inline __m128i
5513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5514 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5515 {
5516 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5517 (__v16qi)
5518 _mm_setzero_si128 (),
5519 __M);
5520 }
5521
5522 extern __inline __m512d
5523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5524 _mm512_cvtepi32_pd (__m256i __A)
5525 {
5526 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5527 (__v8df)
5528 _mm512_undefined_pd (),
5529 (__mmask8) -1);
5530 }
5531
5532 extern __inline __m512d
5533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5534 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5535 {
5536 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5537 (__v8df) __W,
5538 (__mmask8) __U);
5539 }
5540
5541 extern __inline __m512d
5542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5543 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5544 {
5545 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5546 (__v8df)
5547 _mm512_setzero_pd (),
5548 (__mmask8) __U);
5549 }
5550
5551 extern __inline __m512d
5552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5553 _mm512_cvtepu32_pd (__m256i __A)
5554 {
5555 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5556 (__v8df)
5557 _mm512_undefined_pd (),
5558 (__mmask8) -1);
5559 }
5560
5561 extern __inline __m512d
5562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5563 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5564 {
5565 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5566 (__v8df) __W,
5567 (__mmask8) __U);
5568 }
5569
5570 extern __inline __m512d
5571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5572 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5573 {
5574 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5575 (__v8df)
5576 _mm512_setzero_pd (),
5577 (__mmask8) __U);
5578 }
5579
5580 #ifdef __OPTIMIZE__
5581 extern __inline __m512
5582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5583 _mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5584 {
5585 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5586 (__v16sf)
5587 _mm512_undefined_ps (),
5588 (__mmask16) -1, __R);
5589 }
5590
5591 extern __inline __m512
5592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5593 _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5594 const int __R)
5595 {
5596 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5597 (__v16sf) __W,
5598 (__mmask16) __U, __R);
5599 }
5600
5601 extern __inline __m512
5602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5603 _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5604 {
5605 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5606 (__v16sf)
5607 _mm512_setzero_ps (),
5608 (__mmask16) __U, __R);
5609 }
5610
5611 extern __inline __m512
5612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5613 _mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5614 {
5615 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5616 (__v16sf)
5617 _mm512_undefined_ps (),
5618 (__mmask16) -1, __R);
5619 }
5620
5621 extern __inline __m512
5622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5623 _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5624 const int __R)
5625 {
5626 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5627 (__v16sf) __W,
5628 (__mmask16) __U, __R);
5629 }
5630
5631 extern __inline __m512
5632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5633 _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5634 {
5635 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5636 (__v16sf)
5637 _mm512_setzero_ps (),
5638 (__mmask16) __U, __R);
5639 }
5640
5641 #else
5642 #define _mm512_cvt_roundepi32_ps(A, B) \
5643 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5644
5645 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5646 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5647
5648 #define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5649 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5650
5651 #define _mm512_cvt_roundepu32_ps(A, B) \
5652 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5653
5654 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5655 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5656
5657 #define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5658 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5659 #endif
5660
5661 #ifdef __OPTIMIZE__
5662 extern __inline __m256d
5663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5664 _mm512_extractf64x4_pd (__m512d __A, const int __imm)
5665 {
5666 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5667 __imm,
5668 (__v4df)
5669 _mm256_undefined_pd (),
5670 (__mmask8) -1);
5671 }
5672
5673 extern __inline __m256d
5674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5675 _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5676 const int __imm)
5677 {
5678 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5679 __imm,
5680 (__v4df) __W,
5681 (__mmask8) __U);
5682 }
5683
5684 extern __inline __m256d
5685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5686 _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5687 {
5688 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5689 __imm,
5690 (__v4df)
5691 _mm256_setzero_pd (),
5692 (__mmask8) __U);
5693 }
5694
5695 extern __inline __m128
5696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5697 _mm512_extractf32x4_ps (__m512 __A, const int __imm)
5698 {
5699 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5700 __imm,
5701 (__v4sf)
5702 _mm_undefined_ps (),
5703 (__mmask8) -1);
5704 }
5705
5706 extern __inline __m128
5707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5708 _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5709 const int __imm)
5710 {
5711 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5712 __imm,
5713 (__v4sf) __W,
5714 (__mmask8) __U);
5715 }
5716
5717 extern __inline __m128
5718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5719 _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5720 {
5721 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5722 __imm,
5723 (__v4sf)
5724 _mm_setzero_ps (),
5725 (__mmask8) __U);
5726 }
5727
5728 extern __inline __m256i
5729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5730 _mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5731 {
5732 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5733 __imm,
5734 (__v4di)
5735 _mm256_undefined_si256 (),
5736 (__mmask8) -1);
5737 }
5738
5739 extern __inline __m256i
5740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5741 _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5742 const int __imm)
5743 {
5744 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5745 __imm,
5746 (__v4di) __W,
5747 (__mmask8) __U);
5748 }
5749
5750 extern __inline __m256i
5751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5752 _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5753 {
5754 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5755 __imm,
5756 (__v4di)
5757 _mm256_setzero_si256 (),
5758 (__mmask8) __U);
5759 }
5760
5761 extern __inline __m128i
5762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5763 _mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5764 {
5765 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5766 __imm,
5767 (__v4si)
5768 _mm_undefined_si128 (),
5769 (__mmask8) -1);
5770 }
5771
5772 extern __inline __m128i
5773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5774 _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5775 const int __imm)
5776 {
5777 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5778 __imm,
5779 (__v4si) __W,
5780 (__mmask8) __U);
5781 }
5782
5783 extern __inline __m128i
5784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5785 _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5786 {
5787 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5788 __imm,
5789 (__v4si)
5790 _mm_setzero_si128 (),
5791 (__mmask8) __U);
5792 }
5793 #else
5794
5795 #define _mm512_extractf64x4_pd(X, C) \
5796 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5797 (int) (C),\
5798 (__v4df)(__m256d)_mm256_undefined_pd(),\
5799 (__mmask8)-1))
5800
5801 #define _mm512_mask_extractf64x4_pd(W, U, X, C) \
5802 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5803 (int) (C),\
5804 (__v4df)(__m256d)(W),\
5805 (__mmask8)(U)))
5806
5807 #define _mm512_maskz_extractf64x4_pd(U, X, C) \
5808 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5809 (int) (C),\
5810 (__v4df)(__m256d)_mm256_setzero_pd(),\
5811 (__mmask8)(U)))
5812
5813 #define _mm512_extractf32x4_ps(X, C) \
5814 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5815 (int) (C),\
5816 (__v4sf)(__m128)_mm_undefined_ps(),\
5817 (__mmask8)-1))
5818
5819 #define _mm512_mask_extractf32x4_ps(W, U, X, C) \
5820 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5821 (int) (C),\
5822 (__v4sf)(__m128)(W),\
5823 (__mmask8)(U)))
5824
5825 #define _mm512_maskz_extractf32x4_ps(U, X, C) \
5826 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5827 (int) (C),\
5828 (__v4sf)(__m128)_mm_setzero_ps(),\
5829 (__mmask8)(U)))
5830
5831 #define _mm512_extracti64x4_epi64(X, C) \
5832 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5833 (int) (C),\
5834 (__v4di)(__m256i)_mm256_undefined_si256 (),\
5835 (__mmask8)-1))
5836
5837 #define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
5838 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5839 (int) (C),\
5840 (__v4di)(__m256i)(W),\
5841 (__mmask8)(U)))
5842
5843 #define _mm512_maskz_extracti64x4_epi64(U, X, C) \
5844 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5845 (int) (C),\
5846 (__v4di)(__m256i)_mm256_setzero_si256 (),\
5847 (__mmask8)(U)))
5848
5849 #define _mm512_extracti32x4_epi32(X, C) \
5850 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5851 (int) (C),\
5852 (__v4si)(__m128i)_mm_undefined_si128 (),\
5853 (__mmask8)-1))
5854
5855 #define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
5856 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5857 (int) (C),\
5858 (__v4si)(__m128i)(W),\
5859 (__mmask8)(U)))
5860
5861 #define _mm512_maskz_extracti32x4_epi32(U, X, C) \
5862 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5863 (int) (C),\
5864 (__v4si)(__m128i)_mm_setzero_si128 (),\
5865 (__mmask8)(U)))
5866 #endif
5867
5868 #ifdef __OPTIMIZE__
5869 extern __inline __m512i
5870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5871 _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
5872 {
5873 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
5874 (__v4si) __B,
5875 __imm,
5876 (__v16si) __A, -1);
5877 }
5878
5879 extern __inline __m512
5880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5881 _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
5882 {
5883 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
5884 (__v4sf) __B,
5885 __imm,
5886 (__v16sf) __A, -1);
5887 }
5888
5889 extern __inline __m512i
5890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5891 _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
5892 {
5893 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5894 (__v4di) __B,
5895 __imm,
5896 (__v8di)
5897 _mm512_undefined_epi32 (),
5898 (__mmask8) -1);
5899 }
5900
5901 extern __inline __m512i
5902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5903 _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
5904 __m256i __B, const int __imm)
5905 {
5906 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5907 (__v4di) __B,
5908 __imm,
5909 (__v8di) __W,
5910 (__mmask8) __U);
5911 }
5912
5913 extern __inline __m512i
5914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5915 _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
5916 const int __imm)
5917 {
5918 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5919 (__v4di) __B,
5920 __imm,
5921 (__v8di)
5922 _mm512_setzero_si512 (),
5923 (__mmask8) __U);
5924 }
5925
5926 extern __inline __m512d
5927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5928 _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
5929 {
5930 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5931 (__v4df) __B,
5932 __imm,
5933 (__v8df)
5934 _mm512_undefined_pd (),
5935 (__mmask8) -1);
5936 }
5937
5938 extern __inline __m512d
5939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5940 _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
5941 __m256d __B, const int __imm)
5942 {
5943 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5944 (__v4df) __B,
5945 __imm,
5946 (__v8df) __W,
5947 (__mmask8) __U);
5948 }
5949
5950 extern __inline __m512d
5951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5952 _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
5953 const int __imm)
5954 {
5955 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5956 (__v4df) __B,
5957 __imm,
5958 (__v8df)
5959 _mm512_setzero_pd (),
5960 (__mmask8) __U);
5961 }
5962 #else
5963 #define _mm512_insertf32x4(X, Y, C) \
5964 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
5965 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
5966
5967 #define _mm512_inserti32x4(X, Y, C) \
5968 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
5969 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
5970
5971 #define _mm512_insertf64x4(X, Y, C) \
5972 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5973 (__v4df)(__m256d) (Y), (int) (C), \
5974 (__v8df)(__m512d)_mm512_undefined_pd(), \
5975 (__mmask8)-1))
5976
5977 #define _mm512_mask_insertf64x4(W, U, X, Y, C) \
5978 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5979 (__v4df)(__m256d) (Y), (int) (C), \
5980 (__v8df)(__m512d)(W), \
5981 (__mmask8)(U)))
5982
5983 #define _mm512_maskz_insertf64x4(U, X, Y, C) \
5984 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5985 (__v4df)(__m256d) (Y), (int) (C), \
5986 (__v8df)(__m512d)_mm512_setzero_pd(), \
5987 (__mmask8)(U)))
5988
5989 #define _mm512_inserti64x4(X, Y, C) \
5990 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5991 (__v4di)(__m256i) (Y), (int) (C), \
5992 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
5993 (__mmask8)-1))
5994
5995 #define _mm512_mask_inserti64x4(W, U, X, Y, C) \
5996 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5997 (__v4di)(__m256i) (Y), (int) (C),\
5998 (__v8di)(__m512i)(W),\
5999 (__mmask8)(U)))
6000
6001 #define _mm512_maskz_inserti64x4(U, X, Y, C) \
6002 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6003 (__v4di)(__m256i) (Y), (int) (C), \
6004 (__v8di)(__m512i)_mm512_setzero_si512 (), \
6005 (__mmask8)(U)))
6006 #endif
6007
6008 extern __inline __m512d
6009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6010 _mm512_loadu_pd (void const *__P)
6011 {
6012 return *(__m512d_u *)__P;
6013 }
6014
6015 extern __inline __m512d
6016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6017 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
6018 {
6019 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
6020 (__v8df) __W,
6021 (__mmask8) __U);
6022 }
6023
6024 extern __inline __m512d
6025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6026 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
6027 {
6028 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
6029 (__v8df)
6030 _mm512_setzero_pd (),
6031 (__mmask8) __U);
6032 }
6033
6034 extern __inline void
6035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6036 _mm512_storeu_pd (void *__P, __m512d __A)
6037 {
6038 *(__m512d_u *)__P = __A;
6039 }
6040
6041 extern __inline void
6042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6043 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
6044 {
6045 __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
6046 (__mmask8) __U);
6047 }
6048
6049 extern __inline __m512
6050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6051 _mm512_loadu_ps (void const *__P)
6052 {
6053 return *(__m512_u *)__P;
6054 }
6055
6056 extern __inline __m512
6057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6058 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
6059 {
6060 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
6061 (__v16sf) __W,
6062 (__mmask16) __U);
6063 }
6064
6065 extern __inline __m512
6066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6067 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
6068 {
6069 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
6070 (__v16sf)
6071 _mm512_setzero_ps (),
6072 (__mmask16) __U);
6073 }
6074
6075 extern __inline void
6076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6077 _mm512_storeu_ps (void *__P, __m512 __A)
6078 {
6079 *(__m512_u *)__P = __A;
6080 }
6081
6082 extern __inline void
6083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6084 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
6085 {
6086 __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
6087 (__mmask16) __U);
6088 }
6089
6090 extern __inline __m512i
6091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6092 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
6093 {
6094 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
6095 (__v8di) __W,
6096 (__mmask8) __U);
6097 }
6098
6099 extern __inline __m512i
6100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6101 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
6102 {
6103 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
6104 (__v8di)
6105 _mm512_setzero_si512 (),
6106 (__mmask8) __U);
6107 }
6108
6109 extern __inline void
6110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6111 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
6112 {
6113 __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A,
6114 (__mmask8) __U);
6115 }
6116
6117 extern __inline __m512i
6118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6119 _mm512_loadu_si512 (void const *__P)
6120 {
6121 return *(__m512i_u *)__P;
6122 }
6123
6124 extern __inline __m512i
6125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6126 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
6127 {
6128 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
6129 (__v16si) __W,
6130 (__mmask16) __U);
6131 }
6132
6133 extern __inline __m512i
6134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6135 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
6136 {
6137 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
6138 (__v16si)
6139 _mm512_setzero_si512 (),
6140 (__mmask16) __U);
6141 }
6142
6143 extern __inline void
6144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6145 _mm512_storeu_si512 (void *__P, __m512i __A)
6146 {
6147 *(__m512i_u *)__P = __A;
6148 }
6149
6150 extern __inline void
6151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6152 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
6153 {
6154 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
6155 (__mmask16) __U);
6156 }
6157
6158 extern __inline __m512d
6159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6160 _mm512_permutevar_pd (__m512d __A, __m512i __C)
6161 {
6162 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6163 (__v8di) __C,
6164 (__v8df)
6165 _mm512_undefined_pd (),
6166 (__mmask8) -1);
6167 }
6168
6169 extern __inline __m512d
6170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6171 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6172 {
6173 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6174 (__v8di) __C,
6175 (__v8df) __W,
6176 (__mmask8) __U);
6177 }
6178
6179 extern __inline __m512d
6180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6181 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
6182 {
6183 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6184 (__v8di) __C,
6185 (__v8df)
6186 _mm512_setzero_pd (),
6187 (__mmask8) __U);
6188 }
6189
6190 extern __inline __m512
6191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6192 _mm512_permutevar_ps (__m512 __A, __m512i __C)
6193 {
6194 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6195 (__v16si) __C,
6196 (__v16sf)
6197 _mm512_undefined_ps (),
6198 (__mmask16) -1);
6199 }
6200
6201 extern __inline __m512
6202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6203 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6204 {
6205 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6206 (__v16si) __C,
6207 (__v16sf) __W,
6208 (__mmask16) __U);
6209 }
6210
6211 extern __inline __m512
6212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6213 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
6214 {
6215 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6216 (__v16si) __C,
6217 (__v16sf)
6218 _mm512_setzero_ps (),
6219 (__mmask16) __U);
6220 }
6221
6222 extern __inline __m512i
6223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6224 _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
6225 {
6226 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
6227 /* idx */ ,
6228 (__v8di) __A,
6229 (__v8di) __B,
6230 (__mmask8) -1);
6231 }
6232
6233 extern __inline __m512i
6234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6235 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
6236 __m512i __B)
6237 {
6238 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
6239 /* idx */ ,
6240 (__v8di) __A,
6241 (__v8di) __B,
6242 (__mmask8) __U);
6243 }
6244
6245 extern __inline __m512i
6246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6247 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
6248 __mmask8 __U, __m512i __B)
6249 {
6250 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
6251 (__v8di) __I
6252 /* idx */ ,
6253 (__v8di) __B,
6254 (__mmask8) __U);
6255 }
6256
6257 extern __inline __m512i
6258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6259 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
6260 __m512i __I, __m512i __B)
6261 {
6262 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
6263 /* idx */ ,
6264 (__v8di) __A,
6265 (__v8di) __B,
6266 (__mmask8) __U);
6267 }
6268
6269 extern __inline __m512i
6270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6271 _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
6272 {
6273 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
6274 /* idx */ ,
6275 (__v16si) __A,
6276 (__v16si) __B,
6277 (__mmask16) -1);
6278 }
6279
6280 extern __inline __m512i
6281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6282 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
6283 __m512i __I, __m512i __B)
6284 {
6285 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
6286 /* idx */ ,
6287 (__v16si) __A,
6288 (__v16si) __B,
6289 (__mmask16) __U);
6290 }
6291
6292 extern __inline __m512i
6293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6294 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
6295 __mmask16 __U, __m512i __B)
6296 {
6297 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
6298 (__v16si) __I
6299 /* idx */ ,
6300 (__v16si) __B,
6301 (__mmask16) __U);
6302 }
6303
6304 extern __inline __m512i
6305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6306 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
6307 __m512i __I, __m512i __B)
6308 {
6309 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
6310 /* idx */ ,
6311 (__v16si) __A,
6312 (__v16si) __B,
6313 (__mmask16) __U);
6314 }
6315
6316 extern __inline __m512d
6317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6318 _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
6319 {
6320 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6321 /* idx */ ,
6322 (__v8df) __A,
6323 (__v8df) __B,
6324 (__mmask8) -1);
6325 }
6326
6327 extern __inline __m512d
6328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6329 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6330 __m512d __B)
6331 {
6332 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6333 /* idx */ ,
6334 (__v8df) __A,
6335 (__v8df) __B,
6336 (__mmask8) __U);
6337 }
6338
6339 extern __inline __m512d
6340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6341 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6342 __m512d __B)
6343 {
6344 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6345 (__v8di) __I
6346 /* idx */ ,
6347 (__v8df) __B,
6348 (__mmask8) __U);
6349 }
6350
6351 extern __inline __m512d
6352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6353 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6354 __m512d __B)
6355 {
6356 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6357 /* idx */ ,
6358 (__v8df) __A,
6359 (__v8df) __B,
6360 (__mmask8) __U);
6361 }
6362
6363 extern __inline __m512
6364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6365 _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6366 {
6367 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6368 /* idx */ ,
6369 (__v16sf) __A,
6370 (__v16sf) __B,
6371 (__mmask16) -1);
6372 }
6373
6374 extern __inline __m512
6375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6376 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6377 {
6378 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6379 /* idx */ ,
6380 (__v16sf) __A,
6381 (__v16sf) __B,
6382 (__mmask16) __U);
6383 }
6384
6385 extern __inline __m512
6386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6387 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6388 __m512 __B)
6389 {
6390 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6391 (__v16si) __I
6392 /* idx */ ,
6393 (__v16sf) __B,
6394 (__mmask16) __U);
6395 }
6396
6397 extern __inline __m512
6398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6399 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6400 __m512 __B)
6401 {
6402 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6403 /* idx */ ,
6404 (__v16sf) __A,
6405 (__v16sf) __B,
6406 (__mmask16) __U);
6407 }
6408
6409 #ifdef __OPTIMIZE__
6410 extern __inline __m512d
6411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6412 _mm512_permute_pd (__m512d __X, const int __C)
6413 {
6414 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6415 (__v8df)
6416 _mm512_undefined_pd (),
6417 (__mmask8) -1);
6418 }
6419
6420 extern __inline __m512d
6421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6422 _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6423 {
6424 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6425 (__v8df) __W,
6426 (__mmask8) __U);
6427 }
6428
6429 extern __inline __m512d
6430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6431 _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6432 {
6433 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6434 (__v8df)
6435 _mm512_setzero_pd (),
6436 (__mmask8) __U);
6437 }
6438
6439 extern __inline __m512
6440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6441 _mm512_permute_ps (__m512 __X, const int __C)
6442 {
6443 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6444 (__v16sf)
6445 _mm512_undefined_ps (),
6446 (__mmask16) -1);
6447 }
6448
6449 extern __inline __m512
6450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6451 _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6452 {
6453 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6454 (__v16sf) __W,
6455 (__mmask16) __U);
6456 }
6457
6458 extern __inline __m512
6459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6460 _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6461 {
6462 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6463 (__v16sf)
6464 _mm512_setzero_ps (),
6465 (__mmask16) __U);
6466 }
6467 #else
6468 #define _mm512_permute_pd(X, C) \
6469 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6470 (__v8df)(__m512d)_mm512_undefined_pd(),\
6471 (__mmask8)(-1)))
6472
6473 #define _mm512_mask_permute_pd(W, U, X, C) \
6474 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6475 (__v8df)(__m512d)(W), \
6476 (__mmask8)(U)))
6477
6478 #define _mm512_maskz_permute_pd(U, X, C) \
6479 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6480 (__v8df)(__m512d)_mm512_setzero_pd(), \
6481 (__mmask8)(U)))
6482
6483 #define _mm512_permute_ps(X, C) \
6484 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6485 (__v16sf)(__m512)_mm512_undefined_ps(),\
6486 (__mmask16)(-1)))
6487
6488 #define _mm512_mask_permute_ps(W, U, X, C) \
6489 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6490 (__v16sf)(__m512)(W), \
6491 (__mmask16)(U)))
6492
6493 #define _mm512_maskz_permute_ps(U, X, C) \
6494 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6495 (__v16sf)(__m512)_mm512_setzero_ps(), \
6496 (__mmask16)(U)))
6497 #endif
6498
6499 #ifdef __OPTIMIZE__
6500 extern __inline __m512i
6501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6502 _mm512_permutex_epi64 (__m512i __X, const int __I)
6503 {
6504 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6505 (__v8di)
6506 _mm512_undefined_epi32 (),
6507 (__mmask8) (-1));
6508 }
6509
6510 extern __inline __m512i
6511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6512 _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6513 __m512i __X, const int __I)
6514 {
6515 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6516 (__v8di) __W,
6517 (__mmask8) __M);
6518 }
6519
6520 extern __inline __m512i
6521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6522 _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6523 {
6524 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6525 (__v8di)
6526 _mm512_setzero_si512 (),
6527 (__mmask8) __M);
6528 }
6529
6530 extern __inline __m512d
6531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6532 _mm512_permutex_pd (__m512d __X, const int __M)
6533 {
6534 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6535 (__v8df)
6536 _mm512_undefined_pd (),
6537 (__mmask8) -1);
6538 }
6539
6540 extern __inline __m512d
6541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6542 _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6543 {
6544 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6545 (__v8df) __W,
6546 (__mmask8) __U);
6547 }
6548
6549 extern __inline __m512d
6550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6551 _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6552 {
6553 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6554 (__v8df)
6555 _mm512_setzero_pd (),
6556 (__mmask8) __U);
6557 }
6558 #else
6559 #define _mm512_permutex_pd(X, M) \
6560 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6561 (__v8df)(__m512d)_mm512_undefined_pd(),\
6562 (__mmask8)-1))
6563
6564 #define _mm512_mask_permutex_pd(W, U, X, M) \
6565 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6566 (__v8df)(__m512d)(W), (__mmask8)(U)))
6567
6568 #define _mm512_maskz_permutex_pd(U, X, M) \
6569 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6570 (__v8df)(__m512d)_mm512_setzero_pd(),\
6571 (__mmask8)(U)))
6572
6573 #define _mm512_permutex_epi64(X, I) \
6574 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6575 (int)(I), \
6576 (__v8di)(__m512i) \
6577 (_mm512_undefined_epi32 ()),\
6578 (__mmask8)(-1)))
6579
6580 #define _mm512_maskz_permutex_epi64(M, X, I) \
6581 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6582 (int)(I), \
6583 (__v8di)(__m512i) \
6584 (_mm512_setzero_si512 ()),\
6585 (__mmask8)(M)))
6586
6587 #define _mm512_mask_permutex_epi64(W, M, X, I) \
6588 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6589 (int)(I), \
6590 (__v8di)(__m512i)(W), \
6591 (__mmask8)(M)))
6592 #endif
6593
6594 extern __inline __m512i
6595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6596 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6597 {
6598 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6599 (__v8di) __X,
6600 (__v8di)
6601 _mm512_setzero_si512 (),
6602 __M);
6603 }
6604
6605 extern __inline __m512i
6606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6607 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6608 {
6609 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6610 (__v8di) __X,
6611 (__v8di)
6612 _mm512_undefined_epi32 (),
6613 (__mmask8) -1);
6614 }
6615
6616 extern __inline __m512i
6617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6618 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6619 __m512i __Y)
6620 {
6621 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6622 (__v8di) __X,
6623 (__v8di) __W,
6624 __M);
6625 }
6626
6627 extern __inline __m512i
6628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6629 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6630 {
6631 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6632 (__v16si) __X,
6633 (__v16si)
6634 _mm512_setzero_si512 (),
6635 __M);
6636 }
6637
6638 extern __inline __m512i
6639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6640 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6641 {
6642 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6643 (__v16si) __X,
6644 (__v16si)
6645 _mm512_undefined_epi32 (),
6646 (__mmask16) -1);
6647 }
6648
6649 extern __inline __m512i
6650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6651 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6652 __m512i __Y)
6653 {
6654 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6655 (__v16si) __X,
6656 (__v16si) __W,
6657 __M);
6658 }
6659
6660 extern __inline __m512d
6661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6662 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6663 {
6664 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6665 (__v8di) __X,
6666 (__v8df)
6667 _mm512_undefined_pd (),
6668 (__mmask8) -1);
6669 }
6670
6671 extern __inline __m512d
6672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6673 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6674 {
6675 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6676 (__v8di) __X,
6677 (__v8df) __W,
6678 (__mmask8) __U);
6679 }
6680
6681 extern __inline __m512d
6682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6683 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6684 {
6685 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6686 (__v8di) __X,
6687 (__v8df)
6688 _mm512_setzero_pd (),
6689 (__mmask8) __U);
6690 }
6691
6692 extern __inline __m512
6693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6694 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6695 {
6696 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6697 (__v16si) __X,
6698 (__v16sf)
6699 _mm512_undefined_ps (),
6700 (__mmask16) -1);
6701 }
6702
6703 extern __inline __m512
6704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6705 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6706 {
6707 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6708 (__v16si) __X,
6709 (__v16sf) __W,
6710 (__mmask16) __U);
6711 }
6712
6713 extern __inline __m512
6714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6715 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6716 {
6717 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6718 (__v16si) __X,
6719 (__v16sf)
6720 _mm512_setzero_ps (),
6721 (__mmask16) __U);
6722 }
6723
6724 #ifdef __OPTIMIZE__
6725 extern __inline __m512
6726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6727 _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6728 {
6729 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6730 (__v16sf) __V, __imm,
6731 (__v16sf)
6732 _mm512_undefined_ps (),
6733 (__mmask16) -1);
6734 }
6735
6736 extern __inline __m512
6737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6738 _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6739 __m512 __V, const int __imm)
6740 {
6741 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6742 (__v16sf) __V, __imm,
6743 (__v16sf) __W,
6744 (__mmask16) __U);
6745 }
6746
6747 extern __inline __m512
6748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6749 _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6750 {
6751 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6752 (__v16sf) __V, __imm,
6753 (__v16sf)
6754 _mm512_setzero_ps (),
6755 (__mmask16) __U);
6756 }
6757
6758 extern __inline __m512d
6759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6760 _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6761 {
6762 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6763 (__v8df) __V, __imm,
6764 (__v8df)
6765 _mm512_undefined_pd (),
6766 (__mmask8) -1);
6767 }
6768
6769 extern __inline __m512d
6770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6771 _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6772 __m512d __V, const int __imm)
6773 {
6774 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6775 (__v8df) __V, __imm,
6776 (__v8df) __W,
6777 (__mmask8) __U);
6778 }
6779
6780 extern __inline __m512d
6781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6782 _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6783 const int __imm)
6784 {
6785 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6786 (__v8df) __V, __imm,
6787 (__v8df)
6788 _mm512_setzero_pd (),
6789 (__mmask8) __U);
6790 }
6791
6792 extern __inline __m512d
6793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6794 _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6795 const int __imm, const int __R)
6796 {
6797 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6798 (__v8df) __B,
6799 (__v8di) __C,
6800 __imm,
6801 (__mmask8) -1, __R);
6802 }
6803
6804 extern __inline __m512d
6805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6806 _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6807 __m512i __C, const int __imm, const int __R)
6808 {
6809 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6810 (__v8df) __B,
6811 (__v8di) __C,
6812 __imm,
6813 (__mmask8) __U, __R);
6814 }
6815
6816 extern __inline __m512d
6817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6818 _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6819 __m512i __C, const int __imm, const int __R)
6820 {
6821 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
6822 (__v8df) __B,
6823 (__v8di) __C,
6824 __imm,
6825 (__mmask8) __U, __R);
6826 }
6827
6828 extern __inline __m512
6829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6830 _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
6831 const int __imm, const int __R)
6832 {
6833 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6834 (__v16sf) __B,
6835 (__v16si) __C,
6836 __imm,
6837 (__mmask16) -1, __R);
6838 }
6839
6840 extern __inline __m512
6841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6842 _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6843 __m512i __C, const int __imm, const int __R)
6844 {
6845 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6846 (__v16sf) __B,
6847 (__v16si) __C,
6848 __imm,
6849 (__mmask16) __U, __R);
6850 }
6851
6852 extern __inline __m512
6853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6854 _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6855 __m512i __C, const int __imm, const int __R)
6856 {
6857 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
6858 (__v16sf) __B,
6859 (__v16si) __C,
6860 __imm,
6861 (__mmask16) __U, __R);
6862 }
6863
6864 extern __inline __m128d
6865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6866 _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
6867 const int __imm, const int __R)
6868 {
6869 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6870 (__v2df) __B,
6871 (__v2di) __C, __imm,
6872 (__mmask8) -1, __R);
6873 }
6874
6875 extern __inline __m128d
6876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6877 _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
6878 __m128i __C, const int __imm, const int __R)
6879 {
6880 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6881 (__v2df) __B,
6882 (__v2di) __C, __imm,
6883 (__mmask8) __U, __R);
6884 }
6885
6886 extern __inline __m128d
6887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6888 _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
6889 __m128i __C, const int __imm, const int __R)
6890 {
6891 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
6892 (__v2df) __B,
6893 (__v2di) __C,
6894 __imm,
6895 (__mmask8) __U, __R);
6896 }
6897
6898 extern __inline __m128
6899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6900 _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
6901 const int __imm, const int __R)
6902 {
6903 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6904 (__v4sf) __B,
6905 (__v4si) __C, __imm,
6906 (__mmask8) -1, __R);
6907 }
6908
6909 extern __inline __m128
6910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6911 _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
6912 __m128i __C, const int __imm, const int __R)
6913 {
6914 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6915 (__v4sf) __B,
6916 (__v4si) __C, __imm,
6917 (__mmask8) __U, __R);
6918 }
6919
6920 extern __inline __m128
6921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6922 _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
6923 __m128i __C, const int __imm, const int __R)
6924 {
6925 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
6926 (__v4sf) __B,
6927 (__v4si) __C, __imm,
6928 (__mmask8) __U, __R);
6929 }
6930
6931 #else
6932 #define _mm512_shuffle_pd(X, Y, C) \
6933 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6934 (__v8df)(__m512d)(Y), (int)(C),\
6935 (__v8df)(__m512d)_mm512_undefined_pd(),\
6936 (__mmask8)-1))
6937
6938 #define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
6939 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6940 (__v8df)(__m512d)(Y), (int)(C),\
6941 (__v8df)(__m512d)(W),\
6942 (__mmask8)(U)))
6943
6944 #define _mm512_maskz_shuffle_pd(U, X, Y, C) \
6945 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6946 (__v8df)(__m512d)(Y), (int)(C),\
6947 (__v8df)(__m512d)_mm512_setzero_pd(),\
6948 (__mmask8)(U)))
6949
6950 #define _mm512_shuffle_ps(X, Y, C) \
6951 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6952 (__v16sf)(__m512)(Y), (int)(C),\
6953 (__v16sf)(__m512)_mm512_undefined_ps(),\
6954 (__mmask16)-1))
6955
6956 #define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
6957 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6958 (__v16sf)(__m512)(Y), (int)(C),\
6959 (__v16sf)(__m512)(W),\
6960 (__mmask16)(U)))
6961
6962 #define _mm512_maskz_shuffle_ps(U, X, Y, C) \
6963 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6964 (__v16sf)(__m512)(Y), (int)(C),\
6965 (__v16sf)(__m512)_mm512_setzero_ps(),\
6966 (__mmask16)(U)))
6967
6968 #define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
6969 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6970 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6971 (__mmask8)(-1), (R)))
6972
6973 #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
6974 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6975 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6976 (__mmask8)(U), (R)))
6977
6978 #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
6979 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
6980 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6981 (__mmask8)(U), (R)))
6982
6983 #define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
6984 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6985 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6986 (__mmask16)(-1), (R)))
6987
6988 #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
6989 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6990 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6991 (__mmask16)(U), (R)))
6992
6993 #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
6994 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
6995 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6996 (__mmask16)(U), (R)))
6997
6998 #define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
6999 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
7000 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7001 (__mmask8)(-1), (R)))
7002
7003 #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
7004 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
7005 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7006 (__mmask8)(U), (R)))
7007
7008 #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
7009 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
7010 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7011 (__mmask8)(U), (R)))
7012
7013 #define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
7014 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
7015 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7016 (__mmask8)(-1), (R)))
7017
7018 #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
7019 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
7020 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7021 (__mmask8)(U), (R)))
7022
7023 #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
7024 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
7025 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7026 (__mmask8)(U), (R)))
7027 #endif
7028
7029 extern __inline __m512
7030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7031 _mm512_movehdup_ps (__m512 __A)
7032 {
7033 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7034 (__v16sf)
7035 _mm512_undefined_ps (),
7036 (__mmask16) -1);
7037 }
7038
7039 extern __inline __m512
7040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7041 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7042 {
7043 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7044 (__v16sf) __W,
7045 (__mmask16) __U);
7046 }
7047
7048 extern __inline __m512
7049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7050 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
7051 {
7052 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7053 (__v16sf)
7054 _mm512_setzero_ps (),
7055 (__mmask16) __U);
7056 }
7057
7058 extern __inline __m512
7059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7060 _mm512_moveldup_ps (__m512 __A)
7061 {
7062 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7063 (__v16sf)
7064 _mm512_undefined_ps (),
7065 (__mmask16) -1);
7066 }
7067
7068 extern __inline __m512
7069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7070 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7071 {
7072 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7073 (__v16sf) __W,
7074 (__mmask16) __U);
7075 }
7076
7077 extern __inline __m512
7078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7079 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
7080 {
7081 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7082 (__v16sf)
7083 _mm512_setzero_ps (),
7084 (__mmask16) __U);
7085 }
7086
7087 extern __inline __m512i
7088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7089 _mm512_or_si512 (__m512i __A, __m512i __B)
7090 {
7091 return (__m512i) ((__v16su) __A | (__v16su) __B);
7092 }
7093
7094 extern __inline __m512i
7095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7096 _mm512_or_epi32 (__m512i __A, __m512i __B)
7097 {
7098 return (__m512i) ((__v16su) __A | (__v16su) __B);
7099 }
7100
7101 extern __inline __m512i
7102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7103 _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7104 {
7105 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
7106 (__v16si) __B,
7107 (__v16si) __W,
7108 (__mmask16) __U);
7109 }
7110
7111 extern __inline __m512i
7112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7113 _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7114 {
7115 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
7116 (__v16si) __B,
7117 (__v16si)
7118 _mm512_setzero_si512 (),
7119 (__mmask16) __U);
7120 }
7121
7122 extern __inline __m512i
7123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7124 _mm512_or_epi64 (__m512i __A, __m512i __B)
7125 {
7126 return (__m512i) ((__v8du) __A | (__v8du) __B);
7127 }
7128
7129 extern __inline __m512i
7130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7131 _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7132 {
7133 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
7134 (__v8di) __B,
7135 (__v8di) __W,
7136 (__mmask8) __U);
7137 }
7138
7139 extern __inline __m512i
7140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7141 _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7142 {
7143 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
7144 (__v8di) __B,
7145 (__v8di)
7146 _mm512_setzero_si512 (),
7147 (__mmask8) __U);
7148 }
7149
7150 extern __inline __m512i
7151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7152 _mm512_xor_si512 (__m512i __A, __m512i __B)
7153 {
7154 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
7155 }
7156
7157 extern __inline __m512i
7158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7159 _mm512_xor_epi32 (__m512i __A, __m512i __B)
7160 {
7161 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
7162 }
7163
7164 extern __inline __m512i
7165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7166 _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7167 {
7168 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
7169 (__v16si) __B,
7170 (__v16si) __W,
7171 (__mmask16) __U);
7172 }
7173
7174 extern __inline __m512i
7175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7176 _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7177 {
7178 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
7179 (__v16si) __B,
7180 (__v16si)
7181 _mm512_setzero_si512 (),
7182 (__mmask16) __U);
7183 }
7184
7185 extern __inline __m512i
7186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7187 _mm512_xor_epi64 (__m512i __A, __m512i __B)
7188 {
7189 return (__m512i) ((__v8du) __A ^ (__v8du) __B);
7190 }
7191
7192 extern __inline __m512i
7193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7194 _mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7195 {
7196 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
7197 (__v8di) __B,
7198 (__v8di) __W,
7199 (__mmask8) __U);
7200 }
7201
7202 extern __inline __m512i
7203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7204 _mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
7205 {
7206 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
7207 (__v8di) __B,
7208 (__v8di)
7209 _mm512_setzero_si512 (),
7210 (__mmask8) __U);
7211 }
7212
7213 #ifdef __OPTIMIZE__
7214 extern __inline __m512i
7215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7216 _mm512_rol_epi32 (__m512i __A, const int __B)
7217 {
7218 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7219 (__v16si)
7220 _mm512_undefined_epi32 (),
7221 (__mmask16) -1);
7222 }
7223
7224 extern __inline __m512i
7225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7226 _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
7227 {
7228 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7229 (__v16si) __W,
7230 (__mmask16) __U);
7231 }
7232
7233 extern __inline __m512i
7234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7235 _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
7236 {
7237 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7238 (__v16si)
7239 _mm512_setzero_si512 (),
7240 (__mmask16) __U);
7241 }
7242
7243 extern __inline __m512i
7244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7245 _mm512_ror_epi32 (__m512i __A, int __B)
7246 {
7247 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7248 (__v16si)
7249 _mm512_undefined_epi32 (),
7250 (__mmask16) -1);
7251 }
7252
7253 extern __inline __m512i
7254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7255 _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
7256 {
7257 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7258 (__v16si) __W,
7259 (__mmask16) __U);
7260 }
7261
7262 extern __inline __m512i
7263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7264 _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
7265 {
7266 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7267 (__v16si)
7268 _mm512_setzero_si512 (),
7269 (__mmask16) __U);
7270 }
7271
7272 extern __inline __m512i
7273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7274 _mm512_rol_epi64 (__m512i __A, const int __B)
7275 {
7276 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7277 (__v8di)
7278 _mm512_undefined_epi32 (),
7279 (__mmask8) -1);
7280 }
7281
7282 extern __inline __m512i
7283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7284 _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
7285 {
7286 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7287 (__v8di) __W,
7288 (__mmask8) __U);
7289 }
7290
7291 extern __inline __m512i
7292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7293 _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
7294 {
7295 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7296 (__v8di)
7297 _mm512_setzero_si512 (),
7298 (__mmask8) __U);
7299 }
7300
7301 extern __inline __m512i
7302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7303 _mm512_ror_epi64 (__m512i __A, int __B)
7304 {
7305 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7306 (__v8di)
7307 _mm512_undefined_epi32 (),
7308 (__mmask8) -1);
7309 }
7310
7311 extern __inline __m512i
7312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7313 _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
7314 {
7315 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7316 (__v8di) __W,
7317 (__mmask8) __U);
7318 }
7319
7320 extern __inline __m512i
7321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7322 _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
7323 {
7324 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7325 (__v8di)
7326 _mm512_setzero_si512 (),
7327 (__mmask8) __U);
7328 }
7329
7330 #else
7331 #define _mm512_rol_epi32(A, B) \
7332 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7333 (int)(B), \
7334 (__v16si)_mm512_undefined_epi32 (), \
7335 (__mmask16)(-1)))
7336 #define _mm512_mask_rol_epi32(W, U, A, B) \
7337 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7338 (int)(B), \
7339 (__v16si)(__m512i)(W), \
7340 (__mmask16)(U)))
7341 #define _mm512_maskz_rol_epi32(U, A, B) \
7342 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7343 (int)(B), \
7344 (__v16si)_mm512_setzero_si512 (), \
7345 (__mmask16)(U)))
7346 #define _mm512_ror_epi32(A, B) \
7347 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7348 (int)(B), \
7349 (__v16si)_mm512_undefined_epi32 (), \
7350 (__mmask16)(-1)))
7351 #define _mm512_mask_ror_epi32(W, U, A, B) \
7352 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7353 (int)(B), \
7354 (__v16si)(__m512i)(W), \
7355 (__mmask16)(U)))
7356 #define _mm512_maskz_ror_epi32(U, A, B) \
7357 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7358 (int)(B), \
7359 (__v16si)_mm512_setzero_si512 (), \
7360 (__mmask16)(U)))
7361 #define _mm512_rol_epi64(A, B) \
7362 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7363 (int)(B), \
7364 (__v8di)_mm512_undefined_epi32 (), \
7365 (__mmask8)(-1)))
7366 #define _mm512_mask_rol_epi64(W, U, A, B) \
7367 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7368 (int)(B), \
7369 (__v8di)(__m512i)(W), \
7370 (__mmask8)(U)))
7371 #define _mm512_maskz_rol_epi64(U, A, B) \
7372 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7373 (int)(B), \
7374 (__v8di)_mm512_setzero_si512 (), \
7375 (__mmask8)(U)))
7376
7377 #define _mm512_ror_epi64(A, B) \
7378 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7379 (int)(B), \
7380 (__v8di)_mm512_undefined_epi32 (), \
7381 (__mmask8)(-1)))
7382 #define _mm512_mask_ror_epi64(W, U, A, B) \
7383 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7384 (int)(B), \
7385 (__v8di)(__m512i)(W), \
7386 (__mmask8)(U)))
7387 #define _mm512_maskz_ror_epi64(U, A, B) \
7388 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7389 (int)(B), \
7390 (__v8di)_mm512_setzero_si512 (), \
7391 (__mmask8)(U)))
7392 #endif
7393
7394 extern __inline __m512i
7395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7396 _mm512_and_si512 (__m512i __A, __m512i __B)
7397 {
7398 return (__m512i) ((__v16su) __A & (__v16su) __B);
7399 }
7400
7401 extern __inline __m512i
7402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7403 _mm512_and_epi32 (__m512i __A, __m512i __B)
7404 {
7405 return (__m512i) ((__v16su) __A & (__v16su) __B);
7406 }
7407
7408 extern __inline __m512i
7409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7410 _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7411 {
7412 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7413 (__v16si) __B,
7414 (__v16si) __W,
7415 (__mmask16) __U);
7416 }
7417
7418 extern __inline __m512i
7419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7420 _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7421 {
7422 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7423 (__v16si) __B,
7424 (__v16si)
7425 _mm512_setzero_si512 (),
7426 (__mmask16) __U);
7427 }
7428
7429 extern __inline __m512i
7430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7431 _mm512_and_epi64 (__m512i __A, __m512i __B)
7432 {
7433 return (__m512i) ((__v8du) __A & (__v8du) __B);
7434 }
7435
7436 extern __inline __m512i
7437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7438 _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7439 {
7440 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7441 (__v8di) __B,
7442 (__v8di) __W, __U);
7443 }
7444
7445 extern __inline __m512i
7446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7447 _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7448 {
7449 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7450 (__v8di) __B,
7451 (__v8di)
7452 _mm512_setzero_pd (),
7453 __U);
7454 }
7455
7456 extern __inline __m512i
7457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7458 _mm512_andnot_si512 (__m512i __A, __m512i __B)
7459 {
7460 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7461 (__v16si) __B,
7462 (__v16si)
7463 _mm512_undefined_epi32 (),
7464 (__mmask16) -1);
7465 }
7466
7467 extern __inline __m512i
7468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7469 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
7470 {
7471 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7472 (__v16si) __B,
7473 (__v16si)
7474 _mm512_undefined_epi32 (),
7475 (__mmask16) -1);
7476 }
7477
7478 extern __inline __m512i
7479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7480 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7481 {
7482 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7483 (__v16si) __B,
7484 (__v16si) __W,
7485 (__mmask16) __U);
7486 }
7487
7488 extern __inline __m512i
7489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7490 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7491 {
7492 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7493 (__v16si) __B,
7494 (__v16si)
7495 _mm512_setzero_si512 (),
7496 (__mmask16) __U);
7497 }
7498
7499 extern __inline __m512i
7500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7501 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
7502 {
7503 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7504 (__v8di) __B,
7505 (__v8di)
7506 _mm512_undefined_epi32 (),
7507 (__mmask8) -1);
7508 }
7509
7510 extern __inline __m512i
7511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7512 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7513 {
7514 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7515 (__v8di) __B,
7516 (__v8di) __W, __U);
7517 }
7518
7519 extern __inline __m512i
7520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7521 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7522 {
7523 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7524 (__v8di) __B,
7525 (__v8di)
7526 _mm512_setzero_pd (),
7527 __U);
7528 }
7529
7530 extern __inline __mmask16
7531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7532 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
7533 {
7534 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7535 (__v16si) __B,
7536 (__mmask16) -1);
7537 }
7538
7539 extern __inline __mmask16
7540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7541 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7542 {
7543 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7544 (__v16si) __B, __U);
7545 }
7546
7547 extern __inline __mmask8
7548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7549 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
7550 {
7551 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7552 (__v8di) __B,
7553 (__mmask8) -1);
7554 }
7555
7556 extern __inline __mmask8
7557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7558 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7559 {
7560 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7561 }
7562
7563 extern __inline __mmask16
7564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7565 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7566 {
7567 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7568 (__v16si) __B,
7569 (__mmask16) -1);
7570 }
7571
7572 extern __inline __mmask16
7573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7574 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7575 {
7576 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7577 (__v16si) __B, __U);
7578 }
7579
7580 extern __inline __mmask8
7581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7582 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7583 {
7584 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7585 (__v8di) __B,
7586 (__mmask8) -1);
7587 }
7588
7589 extern __inline __mmask8
7590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7591 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7592 {
7593 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7594 (__v8di) __B, __U);
7595 }
7596
7597 extern __inline __m512
7598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7599 _mm512_abs_ps (__m512 __A)
7600 {
7601 return (__m512) _mm512_and_epi32 ((__m512i) __A,
7602 _mm512_set1_epi32 (0x7fffffff));
7603 }
7604
7605 extern __inline __m512
7606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7607 _mm512_mask_abs_ps (__m512 __W, __mmask16 __U, __m512 __A)
7608 {
7609 return (__m512) _mm512_mask_and_epi32 ((__m512i) __W, __U, (__m512i) __A,
7610 _mm512_set1_epi32 (0x7fffffff));
7611 }
7612
7613 extern __inline __m512d
7614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7615 _mm512_abs_pd (__m512 __A)
7616 {
7617 return (__m512d) _mm512_and_epi64 ((__m512i) __A,
7618 _mm512_set1_epi64 (0x7fffffffffffffffLL));
7619 }
7620
7621 extern __inline __m512d
7622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7623 _mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512 __A)
7624 {
7625 return (__m512d)
7626 _mm512_mask_and_epi64 ((__m512i) __W, __U, (__m512i) __A,
7627 _mm512_set1_epi64 (0x7fffffffffffffffLL));
7628 }
7629
7630 extern __inline __m512i
7631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7632 _mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7633 {
7634 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7635 (__v16si) __B,
7636 (__v16si)
7637 _mm512_undefined_epi32 (),
7638 (__mmask16) -1);
7639 }
7640
7641 extern __inline __m512i
7642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7643 _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7644 __m512i __B)
7645 {
7646 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7647 (__v16si) __B,
7648 (__v16si) __W,
7649 (__mmask16) __U);
7650 }
7651
7652 extern __inline __m512i
7653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7654 _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7655 {
7656 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7657 (__v16si) __B,
7658 (__v16si)
7659 _mm512_setzero_si512 (),
7660 (__mmask16) __U);
7661 }
7662
7663 extern __inline __m512i
7664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7665 _mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7666 {
7667 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7668 (__v8di) __B,
7669 (__v8di)
7670 _mm512_undefined_epi32 (),
7671 (__mmask8) -1);
7672 }
7673
7674 extern __inline __m512i
7675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7676 _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7677 {
7678 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7679 (__v8di) __B,
7680 (__v8di) __W,
7681 (__mmask8) __U);
7682 }
7683
7684 extern __inline __m512i
7685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7686 _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7687 {
7688 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7689 (__v8di) __B,
7690 (__v8di)
7691 _mm512_setzero_si512 (),
7692 (__mmask8) __U);
7693 }
7694
7695 extern __inline __m512i
7696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7697 _mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7698 {
7699 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7700 (__v16si) __B,
7701 (__v16si)
7702 _mm512_undefined_epi32 (),
7703 (__mmask16) -1);
7704 }
7705
7706 extern __inline __m512i
7707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7708 _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7709 __m512i __B)
7710 {
7711 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7712 (__v16si) __B,
7713 (__v16si) __W,
7714 (__mmask16) __U);
7715 }
7716
7717 extern __inline __m512i
7718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7719 _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7720 {
7721 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7722 (__v16si) __B,
7723 (__v16si)
7724 _mm512_setzero_si512 (),
7725 (__mmask16) __U);
7726 }
7727
7728 extern __inline __m512i
7729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7730 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7731 {
7732 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7733 (__v8di) __B,
7734 (__v8di)
7735 _mm512_undefined_epi32 (),
7736 (__mmask8) -1);
7737 }
7738
7739 extern __inline __m512i
7740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7741 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7742 {
7743 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7744 (__v8di) __B,
7745 (__v8di) __W,
7746 (__mmask8) __U);
7747 }
7748
7749 extern __inline __m512i
7750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7751 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7752 {
7753 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7754 (__v8di) __B,
7755 (__v8di)
7756 _mm512_setzero_si512 (),
7757 (__mmask8) __U);
7758 }
7759
7760 #ifdef __x86_64__
7761 #ifdef __OPTIMIZE__
7762 extern __inline unsigned long long
7763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7764 _mm_cvt_roundss_u64 (__m128 __A, const int __R)
7765 {
7766 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7767 }
7768
7769 extern __inline long long
7770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7771 _mm_cvt_roundss_si64 (__m128 __A, const int __R)
7772 {
7773 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7774 }
7775
7776 extern __inline long long
7777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7778 _mm_cvt_roundss_i64 (__m128 __A, const int __R)
7779 {
7780 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7781 }
7782
7783 extern __inline unsigned long long
7784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7785 _mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7786 {
7787 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7788 }
7789
7790 extern __inline long long
7791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7792 _mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7793 {
7794 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7795 }
7796
7797 extern __inline long long
7798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7799 _mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7800 {
7801 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7802 }
7803 #else
7804 #define _mm_cvt_roundss_u64(A, B) \
7805 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7806
7807 #define _mm_cvt_roundss_si64(A, B) \
7808 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7809
7810 #define _mm_cvt_roundss_i64(A, B) \
7811 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7812
7813 #define _mm_cvtt_roundss_u64(A, B) \
7814 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
7815
7816 #define _mm_cvtt_roundss_i64(A, B) \
7817 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7818
7819 #define _mm_cvtt_roundss_si64(A, B) \
7820 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7821 #endif
7822 #endif
7823
7824 #ifdef __OPTIMIZE__
7825 extern __inline unsigned
7826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7827 _mm_cvt_roundss_u32 (__m128 __A, const int __R)
7828 {
7829 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
7830 }
7831
7832 extern __inline int
7833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7834 _mm_cvt_roundss_si32 (__m128 __A, const int __R)
7835 {
7836 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7837 }
7838
7839 extern __inline int
7840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7841 _mm_cvt_roundss_i32 (__m128 __A, const int __R)
7842 {
7843 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7844 }
7845
7846 extern __inline unsigned
7847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7848 _mm_cvtt_roundss_u32 (__m128 __A, const int __R)
7849 {
7850 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
7851 }
7852
7853 extern __inline int
7854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7855 _mm_cvtt_roundss_i32 (__m128 __A, const int __R)
7856 {
7857 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7858 }
7859
7860 extern __inline int
7861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7862 _mm_cvtt_roundss_si32 (__m128 __A, const int __R)
7863 {
7864 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7865 }
7866 #else
7867 #define _mm_cvt_roundss_u32(A, B) \
7868 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
7869
7870 #define _mm_cvt_roundss_si32(A, B) \
7871 ((int)__builtin_ia32_vcvtss2si32(A, B))
7872
7873 #define _mm_cvt_roundss_i32(A, B) \
7874 ((int)__builtin_ia32_vcvtss2si32(A, B))
7875
7876 #define _mm_cvtt_roundss_u32(A, B) \
7877 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
7878
7879 #define _mm_cvtt_roundss_si32(A, B) \
7880 ((int)__builtin_ia32_vcvttss2si32(A, B))
7881
7882 #define _mm_cvtt_roundss_i32(A, B) \
7883 ((int)__builtin_ia32_vcvttss2si32(A, B))
7884 #endif
7885
7886 #ifdef __x86_64__
7887 #ifdef __OPTIMIZE__
7888 extern __inline unsigned long long
7889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7890 _mm_cvt_roundsd_u64 (__m128d __A, const int __R)
7891 {
7892 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
7893 }
7894
7895 extern __inline long long
7896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7897 _mm_cvt_roundsd_si64 (__m128d __A, const int __R)
7898 {
7899 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7900 }
7901
7902 extern __inline long long
7903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7904 _mm_cvt_roundsd_i64 (__m128d __A, const int __R)
7905 {
7906 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7907 }
7908
7909 extern __inline unsigned long long
7910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7911 _mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
7912 {
7913 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
7914 }
7915
7916 extern __inline long long
7917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7918 _mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
7919 {
7920 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7921 }
7922
7923 extern __inline long long
7924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7925 _mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
7926 {
7927 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7928 }
7929 #else
7930 #define _mm_cvt_roundsd_u64(A, B) \
7931 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
7932
7933 #define _mm_cvt_roundsd_si64(A, B) \
7934 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7935
7936 #define _mm_cvt_roundsd_i64(A, B) \
7937 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7938
7939 #define _mm_cvtt_roundsd_u64(A, B) \
7940 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
7941
7942 #define _mm_cvtt_roundsd_si64(A, B) \
7943 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7944
7945 #define _mm_cvtt_roundsd_i64(A, B) \
7946 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7947 #endif
7948 #endif
7949
7950 #ifdef __OPTIMIZE__
7951 extern __inline unsigned
7952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7953 _mm_cvt_roundsd_u32 (__m128d __A, const int __R)
7954 {
7955 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
7956 }
7957
7958 extern __inline int
7959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7960 _mm_cvt_roundsd_si32 (__m128d __A, const int __R)
7961 {
7962 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7963 }
7964
7965 extern __inline int
7966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7967 _mm_cvt_roundsd_i32 (__m128d __A, const int __R)
7968 {
7969 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7970 }
7971
7972 extern __inline unsigned
7973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7974 _mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
7975 {
7976 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
7977 }
7978
7979 extern __inline int
7980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7981 _mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
7982 {
7983 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7984 }
7985
7986 extern __inline int
7987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7988 _mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
7989 {
7990 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7991 }
7992 #else
7993 #define _mm_cvt_roundsd_u32(A, B) \
7994 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
7995
7996 #define _mm_cvt_roundsd_si32(A, B) \
7997 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7998
7999 #define _mm_cvt_roundsd_i32(A, B) \
8000 ((int)__builtin_ia32_vcvtsd2si32(A, B))
8001
8002 #define _mm_cvtt_roundsd_u32(A, B) \
8003 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
8004
8005 #define _mm_cvtt_roundsd_si32(A, B) \
8006 ((int)__builtin_ia32_vcvttsd2si32(A, B))
8007
8008 #define _mm_cvtt_roundsd_i32(A, B) \
8009 ((int)__builtin_ia32_vcvttsd2si32(A, B))
8010 #endif
8011
8012 extern __inline __m512d
8013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8014 _mm512_movedup_pd (__m512d __A)
8015 {
8016 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8017 (__v8df)
8018 _mm512_undefined_pd (),
8019 (__mmask8) -1);
8020 }
8021
8022 extern __inline __m512d
8023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8024 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
8025 {
8026 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8027 (__v8df) __W,
8028 (__mmask8) __U);
8029 }
8030
8031 extern __inline __m512d
8032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8033 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
8034 {
8035 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8036 (__v8df)
8037 _mm512_setzero_pd (),
8038 (__mmask8) __U);
8039 }
8040
8041 extern __inline __m512d
8042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8043 _mm512_unpacklo_pd (__m512d __A, __m512d __B)
8044 {
8045 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8046 (__v8df) __B,
8047 (__v8df)
8048 _mm512_undefined_pd (),
8049 (__mmask8) -1);
8050 }
8051
8052 extern __inline __m512d
8053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8054 _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
8055 {
8056 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8057 (__v8df) __B,
8058 (__v8df) __W,
8059 (__mmask8) __U);
8060 }
8061
8062 extern __inline __m512d
8063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8064 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
8065 {
8066 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8067 (__v8df) __B,
8068 (__v8df)
8069 _mm512_setzero_pd (),
8070 (__mmask8) __U);
8071 }
8072
8073 extern __inline __m512d
8074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8075 _mm512_unpackhi_pd (__m512d __A, __m512d __B)
8076 {
8077 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8078 (__v8df) __B,
8079 (__v8df)
8080 _mm512_undefined_pd (),
8081 (__mmask8) -1);
8082 }
8083
8084 extern __inline __m512d
8085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8086 _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
8087 {
8088 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8089 (__v8df) __B,
8090 (__v8df) __W,
8091 (__mmask8) __U);
8092 }
8093
8094 extern __inline __m512d
8095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8096 _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
8097 {
8098 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8099 (__v8df) __B,
8100 (__v8df)
8101 _mm512_setzero_pd (),
8102 (__mmask8) __U);
8103 }
8104
8105 extern __inline __m512
8106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8107 _mm512_unpackhi_ps (__m512 __A, __m512 __B)
8108 {
8109 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8110 (__v16sf) __B,
8111 (__v16sf)
8112 _mm512_undefined_ps (),
8113 (__mmask16) -1);
8114 }
8115
8116 extern __inline __m512
8117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8118 _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
8119 {
8120 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8121 (__v16sf) __B,
8122 (__v16sf) __W,
8123 (__mmask16) __U);
8124 }
8125
8126 extern __inline __m512
8127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8128 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
8129 {
8130 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8131 (__v16sf) __B,
8132 (__v16sf)
8133 _mm512_setzero_ps (),
8134 (__mmask16) __U);
8135 }
8136
8137 #ifdef __OPTIMIZE__
8138 extern __inline __m512d
8139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8140 _mm512_cvt_roundps_pd (__m256 __A, const int __R)
8141 {
8142 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8143 (__v8df)
8144 _mm512_undefined_pd (),
8145 (__mmask8) -1, __R);
8146 }
8147
8148 extern __inline __m512d
8149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8150 _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
8151 const int __R)
8152 {
8153 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8154 (__v8df) __W,
8155 (__mmask8) __U, __R);
8156 }
8157
8158 extern __inline __m512d
8159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8160 _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
8161 {
8162 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8163 (__v8df)
8164 _mm512_setzero_pd (),
8165 (__mmask8) __U, __R);
8166 }
8167
8168 extern __inline __m512
8169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8170 _mm512_cvt_roundph_ps (__m256i __A, const int __R)
8171 {
8172 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8173 (__v16sf)
8174 _mm512_undefined_ps (),
8175 (__mmask16) -1, __R);
8176 }
8177
8178 extern __inline __m512
8179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8180 _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
8181 const int __R)
8182 {
8183 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8184 (__v16sf) __W,
8185 (__mmask16) __U, __R);
8186 }
8187
8188 extern __inline __m512
8189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8190 _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
8191 {
8192 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8193 (__v16sf)
8194 _mm512_setzero_ps (),
8195 (__mmask16) __U, __R);
8196 }
8197
8198 extern __inline __m256i
8199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8200 _mm512_cvt_roundps_ph (__m512 __A, const int __I)
8201 {
8202 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8203 __I,
8204 (__v16hi)
8205 _mm256_undefined_si256 (),
8206 -1);
8207 }
8208
8209 extern __inline __m256i
8210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8211 _mm512_cvtps_ph (__m512 __A, const int __I)
8212 {
8213 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8214 __I,
8215 (__v16hi)
8216 _mm256_undefined_si256 (),
8217 -1);
8218 }
8219
8220 extern __inline __m256i
8221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8222 _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
8223 const int __I)
8224 {
8225 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8226 __I,
8227 (__v16hi) __U,
8228 (__mmask16) __W);
8229 }
8230
8231 extern __inline __m256i
8232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8233 _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
8234 {
8235 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8236 __I,
8237 (__v16hi) __U,
8238 (__mmask16) __W);
8239 }
8240
8241 extern __inline __m256i
8242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8243 _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
8244 {
8245 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8246 __I,
8247 (__v16hi)
8248 _mm256_setzero_si256 (),
8249 (__mmask16) __W);
8250 }
8251
8252 extern __inline __m256i
8253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8254 _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
8255 {
8256 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8257 __I,
8258 (__v16hi)
8259 _mm256_setzero_si256 (),
8260 (__mmask16) __W);
8261 }
8262 #else
8263 #define _mm512_cvt_roundps_pd(A, B) \
8264 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
8265
8266 #define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
8267 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
8268
8269 #define _mm512_maskz_cvt_roundps_pd(U, A, B) \
8270 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
8271
8272 #define _mm512_cvt_roundph_ps(A, B) \
8273 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
8274
8275 #define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
8276 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
8277
8278 #define _mm512_maskz_cvt_roundph_ps(U, A, B) \
8279 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
8280
8281 #define _mm512_cvt_roundps_ph(A, I) \
8282 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8283 (__v16hi)_mm256_undefined_si256 (), -1))
8284 #define _mm512_cvtps_ph(A, I) \
8285 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8286 (__v16hi)_mm256_undefined_si256 (), -1))
8287 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
8288 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8289 (__v16hi)(__m256i)(U), (__mmask16) (W)))
8290 #define _mm512_mask_cvtps_ph(U, W, A, I) \
8291 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8292 (__v16hi)(__m256i)(U), (__mmask16) (W)))
8293 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
8294 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8295 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8296 #define _mm512_maskz_cvtps_ph(W, A, I) \
8297 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8298 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8299 #endif
8300
8301 #ifdef __OPTIMIZE__
8302 extern __inline __m256
8303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8304 _mm512_cvt_roundpd_ps (__m512d __A, const int __R)
8305 {
8306 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8307 (__v8sf)
8308 _mm256_undefined_ps (),
8309 (__mmask8) -1, __R);
8310 }
8311
8312 extern __inline __m256
8313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8314 _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
8315 const int __R)
8316 {
8317 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8318 (__v8sf) __W,
8319 (__mmask8) __U, __R);
8320 }
8321
8322 extern __inline __m256
8323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8324 _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
8325 {
8326 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8327 (__v8sf)
8328 _mm256_setzero_ps (),
8329 (__mmask8) __U, __R);
8330 }
8331
8332 extern __inline __m128
8333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8334 _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
8335 {
8336 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
8337 (__v2df) __B,
8338 __R);
8339 }
8340
8341 extern __inline __m128d
8342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8343 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
8344 {
8345 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
8346 (__v4sf) __B,
8347 __R);
8348 }
8349 #else
8350 #define _mm512_cvt_roundpd_ps(A, B) \
8351 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
8352
8353 #define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
8354 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
8355
8356 #define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
8357 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
8358
8359 #define _mm_cvt_roundsd_ss(A, B, C) \
8360 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
8361
8362 #define _mm_cvt_roundss_sd(A, B, C) \
8363 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
8364 #endif
8365
8366 extern __inline void
8367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8368 _mm512_stream_si512 (__m512i * __P, __m512i __A)
8369 {
8370 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8371 }
8372
8373 extern __inline void
8374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8375 _mm512_stream_ps (float *__P, __m512 __A)
8376 {
8377 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8378 }
8379
8380 extern __inline void
8381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8382 _mm512_stream_pd (double *__P, __m512d __A)
8383 {
8384 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8385 }
8386
8387 extern __inline __m512i
8388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8389 _mm512_stream_load_si512 (void *__P)
8390 {
8391 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8392 }
8393
8394 /* Constants for mantissa extraction */
8395 typedef enum
8396 {
8397 _MM_MANT_NORM_1_2, /* interval [1, 2) */
8398 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
8399 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
8400 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
8401 } _MM_MANTISSA_NORM_ENUM;
8402
8403 typedef enum
8404 {
8405 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
8406 _MM_MANT_SIGN_zero, /* sign = 0 */
8407 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
8408 } _MM_MANTISSA_SIGN_ENUM;
8409
8410 #ifdef __OPTIMIZE__
8411 extern __inline __m128
8412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8413 _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8414 {
8415 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8416 (__v4sf) __B,
8417 __R);
8418 }
8419
8420 extern __inline __m128d
8421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8422 _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8423 {
8424 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8425 (__v2df) __B,
8426 __R);
8427 }
8428
8429 extern __inline __m512
8430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8431 _mm512_getexp_round_ps (__m512 __A, const int __R)
8432 {
8433 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8434 (__v16sf)
8435 _mm512_undefined_ps (),
8436 (__mmask16) -1, __R);
8437 }
8438
8439 extern __inline __m512
8440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8441 _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8442 const int __R)
8443 {
8444 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8445 (__v16sf) __W,
8446 (__mmask16) __U, __R);
8447 }
8448
8449 extern __inline __m512
8450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8451 _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8452 {
8453 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8454 (__v16sf)
8455 _mm512_setzero_ps (),
8456 (__mmask16) __U, __R);
8457 }
8458
8459 extern __inline __m512d
8460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8461 _mm512_getexp_round_pd (__m512d __A, const int __R)
8462 {
8463 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8464 (__v8df)
8465 _mm512_undefined_pd (),
8466 (__mmask8) -1, __R);
8467 }
8468
8469 extern __inline __m512d
8470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8471 _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8472 const int __R)
8473 {
8474 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8475 (__v8df) __W,
8476 (__mmask8) __U, __R);
8477 }
8478
8479 extern __inline __m512d
8480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8481 _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8482 {
8483 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8484 (__v8df)
8485 _mm512_setzero_pd (),
8486 (__mmask8) __U, __R);
8487 }
8488
8489 extern __inline __m512d
8490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8491 _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8492 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8493 {
8494 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8495 (__C << 2) | __B,
8496 _mm512_undefined_pd (),
8497 (__mmask8) -1, __R);
8498 }
8499
8500 extern __inline __m512d
8501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8502 _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8503 _MM_MANTISSA_NORM_ENUM __B,
8504 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8505 {
8506 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8507 (__C << 2) | __B,
8508 (__v8df) __W, __U,
8509 __R);
8510 }
8511
8512 extern __inline __m512d
8513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8514 _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8515 _MM_MANTISSA_NORM_ENUM __B,
8516 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8517 {
8518 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8519 (__C << 2) | __B,
8520 (__v8df)
8521 _mm512_setzero_pd (),
8522 __U, __R);
8523 }
8524
8525 extern __inline __m512
8526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8527 _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8528 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8529 {
8530 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8531 (__C << 2) | __B,
8532 _mm512_undefined_ps (),
8533 (__mmask16) -1, __R);
8534 }
8535
8536 extern __inline __m512
8537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8538 _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8539 _MM_MANTISSA_NORM_ENUM __B,
8540 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8541 {
8542 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8543 (__C << 2) | __B,
8544 (__v16sf) __W, __U,
8545 __R);
8546 }
8547
8548 extern __inline __m512
8549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8550 _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8551 _MM_MANTISSA_NORM_ENUM __B,
8552 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8553 {
8554 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8555 (__C << 2) | __B,
8556 (__v16sf)
8557 _mm512_setzero_ps (),
8558 __U, __R);
8559 }
8560
8561 extern __inline __m128d
8562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8563 _mm_getmant_round_sd (__m128d __A, __m128d __B,
8564 _MM_MANTISSA_NORM_ENUM __C,
8565 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8566 {
8567 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8568 (__v2df) __B,
8569 (__D << 2) | __C,
8570 __R);
8571 }
8572
8573 extern __inline __m128
8574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8575 _mm_getmant_round_ss (__m128 __A, __m128 __B,
8576 _MM_MANTISSA_NORM_ENUM __C,
8577 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8578 {
8579 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8580 (__v4sf) __B,
8581 (__D << 2) | __C,
8582 __R);
8583 }
8584
8585 #else
8586 #define _mm512_getmant_round_pd(X, B, C, R) \
8587 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8588 (int)(((C)<<2) | (B)), \
8589 (__v8df)(__m512d)_mm512_undefined_pd(), \
8590 (__mmask8)-1,\
8591 (R)))
8592
8593 #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
8594 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8595 (int)(((C)<<2) | (B)), \
8596 (__v8df)(__m512d)(W), \
8597 (__mmask8)(U),\
8598 (R)))
8599
8600 #define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
8601 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8602 (int)(((C)<<2) | (B)), \
8603 (__v8df)(__m512d)_mm512_setzero_pd(), \
8604 (__mmask8)(U),\
8605 (R)))
8606 #define _mm512_getmant_round_ps(X, B, C, R) \
8607 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8608 (int)(((C)<<2) | (B)), \
8609 (__v16sf)(__m512)_mm512_undefined_ps(), \
8610 (__mmask16)-1,\
8611 (R)))
8612
8613 #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
8614 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8615 (int)(((C)<<2) | (B)), \
8616 (__v16sf)(__m512)(W), \
8617 (__mmask16)(U),\
8618 (R)))
8619
8620 #define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
8621 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8622 (int)(((C)<<2) | (B)), \
8623 (__v16sf)(__m512)_mm512_setzero_ps(), \
8624 (__mmask16)(U),\
8625 (R)))
8626 #define _mm_getmant_round_sd(X, Y, C, D, R) \
8627 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
8628 (__v2df)(__m128d)(Y), \
8629 (int)(((D)<<2) | (C)), \
8630 (R)))
8631
8632 #define _mm_getmant_round_ss(X, Y, C, D, R) \
8633 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
8634 (__v4sf)(__m128)(Y), \
8635 (int)(((D)<<2) | (C)), \
8636 (R)))
8637
8638 #define _mm_getexp_round_ss(A, B, R) \
8639 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8640
8641 #define _mm_getexp_round_sd(A, B, R) \
8642 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8643
8644 #define _mm512_getexp_round_ps(A, R) \
8645 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8646 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
8647
8648 #define _mm512_mask_getexp_round_ps(W, U, A, R) \
8649 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8650 (__v16sf)(__m512)(W), (__mmask16)(U), R))
8651
8652 #define _mm512_maskz_getexp_round_ps(U, A, R) \
8653 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8654 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8655
8656 #define _mm512_getexp_round_pd(A, R) \
8657 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8658 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
8659
8660 #define _mm512_mask_getexp_round_pd(W, U, A, R) \
8661 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8662 (__v8df)(__m512d)(W), (__mmask8)(U), R))
8663
8664 #define _mm512_maskz_getexp_round_pd(U, A, R) \
8665 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8666 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8667 #endif
8668
8669 #ifdef __OPTIMIZE__
8670 extern __inline __m512
8671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8672 _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
8673 {
8674 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
8675 (__v16sf)
8676 _mm512_undefined_ps (),
8677 -1, __R);
8678 }
8679
8680 extern __inline __m512
8681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8682 _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
8683 const int __imm, const int __R)
8684 {
8685 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
8686 (__v16sf) __A,
8687 (__mmask16) __B, __R);
8688 }
8689
8690 extern __inline __m512
8691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8692 _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
8693 const int __imm, const int __R)
8694 {
8695 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
8696 __imm,
8697 (__v16sf)
8698 _mm512_setzero_ps (),
8699 (__mmask16) __A, __R);
8700 }
8701
8702 extern __inline __m512d
8703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8704 _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
8705 {
8706 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
8707 (__v8df)
8708 _mm512_undefined_pd (),
8709 -1, __R);
8710 }
8711
8712 extern __inline __m512d
8713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8714 _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
8715 __m512d __C, const int __imm, const int __R)
8716 {
8717 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
8718 (__v8df) __A,
8719 (__mmask8) __B, __R);
8720 }
8721
8722 extern __inline __m512d
8723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8724 _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
8725 const int __imm, const int __R)
8726 {
8727 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
8728 __imm,
8729 (__v8df)
8730 _mm512_setzero_pd (),
8731 (__mmask8) __A, __R);
8732 }
8733
8734 extern __inline __m128
8735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8736 _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
8737 {
8738 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
8739 (__v4sf) __B, __imm, __R);
8740 }
8741
8742 extern __inline __m128d
8743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8744 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
8745 const int __R)
8746 {
8747 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
8748 (__v2df) __B, __imm, __R);
8749 }
8750
8751 #else
8752 #define _mm512_roundscale_round_ps(A, B, R) \
8753 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
8754 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
8755 #define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
8756 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
8757 (int)(D), \
8758 (__v16sf)(__m512)(A), \
8759 (__mmask16)(B), R))
8760 #define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
8761 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
8762 (int)(C), \
8763 (__v16sf)_mm512_setzero_ps(),\
8764 (__mmask16)(A), R))
8765 #define _mm512_roundscale_round_pd(A, B, R) \
8766 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
8767 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
8768 #define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
8769 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
8770 (int)(D), \
8771 (__v8df)(__m512d)(A), \
8772 (__mmask8)(B), R))
8773 #define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
8774 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
8775 (int)(C), \
8776 (__v8df)_mm512_setzero_pd(),\
8777 (__mmask8)(A), R))
8778 #define _mm_roundscale_round_ss(A, B, C, R) \
8779 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
8780 (__v4sf)(__m128)(B), (int)(C), R))
8781 #define _mm_roundscale_round_sd(A, B, C, R) \
8782 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
8783 (__v2df)(__m128d)(B), (int)(C), R))
8784 #endif
8785
8786 extern __inline __m512
8787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8788 _mm512_floor_ps (__m512 __A)
8789 {
8790 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8791 _MM_FROUND_FLOOR,
8792 (__v16sf) __A, -1,
8793 _MM_FROUND_CUR_DIRECTION);
8794 }
8795
8796 extern __inline __m512d
8797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8798 _mm512_floor_pd (__m512d __A)
8799 {
8800 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8801 _MM_FROUND_FLOOR,
8802 (__v8df) __A, -1,
8803 _MM_FROUND_CUR_DIRECTION);
8804 }
8805
8806 extern __inline __m512
8807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8808 _mm512_ceil_ps (__m512 __A)
8809 {
8810 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8811 _MM_FROUND_CEIL,
8812 (__v16sf) __A, -1,
8813 _MM_FROUND_CUR_DIRECTION);
8814 }
8815
8816 extern __inline __m512d
8817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8818 _mm512_ceil_pd (__m512d __A)
8819 {
8820 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8821 _MM_FROUND_CEIL,
8822 (__v8df) __A, -1,
8823 _MM_FROUND_CUR_DIRECTION);
8824 }
8825
8826 extern __inline __m512
8827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8828 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
8829 {
8830 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8831 _MM_FROUND_FLOOR,
8832 (__v16sf) __W, __U,
8833 _MM_FROUND_CUR_DIRECTION);
8834 }
8835
8836 extern __inline __m512d
8837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8838 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
8839 {
8840 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8841 _MM_FROUND_FLOOR,
8842 (__v8df) __W, __U,
8843 _MM_FROUND_CUR_DIRECTION);
8844 }
8845
8846 extern __inline __m512
8847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8848 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
8849 {
8850 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8851 _MM_FROUND_CEIL,
8852 (__v16sf) __W, __U,
8853 _MM_FROUND_CUR_DIRECTION);
8854 }
8855
8856 extern __inline __m512d
8857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8858 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
8859 {
8860 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8861 _MM_FROUND_CEIL,
8862 (__v8df) __W, __U,
8863 _MM_FROUND_CUR_DIRECTION);
8864 }
8865
8866 #ifdef __OPTIMIZE__
8867 extern __inline __m512i
8868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8869 _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
8870 {
8871 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8872 (__v16si) __B, __imm,
8873 (__v16si)
8874 _mm512_undefined_epi32 (),
8875 (__mmask16) -1);
8876 }
8877
8878 extern __inline __m512i
8879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8880 _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
8881 __m512i __B, const int __imm)
8882 {
8883 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8884 (__v16si) __B, __imm,
8885 (__v16si) __W,
8886 (__mmask16) __U);
8887 }
8888
8889 extern __inline __m512i
8890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8891 _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
8892 const int __imm)
8893 {
8894 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8895 (__v16si) __B, __imm,
8896 (__v16si)
8897 _mm512_setzero_si512 (),
8898 (__mmask16) __U);
8899 }
8900
8901 extern __inline __m512i
8902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8903 _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
8904 {
8905 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8906 (__v8di) __B, __imm,
8907 (__v8di)
8908 _mm512_undefined_epi32 (),
8909 (__mmask8) -1);
8910 }
8911
8912 extern __inline __m512i
8913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8914 _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
8915 __m512i __B, const int __imm)
8916 {
8917 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8918 (__v8di) __B, __imm,
8919 (__v8di) __W,
8920 (__mmask8) __U);
8921 }
8922
8923 extern __inline __m512i
8924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8925 _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
8926 const int __imm)
8927 {
8928 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8929 (__v8di) __B, __imm,
8930 (__v8di)
8931 _mm512_setzero_si512 (),
8932 (__mmask8) __U);
8933 }
8934 #else
8935 #define _mm512_alignr_epi32(X, Y, C) \
8936 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8937 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\
8938 (__mmask16)-1))
8939
8940 #define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
8941 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8942 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
8943 (__mmask16)(U)))
8944
8945 #define _mm512_maskz_alignr_epi32(U, X, Y, C) \
8946 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8947 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
8948 (__mmask16)(U)))
8949
8950 #define _mm512_alignr_epi64(X, Y, C) \
8951 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8952 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (), \
8953 (__mmask8)-1))
8954
8955 #define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
8956 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8957 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
8958
8959 #define _mm512_maskz_alignr_epi64(U, X, Y, C) \
8960 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8961 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
8962 (__mmask8)(U)))
8963 #endif
8964
8965 extern __inline __mmask16
8966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8967 _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
8968 {
8969 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8970 (__v16si) __B,
8971 (__mmask16) -1);
8972 }
8973
8974 extern __inline __mmask16
8975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8976 _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8977 {
8978 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8979 (__v16si) __B, __U);
8980 }
8981
8982 extern __inline __mmask8
8983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8984 _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8985 {
8986 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8987 (__v8di) __B, __U);
8988 }
8989
8990 extern __inline __mmask8
8991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8992 _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
8993 {
8994 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8995 (__v8di) __B,
8996 (__mmask8) -1);
8997 }
8998
8999 extern __inline __mmask16
9000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9001 _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
9002 {
9003 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
9004 (__v16si) __B,
9005 (__mmask16) -1);
9006 }
9007
9008 extern __inline __mmask16
9009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9010 _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
9011 {
9012 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
9013 (__v16si) __B, __U);
9014 }
9015
9016 extern __inline __mmask8
9017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9018 _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
9019 {
9020 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
9021 (__v8di) __B, __U);
9022 }
9023
9024 extern __inline __mmask8
9025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9026 _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
9027 {
9028 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
9029 (__v8di) __B,
9030 (__mmask8) -1);
9031 }
9032
9033 extern __inline __mmask16
9034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9035 _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
9036 {
9037 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9038 (__v16si) __Y, 5,
9039 (__mmask16) -1);
9040 }
9041
9042 extern __inline __mmask16
9043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9044 _mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9045 {
9046 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9047 (__v16si) __Y, 5,
9048 (__mmask16) __M);
9049 }
9050
9051 extern __inline __mmask16
9052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9053 _mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9054 {
9055 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9056 (__v16si) __Y, 5,
9057 (__mmask16) __M);
9058 }
9059
9060 extern __inline __mmask16
9061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9062 _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
9063 {
9064 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9065 (__v16si) __Y, 5,
9066 (__mmask16) -1);
9067 }
9068
9069 extern __inline __mmask8
9070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9071 _mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9072 {
9073 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9074 (__v8di) __Y, 5,
9075 (__mmask8) __M);
9076 }
9077
9078 extern __inline __mmask8
9079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9080 _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
9081 {
9082 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9083 (__v8di) __Y, 5,
9084 (__mmask8) -1);
9085 }
9086
9087 extern __inline __mmask8
9088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9089 _mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9090 {
9091 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9092 (__v8di) __Y, 5,
9093 (__mmask8) __M);
9094 }
9095
9096 extern __inline __mmask8
9097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9098 _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
9099 {
9100 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9101 (__v8di) __Y, 5,
9102 (__mmask8) -1);
9103 }
9104
9105 extern __inline __mmask16
9106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9107 _mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9108 {
9109 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9110 (__v16si) __Y, 2,
9111 (__mmask16) __M);
9112 }
9113
9114 extern __inline __mmask16
9115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9116 _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
9117 {
9118 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9119 (__v16si) __Y, 2,
9120 (__mmask16) -1);
9121 }
9122
9123 extern __inline __mmask16
9124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9125 _mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9126 {
9127 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9128 (__v16si) __Y, 2,
9129 (__mmask16) __M);
9130 }
9131
9132 extern __inline __mmask16
9133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9134 _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
9135 {
9136 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9137 (__v16si) __Y, 2,
9138 (__mmask16) -1);
9139 }
9140
9141 extern __inline __mmask8
9142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9143 _mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9144 {
9145 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9146 (__v8di) __Y, 2,
9147 (__mmask8) __M);
9148 }
9149
9150 extern __inline __mmask8
9151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9152 _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
9153 {
9154 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9155 (__v8di) __Y, 2,
9156 (__mmask8) -1);
9157 }
9158
9159 extern __inline __mmask8
9160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9161 _mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9162 {
9163 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9164 (__v8di) __Y, 2,
9165 (__mmask8) __M);
9166 }
9167
9168 extern __inline __mmask8
9169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9170 _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
9171 {
9172 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9173 (__v8di) __Y, 2,
9174 (__mmask8) -1);
9175 }
9176
9177 extern __inline __mmask16
9178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9179 _mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9180 {
9181 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9182 (__v16si) __Y, 1,
9183 (__mmask16) __M);
9184 }
9185
9186 extern __inline __mmask16
9187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9188 _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
9189 {
9190 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9191 (__v16si) __Y, 1,
9192 (__mmask16) -1);
9193 }
9194
9195 extern __inline __mmask16
9196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9197 _mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9198 {
9199 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9200 (__v16si) __Y, 1,
9201 (__mmask16) __M);
9202 }
9203
9204 extern __inline __mmask16
9205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9206 _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
9207 {
9208 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9209 (__v16si) __Y, 1,
9210 (__mmask16) -1);
9211 }
9212
9213 extern __inline __mmask8
9214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9215 _mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9216 {
9217 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9218 (__v8di) __Y, 1,
9219 (__mmask8) __M);
9220 }
9221
9222 extern __inline __mmask8
9223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9224 _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
9225 {
9226 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9227 (__v8di) __Y, 1,
9228 (__mmask8) -1);
9229 }
9230
9231 extern __inline __mmask8
9232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9233 _mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9234 {
9235 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9236 (__v8di) __Y, 1,
9237 (__mmask8) __M);
9238 }
9239
9240 extern __inline __mmask8
9241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9242 _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
9243 {
9244 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9245 (__v8di) __Y, 1,
9246 (__mmask8) -1);
9247 }
9248
9249 extern __inline __mmask16
9250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9251 _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
9252 {
9253 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9254 (__v16si) __Y, 4,
9255 (__mmask16) -1);
9256 }
9257
9258 extern __inline __mmask16
9259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9260 _mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9261 {
9262 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9263 (__v16si) __Y, 4,
9264 (__mmask16) __M);
9265 }
9266
9267 extern __inline __mmask16
9268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9269 _mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9270 {
9271 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9272 (__v16si) __Y, 4,
9273 (__mmask16) __M);
9274 }
9275
9276 extern __inline __mmask16
9277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9278 _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
9279 {
9280 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9281 (__v16si) __Y, 4,
9282 (__mmask16) -1);
9283 }
9284
9285 extern __inline __mmask8
9286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9287 _mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9288 {
9289 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9290 (__v8di) __Y, 4,
9291 (__mmask8) __M);
9292 }
9293
9294 extern __inline __mmask8
9295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9296 _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
9297 {
9298 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9299 (__v8di) __Y, 4,
9300 (__mmask8) -1);
9301 }
9302
9303 extern __inline __mmask8
9304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9305 _mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9306 {
9307 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9308 (__v8di) __Y, 4,
9309 (__mmask8) __M);
9310 }
9311
9312 extern __inline __mmask8
9313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9314 _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
9315 {
9316 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9317 (__v8di) __Y, 4,
9318 (__mmask8) -1);
9319 }
9320
9321 #define _MM_CMPINT_EQ 0x0
9322 #define _MM_CMPINT_LT 0x1
9323 #define _MM_CMPINT_LE 0x2
9324 #define _MM_CMPINT_UNUSED 0x3
9325 #define _MM_CMPINT_NE 0x4
9326 #define _MM_CMPINT_NLT 0x5
9327 #define _MM_CMPINT_GE 0x5
9328 #define _MM_CMPINT_NLE 0x6
9329 #define _MM_CMPINT_GT 0x6
9330
9331 #ifdef __OPTIMIZE__
9332 extern __inline __mmask16
9333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9334 _kshiftli_mask16 (__mmask16 __A, unsigned int __B)
9335 {
9336 return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A,
9337 (__mmask8) __B);
9338 }
9339
9340 extern __inline __mmask16
9341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9342 _kshiftri_mask16 (__mmask16 __A, unsigned int __B)
9343 {
9344 return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A,
9345 (__mmask8) __B);
9346 }
9347
9348 extern __inline __mmask8
9349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9350 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
9351 {
9352 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9353 (__v8di) __Y, __P,
9354 (__mmask8) -1);
9355 }
9356
9357 extern __inline __mmask16
9358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9359 _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
9360 {
9361 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9362 (__v16si) __Y, __P,
9363 (__mmask16) -1);
9364 }
9365
9366 extern __inline __mmask8
9367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9368 _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
9369 {
9370 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9371 (__v8di) __Y, __P,
9372 (__mmask8) -1);
9373 }
9374
9375 extern __inline __mmask16
9376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9377 _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
9378 {
9379 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9380 (__v16si) __Y, __P,
9381 (__mmask16) -1);
9382 }
9383
9384 extern __inline __mmask8
9385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9386 _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
9387 const int __R)
9388 {
9389 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9390 (__v8df) __Y, __P,
9391 (__mmask8) -1, __R);
9392 }
9393
9394 extern __inline __mmask16
9395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9396 _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
9397 {
9398 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9399 (__v16sf) __Y, __P,
9400 (__mmask16) -1, __R);
9401 }
9402
9403 extern __inline __mmask8
9404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9405 _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9406 const int __P)
9407 {
9408 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9409 (__v8di) __Y, __P,
9410 (__mmask8) __U);
9411 }
9412
9413 extern __inline __mmask16
9414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9415 _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9416 const int __P)
9417 {
9418 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9419 (__v16si) __Y, __P,
9420 (__mmask16) __U);
9421 }
9422
9423 extern __inline __mmask8
9424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9425 _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9426 const int __P)
9427 {
9428 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9429 (__v8di) __Y, __P,
9430 (__mmask8) __U);
9431 }
9432
9433 extern __inline __mmask16
9434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9435 _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9436 const int __P)
9437 {
9438 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9439 (__v16si) __Y, __P,
9440 (__mmask16) __U);
9441 }
9442
9443 extern __inline __mmask8
9444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9445 _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9446 const int __P, const int __R)
9447 {
9448 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9449 (__v8df) __Y, __P,
9450 (__mmask8) __U, __R);
9451 }
9452
9453 extern __inline __mmask16
9454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9455 _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9456 const int __P, const int __R)
9457 {
9458 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9459 (__v16sf) __Y, __P,
9460 (__mmask16) __U, __R);
9461 }
9462
9463 extern __inline __mmask8
9464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9465 _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
9466 {
9467 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9468 (__v2df) __Y, __P,
9469 (__mmask8) -1, __R);
9470 }
9471
9472 extern __inline __mmask8
9473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9474 _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
9475 const int __P, const int __R)
9476 {
9477 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9478 (__v2df) __Y, __P,
9479 (__mmask8) __M, __R);
9480 }
9481
9482 extern __inline __mmask8
9483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9484 _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
9485 {
9486 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9487 (__v4sf) __Y, __P,
9488 (__mmask8) -1, __R);
9489 }
9490
9491 extern __inline __mmask8
9492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9493 _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
9494 const int __P, const int __R)
9495 {
9496 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9497 (__v4sf) __Y, __P,
9498 (__mmask8) __M, __R);
9499 }
9500
9501 #else
9502 #define _kshiftli_mask16(X, Y) \
9503 ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y)))
9504
9505 #define _kshiftri_mask16(X, Y) \
9506 ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y)))
9507
9508 #define _mm512_cmp_epi64_mask(X, Y, P) \
9509 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9510 (__v8di)(__m512i)(Y), (int)(P),\
9511 (__mmask8)-1))
9512
9513 #define _mm512_cmp_epi32_mask(X, Y, P) \
9514 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9515 (__v16si)(__m512i)(Y), (int)(P), \
9516 (__mmask16)-1))
9517
9518 #define _mm512_cmp_epu64_mask(X, Y, P) \
9519 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9520 (__v8di)(__m512i)(Y), (int)(P),\
9521 (__mmask8)-1))
9522
9523 #define _mm512_cmp_epu32_mask(X, Y, P) \
9524 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9525 (__v16si)(__m512i)(Y), (int)(P), \
9526 (__mmask16)-1))
9527
9528 #define _mm512_cmp_round_pd_mask(X, Y, P, R) \
9529 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9530 (__v8df)(__m512d)(Y), (int)(P),\
9531 (__mmask8)-1, R))
9532
9533 #define _mm512_cmp_round_ps_mask(X, Y, P, R) \
9534 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9535 (__v16sf)(__m512)(Y), (int)(P),\
9536 (__mmask16)-1, R))
9537
9538 #define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
9539 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9540 (__v8di)(__m512i)(Y), (int)(P),\
9541 (__mmask8)M))
9542
9543 #define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
9544 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9545 (__v16si)(__m512i)(Y), (int)(P), \
9546 (__mmask16)M))
9547
9548 #define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
9549 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9550 (__v8di)(__m512i)(Y), (int)(P),\
9551 (__mmask8)M))
9552
9553 #define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
9554 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9555 (__v16si)(__m512i)(Y), (int)(P), \
9556 (__mmask16)M))
9557
9558 #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
9559 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9560 (__v8df)(__m512d)(Y), (int)(P),\
9561 (__mmask8)M, R))
9562
9563 #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
9564 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9565 (__v16sf)(__m512)(Y), (int)(P),\
9566 (__mmask16)M, R))
9567
9568 #define _mm_cmp_round_sd_mask(X, Y, P, R) \
9569 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9570 (__v2df)(__m128d)(Y), (int)(P),\
9571 (__mmask8)-1, R))
9572
9573 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
9574 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9575 (__v2df)(__m128d)(Y), (int)(P),\
9576 (M), R))
9577
9578 #define _mm_cmp_round_ss_mask(X, Y, P, R) \
9579 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9580 (__v4sf)(__m128)(Y), (int)(P), \
9581 (__mmask8)-1, R))
9582
9583 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
9584 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9585 (__v4sf)(__m128)(Y), (int)(P), \
9586 (M), R))
9587 #endif
9588
9589 #ifdef __OPTIMIZE__
9590 extern __inline __m512
9591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9592 _mm512_i32gather_ps (__m512i __index, void const *__addr, int __scale)
9593 {
9594 __m512 __v1_old = _mm512_undefined_ps ();
9595 __mmask16 __mask = 0xFFFF;
9596
9597 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
9598 __addr,
9599 (__v16si) __index,
9600 __mask, __scale);
9601 }
9602
9603 extern __inline __m512
9604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9605 _mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask,
9606 __m512i __index, void const *__addr, int __scale)
9607 {
9608 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
9609 __addr,
9610 (__v16si) __index,
9611 __mask, __scale);
9612 }
9613
9614 extern __inline __m512d
9615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9616 _mm512_i32gather_pd (__m256i __index, void const *__addr, int __scale)
9617 {
9618 __m512d __v1_old = _mm512_undefined_pd ();
9619 __mmask8 __mask = 0xFF;
9620
9621 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9622 __addr,
9623 (__v8si) __index, __mask,
9624 __scale);
9625 }
9626
9627 extern __inline __m512d
9628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9629 _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
9630 __m256i __index, void const *__addr, int __scale)
9631 {
9632 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9633 __addr,
9634 (__v8si) __index,
9635 __mask, __scale);
9636 }
9637
9638 extern __inline __m256
9639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9640 _mm512_i64gather_ps (__m512i __index, void const *__addr, int __scale)
9641 {
9642 __m256 __v1_old = _mm256_undefined_ps ();
9643 __mmask8 __mask = 0xFF;
9644
9645 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9646 __addr,
9647 (__v8di) __index, __mask,
9648 __scale);
9649 }
9650
9651 extern __inline __m256
9652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9653 _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
9654 __m512i __index, void const *__addr, int __scale)
9655 {
9656 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9657 __addr,
9658 (__v8di) __index,
9659 __mask, __scale);
9660 }
9661
9662 extern __inline __m512d
9663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9664 _mm512_i64gather_pd (__m512i __index, void const *__addr, int __scale)
9665 {
9666 __m512d __v1_old = _mm512_undefined_pd ();
9667 __mmask8 __mask = 0xFF;
9668
9669 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9670 __addr,
9671 (__v8di) __index, __mask,
9672 __scale);
9673 }
9674
9675 extern __inline __m512d
9676 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9677 _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
9678 __m512i __index, void const *__addr, int __scale)
9679 {
9680 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9681 __addr,
9682 (__v8di) __index,
9683 __mask, __scale);
9684 }
9685
9686 extern __inline __m512i
9687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9688 _mm512_i32gather_epi32 (__m512i __index, void const *__addr, int __scale)
9689 {
9690 __m512i __v1_old = _mm512_undefined_epi32 ();
9691 __mmask16 __mask = 0xFFFF;
9692
9693 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9694 __addr,
9695 (__v16si) __index,
9696 __mask, __scale);
9697 }
9698
9699 extern __inline __m512i
9700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9701 _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
9702 __m512i __index, void const *__addr, int __scale)
9703 {
9704 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9705 __addr,
9706 (__v16si) __index,
9707 __mask, __scale);
9708 }
9709
9710 extern __inline __m512i
9711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9712 _mm512_i32gather_epi64 (__m256i __index, void const *__addr, int __scale)
9713 {
9714 __m512i __v1_old = _mm512_undefined_epi32 ();
9715 __mmask8 __mask = 0xFF;
9716
9717 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9718 __addr,
9719 (__v8si) __index, __mask,
9720 __scale);
9721 }
9722
9723 extern __inline __m512i
9724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9725 _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9726 __m256i __index, void const *__addr,
9727 int __scale)
9728 {
9729 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9730 __addr,
9731 (__v8si) __index,
9732 __mask, __scale);
9733 }
9734
9735 extern __inline __m256i
9736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9737 _mm512_i64gather_epi32 (__m512i __index, void const *__addr, int __scale)
9738 {
9739 __m256i __v1_old = _mm256_undefined_si256 ();
9740 __mmask8 __mask = 0xFF;
9741
9742 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9743 __addr,
9744 (__v8di) __index,
9745 __mask, __scale);
9746 }
9747
9748 extern __inline __m256i
9749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9750 _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
9751 __m512i __index, void const *__addr, int __scale)
9752 {
9753 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9754 __addr,
9755 (__v8di) __index,
9756 __mask, __scale);
9757 }
9758
9759 extern __inline __m512i
9760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9761 _mm512_i64gather_epi64 (__m512i __index, void const *__addr, int __scale)
9762 {
9763 __m512i __v1_old = _mm512_undefined_epi32 ();
9764 __mmask8 __mask = 0xFF;
9765
9766 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9767 __addr,
9768 (__v8di) __index, __mask,
9769 __scale);
9770 }
9771
9772 extern __inline __m512i
9773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9774 _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9775 __m512i __index, void const *__addr,
9776 int __scale)
9777 {
9778 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9779 __addr,
9780 (__v8di) __index,
9781 __mask, __scale);
9782 }
9783
9784 extern __inline void
9785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9786 _mm512_i32scatter_ps (void *__addr, __m512i __index, __m512 __v1, int __scale)
9787 {
9788 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
9789 (__v16si) __index, (__v16sf) __v1, __scale);
9790 }
9791
9792 extern __inline void
9793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9794 _mm512_mask_i32scatter_ps (void *__addr, __mmask16 __mask,
9795 __m512i __index, __m512 __v1, int __scale)
9796 {
9797 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
9798 (__v16sf) __v1, __scale);
9799 }
9800
9801 extern __inline void
9802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9803 _mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
9804 int __scale)
9805 {
9806 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
9807 (__v8si) __index, (__v8df) __v1, __scale);
9808 }
9809
9810 extern __inline void
9811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9812 _mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
9813 __m256i __index, __m512d __v1, int __scale)
9814 {
9815 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
9816 (__v8df) __v1, __scale);
9817 }
9818
9819 extern __inline void
9820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9821 _mm512_i64scatter_ps (void *__addr, __m512i __index, __m256 __v1, int __scale)
9822 {
9823 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
9824 (__v8di) __index, (__v8sf) __v1, __scale);
9825 }
9826
9827 extern __inline void
9828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9829 _mm512_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
9830 __m512i __index, __m256 __v1, int __scale)
9831 {
9832 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
9833 (__v8sf) __v1, __scale);
9834 }
9835
9836 extern __inline void
9837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9838 _mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1,
9839 int __scale)
9840 {
9841 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
9842 (__v8di) __index, (__v8df) __v1, __scale);
9843 }
9844
9845 extern __inline void
9846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9847 _mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
9848 __m512i __index, __m512d __v1, int __scale)
9849 {
9850 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
9851 (__v8df) __v1, __scale);
9852 }
9853
9854 extern __inline void
9855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9856 _mm512_i32scatter_epi32 (void *__addr, __m512i __index,
9857 __m512i __v1, int __scale)
9858 {
9859 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
9860 (__v16si) __index, (__v16si) __v1, __scale);
9861 }
9862
9863 extern __inline void
9864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9865 _mm512_mask_i32scatter_epi32 (void *__addr, __mmask16 __mask,
9866 __m512i __index, __m512i __v1, int __scale)
9867 {
9868 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
9869 (__v16si) __v1, __scale);
9870 }
9871
9872 extern __inline void
9873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9874 _mm512_i32scatter_epi64 (void *__addr, __m256i __index,
9875 __m512i __v1, int __scale)
9876 {
9877 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
9878 (__v8si) __index, (__v8di) __v1, __scale);
9879 }
9880
9881 extern __inline void
9882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9883 _mm512_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
9884 __m256i __index, __m512i __v1, int __scale)
9885 {
9886 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
9887 (__v8di) __v1, __scale);
9888 }
9889
9890 extern __inline void
9891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9892 _mm512_i64scatter_epi32 (void *__addr, __m512i __index,
9893 __m256i __v1, int __scale)
9894 {
9895 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
9896 (__v8di) __index, (__v8si) __v1, __scale);
9897 }
9898
9899 extern __inline void
9900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9901 _mm512_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
9902 __m512i __index, __m256i __v1, int __scale)
9903 {
9904 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
9905 (__v8si) __v1, __scale);
9906 }
9907
9908 extern __inline void
9909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9910 _mm512_i64scatter_epi64 (void *__addr, __m512i __index,
9911 __m512i __v1, int __scale)
9912 {
9913 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
9914 (__v8di) __index, (__v8di) __v1, __scale);
9915 }
9916
9917 extern __inline void
9918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9919 _mm512_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
9920 __m512i __index, __m512i __v1, int __scale)
9921 {
9922 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
9923 (__v8di) __v1, __scale);
9924 }
9925 #else
9926 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
9927 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
9928 (void const *)ADDR, \
9929 (__v16si)(__m512i)INDEX, \
9930 (__mmask16)0xFFFF, (int)SCALE)
9931
9932 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9933 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
9934 (void const *)ADDR, \
9935 (__v16si)(__m512i)INDEX, \
9936 (__mmask16)MASK, (int)SCALE)
9937
9938 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
9939 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
9940 (void const *)ADDR, \
9941 (__v8si)(__m256i)INDEX, \
9942 (__mmask8)0xFF, (int)SCALE)
9943
9944 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9945 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
9946 (void const *)ADDR, \
9947 (__v8si)(__m256i)INDEX, \
9948 (__mmask8)MASK, (int)SCALE)
9949
9950 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
9951 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
9952 (void const *)ADDR, \
9953 (__v8di)(__m512i)INDEX, \
9954 (__mmask8)0xFF, (int)SCALE)
9955
9956 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9957 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
9958 (void const *)ADDR, \
9959 (__v8di)(__m512i)INDEX, \
9960 (__mmask8)MASK, (int)SCALE)
9961
9962 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
9963 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
9964 (void const *)ADDR, \
9965 (__v8di)(__m512i)INDEX, \
9966 (__mmask8)0xFF, (int)SCALE)
9967
9968 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9969 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
9970 (void const *)ADDR, \
9971 (__v8di)(__m512i)INDEX, \
9972 (__mmask8)MASK, (int)SCALE)
9973
9974 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
9975 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (), \
9976 (void const *)ADDR, \
9977 (__v16si)(__m512i)INDEX, \
9978 (__mmask16)0xFFFF, (int)SCALE)
9979
9980 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9981 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
9982 (void const *)ADDR, \
9983 (__v16si)(__m512i)INDEX, \
9984 (__mmask16)MASK, (int)SCALE)
9985
9986 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
9987 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (), \
9988 (void const *)ADDR, \
9989 (__v8si)(__m256i)INDEX, \
9990 (__mmask8)0xFF, (int)SCALE)
9991
9992 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9993 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
9994 (void const *)ADDR, \
9995 (__v8si)(__m256i)INDEX, \
9996 (__mmask8)MASK, (int)SCALE)
9997
9998 #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
9999 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
10000 (void const *)ADDR, \
10001 (__v8di)(__m512i)INDEX, \
10002 (__mmask8)0xFF, (int)SCALE)
10003
10004 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
10005 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
10006 (void const *)ADDR, \
10007 (__v8di)(__m512i)INDEX, \
10008 (__mmask8)MASK, (int)SCALE)
10009
10010 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
10011 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (), \
10012 (void const *)ADDR, \
10013 (__v8di)(__m512i)INDEX, \
10014 (__mmask8)0xFF, (int)SCALE)
10015
10016 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
10017 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
10018 (void const *)ADDR, \
10019 (__v8di)(__m512i)INDEX, \
10020 (__mmask8)MASK, (int)SCALE)
10021
10022 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
10023 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)0xFFFF, \
10024 (__v16si)(__m512i)INDEX, \
10025 (__v16sf)(__m512)V1, (int)SCALE)
10026
10027 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
10028 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)MASK, \
10029 (__v16si)(__m512i)INDEX, \
10030 (__v16sf)(__m512)V1, (int)SCALE)
10031
10032 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
10033 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)0xFF, \
10034 (__v8si)(__m256i)INDEX, \
10035 (__v8df)(__m512d)V1, (int)SCALE)
10036
10037 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
10038 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)MASK, \
10039 (__v8si)(__m256i)INDEX, \
10040 (__v8df)(__m512d)V1, (int)SCALE)
10041
10042 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
10043 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask8)0xFF, \
10044 (__v8di)(__m512i)INDEX, \
10045 (__v8sf)(__m256)V1, (int)SCALE)
10046
10047 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
10048 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask16)MASK, \
10049 (__v8di)(__m512i)INDEX, \
10050 (__v8sf)(__m256)V1, (int)SCALE)
10051
10052 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
10053 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)0xFF, \
10054 (__v8di)(__m512i)INDEX, \
10055 (__v8df)(__m512d)V1, (int)SCALE)
10056
10057 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
10058 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)MASK, \
10059 (__v8di)(__m512i)INDEX, \
10060 (__v8df)(__m512d)V1, (int)SCALE)
10061
10062 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
10063 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)0xFFFF, \
10064 (__v16si)(__m512i)INDEX, \
10065 (__v16si)(__m512i)V1, (int)SCALE)
10066
10067 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
10068 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)MASK, \
10069 (__v16si)(__m512i)INDEX, \
10070 (__v16si)(__m512i)V1, (int)SCALE)
10071
10072 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
10073 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)0xFF, \
10074 (__v8si)(__m256i)INDEX, \
10075 (__v8di)(__m512i)V1, (int)SCALE)
10076
10077 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
10078 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)MASK, \
10079 (__v8si)(__m256i)INDEX, \
10080 (__v8di)(__m512i)V1, (int)SCALE)
10081
10082 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
10083 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)0xFF, \
10084 (__v8di)(__m512i)INDEX, \
10085 (__v8si)(__m256i)V1, (int)SCALE)
10086
10087 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
10088 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)MASK, \
10089 (__v8di)(__m512i)INDEX, \
10090 (__v8si)(__m256i)V1, (int)SCALE)
10091
10092 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
10093 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)0xFF, \
10094 (__v8di)(__m512i)INDEX, \
10095 (__v8di)(__m512i)V1, (int)SCALE)
10096
10097 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
10098 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)MASK, \
10099 (__v8di)(__m512i)INDEX, \
10100 (__v8di)(__m512i)V1, (int)SCALE)
10101 #endif
10102
10103 extern __inline __m512d
10104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10105 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
10106 {
10107 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
10108 (__v8df) __W,
10109 (__mmask8) __U);
10110 }
10111
10112 extern __inline __m512d
10113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10114 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
10115 {
10116 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
10117 (__v8df)
10118 _mm512_setzero_pd (),
10119 (__mmask8) __U);
10120 }
10121
10122 extern __inline void
10123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10124 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
10125 {
10126 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
10127 (__mmask8) __U);
10128 }
10129
10130 extern __inline __m512
10131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10132 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
10133 {
10134 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
10135 (__v16sf) __W,
10136 (__mmask16) __U);
10137 }
10138
10139 extern __inline __m512
10140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10141 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
10142 {
10143 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
10144 (__v16sf)
10145 _mm512_setzero_ps (),
10146 (__mmask16) __U);
10147 }
10148
10149 extern __inline void
10150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10151 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
10152 {
10153 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
10154 (__mmask16) __U);
10155 }
10156
10157 extern __inline __m512i
10158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10159 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
10160 {
10161 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
10162 (__v8di) __W,
10163 (__mmask8) __U);
10164 }
10165
10166 extern __inline __m512i
10167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10168 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
10169 {
10170 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
10171 (__v8di)
10172 _mm512_setzero_si512 (),
10173 (__mmask8) __U);
10174 }
10175
10176 extern __inline void
10177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10178 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
10179 {
10180 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
10181 (__mmask8) __U);
10182 }
10183
10184 extern __inline __m512i
10185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10186 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
10187 {
10188 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
10189 (__v16si) __W,
10190 (__mmask16) __U);
10191 }
10192
10193 extern __inline __m512i
10194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10195 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
10196 {
10197 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
10198 (__v16si)
10199 _mm512_setzero_si512 (),
10200 (__mmask16) __U);
10201 }
10202
10203 extern __inline void
10204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10205 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
10206 {
10207 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
10208 (__mmask16) __U);
10209 }
10210
10211 extern __inline __m512d
10212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10213 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
10214 {
10215 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
10216 (__v8df) __W,
10217 (__mmask8) __U);
10218 }
10219
10220 extern __inline __m512d
10221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10222 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
10223 {
10224 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
10225 (__v8df)
10226 _mm512_setzero_pd (),
10227 (__mmask8) __U);
10228 }
10229
10230 extern __inline __m512d
10231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10232 _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
10233 {
10234 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
10235 (__v8df) __W,
10236 (__mmask8) __U);
10237 }
10238
10239 extern __inline __m512d
10240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10241 _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
10242 {
10243 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
10244 (__v8df)
10245 _mm512_setzero_pd (),
10246 (__mmask8) __U);
10247 }
10248
10249 extern __inline __m512
10250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10251 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
10252 {
10253 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
10254 (__v16sf) __W,
10255 (__mmask16) __U);
10256 }
10257
10258 extern __inline __m512
10259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10260 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
10261 {
10262 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
10263 (__v16sf)
10264 _mm512_setzero_ps (),
10265 (__mmask16) __U);
10266 }
10267
10268 extern __inline __m512
10269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10270 _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
10271 {
10272 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
10273 (__v16sf) __W,
10274 (__mmask16) __U);
10275 }
10276
10277 extern __inline __m512
10278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10279 _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
10280 {
10281 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
10282 (__v16sf)
10283 _mm512_setzero_ps (),
10284 (__mmask16) __U);
10285 }
10286
10287 extern __inline __m512i
10288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10289 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
10290 {
10291 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
10292 (__v8di) __W,
10293 (__mmask8) __U);
10294 }
10295
10296 extern __inline __m512i
10297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10298 _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
10299 {
10300 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
10301 (__v8di)
10302 _mm512_setzero_si512 (),
10303 (__mmask8) __U);
10304 }
10305
10306 extern __inline __m512i
10307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10308 _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
10309 {
10310 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
10311 (__v8di) __W,
10312 (__mmask8) __U);
10313 }
10314
10315 extern __inline __m512i
10316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10317 _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
10318 {
10319 return (__m512i)
10320 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
10321 (__v8di)
10322 _mm512_setzero_si512 (),
10323 (__mmask8) __U);
10324 }
10325
10326 extern __inline __m512i
10327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10328 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
10329 {
10330 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
10331 (__v16si) __W,
10332 (__mmask16) __U);
10333 }
10334
10335 extern __inline __m512i
10336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10337 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
10338 {
10339 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
10340 (__v16si)
10341 _mm512_setzero_si512 (),
10342 (__mmask16) __U);
10343 }
10344
10345 extern __inline __m512i
10346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10347 _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
10348 {
10349 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
10350 (__v16si) __W,
10351 (__mmask16) __U);
10352 }
10353
10354 extern __inline __m512i
10355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10356 _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
10357 {
10358 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
10359 (__v16si)
10360 _mm512_setzero_si512
10361 (), (__mmask16) __U);
10362 }
10363
10364 /* Mask arithmetic operations */
10365 #define _kand_mask16 _mm512_kand
10366 #define _kandn_mask16 _mm512_kandn
10367 #define _knot_mask16 _mm512_knot
10368 #define _kor_mask16 _mm512_kor
10369 #define _kxnor_mask16 _mm512_kxnor
10370 #define _kxor_mask16 _mm512_kxor
10371
10372 extern __inline unsigned char
10373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10374 _kortest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF)
10375 {
10376 *__CF = (unsigned char) __builtin_ia32_kortestchi (__A, __B);
10377 return (unsigned char) __builtin_ia32_kortestzhi (__A, __B);
10378 }
10379
10380 extern __inline unsigned char
10381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10382 _kortestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
10383 {
10384 return (unsigned char) __builtin_ia32_kortestzhi ((__mmask16) __A,
10385 (__mmask16) __B);
10386 }
10387
10388 extern __inline unsigned char
10389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10390 _kortestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
10391 {
10392 return (unsigned char) __builtin_ia32_kortestchi ((__mmask16) __A,
10393 (__mmask16) __B);
10394 }
10395
10396 extern __inline unsigned int
10397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10398 _cvtmask16_u32 (__mmask16 __A)
10399 {
10400 return (unsigned int) __builtin_ia32_kmovw ((__mmask16 ) __A);
10401 }
10402
10403 extern __inline __mmask16
10404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10405 _cvtu32_mask16 (unsigned int __A)
10406 {
10407 return (__mmask16) __builtin_ia32_kmovw ((__mmask16 ) __A);
10408 }
10409
10410 extern __inline __mmask16
10411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10412 _load_mask16 (__mmask16 *__A)
10413 {
10414 return (__mmask16) __builtin_ia32_kmovw (*(__mmask16 *) __A);
10415 }
10416
10417 extern __inline void
10418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10419 _store_mask16 (__mmask16 *__A, __mmask16 __B)
10420 {
10421 *(__mmask16 *) __A = __builtin_ia32_kmovw (__B);
10422 }
10423
10424 extern __inline __mmask16
10425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10426 _mm512_kand (__mmask16 __A, __mmask16 __B)
10427 {
10428 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
10429 }
10430
10431 extern __inline __mmask16
10432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10433 _mm512_kandn (__mmask16 __A, __mmask16 __B)
10434 {
10435 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
10436 (__mmask16) __B);
10437 }
10438
10439 extern __inline __mmask16
10440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10441 _mm512_kor (__mmask16 __A, __mmask16 __B)
10442 {
10443 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
10444 }
10445
10446 extern __inline int
10447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10448 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
10449 {
10450 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10451 (__mmask16) __B);
10452 }
10453
10454 extern __inline int
10455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10456 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
10457 {
10458 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
10459 (__mmask16) __B);
10460 }
10461
10462 extern __inline __mmask16
10463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10464 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
10465 {
10466 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
10467 }
10468
10469 extern __inline __mmask16
10470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10471 _mm512_kxor (__mmask16 __A, __mmask16 __B)
10472 {
10473 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
10474 }
10475
10476 extern __inline __mmask16
10477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10478 _mm512_knot (__mmask16 __A)
10479 {
10480 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
10481 }
10482
10483 extern __inline __mmask16
10484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10485 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
10486 {
10487 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10488 }
10489
10490 extern __inline __mmask16
10491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10492 _kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
10493 {
10494 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10495 }
10496
10497 #ifdef __OPTIMIZE__
10498 extern __inline __m512i
10499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10500 _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
10501 const int __imm)
10502 {
10503 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10504 (__v4si) __D,
10505 __imm,
10506 (__v16si)
10507 _mm512_setzero_si512 (),
10508 __B);
10509 }
10510
10511 extern __inline __m512
10512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10513 _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
10514 const int __imm)
10515 {
10516 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10517 (__v4sf) __D,
10518 __imm,
10519 (__v16sf)
10520 _mm512_setzero_ps (), __B);
10521 }
10522
10523 extern __inline __m512i
10524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10525 _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
10526 __m128i __D, const int __imm)
10527 {
10528 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10529 (__v4si) __D,
10530 __imm,
10531 (__v16si) __A,
10532 __B);
10533 }
10534
10535 extern __inline __m512
10536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10537 _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
10538 __m128 __D, const int __imm)
10539 {
10540 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10541 (__v4sf) __D,
10542 __imm,
10543 (__v16sf) __A, __B);
10544 }
10545 #else
10546 #define _mm512_maskz_insertf32x4(A, X, Y, C) \
10547 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10548 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
10549 (__mmask8)(A)))
10550
10551 #define _mm512_maskz_inserti32x4(A, X, Y, C) \
10552 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10553 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
10554 (__mmask8)(A)))
10555
10556 #define _mm512_mask_insertf32x4(A, B, X, Y, C) \
10557 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10558 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
10559 (__mmask8)(B)))
10560
10561 #define _mm512_mask_inserti32x4(A, B, X, Y, C) \
10562 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10563 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
10564 (__mmask8)(B)))
10565 #endif
10566
10567 extern __inline __m512i
10568 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10569 _mm512_max_epi64 (__m512i __A, __m512i __B)
10570 {
10571 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10572 (__v8di) __B,
10573 (__v8di)
10574 _mm512_undefined_epi32 (),
10575 (__mmask8) -1);
10576 }
10577
10578 extern __inline __m512i
10579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10580 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10581 {
10582 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10583 (__v8di) __B,
10584 (__v8di)
10585 _mm512_setzero_si512 (),
10586 __M);
10587 }
10588
10589 extern __inline __m512i
10590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10591 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10592 {
10593 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10594 (__v8di) __B,
10595 (__v8di) __W, __M);
10596 }
10597
10598 extern __inline __m512i
10599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10600 _mm512_min_epi64 (__m512i __A, __m512i __B)
10601 {
10602 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10603 (__v8di) __B,
10604 (__v8di)
10605 _mm512_undefined_epi32 (),
10606 (__mmask8) -1);
10607 }
10608
10609 extern __inline __m512i
10610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10611 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10612 {
10613 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10614 (__v8di) __B,
10615 (__v8di) __W, __M);
10616 }
10617
10618 extern __inline __m512i
10619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10620 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10621 {
10622 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10623 (__v8di) __B,
10624 (__v8di)
10625 _mm512_setzero_si512 (),
10626 __M);
10627 }
10628
10629 extern __inline __m512i
10630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10631 _mm512_max_epu64 (__m512i __A, __m512i __B)
10632 {
10633 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10634 (__v8di) __B,
10635 (__v8di)
10636 _mm512_undefined_epi32 (),
10637 (__mmask8) -1);
10638 }
10639
10640 extern __inline __m512i
10641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10642 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10643 {
10644 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10645 (__v8di) __B,
10646 (__v8di)
10647 _mm512_setzero_si512 (),
10648 __M);
10649 }
10650
10651 extern __inline __m512i
10652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10653 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10654 {
10655 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10656 (__v8di) __B,
10657 (__v8di) __W, __M);
10658 }
10659
10660 extern __inline __m512i
10661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10662 _mm512_min_epu64 (__m512i __A, __m512i __B)
10663 {
10664 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10665 (__v8di) __B,
10666 (__v8di)
10667 _mm512_undefined_epi32 (),
10668 (__mmask8) -1);
10669 }
10670
10671 extern __inline __m512i
10672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10673 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10674 {
10675 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10676 (__v8di) __B,
10677 (__v8di) __W, __M);
10678 }
10679
10680 extern __inline __m512i
10681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10682 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10683 {
10684 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10685 (__v8di) __B,
10686 (__v8di)
10687 _mm512_setzero_si512 (),
10688 __M);
10689 }
10690
10691 extern __inline __m512i
10692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10693 _mm512_max_epi32 (__m512i __A, __m512i __B)
10694 {
10695 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10696 (__v16si) __B,
10697 (__v16si)
10698 _mm512_undefined_epi32 (),
10699 (__mmask16) -1);
10700 }
10701
10702 extern __inline __m512i
10703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10704 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10705 {
10706 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10707 (__v16si) __B,
10708 (__v16si)
10709 _mm512_setzero_si512 (),
10710 __M);
10711 }
10712
10713 extern __inline __m512i
10714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10715 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10716 {
10717 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10718 (__v16si) __B,
10719 (__v16si) __W, __M);
10720 }
10721
10722 extern __inline __m512i
10723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10724 _mm512_min_epi32 (__m512i __A, __m512i __B)
10725 {
10726 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10727 (__v16si) __B,
10728 (__v16si)
10729 _mm512_undefined_epi32 (),
10730 (__mmask16) -1);
10731 }
10732
10733 extern __inline __m512i
10734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10735 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10736 {
10737 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10738 (__v16si) __B,
10739 (__v16si)
10740 _mm512_setzero_si512 (),
10741 __M);
10742 }
10743
10744 extern __inline __m512i
10745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10746 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10747 {
10748 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10749 (__v16si) __B,
10750 (__v16si) __W, __M);
10751 }
10752
10753 extern __inline __m512i
10754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10755 _mm512_max_epu32 (__m512i __A, __m512i __B)
10756 {
10757 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10758 (__v16si) __B,
10759 (__v16si)
10760 _mm512_undefined_epi32 (),
10761 (__mmask16) -1);
10762 }
10763
10764 extern __inline __m512i
10765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10766 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10767 {
10768 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10769 (__v16si) __B,
10770 (__v16si)
10771 _mm512_setzero_si512 (),
10772 __M);
10773 }
10774
10775 extern __inline __m512i
10776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10777 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10778 {
10779 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10780 (__v16si) __B,
10781 (__v16si) __W, __M);
10782 }
10783
10784 extern __inline __m512i
10785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10786 _mm512_min_epu32 (__m512i __A, __m512i __B)
10787 {
10788 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10789 (__v16si) __B,
10790 (__v16si)
10791 _mm512_undefined_epi32 (),
10792 (__mmask16) -1);
10793 }
10794
10795 extern __inline __m512i
10796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10797 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10798 {
10799 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10800 (__v16si) __B,
10801 (__v16si)
10802 _mm512_setzero_si512 (),
10803 __M);
10804 }
10805
10806 extern __inline __m512i
10807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10808 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10809 {
10810 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10811 (__v16si) __B,
10812 (__v16si) __W, __M);
10813 }
10814
10815 extern __inline __m512
10816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10817 _mm512_unpacklo_ps (__m512 __A, __m512 __B)
10818 {
10819 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10820 (__v16sf) __B,
10821 (__v16sf)
10822 _mm512_undefined_ps (),
10823 (__mmask16) -1);
10824 }
10825
10826 extern __inline __m512
10827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10828 _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10829 {
10830 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10831 (__v16sf) __B,
10832 (__v16sf) __W,
10833 (__mmask16) __U);
10834 }
10835
10836 extern __inline __m512
10837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10838 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
10839 {
10840 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10841 (__v16sf) __B,
10842 (__v16sf)
10843 _mm512_setzero_ps (),
10844 (__mmask16) __U);
10845 }
10846
10847 #ifdef __OPTIMIZE__
10848 extern __inline __m128d
10849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10850 _mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
10851 {
10852 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
10853 (__v2df) __B,
10854 __R);
10855 }
10856
10857 extern __inline __m128d
10858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10859 _mm_mask_max_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
10860 __m128d __B, const int __R)
10861 {
10862 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
10863 (__v2df) __B,
10864 (__v2df) __W,
10865 (__mmask8) __U, __R);
10866 }
10867
10868 extern __inline __m128d
10869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10870 _mm_maskz_max_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
10871 const int __R)
10872 {
10873 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
10874 (__v2df) __B,
10875 (__v2df)
10876 _mm_setzero_pd (),
10877 (__mmask8) __U, __R);
10878 }
10879
10880 extern __inline __m128
10881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10882 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
10883 {
10884 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
10885 (__v4sf) __B,
10886 __R);
10887 }
10888
10889 extern __inline __m128
10890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10891 _mm_mask_max_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
10892 __m128 __B, const int __R)
10893 {
10894 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
10895 (__v4sf) __B,
10896 (__v4sf) __W,
10897 (__mmask8) __U, __R);
10898 }
10899
10900 extern __inline __m128
10901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10902 _mm_maskz_max_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
10903 const int __R)
10904 {
10905 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
10906 (__v4sf) __B,
10907 (__v4sf)
10908 _mm_setzero_ps (),
10909 (__mmask8) __U, __R);
10910 }
10911
10912 extern __inline __m128d
10913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10914 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
10915 {
10916 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
10917 (__v2df) __B,
10918 __R);
10919 }
10920
10921 extern __inline __m128d
10922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10923 _mm_mask_min_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
10924 __m128d __B, const int __R)
10925 {
10926 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
10927 (__v2df) __B,
10928 (__v2df) __W,
10929 (__mmask8) __U, __R);
10930 }
10931
10932 extern __inline __m128d
10933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10934 _mm_maskz_min_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
10935 const int __R)
10936 {
10937 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
10938 (__v2df) __B,
10939 (__v2df)
10940 _mm_setzero_pd (),
10941 (__mmask8) __U, __R);
10942 }
10943
10944 extern __inline __m128
10945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10946 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
10947 {
10948 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
10949 (__v4sf) __B,
10950 __R);
10951 }
10952
10953 extern __inline __m128
10954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10955 _mm_mask_min_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
10956 __m128 __B, const int __R)
10957 {
10958 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
10959 (__v4sf) __B,
10960 (__v4sf) __W,
10961 (__mmask8) __U, __R);
10962 }
10963
10964 extern __inline __m128
10965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10966 _mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
10967 const int __R)
10968 {
10969 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
10970 (__v4sf) __B,
10971 (__v4sf)
10972 _mm_setzero_ps (),
10973 (__mmask8) __U, __R);
10974 }
10975
10976 #else
10977 #define _mm_max_round_sd(A, B, C) \
10978 (__m128d)__builtin_ia32_maxsd_round(A, B, C)
10979
10980 #define _mm_mask_max_round_sd(W, U, A, B, C) \
10981 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, W, U, C)
10982
10983 #define _mm_maskz_max_round_sd(U, A, B, C) \
10984 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
10985
10986 #define _mm_max_round_ss(A, B, C) \
10987 (__m128)__builtin_ia32_maxss_round(A, B, C)
10988
10989 #define _mm_mask_max_round_ss(W, U, A, B, C) \
10990 (__m128)__builtin_ia32_maxss_mask_round(A, B, W, U, C)
10991
10992 #define _mm_maskz_max_round_ss(U, A, B, C) \
10993 (__m128)__builtin_ia32_maxss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
10994
10995 #define _mm_min_round_sd(A, B, C) \
10996 (__m128d)__builtin_ia32_minsd_round(A, B, C)
10997
10998 #define _mm_mask_min_round_sd(W, U, A, B, C) \
10999 (__m128d)__builtin_ia32_minsd_mask_round(A, B, W, U, C)
11000
11001 #define _mm_maskz_min_round_sd(U, A, B, C) \
11002 (__m128d)__builtin_ia32_minsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
11003
11004 #define _mm_min_round_ss(A, B, C) \
11005 (__m128)__builtin_ia32_minss_round(A, B, C)
11006
11007 #define _mm_mask_min_round_ss(W, U, A, B, C) \
11008 (__m128)__builtin_ia32_minss_mask_round(A, B, W, U, C)
11009
11010 #define _mm_maskz_min_round_ss(U, A, B, C) \
11011 (__m128)__builtin_ia32_minss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
11012
11013 #endif
11014
11015 extern __inline __m512d
11016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11017 _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
11018 {
11019 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
11020 (__v8df) __W,
11021 (__mmask8) __U);
11022 }
11023
11024 extern __inline __m512
11025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11026 _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
11027 {
11028 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
11029 (__v16sf) __W,
11030 (__mmask16) __U);
11031 }
11032
11033 extern __inline __m512i
11034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11035 _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
11036 {
11037 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
11038 (__v8di) __W,
11039 (__mmask8) __U);
11040 }
11041
11042 extern __inline __m512i
11043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11044 _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
11045 {
11046 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
11047 (__v16si) __W,
11048 (__mmask16) __U);
11049 }
11050
11051 #ifdef __OPTIMIZE__
11052 extern __inline __m128d
11053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11054 _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11055 {
11056 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11057 (__v2df) __A,
11058 (__v2df) __B,
11059 __R);
11060 }
11061
11062 extern __inline __m128
11063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11064 _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11065 {
11066 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11067 (__v4sf) __A,
11068 (__v4sf) __B,
11069 __R);
11070 }
11071
11072 extern __inline __m128d
11073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11074 _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11075 {
11076 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11077 (__v2df) __A,
11078 -(__v2df) __B,
11079 __R);
11080 }
11081
11082 extern __inline __m128
11083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11084 _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11085 {
11086 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11087 (__v4sf) __A,
11088 -(__v4sf) __B,
11089 __R);
11090 }
11091
11092 extern __inline __m128d
11093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11094 _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11095 {
11096 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11097 -(__v2df) __A,
11098 (__v2df) __B,
11099 __R);
11100 }
11101
11102 extern __inline __m128
11103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11104 _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11105 {
11106 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11107 -(__v4sf) __A,
11108 (__v4sf) __B,
11109 __R);
11110 }
11111
11112 extern __inline __m128d
11113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11114 _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11115 {
11116 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11117 -(__v2df) __A,
11118 -(__v2df) __B,
11119 __R);
11120 }
11121
11122 extern __inline __m128
11123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11124 _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11125 {
11126 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11127 -(__v4sf) __A,
11128 -(__v4sf) __B,
11129 __R);
11130 }
11131 #else
11132 #define _mm_fmadd_round_sd(A, B, C, R) \
11133 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
11134
11135 #define _mm_fmadd_round_ss(A, B, C, R) \
11136 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
11137
11138 #define _mm_fmsub_round_sd(A, B, C, R) \
11139 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
11140
11141 #define _mm_fmsub_round_ss(A, B, C, R) \
11142 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
11143
11144 #define _mm_fnmadd_round_sd(A, B, C, R) \
11145 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
11146
11147 #define _mm_fnmadd_round_ss(A, B, C, R) \
11148 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
11149
11150 #define _mm_fnmsub_round_sd(A, B, C, R) \
11151 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
11152
11153 #define _mm_fnmsub_round_ss(A, B, C, R) \
11154 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
11155 #endif
11156
11157 #ifdef __OPTIMIZE__
11158 extern __inline int
11159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11160 _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
11161 {
11162 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
11163 }
11164
11165 extern __inline int
11166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11167 _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
11168 {
11169 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
11170 }
11171 #else
11172 #define _mm_comi_round_ss(A, B, C, D)\
11173 __builtin_ia32_vcomiss(A, B, C, D)
11174 #define _mm_comi_round_sd(A, B, C, D)\
11175 __builtin_ia32_vcomisd(A, B, C, D)
11176 #endif
11177
11178 extern __inline __m512d
11179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11180 _mm512_sqrt_pd (__m512d __A)
11181 {
11182 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
11183 (__v8df)
11184 _mm512_undefined_pd (),
11185 (__mmask8) -1,
11186 _MM_FROUND_CUR_DIRECTION);
11187 }
11188
11189 extern __inline __m512d
11190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11191 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
11192 {
11193 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
11194 (__v8df) __W,
11195 (__mmask8) __U,
11196 _MM_FROUND_CUR_DIRECTION);
11197 }
11198
11199 extern __inline __m512d
11200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11201 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
11202 {
11203 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
11204 (__v8df)
11205 _mm512_setzero_pd (),
11206 (__mmask8) __U,
11207 _MM_FROUND_CUR_DIRECTION);
11208 }
11209
11210 extern __inline __m512
11211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11212 _mm512_sqrt_ps (__m512 __A)
11213 {
11214 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
11215 (__v16sf)
11216 _mm512_undefined_ps (),
11217 (__mmask16) -1,
11218 _MM_FROUND_CUR_DIRECTION);
11219 }
11220
11221 extern __inline __m512
11222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11223 _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
11224 {
11225 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
11226 (__v16sf) __W,
11227 (__mmask16) __U,
11228 _MM_FROUND_CUR_DIRECTION);
11229 }
11230
11231 extern __inline __m512
11232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11233 _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
11234 {
11235 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
11236 (__v16sf)
11237 _mm512_setzero_ps (),
11238 (__mmask16) __U,
11239 _MM_FROUND_CUR_DIRECTION);
11240 }
11241
11242 extern __inline __m512d
11243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11244 _mm512_add_pd (__m512d __A, __m512d __B)
11245 {
11246 return (__m512d) ((__v8df)__A + (__v8df)__B);
11247 }
11248
11249 extern __inline __m512d
11250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11251 _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11252 {
11253 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
11254 (__v8df) __B,
11255 (__v8df) __W,
11256 (__mmask8) __U,
11257 _MM_FROUND_CUR_DIRECTION);
11258 }
11259
11260 extern __inline __m512d
11261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11262 _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
11263 {
11264 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
11265 (__v8df) __B,
11266 (__v8df)
11267 _mm512_setzero_pd (),
11268 (__mmask8) __U,
11269 _MM_FROUND_CUR_DIRECTION);
11270 }
11271
11272 extern __inline __m512
11273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11274 _mm512_add_ps (__m512 __A, __m512 __B)
11275 {
11276 return (__m512) ((__v16sf)__A + (__v16sf)__B);
11277 }
11278
11279 extern __inline __m512
11280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11281 _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11282 {
11283 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
11284 (__v16sf) __B,
11285 (__v16sf) __W,
11286 (__mmask16) __U,
11287 _MM_FROUND_CUR_DIRECTION);
11288 }
11289
11290 extern __inline __m512
11291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11292 _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
11293 {
11294 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
11295 (__v16sf) __B,
11296 (__v16sf)
11297 _mm512_setzero_ps (),
11298 (__mmask16) __U,
11299 _MM_FROUND_CUR_DIRECTION);
11300 }
11301
11302 extern __inline __m128d
11303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11304 _mm_mask_add_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11305 {
11306 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
11307 (__v2df) __B,
11308 (__v2df) __W,
11309 (__mmask8) __U,
11310 _MM_FROUND_CUR_DIRECTION);
11311 }
11312
11313 extern __inline __m128d
11314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11315 _mm_maskz_add_sd (__mmask8 __U, __m128d __A, __m128d __B)
11316 {
11317 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
11318 (__v2df) __B,
11319 (__v2df)
11320 _mm_setzero_pd (),
11321 (__mmask8) __U,
11322 _MM_FROUND_CUR_DIRECTION);
11323 }
11324
11325 extern __inline __m128
11326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11327 _mm_mask_add_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11328 {
11329 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
11330 (__v4sf) __B,
11331 (__v4sf) __W,
11332 (__mmask8) __U,
11333 _MM_FROUND_CUR_DIRECTION);
11334 }
11335
11336 extern __inline __m128
11337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11338 _mm_maskz_add_ss (__mmask8 __U, __m128 __A, __m128 __B)
11339 {
11340 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
11341 (__v4sf) __B,
11342 (__v4sf)
11343 _mm_setzero_ps (),
11344 (__mmask8) __U,
11345 _MM_FROUND_CUR_DIRECTION);
11346 }
11347
11348 extern __inline __m512d
11349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11350 _mm512_sub_pd (__m512d __A, __m512d __B)
11351 {
11352 return (__m512d) ((__v8df)__A - (__v8df)__B);
11353 }
11354
11355 extern __inline __m512d
11356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11357 _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11358 {
11359 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
11360 (__v8df) __B,
11361 (__v8df) __W,
11362 (__mmask8) __U,
11363 _MM_FROUND_CUR_DIRECTION);
11364 }
11365
11366 extern __inline __m512d
11367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11368 _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
11369 {
11370 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
11371 (__v8df) __B,
11372 (__v8df)
11373 _mm512_setzero_pd (),
11374 (__mmask8) __U,
11375 _MM_FROUND_CUR_DIRECTION);
11376 }
11377
11378 extern __inline __m512
11379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11380 _mm512_sub_ps (__m512 __A, __m512 __B)
11381 {
11382 return (__m512) ((__v16sf)__A - (__v16sf)__B);
11383 }
11384
11385 extern __inline __m512
11386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11387 _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11388 {
11389 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
11390 (__v16sf) __B,
11391 (__v16sf) __W,
11392 (__mmask16) __U,
11393 _MM_FROUND_CUR_DIRECTION);
11394 }
11395
11396 extern __inline __m512
11397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11398 _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
11399 {
11400 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
11401 (__v16sf) __B,
11402 (__v16sf)
11403 _mm512_setzero_ps (),
11404 (__mmask16) __U,
11405 _MM_FROUND_CUR_DIRECTION);
11406 }
11407
11408 extern __inline __m128d
11409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11410 _mm_mask_sub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11411 {
11412 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
11413 (__v2df) __B,
11414 (__v2df) __W,
11415 (__mmask8) __U,
11416 _MM_FROUND_CUR_DIRECTION);
11417 }
11418
11419 extern __inline __m128d
11420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11421 _mm_maskz_sub_sd (__mmask8 __U, __m128d __A, __m128d __B)
11422 {
11423 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
11424 (__v2df) __B,
11425 (__v2df)
11426 _mm_setzero_pd (),
11427 (__mmask8) __U,
11428 _MM_FROUND_CUR_DIRECTION);
11429 }
11430
11431 extern __inline __m128
11432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11433 _mm_mask_sub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11434 {
11435 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
11436 (__v4sf) __B,
11437 (__v4sf) __W,
11438 (__mmask8) __U,
11439 _MM_FROUND_CUR_DIRECTION);
11440 }
11441
11442 extern __inline __m128
11443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11444 _mm_maskz_sub_ss (__mmask8 __U, __m128 __A, __m128 __B)
11445 {
11446 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
11447 (__v4sf) __B,
11448 (__v4sf)
11449 _mm_setzero_ps (),
11450 (__mmask8) __U,
11451 _MM_FROUND_CUR_DIRECTION);
11452 }
11453
11454 extern __inline __m512d
11455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11456 _mm512_mul_pd (__m512d __A, __m512d __B)
11457 {
11458 return (__m512d) ((__v8df)__A * (__v8df)__B);
11459 }
11460
11461 extern __inline __m512d
11462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11463 _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11464 {
11465 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
11466 (__v8df) __B,
11467 (__v8df) __W,
11468 (__mmask8) __U,
11469 _MM_FROUND_CUR_DIRECTION);
11470 }
11471
11472 extern __inline __m512d
11473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11474 _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
11475 {
11476 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
11477 (__v8df) __B,
11478 (__v8df)
11479 _mm512_setzero_pd (),
11480 (__mmask8) __U,
11481 _MM_FROUND_CUR_DIRECTION);
11482 }
11483
11484 extern __inline __m512
11485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11486 _mm512_mul_ps (__m512 __A, __m512 __B)
11487 {
11488 return (__m512) ((__v16sf)__A * (__v16sf)__B);
11489 }
11490
11491 extern __inline __m512
11492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11493 _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11494 {
11495 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
11496 (__v16sf) __B,
11497 (__v16sf) __W,
11498 (__mmask16) __U,
11499 _MM_FROUND_CUR_DIRECTION);
11500 }
11501
11502 extern __inline __m512
11503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11504 _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
11505 {
11506 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
11507 (__v16sf) __B,
11508 (__v16sf)
11509 _mm512_setzero_ps (),
11510 (__mmask16) __U,
11511 _MM_FROUND_CUR_DIRECTION);
11512 }
11513
11514 extern __inline __m128d
11515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11516 _mm_mask_mul_sd (__m128d __W, __mmask8 __U, __m128d __A,
11517 __m128d __B)
11518 {
11519 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
11520 (__v2df) __B,
11521 (__v2df) __W,
11522 (__mmask8) __U,
11523 _MM_FROUND_CUR_DIRECTION);
11524 }
11525
11526 extern __inline __m128d
11527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11528 _mm_maskz_mul_sd (__mmask8 __U, __m128d __A, __m128d __B)
11529 {
11530 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
11531 (__v2df) __B,
11532 (__v2df)
11533 _mm_setzero_pd (),
11534 (__mmask8) __U,
11535 _MM_FROUND_CUR_DIRECTION);
11536 }
11537
11538 extern __inline __m128
11539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11540 _mm_mask_mul_ss (__m128 __W, __mmask8 __U, __m128 __A,
11541 __m128 __B)
11542 {
11543 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
11544 (__v4sf) __B,
11545 (__v4sf) __W,
11546 (__mmask8) __U,
11547 _MM_FROUND_CUR_DIRECTION);
11548 }
11549
11550 extern __inline __m128
11551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11552 _mm_maskz_mul_ss (__mmask8 __U, __m128 __A, __m128 __B)
11553 {
11554 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
11555 (__v4sf) __B,
11556 (__v4sf)
11557 _mm_setzero_ps (),
11558 (__mmask8) __U,
11559 _MM_FROUND_CUR_DIRECTION);
11560 }
11561
11562 extern __inline __m512d
11563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11564 _mm512_div_pd (__m512d __M, __m512d __V)
11565 {
11566 return (__m512d) ((__v8df)__M / (__v8df)__V);
11567 }
11568
11569 extern __inline __m512d
11570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11571 _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
11572 {
11573 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
11574 (__v8df) __V,
11575 (__v8df) __W,
11576 (__mmask8) __U,
11577 _MM_FROUND_CUR_DIRECTION);
11578 }
11579
11580 extern __inline __m512d
11581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11582 _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
11583 {
11584 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
11585 (__v8df) __V,
11586 (__v8df)
11587 _mm512_setzero_pd (),
11588 (__mmask8) __U,
11589 _MM_FROUND_CUR_DIRECTION);
11590 }
11591
11592 extern __inline __m512
11593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11594 _mm512_div_ps (__m512 __A, __m512 __B)
11595 {
11596 return (__m512) ((__v16sf)__A / (__v16sf)__B);
11597 }
11598
11599 extern __inline __m512
11600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11601 _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11602 {
11603 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
11604 (__v16sf) __B,
11605 (__v16sf) __W,
11606 (__mmask16) __U,
11607 _MM_FROUND_CUR_DIRECTION);
11608 }
11609
11610 extern __inline __m512
11611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11612 _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
11613 {
11614 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
11615 (__v16sf) __B,
11616 (__v16sf)
11617 _mm512_setzero_ps (),
11618 (__mmask16) __U,
11619 _MM_FROUND_CUR_DIRECTION);
11620 }
11621
11622 extern __inline __m128d
11623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11624 _mm_mask_div_sd (__m128d __W, __mmask8 __U, __m128d __A,
11625 __m128d __B)
11626 {
11627 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
11628 (__v2df) __B,
11629 (__v2df) __W,
11630 (__mmask8) __U,
11631 _MM_FROUND_CUR_DIRECTION);
11632 }
11633
11634 extern __inline __m128d
11635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11636 _mm_maskz_div_sd (__mmask8 __U, __m128d __A, __m128d __B)
11637 {
11638 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
11639 (__v2df) __B,
11640 (__v2df)
11641 _mm_setzero_pd (),
11642 (__mmask8) __U,
11643 _MM_FROUND_CUR_DIRECTION);
11644 }
11645
11646 extern __inline __m128
11647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11648 _mm_mask_div_ss (__m128 __W, __mmask8 __U, __m128 __A,
11649 __m128 __B)
11650 {
11651 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
11652 (__v4sf) __B,
11653 (__v4sf) __W,
11654 (__mmask8) __U,
11655 _MM_FROUND_CUR_DIRECTION);
11656 }
11657
11658 extern __inline __m128
11659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11660 _mm_maskz_div_ss (__mmask8 __U, __m128 __A, __m128 __B)
11661 {
11662 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
11663 (__v4sf) __B,
11664 (__v4sf)
11665 _mm_setzero_ps (),
11666 (__mmask8) __U,
11667 _MM_FROUND_CUR_DIRECTION);
11668 }
11669
11670 extern __inline __m512d
11671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11672 _mm512_max_pd (__m512d __A, __m512d __B)
11673 {
11674 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11675 (__v8df) __B,
11676 (__v8df)
11677 _mm512_undefined_pd (),
11678 (__mmask8) -1,
11679 _MM_FROUND_CUR_DIRECTION);
11680 }
11681
11682 extern __inline __m512d
11683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11684 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11685 {
11686 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11687 (__v8df) __B,
11688 (__v8df) __W,
11689 (__mmask8) __U,
11690 _MM_FROUND_CUR_DIRECTION);
11691 }
11692
11693 extern __inline __m512d
11694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11695 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
11696 {
11697 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11698 (__v8df) __B,
11699 (__v8df)
11700 _mm512_setzero_pd (),
11701 (__mmask8) __U,
11702 _MM_FROUND_CUR_DIRECTION);
11703 }
11704
11705 extern __inline __m512
11706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11707 _mm512_max_ps (__m512 __A, __m512 __B)
11708 {
11709 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11710 (__v16sf) __B,
11711 (__v16sf)
11712 _mm512_undefined_ps (),
11713 (__mmask16) -1,
11714 _MM_FROUND_CUR_DIRECTION);
11715 }
11716
11717 extern __inline __m512
11718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11719 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11720 {
11721 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11722 (__v16sf) __B,
11723 (__v16sf) __W,
11724 (__mmask16) __U,
11725 _MM_FROUND_CUR_DIRECTION);
11726 }
11727
11728 extern __inline __m512
11729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11730 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
11731 {
11732 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11733 (__v16sf) __B,
11734 (__v16sf)
11735 _mm512_setzero_ps (),
11736 (__mmask16) __U,
11737 _MM_FROUND_CUR_DIRECTION);
11738 }
11739
11740 extern __inline __m128d
11741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11742 _mm_mask_max_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11743 {
11744 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11745 (__v2df) __B,
11746 (__v2df) __W,
11747 (__mmask8) __U,
11748 _MM_FROUND_CUR_DIRECTION);
11749 }
11750
11751 extern __inline __m128d
11752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11753 _mm_maskz_max_sd (__mmask8 __U, __m128d __A, __m128d __B)
11754 {
11755 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11756 (__v2df) __B,
11757 (__v2df)
11758 _mm_setzero_pd (),
11759 (__mmask8) __U,
11760 _MM_FROUND_CUR_DIRECTION);
11761 }
11762
11763 extern __inline __m128
11764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11765 _mm_mask_max_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11766 {
11767 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11768 (__v4sf) __B,
11769 (__v4sf) __W,
11770 (__mmask8) __U,
11771 _MM_FROUND_CUR_DIRECTION);
11772 }
11773
11774 extern __inline __m128
11775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11776 _mm_maskz_max_ss (__mmask8 __U, __m128 __A, __m128 __B)
11777 {
11778 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11779 (__v4sf) __B,
11780 (__v4sf)
11781 _mm_setzero_ps (),
11782 (__mmask8) __U,
11783 _MM_FROUND_CUR_DIRECTION);
11784 }
11785
11786 extern __inline __m512d
11787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11788 _mm512_min_pd (__m512d __A, __m512d __B)
11789 {
11790 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11791 (__v8df) __B,
11792 (__v8df)
11793 _mm512_undefined_pd (),
11794 (__mmask8) -1,
11795 _MM_FROUND_CUR_DIRECTION);
11796 }
11797
11798 extern __inline __m512d
11799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11800 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11801 {
11802 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11803 (__v8df) __B,
11804 (__v8df) __W,
11805 (__mmask8) __U,
11806 _MM_FROUND_CUR_DIRECTION);
11807 }
11808
11809 extern __inline __m512d
11810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11811 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
11812 {
11813 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11814 (__v8df) __B,
11815 (__v8df)
11816 _mm512_setzero_pd (),
11817 (__mmask8) __U,
11818 _MM_FROUND_CUR_DIRECTION);
11819 }
11820
11821 extern __inline __m512
11822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11823 _mm512_min_ps (__m512 __A, __m512 __B)
11824 {
11825 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11826 (__v16sf) __B,
11827 (__v16sf)
11828 _mm512_undefined_ps (),
11829 (__mmask16) -1,
11830 _MM_FROUND_CUR_DIRECTION);
11831 }
11832
11833 extern __inline __m512
11834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11835 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11836 {
11837 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11838 (__v16sf) __B,
11839 (__v16sf) __W,
11840 (__mmask16) __U,
11841 _MM_FROUND_CUR_DIRECTION);
11842 }
11843
11844 extern __inline __m512
11845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11846 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
11847 {
11848 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11849 (__v16sf) __B,
11850 (__v16sf)
11851 _mm512_setzero_ps (),
11852 (__mmask16) __U,
11853 _MM_FROUND_CUR_DIRECTION);
11854 }
11855
11856 extern __inline __m128d
11857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11858 _mm_mask_min_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11859 {
11860 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
11861 (__v2df) __B,
11862 (__v2df) __W,
11863 (__mmask8) __U,
11864 _MM_FROUND_CUR_DIRECTION);
11865 }
11866
11867 extern __inline __m128d
11868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11869 _mm_maskz_min_sd (__mmask8 __U, __m128d __A, __m128d __B)
11870 {
11871 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
11872 (__v2df) __B,
11873 (__v2df)
11874 _mm_setzero_pd (),
11875 (__mmask8) __U,
11876 _MM_FROUND_CUR_DIRECTION);
11877 }
11878
11879 extern __inline __m128
11880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11881 _mm_mask_min_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11882 {
11883 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
11884 (__v4sf) __B,
11885 (__v4sf) __W,
11886 (__mmask8) __U,
11887 _MM_FROUND_CUR_DIRECTION);
11888 }
11889
11890 extern __inline __m128
11891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11892 _mm_maskz_min_ss (__mmask8 __U, __m128 __A, __m128 __B)
11893 {
11894 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
11895 (__v4sf) __B,
11896 (__v4sf)
11897 _mm_setzero_ps (),
11898 (__mmask8) __U,
11899 _MM_FROUND_CUR_DIRECTION);
11900 }
11901
11902 extern __inline __m512d
11903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11904 _mm512_scalef_pd (__m512d __A, __m512d __B)
11905 {
11906 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11907 (__v8df) __B,
11908 (__v8df)
11909 _mm512_undefined_pd (),
11910 (__mmask8) -1,
11911 _MM_FROUND_CUR_DIRECTION);
11912 }
11913
11914 extern __inline __m512d
11915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11916 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11917 {
11918 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11919 (__v8df) __B,
11920 (__v8df) __W,
11921 (__mmask8) __U,
11922 _MM_FROUND_CUR_DIRECTION);
11923 }
11924
11925 extern __inline __m512d
11926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11927 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
11928 {
11929 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11930 (__v8df) __B,
11931 (__v8df)
11932 _mm512_setzero_pd (),
11933 (__mmask8) __U,
11934 _MM_FROUND_CUR_DIRECTION);
11935 }
11936
11937 extern __inline __m512
11938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11939 _mm512_scalef_ps (__m512 __A, __m512 __B)
11940 {
11941 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11942 (__v16sf) __B,
11943 (__v16sf)
11944 _mm512_undefined_ps (),
11945 (__mmask16) -1,
11946 _MM_FROUND_CUR_DIRECTION);
11947 }
11948
11949 extern __inline __m512
11950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11951 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11952 {
11953 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11954 (__v16sf) __B,
11955 (__v16sf) __W,
11956 (__mmask16) __U,
11957 _MM_FROUND_CUR_DIRECTION);
11958 }
11959
11960 extern __inline __m512
11961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11962 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
11963 {
11964 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11965 (__v16sf) __B,
11966 (__v16sf)
11967 _mm512_setzero_ps (),
11968 (__mmask16) __U,
11969 _MM_FROUND_CUR_DIRECTION);
11970 }
11971
11972 extern __inline __m128d
11973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11974 _mm_scalef_sd (__m128d __A, __m128d __B)
11975 {
11976 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
11977 (__v2df) __B,
11978 _MM_FROUND_CUR_DIRECTION);
11979 }
11980
11981 extern __inline __m128
11982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11983 _mm_scalef_ss (__m128 __A, __m128 __B)
11984 {
11985 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
11986 (__v4sf) __B,
11987 _MM_FROUND_CUR_DIRECTION);
11988 }
11989
11990 extern __inline __m512d
11991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11992 _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11993 {
11994 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11995 (__v8df) __B,
11996 (__v8df) __C,
11997 (__mmask8) -1,
11998 _MM_FROUND_CUR_DIRECTION);
11999 }
12000
12001 extern __inline __m512d
12002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12003 _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12004 {
12005 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12006 (__v8df) __B,
12007 (__v8df) __C,
12008 (__mmask8) __U,
12009 _MM_FROUND_CUR_DIRECTION);
12010 }
12011
12012 extern __inline __m512d
12013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12014 _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12015 {
12016 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
12017 (__v8df) __B,
12018 (__v8df) __C,
12019 (__mmask8) __U,
12020 _MM_FROUND_CUR_DIRECTION);
12021 }
12022
12023 extern __inline __m512d
12024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12025 _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12026 {
12027 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
12028 (__v8df) __B,
12029 (__v8df) __C,
12030 (__mmask8) __U,
12031 _MM_FROUND_CUR_DIRECTION);
12032 }
12033
12034 extern __inline __m512
12035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12036 _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
12037 {
12038 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12039 (__v16sf) __B,
12040 (__v16sf) __C,
12041 (__mmask16) -1,
12042 _MM_FROUND_CUR_DIRECTION);
12043 }
12044
12045 extern __inline __m512
12046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12047 _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12048 {
12049 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12050 (__v16sf) __B,
12051 (__v16sf) __C,
12052 (__mmask16) __U,
12053 _MM_FROUND_CUR_DIRECTION);
12054 }
12055
12056 extern __inline __m512
12057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12058 _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12059 {
12060 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
12061 (__v16sf) __B,
12062 (__v16sf) __C,
12063 (__mmask16) __U,
12064 _MM_FROUND_CUR_DIRECTION);
12065 }
12066
12067 extern __inline __m512
12068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12069 _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12070 {
12071 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
12072 (__v16sf) __B,
12073 (__v16sf) __C,
12074 (__mmask16) __U,
12075 _MM_FROUND_CUR_DIRECTION);
12076 }
12077
12078 extern __inline __m512d
12079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12080 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
12081 {
12082 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12083 (__v8df) __B,
12084 -(__v8df) __C,
12085 (__mmask8) -1,
12086 _MM_FROUND_CUR_DIRECTION);
12087 }
12088
12089 extern __inline __m512d
12090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12091 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12092 {
12093 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12094 (__v8df) __B,
12095 -(__v8df) __C,
12096 (__mmask8) __U,
12097 _MM_FROUND_CUR_DIRECTION);
12098 }
12099
12100 extern __inline __m512d
12101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12102 _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12103 {
12104 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
12105 (__v8df) __B,
12106 (__v8df) __C,
12107 (__mmask8) __U,
12108 _MM_FROUND_CUR_DIRECTION);
12109 }
12110
12111 extern __inline __m512d
12112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12113 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12114 {
12115 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
12116 (__v8df) __B,
12117 -(__v8df) __C,
12118 (__mmask8) __U,
12119 _MM_FROUND_CUR_DIRECTION);
12120 }
12121
12122 extern __inline __m512
12123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12124 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
12125 {
12126 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12127 (__v16sf) __B,
12128 -(__v16sf) __C,
12129 (__mmask16) -1,
12130 _MM_FROUND_CUR_DIRECTION);
12131 }
12132
12133 extern __inline __m512
12134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12135 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12136 {
12137 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12138 (__v16sf) __B,
12139 -(__v16sf) __C,
12140 (__mmask16) __U,
12141 _MM_FROUND_CUR_DIRECTION);
12142 }
12143
12144 extern __inline __m512
12145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12146 _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12147 {
12148 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
12149 (__v16sf) __B,
12150 (__v16sf) __C,
12151 (__mmask16) __U,
12152 _MM_FROUND_CUR_DIRECTION);
12153 }
12154
12155 extern __inline __m512
12156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12157 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12158 {
12159 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
12160 (__v16sf) __B,
12161 -(__v16sf) __C,
12162 (__mmask16) __U,
12163 _MM_FROUND_CUR_DIRECTION);
12164 }
12165
12166 extern __inline __m512d
12167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12168 _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
12169 {
12170 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12171 (__v8df) __B,
12172 (__v8df) __C,
12173 (__mmask8) -1,
12174 _MM_FROUND_CUR_DIRECTION);
12175 }
12176
12177 extern __inline __m512d
12178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12179 _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12180 {
12181 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12182 (__v8df) __B,
12183 (__v8df) __C,
12184 (__mmask8) __U,
12185 _MM_FROUND_CUR_DIRECTION);
12186 }
12187
12188 extern __inline __m512d
12189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12190 _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12191 {
12192 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
12193 (__v8df) __B,
12194 (__v8df) __C,
12195 (__mmask8) __U,
12196 _MM_FROUND_CUR_DIRECTION);
12197 }
12198
12199 extern __inline __m512d
12200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12201 _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12202 {
12203 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
12204 (__v8df) __B,
12205 (__v8df) __C,
12206 (__mmask8) __U,
12207 _MM_FROUND_CUR_DIRECTION);
12208 }
12209
12210 extern __inline __m512
12211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12212 _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
12213 {
12214 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12215 (__v16sf) __B,
12216 (__v16sf) __C,
12217 (__mmask16) -1,
12218 _MM_FROUND_CUR_DIRECTION);
12219 }
12220
12221 extern __inline __m512
12222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12223 _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12224 {
12225 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12226 (__v16sf) __B,
12227 (__v16sf) __C,
12228 (__mmask16) __U,
12229 _MM_FROUND_CUR_DIRECTION);
12230 }
12231
12232 extern __inline __m512
12233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12234 _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12235 {
12236 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
12237 (__v16sf) __B,
12238 (__v16sf) __C,
12239 (__mmask16) __U,
12240 _MM_FROUND_CUR_DIRECTION);
12241 }
12242
12243 extern __inline __m512
12244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12245 _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12246 {
12247 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
12248 (__v16sf) __B,
12249 (__v16sf) __C,
12250 (__mmask16) __U,
12251 _MM_FROUND_CUR_DIRECTION);
12252 }
12253
12254 extern __inline __m512d
12255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12256 _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
12257 {
12258 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12259 (__v8df) __B,
12260 -(__v8df) __C,
12261 (__mmask8) -1,
12262 _MM_FROUND_CUR_DIRECTION);
12263 }
12264
12265 extern __inline __m512d
12266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12267 _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12268 {
12269 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12270 (__v8df) __B,
12271 -(__v8df) __C,
12272 (__mmask8) __U,
12273 _MM_FROUND_CUR_DIRECTION);
12274 }
12275
12276 extern __inline __m512d
12277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12278 _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12279 {
12280 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
12281 (__v8df) __B,
12282 (__v8df) __C,
12283 (__mmask8) __U,
12284 _MM_FROUND_CUR_DIRECTION);
12285 }
12286
12287 extern __inline __m512d
12288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12289 _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12290 {
12291 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
12292 (__v8df) __B,
12293 -(__v8df) __C,
12294 (__mmask8) __U,
12295 _MM_FROUND_CUR_DIRECTION);
12296 }
12297
12298 extern __inline __m512
12299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12300 _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
12301 {
12302 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12303 (__v16sf) __B,
12304 -(__v16sf) __C,
12305 (__mmask16) -1,
12306 _MM_FROUND_CUR_DIRECTION);
12307 }
12308
12309 extern __inline __m512
12310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12311 _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12312 {
12313 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12314 (__v16sf) __B,
12315 -(__v16sf) __C,
12316 (__mmask16) __U,
12317 _MM_FROUND_CUR_DIRECTION);
12318 }
12319
12320 extern __inline __m512
12321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12322 _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12323 {
12324 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
12325 (__v16sf) __B,
12326 (__v16sf) __C,
12327 (__mmask16) __U,
12328 _MM_FROUND_CUR_DIRECTION);
12329 }
12330
12331 extern __inline __m512
12332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12333 _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12334 {
12335 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
12336 (__v16sf) __B,
12337 -(__v16sf) __C,
12338 (__mmask16) __U,
12339 _MM_FROUND_CUR_DIRECTION);
12340 }
12341
12342 extern __inline __m512d
12343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12344 _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
12345 {
12346 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
12347 (__v8df) __B,
12348 (__v8df) __C,
12349 (__mmask8) -1,
12350 _MM_FROUND_CUR_DIRECTION);
12351 }
12352
12353 extern __inline __m512d
12354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12355 _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12356 {
12357 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
12358 (__v8df) __B,
12359 (__v8df) __C,
12360 (__mmask8) __U,
12361 _MM_FROUND_CUR_DIRECTION);
12362 }
12363
12364 extern __inline __m512d
12365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12366 _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12367 {
12368 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
12369 (__v8df) __B,
12370 (__v8df) __C,
12371 (__mmask8) __U,
12372 _MM_FROUND_CUR_DIRECTION);
12373 }
12374
12375 extern __inline __m512d
12376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12377 _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12378 {
12379 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
12380 (__v8df) __B,
12381 (__v8df) __C,
12382 (__mmask8) __U,
12383 _MM_FROUND_CUR_DIRECTION);
12384 }
12385
12386 extern __inline __m512
12387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12388 _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
12389 {
12390 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
12391 (__v16sf) __B,
12392 (__v16sf) __C,
12393 (__mmask16) -1,
12394 _MM_FROUND_CUR_DIRECTION);
12395 }
12396
12397 extern __inline __m512
12398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12399 _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12400 {
12401 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
12402 (__v16sf) __B,
12403 (__v16sf) __C,
12404 (__mmask16) __U,
12405 _MM_FROUND_CUR_DIRECTION);
12406 }
12407
12408 extern __inline __m512
12409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12410 _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12411 {
12412 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
12413 (__v16sf) __B,
12414 (__v16sf) __C,
12415 (__mmask16) __U,
12416 _MM_FROUND_CUR_DIRECTION);
12417 }
12418
12419 extern __inline __m512
12420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12421 _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12422 {
12423 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
12424 (__v16sf) __B,
12425 (__v16sf) __C,
12426 (__mmask16) __U,
12427 _MM_FROUND_CUR_DIRECTION);
12428 }
12429
12430 extern __inline __m512d
12431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12432 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
12433 {
12434 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
12435 (__v8df) __B,
12436 -(__v8df) __C,
12437 (__mmask8) -1,
12438 _MM_FROUND_CUR_DIRECTION);
12439 }
12440
12441 extern __inline __m512d
12442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12443 _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12444 {
12445 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
12446 (__v8df) __B,
12447 (__v8df) __C,
12448 (__mmask8) __U,
12449 _MM_FROUND_CUR_DIRECTION);
12450 }
12451
12452 extern __inline __m512d
12453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12454 _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12455 {
12456 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
12457 (__v8df) __B,
12458 (__v8df) __C,
12459 (__mmask8) __U,
12460 _MM_FROUND_CUR_DIRECTION);
12461 }
12462
12463 extern __inline __m512d
12464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12465 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12466 {
12467 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
12468 (__v8df) __B,
12469 -(__v8df) __C,
12470 (__mmask8) __U,
12471 _MM_FROUND_CUR_DIRECTION);
12472 }
12473
12474 extern __inline __m512
12475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12476 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
12477 {
12478 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
12479 (__v16sf) __B,
12480 -(__v16sf) __C,
12481 (__mmask16) -1,
12482 _MM_FROUND_CUR_DIRECTION);
12483 }
12484
12485 extern __inline __m512
12486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12487 _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12488 {
12489 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
12490 (__v16sf) __B,
12491 (__v16sf) __C,
12492 (__mmask16) __U,
12493 _MM_FROUND_CUR_DIRECTION);
12494 }
12495
12496 extern __inline __m512
12497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12498 _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12499 {
12500 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
12501 (__v16sf) __B,
12502 (__v16sf) __C,
12503 (__mmask16) __U,
12504 _MM_FROUND_CUR_DIRECTION);
12505 }
12506
12507 extern __inline __m512
12508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12509 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12510 {
12511 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
12512 (__v16sf) __B,
12513 -(__v16sf) __C,
12514 (__mmask16) __U,
12515 _MM_FROUND_CUR_DIRECTION);
12516 }
12517
12518 extern __inline __m256i
12519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12520 _mm512_cvttpd_epi32 (__m512d __A)
12521 {
12522 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
12523 (__v8si)
12524 _mm256_undefined_si256 (),
12525 (__mmask8) -1,
12526 _MM_FROUND_CUR_DIRECTION);
12527 }
12528
12529 extern __inline __m256i
12530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12531 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
12532 {
12533 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
12534 (__v8si) __W,
12535 (__mmask8) __U,
12536 _MM_FROUND_CUR_DIRECTION);
12537 }
12538
12539 extern __inline __m256i
12540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12541 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
12542 {
12543 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
12544 (__v8si)
12545 _mm256_setzero_si256 (),
12546 (__mmask8) __U,
12547 _MM_FROUND_CUR_DIRECTION);
12548 }
12549
12550 extern __inline __m256i
12551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12552 _mm512_cvttpd_epu32 (__m512d __A)
12553 {
12554 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
12555 (__v8si)
12556 _mm256_undefined_si256 (),
12557 (__mmask8) -1,
12558 _MM_FROUND_CUR_DIRECTION);
12559 }
12560
12561 extern __inline __m256i
12562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12563 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
12564 {
12565 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
12566 (__v8si) __W,
12567 (__mmask8) __U,
12568 _MM_FROUND_CUR_DIRECTION);
12569 }
12570
12571 extern __inline __m256i
12572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12573 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
12574 {
12575 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
12576 (__v8si)
12577 _mm256_setzero_si256 (),
12578 (__mmask8) __U,
12579 _MM_FROUND_CUR_DIRECTION);
12580 }
12581
12582 extern __inline __m256i
12583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12584 _mm512_cvtpd_epi32 (__m512d __A)
12585 {
12586 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
12587 (__v8si)
12588 _mm256_undefined_si256 (),
12589 (__mmask8) -1,
12590 _MM_FROUND_CUR_DIRECTION);
12591 }
12592
12593 extern __inline __m256i
12594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12595 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
12596 {
12597 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
12598 (__v8si) __W,
12599 (__mmask8) __U,
12600 _MM_FROUND_CUR_DIRECTION);
12601 }
12602
12603 extern __inline __m256i
12604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12605 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
12606 {
12607 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
12608 (__v8si)
12609 _mm256_setzero_si256 (),
12610 (__mmask8) __U,
12611 _MM_FROUND_CUR_DIRECTION);
12612 }
12613
12614 extern __inline __m256i
12615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12616 _mm512_cvtpd_epu32 (__m512d __A)
12617 {
12618 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
12619 (__v8si)
12620 _mm256_undefined_si256 (),
12621 (__mmask8) -1,
12622 _MM_FROUND_CUR_DIRECTION);
12623 }
12624
12625 extern __inline __m256i
12626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12627 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
12628 {
12629 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
12630 (__v8si) __W,
12631 (__mmask8) __U,
12632 _MM_FROUND_CUR_DIRECTION);
12633 }
12634
12635 extern __inline __m256i
12636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12637 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
12638 {
12639 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
12640 (__v8si)
12641 _mm256_setzero_si256 (),
12642 (__mmask8) __U,
12643 _MM_FROUND_CUR_DIRECTION);
12644 }
12645
12646 extern __inline __m512i
12647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12648 _mm512_cvttps_epi32 (__m512 __A)
12649 {
12650 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
12651 (__v16si)
12652 _mm512_undefined_epi32 (),
12653 (__mmask16) -1,
12654 _MM_FROUND_CUR_DIRECTION);
12655 }
12656
12657 extern __inline __m512i
12658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12659 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
12660 {
12661 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
12662 (__v16si) __W,
12663 (__mmask16) __U,
12664 _MM_FROUND_CUR_DIRECTION);
12665 }
12666
12667 extern __inline __m512i
12668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12669 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
12670 {
12671 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
12672 (__v16si)
12673 _mm512_setzero_si512 (),
12674 (__mmask16) __U,
12675 _MM_FROUND_CUR_DIRECTION);
12676 }
12677
12678 extern __inline __m512i
12679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12680 _mm512_cvttps_epu32 (__m512 __A)
12681 {
12682 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
12683 (__v16si)
12684 _mm512_undefined_epi32 (),
12685 (__mmask16) -1,
12686 _MM_FROUND_CUR_DIRECTION);
12687 }
12688
12689 extern __inline __m512i
12690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12691 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
12692 {
12693 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
12694 (__v16si) __W,
12695 (__mmask16) __U,
12696 _MM_FROUND_CUR_DIRECTION);
12697 }
12698
12699 extern __inline __m512i
12700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12701 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
12702 {
12703 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
12704 (__v16si)
12705 _mm512_setzero_si512 (),
12706 (__mmask16) __U,
12707 _MM_FROUND_CUR_DIRECTION);
12708 }
12709
12710 extern __inline __m512i
12711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12712 _mm512_cvtps_epi32 (__m512 __A)
12713 {
12714 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
12715 (__v16si)
12716 _mm512_undefined_epi32 (),
12717 (__mmask16) -1,
12718 _MM_FROUND_CUR_DIRECTION);
12719 }
12720
12721 extern __inline __m512i
12722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12723 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
12724 {
12725 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
12726 (__v16si) __W,
12727 (__mmask16) __U,
12728 _MM_FROUND_CUR_DIRECTION);
12729 }
12730
12731 extern __inline __m512i
12732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12733 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
12734 {
12735 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
12736 (__v16si)
12737 _mm512_setzero_si512 (),
12738 (__mmask16) __U,
12739 _MM_FROUND_CUR_DIRECTION);
12740 }
12741
12742 extern __inline __m512i
12743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12744 _mm512_cvtps_epu32 (__m512 __A)
12745 {
12746 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
12747 (__v16si)
12748 _mm512_undefined_epi32 (),
12749 (__mmask16) -1,
12750 _MM_FROUND_CUR_DIRECTION);
12751 }
12752
12753 extern __inline __m512i
12754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12755 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
12756 {
12757 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
12758 (__v16si) __W,
12759 (__mmask16) __U,
12760 _MM_FROUND_CUR_DIRECTION);
12761 }
12762
12763 extern __inline __m512i
12764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12765 _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
12766 {
12767 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
12768 (__v16si)
12769 _mm512_setzero_si512 (),
12770 (__mmask16) __U,
12771 _MM_FROUND_CUR_DIRECTION);
12772 }
12773
12774 extern __inline double
12775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12776 _mm512_cvtsd_f64 (__m512d __A)
12777 {
12778 return __A[0];
12779 }
12780
12781 extern __inline float
12782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12783 _mm512_cvtss_f32 (__m512 __A)
12784 {
12785 return __A[0];
12786 }
12787
12788 #ifdef __x86_64__
12789 extern __inline __m128
12790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12791 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
12792 {
12793 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
12794 _MM_FROUND_CUR_DIRECTION);
12795 }
12796
12797 extern __inline __m128d
12798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12799 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
12800 {
12801 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
12802 _MM_FROUND_CUR_DIRECTION);
12803 }
12804 #endif
12805
12806 extern __inline __m128
12807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12808 _mm_cvtu32_ss (__m128 __A, unsigned __B)
12809 {
12810 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
12811 _MM_FROUND_CUR_DIRECTION);
12812 }
12813
12814 extern __inline __m512
12815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12816 _mm512_cvtepi32_ps (__m512i __A)
12817 {
12818 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12819 (__v16sf)
12820 _mm512_undefined_ps (),
12821 (__mmask16) -1,
12822 _MM_FROUND_CUR_DIRECTION);
12823 }
12824
12825 extern __inline __m512
12826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12827 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12828 {
12829 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12830 (__v16sf) __W,
12831 (__mmask16) __U,
12832 _MM_FROUND_CUR_DIRECTION);
12833 }
12834
12835 extern __inline __m512
12836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12837 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
12838 {
12839 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12840 (__v16sf)
12841 _mm512_setzero_ps (),
12842 (__mmask16) __U,
12843 _MM_FROUND_CUR_DIRECTION);
12844 }
12845
12846 extern __inline __m512
12847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12848 _mm512_cvtepu32_ps (__m512i __A)
12849 {
12850 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12851 (__v16sf)
12852 _mm512_undefined_ps (),
12853 (__mmask16) -1,
12854 _MM_FROUND_CUR_DIRECTION);
12855 }
12856
12857 extern __inline __m512
12858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12859 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12860 {
12861 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12862 (__v16sf) __W,
12863 (__mmask16) __U,
12864 _MM_FROUND_CUR_DIRECTION);
12865 }
12866
12867 extern __inline __m512
12868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12869 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
12870 {
12871 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12872 (__v16sf)
12873 _mm512_setzero_ps (),
12874 (__mmask16) __U,
12875 _MM_FROUND_CUR_DIRECTION);
12876 }
12877
12878 #ifdef __OPTIMIZE__
12879 extern __inline __m512d
12880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12881 _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
12882 {
12883 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12884 (__v8df) __B,
12885 (__v8di) __C,
12886 __imm,
12887 (__mmask8) -1,
12888 _MM_FROUND_CUR_DIRECTION);
12889 }
12890
12891 extern __inline __m512d
12892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12893 _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
12894 __m512i __C, const int __imm)
12895 {
12896 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12897 (__v8df) __B,
12898 (__v8di) __C,
12899 __imm,
12900 (__mmask8) __U,
12901 _MM_FROUND_CUR_DIRECTION);
12902 }
12903
12904 extern __inline __m512d
12905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12906 _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
12907 __m512i __C, const int __imm)
12908 {
12909 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
12910 (__v8df) __B,
12911 (__v8di) __C,
12912 __imm,
12913 (__mmask8) __U,
12914 _MM_FROUND_CUR_DIRECTION);
12915 }
12916
12917 extern __inline __m512
12918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12919 _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
12920 {
12921 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12922 (__v16sf) __B,
12923 (__v16si) __C,
12924 __imm,
12925 (__mmask16) -1,
12926 _MM_FROUND_CUR_DIRECTION);
12927 }
12928
12929 extern __inline __m512
12930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12931 _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
12932 __m512i __C, const int __imm)
12933 {
12934 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12935 (__v16sf) __B,
12936 (__v16si) __C,
12937 __imm,
12938 (__mmask16) __U,
12939 _MM_FROUND_CUR_DIRECTION);
12940 }
12941
12942 extern __inline __m512
12943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12944 _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
12945 __m512i __C, const int __imm)
12946 {
12947 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
12948 (__v16sf) __B,
12949 (__v16si) __C,
12950 __imm,
12951 (__mmask16) __U,
12952 _MM_FROUND_CUR_DIRECTION);
12953 }
12954
12955 extern __inline __m128d
12956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12957 _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
12958 {
12959 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12960 (__v2df) __B,
12961 (__v2di) __C, __imm,
12962 (__mmask8) -1,
12963 _MM_FROUND_CUR_DIRECTION);
12964 }
12965
12966 extern __inline __m128d
12967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12968 _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
12969 __m128i __C, const int __imm)
12970 {
12971 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12972 (__v2df) __B,
12973 (__v2di) __C, __imm,
12974 (__mmask8) __U,
12975 _MM_FROUND_CUR_DIRECTION);
12976 }
12977
12978 extern __inline __m128d
12979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12980 _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
12981 __m128i __C, const int __imm)
12982 {
12983 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
12984 (__v2df) __B,
12985 (__v2di) __C,
12986 __imm,
12987 (__mmask8) __U,
12988 _MM_FROUND_CUR_DIRECTION);
12989 }
12990
12991 extern __inline __m128
12992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12993 _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
12994 {
12995 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12996 (__v4sf) __B,
12997 (__v4si) __C, __imm,
12998 (__mmask8) -1,
12999 _MM_FROUND_CUR_DIRECTION);
13000 }
13001
13002 extern __inline __m128
13003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13004 _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
13005 __m128i __C, const int __imm)
13006 {
13007 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
13008 (__v4sf) __B,
13009 (__v4si) __C, __imm,
13010 (__mmask8) __U,
13011 _MM_FROUND_CUR_DIRECTION);
13012 }
13013
13014 extern __inline __m128
13015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13016 _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
13017 __m128i __C, const int __imm)
13018 {
13019 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
13020 (__v4sf) __B,
13021 (__v4si) __C, __imm,
13022 (__mmask8) __U,
13023 _MM_FROUND_CUR_DIRECTION);
13024 }
13025 #else
13026 #define _mm512_fixupimm_pd(X, Y, Z, C) \
13027 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
13028 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
13029 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
13030
13031 #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
13032 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
13033 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
13034 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13035
13036 #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
13037 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
13038 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
13039 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13040
13041 #define _mm512_fixupimm_ps(X, Y, Z, C) \
13042 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
13043 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
13044 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
13045
13046 #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
13047 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
13048 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
13049 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13050
13051 #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
13052 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
13053 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
13054 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13055
13056 #define _mm_fixupimm_sd(X, Y, Z, C) \
13057 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
13058 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
13059 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
13060
13061 #define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
13062 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
13063 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
13064 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13065
13066 #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
13067 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
13068 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
13069 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13070
13071 #define _mm_fixupimm_ss(X, Y, Z, C) \
13072 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
13073 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
13074 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
13075
13076 #define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
13077 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
13078 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
13079 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13080
13081 #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
13082 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
13083 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
13084 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13085 #endif
13086
13087 #ifdef __x86_64__
13088 extern __inline unsigned long long
13089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13090 _mm_cvtss_u64 (__m128 __A)
13091 {
13092 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
13093 __A,
13094 _MM_FROUND_CUR_DIRECTION);
13095 }
13096
13097 extern __inline unsigned long long
13098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13099 _mm_cvttss_u64 (__m128 __A)
13100 {
13101 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
13102 __A,
13103 _MM_FROUND_CUR_DIRECTION);
13104 }
13105
13106 extern __inline long long
13107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13108 _mm_cvttss_i64 (__m128 __A)
13109 {
13110 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
13111 _MM_FROUND_CUR_DIRECTION);
13112 }
13113 #endif /* __x86_64__ */
13114
13115 extern __inline unsigned
13116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13117 _mm_cvtss_u32 (__m128 __A)
13118 {
13119 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
13120 _MM_FROUND_CUR_DIRECTION);
13121 }
13122
13123 extern __inline unsigned
13124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13125 _mm_cvttss_u32 (__m128 __A)
13126 {
13127 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
13128 _MM_FROUND_CUR_DIRECTION);
13129 }
13130
13131 extern __inline int
13132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13133 _mm_cvttss_i32 (__m128 __A)
13134 {
13135 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
13136 _MM_FROUND_CUR_DIRECTION);
13137 }
13138
13139 #ifdef __x86_64__
13140 extern __inline unsigned long long
13141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13142 _mm_cvtsd_u64 (__m128d __A)
13143 {
13144 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
13145 __A,
13146 _MM_FROUND_CUR_DIRECTION);
13147 }
13148
13149 extern __inline unsigned long long
13150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13151 _mm_cvttsd_u64 (__m128d __A)
13152 {
13153 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
13154 __A,
13155 _MM_FROUND_CUR_DIRECTION);
13156 }
13157
13158 extern __inline long long
13159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13160 _mm_cvttsd_i64 (__m128d __A)
13161 {
13162 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
13163 _MM_FROUND_CUR_DIRECTION);
13164 }
13165 #endif /* __x86_64__ */
13166
13167 extern __inline unsigned
13168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13169 _mm_cvtsd_u32 (__m128d __A)
13170 {
13171 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
13172 _MM_FROUND_CUR_DIRECTION);
13173 }
13174
13175 extern __inline unsigned
13176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13177 _mm_cvttsd_u32 (__m128d __A)
13178 {
13179 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
13180 _MM_FROUND_CUR_DIRECTION);
13181 }
13182
13183 extern __inline int
13184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13185 _mm_cvttsd_i32 (__m128d __A)
13186 {
13187 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
13188 _MM_FROUND_CUR_DIRECTION);
13189 }
13190
13191 extern __inline __m512d
13192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13193 _mm512_cvtps_pd (__m256 __A)
13194 {
13195 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
13196 (__v8df)
13197 _mm512_undefined_pd (),
13198 (__mmask8) -1,
13199 _MM_FROUND_CUR_DIRECTION);
13200 }
13201
13202 extern __inline __m512d
13203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13204 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
13205 {
13206 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
13207 (__v8df) __W,
13208 (__mmask8) __U,
13209 _MM_FROUND_CUR_DIRECTION);
13210 }
13211
13212 extern __inline __m512d
13213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13214 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
13215 {
13216 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
13217 (__v8df)
13218 _mm512_setzero_pd (),
13219 (__mmask8) __U,
13220 _MM_FROUND_CUR_DIRECTION);
13221 }
13222
13223 extern __inline __m512
13224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13225 _mm512_cvtph_ps (__m256i __A)
13226 {
13227 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
13228 (__v16sf)
13229 _mm512_undefined_ps (),
13230 (__mmask16) -1,
13231 _MM_FROUND_CUR_DIRECTION);
13232 }
13233
13234 extern __inline __m512
13235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13236 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
13237 {
13238 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
13239 (__v16sf) __W,
13240 (__mmask16) __U,
13241 _MM_FROUND_CUR_DIRECTION);
13242 }
13243
13244 extern __inline __m512
13245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13246 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
13247 {
13248 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
13249 (__v16sf)
13250 _mm512_setzero_ps (),
13251 (__mmask16) __U,
13252 _MM_FROUND_CUR_DIRECTION);
13253 }
13254
13255 extern __inline __m256
13256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13257 _mm512_cvtpd_ps (__m512d __A)
13258 {
13259 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
13260 (__v8sf)
13261 _mm256_undefined_ps (),
13262 (__mmask8) -1,
13263 _MM_FROUND_CUR_DIRECTION);
13264 }
13265
13266 extern __inline __m256
13267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13268 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
13269 {
13270 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
13271 (__v8sf) __W,
13272 (__mmask8) __U,
13273 _MM_FROUND_CUR_DIRECTION);
13274 }
13275
13276 extern __inline __m256
13277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13278 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
13279 {
13280 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
13281 (__v8sf)
13282 _mm256_setzero_ps (),
13283 (__mmask8) __U,
13284 _MM_FROUND_CUR_DIRECTION);
13285 }
13286
13287 #ifdef __OPTIMIZE__
13288 extern __inline __m512
13289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13290 _mm512_getexp_ps (__m512 __A)
13291 {
13292 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
13293 (__v16sf)
13294 _mm512_undefined_ps (),
13295 (__mmask16) -1,
13296 _MM_FROUND_CUR_DIRECTION);
13297 }
13298
13299 extern __inline __m512
13300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13301 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
13302 {
13303 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
13304 (__v16sf) __W,
13305 (__mmask16) __U,
13306 _MM_FROUND_CUR_DIRECTION);
13307 }
13308
13309 extern __inline __m512
13310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13311 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
13312 {
13313 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
13314 (__v16sf)
13315 _mm512_setzero_ps (),
13316 (__mmask16) __U,
13317 _MM_FROUND_CUR_DIRECTION);
13318 }
13319
13320 extern __inline __m512d
13321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13322 _mm512_getexp_pd (__m512d __A)
13323 {
13324 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
13325 (__v8df)
13326 _mm512_undefined_pd (),
13327 (__mmask8) -1,
13328 _MM_FROUND_CUR_DIRECTION);
13329 }
13330
13331 extern __inline __m512d
13332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13333 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
13334 {
13335 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
13336 (__v8df) __W,
13337 (__mmask8) __U,
13338 _MM_FROUND_CUR_DIRECTION);
13339 }
13340
13341 extern __inline __m512d
13342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13343 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
13344 {
13345 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
13346 (__v8df)
13347 _mm512_setzero_pd (),
13348 (__mmask8) __U,
13349 _MM_FROUND_CUR_DIRECTION);
13350 }
13351
13352 extern __inline __m128
13353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13354 _mm_getexp_ss (__m128 __A, __m128 __B)
13355 {
13356 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
13357 (__v4sf) __B,
13358 _MM_FROUND_CUR_DIRECTION);
13359 }
13360
13361 extern __inline __m128d
13362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13363 _mm_getexp_sd (__m128d __A, __m128d __B)
13364 {
13365 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
13366 (__v2df) __B,
13367 _MM_FROUND_CUR_DIRECTION);
13368 }
13369
13370 extern __inline __m512d
13371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13372 _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
13373 _MM_MANTISSA_SIGN_ENUM __C)
13374 {
13375 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
13376 (__C << 2) | __B,
13377 _mm512_undefined_pd (),
13378 (__mmask8) -1,
13379 _MM_FROUND_CUR_DIRECTION);
13380 }
13381
13382 extern __inline __m512d
13383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13384 _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
13385 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13386 {
13387 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
13388 (__C << 2) | __B,
13389 (__v8df) __W, __U,
13390 _MM_FROUND_CUR_DIRECTION);
13391 }
13392
13393 extern __inline __m512d
13394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13395 _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
13396 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13397 {
13398 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
13399 (__C << 2) | __B,
13400 (__v8df)
13401 _mm512_setzero_pd (),
13402 __U,
13403 _MM_FROUND_CUR_DIRECTION);
13404 }
13405
13406 extern __inline __m512
13407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13408 _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
13409 _MM_MANTISSA_SIGN_ENUM __C)
13410 {
13411 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
13412 (__C << 2) | __B,
13413 _mm512_undefined_ps (),
13414 (__mmask16) -1,
13415 _MM_FROUND_CUR_DIRECTION);
13416 }
13417
13418 extern __inline __m512
13419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13420 _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
13421 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13422 {
13423 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
13424 (__C << 2) | __B,
13425 (__v16sf) __W, __U,
13426 _MM_FROUND_CUR_DIRECTION);
13427 }
13428
13429 extern __inline __m512
13430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13431 _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
13432 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13433 {
13434 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
13435 (__C << 2) | __B,
13436 (__v16sf)
13437 _mm512_setzero_ps (),
13438 __U,
13439 _MM_FROUND_CUR_DIRECTION);
13440 }
13441
13442 extern __inline __m128d
13443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13444 _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
13445 _MM_MANTISSA_SIGN_ENUM __D)
13446 {
13447 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
13448 (__v2df) __B,
13449 (__D << 2) | __C,
13450 _MM_FROUND_CUR_DIRECTION);
13451 }
13452
13453 extern __inline __m128
13454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13455 _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
13456 _MM_MANTISSA_SIGN_ENUM __D)
13457 {
13458 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
13459 (__v4sf) __B,
13460 (__D << 2) | __C,
13461 _MM_FROUND_CUR_DIRECTION);
13462 }
13463
13464 #else
13465 #define _mm512_getmant_pd(X, B, C) \
13466 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
13467 (int)(((C)<<2) | (B)), \
13468 (__v8df)_mm512_undefined_pd(), \
13469 (__mmask8)-1,\
13470 _MM_FROUND_CUR_DIRECTION))
13471
13472 #define _mm512_mask_getmant_pd(W, U, X, B, C) \
13473 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
13474 (int)(((C)<<2) | (B)), \
13475 (__v8df)(__m512d)(W), \
13476 (__mmask8)(U),\
13477 _MM_FROUND_CUR_DIRECTION))
13478
13479 #define _mm512_maskz_getmant_pd(U, X, B, C) \
13480 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
13481 (int)(((C)<<2) | (B)), \
13482 (__v8df)_mm512_setzero_pd(), \
13483 (__mmask8)(U),\
13484 _MM_FROUND_CUR_DIRECTION))
13485 #define _mm512_getmant_ps(X, B, C) \
13486 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
13487 (int)(((C)<<2) | (B)), \
13488 (__v16sf)_mm512_undefined_ps(), \
13489 (__mmask16)-1,\
13490 _MM_FROUND_CUR_DIRECTION))
13491
13492 #define _mm512_mask_getmant_ps(W, U, X, B, C) \
13493 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
13494 (int)(((C)<<2) | (B)), \
13495 (__v16sf)(__m512)(W), \
13496 (__mmask16)(U),\
13497 _MM_FROUND_CUR_DIRECTION))
13498
13499 #define _mm512_maskz_getmant_ps(U, X, B, C) \
13500 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
13501 (int)(((C)<<2) | (B)), \
13502 (__v16sf)_mm512_setzero_ps(), \
13503 (__mmask16)(U),\
13504 _MM_FROUND_CUR_DIRECTION))
13505 #define _mm_getmant_sd(X, Y, C, D) \
13506 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
13507 (__v2df)(__m128d)(Y), \
13508 (int)(((D)<<2) | (C)), \
13509 _MM_FROUND_CUR_DIRECTION))
13510
13511 #define _mm_getmant_ss(X, Y, C, D) \
13512 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
13513 (__v4sf)(__m128)(Y), \
13514 (int)(((D)<<2) | (C)), \
13515 _MM_FROUND_CUR_DIRECTION))
13516
13517 #define _mm_getexp_ss(A, B) \
13518 ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
13519 _MM_FROUND_CUR_DIRECTION))
13520
13521 #define _mm_getexp_sd(A, B) \
13522 ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
13523 _MM_FROUND_CUR_DIRECTION))
13524
13525 #define _mm512_getexp_ps(A) \
13526 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
13527 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
13528
13529 #define _mm512_mask_getexp_ps(W, U, A) \
13530 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
13531 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13532
13533 #define _mm512_maskz_getexp_ps(U, A) \
13534 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
13535 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13536
13537 #define _mm512_getexp_pd(A) \
13538 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
13539 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
13540
13541 #define _mm512_mask_getexp_pd(W, U, A) \
13542 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
13543 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13544
13545 #define _mm512_maskz_getexp_pd(U, A) \
13546 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
13547 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13548 #endif
13549
13550 #ifdef __OPTIMIZE__
13551 extern __inline __m512
13552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13553 _mm512_roundscale_ps (__m512 __A, const int __imm)
13554 {
13555 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
13556 (__v16sf)
13557 _mm512_undefined_ps (),
13558 -1,
13559 _MM_FROUND_CUR_DIRECTION);
13560 }
13561
13562 extern __inline __m512
13563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13564 _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
13565 const int __imm)
13566 {
13567 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
13568 (__v16sf) __A,
13569 (__mmask16) __B,
13570 _MM_FROUND_CUR_DIRECTION);
13571 }
13572
13573 extern __inline __m512
13574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13575 _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
13576 {
13577 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
13578 __imm,
13579 (__v16sf)
13580 _mm512_setzero_ps (),
13581 (__mmask16) __A,
13582 _MM_FROUND_CUR_DIRECTION);
13583 }
13584
13585 extern __inline __m512d
13586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13587 _mm512_roundscale_pd (__m512d __A, const int __imm)
13588 {
13589 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
13590 (__v8df)
13591 _mm512_undefined_pd (),
13592 -1,
13593 _MM_FROUND_CUR_DIRECTION);
13594 }
13595
13596 extern __inline __m512d
13597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13598 _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
13599 const int __imm)
13600 {
13601 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
13602 (__v8df) __A,
13603 (__mmask8) __B,
13604 _MM_FROUND_CUR_DIRECTION);
13605 }
13606
13607 extern __inline __m512d
13608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13609 _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
13610 {
13611 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
13612 __imm,
13613 (__v8df)
13614 _mm512_setzero_pd (),
13615 (__mmask8) __A,
13616 _MM_FROUND_CUR_DIRECTION);
13617 }
13618
13619 extern __inline __m128
13620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13621 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
13622 {
13623 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
13624 (__v4sf) __B, __imm,
13625 _MM_FROUND_CUR_DIRECTION);
13626 }
13627
13628 extern __inline __m128d
13629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13630 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
13631 {
13632 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
13633 (__v2df) __B, __imm,
13634 _MM_FROUND_CUR_DIRECTION);
13635 }
13636
13637 #else
13638 #define _mm512_roundscale_ps(A, B) \
13639 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
13640 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
13641 #define _mm512_mask_roundscale_ps(A, B, C, D) \
13642 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
13643 (int)(D), \
13644 (__v16sf)(__m512)(A), \
13645 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
13646 #define _mm512_maskz_roundscale_ps(A, B, C) \
13647 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
13648 (int)(C), \
13649 (__v16sf)_mm512_setzero_ps(),\
13650 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
13651 #define _mm512_roundscale_pd(A, B) \
13652 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
13653 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
13654 #define _mm512_mask_roundscale_pd(A, B, C, D) \
13655 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
13656 (int)(D), \
13657 (__v8df)(__m512d)(A), \
13658 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
13659 #define _mm512_maskz_roundscale_pd(A, B, C) \
13660 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
13661 (int)(C), \
13662 (__v8df)_mm512_setzero_pd(),\
13663 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
13664 #define _mm_roundscale_ss(A, B, C) \
13665 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
13666 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
13667 #define _mm_roundscale_sd(A, B, C) \
13668 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
13669 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
13670 #endif
13671
13672 #ifdef __OPTIMIZE__
13673 extern __inline __mmask8
13674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13675 _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
13676 {
13677 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
13678 (__v8df) __Y, __P,
13679 (__mmask8) -1,
13680 _MM_FROUND_CUR_DIRECTION);
13681 }
13682
13683 extern __inline __mmask16
13684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13685 _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
13686 {
13687 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
13688 (__v16sf) __Y, __P,
13689 (__mmask16) -1,
13690 _MM_FROUND_CUR_DIRECTION);
13691 }
13692
13693 extern __inline __mmask16
13694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13695 _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
13696 {
13697 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
13698 (__v16sf) __Y, __P,
13699 (__mmask16) __U,
13700 _MM_FROUND_CUR_DIRECTION);
13701 }
13702
13703 extern __inline __mmask8
13704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13705 _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
13706 {
13707 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
13708 (__v8df) __Y, __P,
13709 (__mmask8) __U,
13710 _MM_FROUND_CUR_DIRECTION);
13711 }
13712
13713 extern __inline __mmask8
13714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13715 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
13716 {
13717 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
13718 (__v2df) __Y, __P,
13719 (__mmask8) -1,
13720 _MM_FROUND_CUR_DIRECTION);
13721 }
13722
13723 extern __inline __mmask8
13724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13725 _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
13726 {
13727 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
13728 (__v2df) __Y, __P,
13729 (__mmask8) __M,
13730 _MM_FROUND_CUR_DIRECTION);
13731 }
13732
13733 extern __inline __mmask8
13734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13735 _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
13736 {
13737 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
13738 (__v4sf) __Y, __P,
13739 (__mmask8) -1,
13740 _MM_FROUND_CUR_DIRECTION);
13741 }
13742
13743 extern __inline __mmask8
13744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13745 _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
13746 {
13747 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
13748 (__v4sf) __Y, __P,
13749 (__mmask8) __M,
13750 _MM_FROUND_CUR_DIRECTION);
13751 }
13752
13753 #else
13754 #define _mm512_cmp_pd_mask(X, Y, P) \
13755 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
13756 (__v8df)(__m512d)(Y), (int)(P),\
13757 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
13758
13759 #define _mm512_cmp_ps_mask(X, Y, P) \
13760 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
13761 (__v16sf)(__m512)(Y), (int)(P),\
13762 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
13763
13764 #define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
13765 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
13766 (__v8df)(__m512d)(Y), (int)(P),\
13767 (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
13768
13769 #define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
13770 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
13771 (__v16sf)(__m512)(Y), (int)(P),\
13772 (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
13773
13774 #define _mm_cmp_sd_mask(X, Y, P) \
13775 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
13776 (__v2df)(__m128d)(Y), (int)(P),\
13777 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
13778
13779 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
13780 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
13781 (__v2df)(__m128d)(Y), (int)(P),\
13782 M,_MM_FROUND_CUR_DIRECTION))
13783
13784 #define _mm_cmp_ss_mask(X, Y, P) \
13785 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
13786 (__v4sf)(__m128)(Y), (int)(P), \
13787 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
13788
13789 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
13790 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
13791 (__v4sf)(__m128)(Y), (int)(P), \
13792 M,_MM_FROUND_CUR_DIRECTION))
13793 #endif
13794
13795 extern __inline __mmask16
13796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13797 _mm512_kmov (__mmask16 __A)
13798 {
13799 return __builtin_ia32_kmovw (__A);
13800 }
13801
13802 extern __inline __m512
13803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13804 _mm512_castpd_ps (__m512d __A)
13805 {
13806 return (__m512) (__A);
13807 }
13808
13809 extern __inline __m512i
13810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13811 _mm512_castpd_si512 (__m512d __A)
13812 {
13813 return (__m512i) (__A);
13814 }
13815
13816 extern __inline __m512d
13817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13818 _mm512_castps_pd (__m512 __A)
13819 {
13820 return (__m512d) (__A);
13821 }
13822
13823 extern __inline __m512i
13824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13825 _mm512_castps_si512 (__m512 __A)
13826 {
13827 return (__m512i) (__A);
13828 }
13829
13830 extern __inline __m512
13831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13832 _mm512_castsi512_ps (__m512i __A)
13833 {
13834 return (__m512) (__A);
13835 }
13836
13837 extern __inline __m512d
13838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13839 _mm512_castsi512_pd (__m512i __A)
13840 {
13841 return (__m512d) (__A);
13842 }
13843
13844 extern __inline __m128d
13845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13846 _mm512_castpd512_pd128 (__m512d __A)
13847 {
13848 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
13849 }
13850
13851 extern __inline __m128
13852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13853 _mm512_castps512_ps128 (__m512 __A)
13854 {
13855 return _mm512_extractf32x4_ps(__A, 0);
13856 }
13857
13858 extern __inline __m128i
13859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13860 _mm512_castsi512_si128 (__m512i __A)
13861 {
13862 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
13863 }
13864
13865 extern __inline __m256d
13866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13867 _mm512_castpd512_pd256 (__m512d __A)
13868 {
13869 return _mm512_extractf64x4_pd(__A, 0);
13870 }
13871
13872 extern __inline __m256
13873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13874 _mm512_castps512_ps256 (__m512 __A)
13875 {
13876 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
13877 }
13878
13879 extern __inline __m256i
13880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13881 _mm512_castsi512_si256 (__m512i __A)
13882 {
13883 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
13884 }
13885
13886 extern __inline __m512d
13887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13888 _mm512_castpd128_pd512 (__m128d __A)
13889 {
13890 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
13891 }
13892
13893 extern __inline __m512
13894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13895 _mm512_castps128_ps512 (__m128 __A)
13896 {
13897 return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
13898 }
13899
13900 extern __inline __m512i
13901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13902 _mm512_castsi128_si512 (__m128i __A)
13903 {
13904 return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
13905 }
13906
13907 extern __inline __m512d
13908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13909 _mm512_castpd256_pd512 (__m256d __A)
13910 {
13911 return __builtin_ia32_pd512_256pd (__A);
13912 }
13913
13914 extern __inline __m512
13915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13916 _mm512_castps256_ps512 (__m256 __A)
13917 {
13918 return __builtin_ia32_ps512_256ps (__A);
13919 }
13920
13921 extern __inline __m512i
13922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13923 _mm512_castsi256_si512 (__m256i __A)
13924 {
13925 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
13926 }
13927
13928 extern __inline __mmask16
13929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13930 _mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
13931 {
13932 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13933 (__v16si) __B, 0,
13934 (__mmask16) -1);
13935 }
13936
13937 extern __inline __mmask16
13938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13939 _mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13940 {
13941 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13942 (__v16si) __B, 0, __U);
13943 }
13944
13945 extern __inline __mmask8
13946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13947 _mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13948 {
13949 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13950 (__v8di) __B, 0, __U);
13951 }
13952
13953 extern __inline __mmask8
13954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13955 _mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
13956 {
13957 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13958 (__v8di) __B, 0,
13959 (__mmask8) -1);
13960 }
13961
13962 extern __inline __mmask16
13963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13964 _mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
13965 {
13966 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13967 (__v16si) __B, 6,
13968 (__mmask16) -1);
13969 }
13970
13971 extern __inline __mmask16
13972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13973 _mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13974 {
13975 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13976 (__v16si) __B, 6, __U);
13977 }
13978
13979 extern __inline __mmask8
13980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13981 _mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13982 {
13983 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13984 (__v8di) __B, 6, __U);
13985 }
13986
13987 extern __inline __mmask8
13988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13989 _mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
13990 {
13991 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13992 (__v8di) __B, 6,
13993 (__mmask8) -1);
13994 }
13995
13996 #undef __MM512_REDUCE_OP
13997 #define __MM512_REDUCE_OP(op) \
13998 __v8si __T1 = (__v8si) _mm512_extracti64x4_epi64 (__A, 1); \
13999 __v8si __T2 = (__v8si) _mm512_extracti64x4_epi64 (__A, 0); \
14000 __m256i __T3 = (__m256i) (__T1 op __T2); \
14001 __v4si __T4 = (__v4si) _mm256_extracti128_si256 (__T3, 1); \
14002 __v4si __T5 = (__v4si) _mm256_extracti128_si256 (__T3, 0); \
14003 __v4si __T6 = __T4 op __T5; \
14004 __v4si __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
14005 __v4si __T8 = __T6 op __T7; \
14006 return __T8[0] op __T8[1]
14007
14008 extern __inline int
14009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14010 _mm512_reduce_add_epi32 (__m512i __A)
14011 {
14012 __MM512_REDUCE_OP (+);
14013 }
14014
14015 extern __inline int
14016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14017 _mm512_reduce_mul_epi32 (__m512i __A)
14018 {
14019 __MM512_REDUCE_OP (*);
14020 }
14021
14022 extern __inline int
14023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14024 _mm512_reduce_and_epi32 (__m512i __A)
14025 {
14026 __MM512_REDUCE_OP (&);
14027 }
14028
14029 extern __inline int
14030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14031 _mm512_reduce_or_epi32 (__m512i __A)
14032 {
14033 __MM512_REDUCE_OP (|);
14034 }
14035
14036 extern __inline int
14037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14038 _mm512_mask_reduce_add_epi32 (__mmask16 __U, __m512i __A)
14039 {
14040 __A = _mm512_maskz_mov_epi32 (__U, __A);
14041 __MM512_REDUCE_OP (+);
14042 }
14043
14044 extern __inline int
14045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14046 _mm512_mask_reduce_mul_epi32 (__mmask16 __U, __m512i __A)
14047 {
14048 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (1), __U, __A);
14049 __MM512_REDUCE_OP (*);
14050 }
14051
14052 extern __inline int
14053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14054 _mm512_mask_reduce_and_epi32 (__mmask16 __U, __m512i __A)
14055 {
14056 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
14057 __MM512_REDUCE_OP (&);
14058 }
14059
14060 extern __inline int
14061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14062 _mm512_mask_reduce_or_epi32 (__mmask16 __U, __m512i __A)
14063 {
14064 __A = _mm512_maskz_mov_epi32 (__U, __A);
14065 __MM512_REDUCE_OP (|);
14066 }
14067
14068 #undef __MM512_REDUCE_OP
14069 #define __MM512_REDUCE_OP(op) \
14070 __m256i __T1 = (__m256i) _mm512_extracti64x4_epi64 (__A, 1); \
14071 __m256i __T2 = (__m256i) _mm512_extracti64x4_epi64 (__A, 0); \
14072 __m256i __T3 = _mm256_##op (__T1, __T2); \
14073 __m128i __T4 = (__m128i) _mm256_extracti128_si256 (__T3, 1); \
14074 __m128i __T5 = (__m128i) _mm256_extracti128_si256 (__T3, 0); \
14075 __m128i __T6 = _mm_##op (__T4, __T5); \
14076 __m128i __T7 = (__m128i) __builtin_shuffle ((__v4si) __T6, \
14077 (__v4si) { 2, 3, 0, 1 }); \
14078 __m128i __T8 = _mm_##op (__T6, __T7); \
14079 __m128i __T9 = (__m128i) __builtin_shuffle ((__v4si) __T8, \
14080 (__v4si) { 1, 0, 1, 0 }); \
14081 __v4si __T10 = (__v4si) _mm_##op (__T8, __T9); \
14082 return __T10[0]
14083
14084 extern __inline int
14085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14086 _mm512_reduce_min_epi32 (__m512i __A)
14087 {
14088 __MM512_REDUCE_OP (min_epi32);
14089 }
14090
14091 extern __inline int
14092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14093 _mm512_reduce_max_epi32 (__m512i __A)
14094 {
14095 __MM512_REDUCE_OP (max_epi32);
14096 }
14097
14098 extern __inline unsigned int
14099 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14100 _mm512_reduce_min_epu32 (__m512i __A)
14101 {
14102 __MM512_REDUCE_OP (min_epu32);
14103 }
14104
14105 extern __inline unsigned int
14106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14107 _mm512_reduce_max_epu32 (__m512i __A)
14108 {
14109 __MM512_REDUCE_OP (max_epu32);
14110 }
14111
14112 extern __inline int
14113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14114 _mm512_mask_reduce_min_epi32 (__mmask16 __U, __m512i __A)
14115 {
14116 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (__INT_MAX__), __U, __A);
14117 __MM512_REDUCE_OP (min_epi32);
14118 }
14119
14120 extern __inline int
14121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14122 _mm512_mask_reduce_max_epi32 (__mmask16 __U, __m512i __A)
14123 {
14124 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (-__INT_MAX__ - 1), __U, __A);
14125 __MM512_REDUCE_OP (max_epi32);
14126 }
14127
14128 extern __inline unsigned int
14129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14130 _mm512_mask_reduce_min_epu32 (__mmask16 __U, __m512i __A)
14131 {
14132 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
14133 __MM512_REDUCE_OP (min_epu32);
14134 }
14135
14136 extern __inline unsigned int
14137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14138 _mm512_mask_reduce_max_epu32 (__mmask16 __U, __m512i __A)
14139 {
14140 __A = _mm512_maskz_mov_epi32 (__U, __A);
14141 __MM512_REDUCE_OP (max_epu32);
14142 }
14143
14144 #undef __MM512_REDUCE_OP
14145 #define __MM512_REDUCE_OP(op) \
14146 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
14147 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
14148 __m256 __T3 = __T1 op __T2; \
14149 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
14150 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
14151 __m128 __T6 = __T4 op __T5; \
14152 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
14153 __m128 __T8 = __T6 op __T7; \
14154 return __T8[0] op __T8[1]
14155
14156 extern __inline float
14157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14158 _mm512_reduce_add_ps (__m512 __A)
14159 {
14160 __MM512_REDUCE_OP (+);
14161 }
14162
14163 extern __inline float
14164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14165 _mm512_reduce_mul_ps (__m512 __A)
14166 {
14167 __MM512_REDUCE_OP (*);
14168 }
14169
14170 extern __inline float
14171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14172 _mm512_mask_reduce_add_ps (__mmask16 __U, __m512 __A)
14173 {
14174 __A = _mm512_maskz_mov_ps (__U, __A);
14175 __MM512_REDUCE_OP (+);
14176 }
14177
14178 extern __inline float
14179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14180 _mm512_mask_reduce_mul_ps (__mmask16 __U, __m512 __A)
14181 {
14182 __A = _mm512_mask_mov_ps (_mm512_set1_ps (1.0f), __U, __A);
14183 __MM512_REDUCE_OP (*);
14184 }
14185
14186 #undef __MM512_REDUCE_OP
14187 #define __MM512_REDUCE_OP(op) \
14188 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
14189 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
14190 __m256 __T3 = _mm256_##op (__T1, __T2); \
14191 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
14192 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
14193 __m128 __T6 = _mm_##op (__T4, __T5); \
14194 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
14195 __m128 __T8 = _mm_##op (__T6, __T7); \
14196 __m128 __T9 = __builtin_shuffle (__T8, (__v4si) { 1, 0, 1, 0 }); \
14197 __m128 __T10 = _mm_##op (__T8, __T9); \
14198 return __T10[0]
14199
14200 extern __inline float
14201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14202 _mm512_reduce_min_ps (__m512 __A)
14203 {
14204 __MM512_REDUCE_OP (min_ps);
14205 }
14206
14207 extern __inline float
14208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14209 _mm512_reduce_max_ps (__m512 __A)
14210 {
14211 __MM512_REDUCE_OP (max_ps);
14212 }
14213
14214 extern __inline float
14215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14216 _mm512_mask_reduce_min_ps (__mmask16 __U, __m512 __A)
14217 {
14218 __A = _mm512_mask_mov_ps (_mm512_set1_ps (__builtin_inff ()), __U, __A);
14219 __MM512_REDUCE_OP (min_ps);
14220 }
14221
14222 extern __inline float
14223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14224 _mm512_mask_reduce_max_ps (__mmask16 __U, __m512 __A)
14225 {
14226 __A = _mm512_mask_mov_ps (_mm512_set1_ps (-__builtin_inff ()), __U, __A);
14227 __MM512_REDUCE_OP (max_ps);
14228 }
14229
14230 #undef __MM512_REDUCE_OP
14231 #define __MM512_REDUCE_OP(op) \
14232 __v4di __T1 = (__v4di) _mm512_extracti64x4_epi64 (__A, 1); \
14233 __v4di __T2 = (__v4di) _mm512_extracti64x4_epi64 (__A, 0); \
14234 __m256i __T3 = (__m256i) (__T1 op __T2); \
14235 __v2di __T4 = (__v2di) _mm256_extracti128_si256 (__T3, 1); \
14236 __v2di __T5 = (__v2di) _mm256_extracti128_si256 (__T3, 0); \
14237 __v2di __T6 = __T4 op __T5; \
14238 return __T6[0] op __T6[1]
14239
14240 extern __inline long long
14241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14242 _mm512_reduce_add_epi64 (__m512i __A)
14243 {
14244 __MM512_REDUCE_OP (+);
14245 }
14246
14247 extern __inline long long
14248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14249 _mm512_reduce_mul_epi64 (__m512i __A)
14250 {
14251 __MM512_REDUCE_OP (*);
14252 }
14253
14254 extern __inline long long
14255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14256 _mm512_reduce_and_epi64 (__m512i __A)
14257 {
14258 __MM512_REDUCE_OP (&);
14259 }
14260
14261 extern __inline long long
14262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14263 _mm512_reduce_or_epi64 (__m512i __A)
14264 {
14265 __MM512_REDUCE_OP (|);
14266 }
14267
14268 extern __inline long long
14269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14270 _mm512_mask_reduce_add_epi64 (__mmask8 __U, __m512i __A)
14271 {
14272 __A = _mm512_maskz_mov_epi64 (__U, __A);
14273 __MM512_REDUCE_OP (+);
14274 }
14275
14276 extern __inline long long
14277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14278 _mm512_mask_reduce_mul_epi64 (__mmask8 __U, __m512i __A)
14279 {
14280 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (1LL), __U, __A);
14281 __MM512_REDUCE_OP (*);
14282 }
14283
14284 extern __inline long long
14285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14286 _mm512_mask_reduce_and_epi64 (__mmask8 __U, __m512i __A)
14287 {
14288 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
14289 __MM512_REDUCE_OP (&);
14290 }
14291
14292 extern __inline long long
14293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14294 _mm512_mask_reduce_or_epi64 (__mmask8 __U, __m512i __A)
14295 {
14296 __A = _mm512_maskz_mov_epi64 (__U, __A);
14297 __MM512_REDUCE_OP (|);
14298 }
14299
14300 #undef __MM512_REDUCE_OP
14301 #define __MM512_REDUCE_OP(op) \
14302 __m512i __T1 = _mm512_shuffle_i64x2 (__A, __A, 0x4e); \
14303 __m512i __T2 = _mm512_##op (__A, __T1); \
14304 __m512i __T3 \
14305 = (__m512i) __builtin_shuffle ((__v8di) __T2, \
14306 (__v8di) { 2, 3, 0, 1, 6, 7, 4, 5 });\
14307 __m512i __T4 = _mm512_##op (__T2, __T3); \
14308 __m512i __T5 \
14309 = (__m512i) __builtin_shuffle ((__v8di) __T4, \
14310 (__v8di) { 1, 0, 3, 2, 5, 4, 7, 6 });\
14311 __v8di __T6 = (__v8di) _mm512_##op (__T4, __T5); \
14312 return __T6[0]
14313
14314 extern __inline long long
14315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14316 _mm512_reduce_min_epi64 (__m512i __A)
14317 {
14318 __MM512_REDUCE_OP (min_epi64);
14319 }
14320
14321 extern __inline long long
14322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14323 _mm512_reduce_max_epi64 (__m512i __A)
14324 {
14325 __MM512_REDUCE_OP (max_epi64);
14326 }
14327
14328 extern __inline long long
14329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14330 _mm512_mask_reduce_min_epi64 (__mmask8 __U, __m512i __A)
14331 {
14332 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (__LONG_LONG_MAX__),
14333 __U, __A);
14334 __MM512_REDUCE_OP (min_epi64);
14335 }
14336
14337 extern __inline long long
14338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14339 _mm512_mask_reduce_max_epi64 (__mmask8 __U, __m512i __A)
14340 {
14341 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (-__LONG_LONG_MAX__ - 1),
14342 __U, __A);
14343 __MM512_REDUCE_OP (max_epi64);
14344 }
14345
14346 extern __inline unsigned long long
14347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14348 _mm512_reduce_min_epu64 (__m512i __A)
14349 {
14350 __MM512_REDUCE_OP (min_epu64);
14351 }
14352
14353 extern __inline unsigned long long
14354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14355 _mm512_reduce_max_epu64 (__m512i __A)
14356 {
14357 __MM512_REDUCE_OP (max_epu64);
14358 }
14359
14360 extern __inline unsigned long long
14361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14362 _mm512_mask_reduce_min_epu64 (__mmask8 __U, __m512i __A)
14363 {
14364 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
14365 __MM512_REDUCE_OP (min_epu64);
14366 }
14367
14368 extern __inline unsigned long long
14369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14370 _mm512_mask_reduce_max_epu64 (__mmask8 __U, __m512i __A)
14371 {
14372 __A = _mm512_maskz_mov_epi64 (__U, __A);
14373 __MM512_REDUCE_OP (max_epu64);
14374 }
14375
14376 #undef __MM512_REDUCE_OP
14377 #define __MM512_REDUCE_OP(op) \
14378 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
14379 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
14380 __m256d __T3 = __T1 op __T2; \
14381 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
14382 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
14383 __m128d __T6 = __T4 op __T5; \
14384 return __T6[0] op __T6[1]
14385
14386 extern __inline double
14387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14388 _mm512_reduce_add_pd (__m512d __A)
14389 {
14390 __MM512_REDUCE_OP (+);
14391 }
14392
14393 extern __inline double
14394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14395 _mm512_reduce_mul_pd (__m512d __A)
14396 {
14397 __MM512_REDUCE_OP (*);
14398 }
14399
14400 extern __inline double
14401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14402 _mm512_mask_reduce_add_pd (__mmask8 __U, __m512d __A)
14403 {
14404 __A = _mm512_maskz_mov_pd (__U, __A);
14405 __MM512_REDUCE_OP (+);
14406 }
14407
14408 extern __inline double
14409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14410 _mm512_mask_reduce_mul_pd (__mmask8 __U, __m512d __A)
14411 {
14412 __A = _mm512_mask_mov_pd (_mm512_set1_pd (1.0), __U, __A);
14413 __MM512_REDUCE_OP (*);
14414 }
14415
14416 #undef __MM512_REDUCE_OP
14417 #define __MM512_REDUCE_OP(op) \
14418 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
14419 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
14420 __m256d __T3 = _mm256_##op (__T1, __T2); \
14421 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
14422 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
14423 __m128d __T6 = _mm_##op (__T4, __T5); \
14424 __m128d __T7 = (__m128d) __builtin_shuffle (__T6, (__v2di) { 1, 0 }); \
14425 __m128d __T8 = _mm_##op (__T6, __T7); \
14426 return __T8[0]
14427
14428 extern __inline double
14429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14430 _mm512_reduce_min_pd (__m512d __A)
14431 {
14432 __MM512_REDUCE_OP (min_pd);
14433 }
14434
14435 extern __inline double
14436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14437 _mm512_reduce_max_pd (__m512d __A)
14438 {
14439 __MM512_REDUCE_OP (max_pd);
14440 }
14441
14442 extern __inline double
14443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14444 _mm512_mask_reduce_min_pd (__mmask8 __U, __m512d __A)
14445 {
14446 __A = _mm512_mask_mov_pd (_mm512_set1_pd (__builtin_inf ()), __U, __A);
14447 __MM512_REDUCE_OP (min_pd);
14448 }
14449
14450 extern __inline double
14451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14452 _mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A)
14453 {
14454 __A = _mm512_mask_mov_pd (_mm512_set1_pd (-__builtin_inf ()), __U, __A);
14455 __MM512_REDUCE_OP (max_pd);
14456 }
14457
14458 #undef __MM512_REDUCE_OP
14459
14460 #ifdef __DISABLE_AVX512F__
14461 #undef __DISABLE_AVX512F__
14462 #pragma GCC pop_options
14463 #endif /* __DISABLE_AVX512F__ */
14464
14465 #endif /* _AVX512FINTRIN_H_INCLUDED */