]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/avx512fintrin.h
Add AVX512 k-mask intrinsics.
[thirdparty/gcc.git] / gcc / config / i386 / avx512fintrin.h
1 /* Copyright (C) 2013-2017 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef _AVX512FINTRIN_H_INCLUDED
29 #define _AVX512FINTRIN_H_INCLUDED
30
31 #ifndef __AVX512F__
32 #pragma GCC push_options
33 #pragma GCC target("avx512f")
34 #define __DISABLE_AVX512F__
35 #endif /* __AVX512F__ */
36
37 /* Internal data types for implementing the intrinsics. */
38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40 typedef long long __v8di __attribute__ ((__vector_size__ (64)));
41 typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
42 typedef int __v16si __attribute__ ((__vector_size__ (64)));
43 typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
44 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
45 typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
46 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
47 typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
48
49 /* The Intel API is flexible enough that we must allow aliasing with other
50 vector types, and their scalar components. */
51 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52 typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
54
55 /* Unaligned version of the same type. */
56 typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
57 typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
58 typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
59
60 typedef unsigned char __mmask8;
61 typedef unsigned short __mmask16;
62
63 extern __inline __m512i
64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
65 _mm512_set_epi64 (long long __A, long long __B, long long __C,
66 long long __D, long long __E, long long __F,
67 long long __G, long long __H)
68 {
69 return __extension__ (__m512i) (__v8di)
70 { __H, __G, __F, __E, __D, __C, __B, __A };
71 }
72
73 /* Create the vector [A B C D E F G H I J K L M N O P]. */
74 extern __inline __m512i
75 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
76 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
77 int __E, int __F, int __G, int __H,
78 int __I, int __J, int __K, int __L,
79 int __M, int __N, int __O, int __P)
80 {
81 return __extension__ (__m512i)(__v16si)
82 { __P, __O, __N, __M, __L, __K, __J, __I,
83 __H, __G, __F, __E, __D, __C, __B, __A };
84 }
85
86 extern __inline __m512d
87 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
88 _mm512_set_pd (double __A, double __B, double __C, double __D,
89 double __E, double __F, double __G, double __H)
90 {
91 return __extension__ (__m512d)
92 { __H, __G, __F, __E, __D, __C, __B, __A };
93 }
94
95 extern __inline __m512
96 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
97 _mm512_set_ps (float __A, float __B, float __C, float __D,
98 float __E, float __F, float __G, float __H,
99 float __I, float __J, float __K, float __L,
100 float __M, float __N, float __O, float __P)
101 {
102 return __extension__ (__m512)
103 { __P, __O, __N, __M, __L, __K, __J, __I,
104 __H, __G, __F, __E, __D, __C, __B, __A };
105 }
106
107 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
108 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
109
110 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
111 e8,e9,e10,e11,e12,e13,e14,e15) \
112 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
113
114 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
115 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
116
117 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
118 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
119
120 extern __inline __m512
121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
122 _mm512_undefined_ps (void)
123 {
124 __m512 __Y = __Y;
125 return __Y;
126 }
127
128 extern __inline __m512d
129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
130 _mm512_undefined_pd (void)
131 {
132 __m512d __Y = __Y;
133 return __Y;
134 }
135
136 extern __inline __m512i
137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
138 _mm512_undefined_epi32 (void)
139 {
140 __m512i __Y = __Y;
141 return __Y;
142 }
143
144 #define _mm512_undefined_si512 _mm512_undefined_epi32
145
146 extern __inline __m512i
147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
148 _mm512_set1_epi8 (char __A)
149 {
150 return __extension__ (__m512i)(__v64qi)
151 { __A, __A, __A, __A, __A, __A, __A, __A,
152 __A, __A, __A, __A, __A, __A, __A, __A,
153 __A, __A, __A, __A, __A, __A, __A, __A,
154 __A, __A, __A, __A, __A, __A, __A, __A,
155 __A, __A, __A, __A, __A, __A, __A, __A,
156 __A, __A, __A, __A, __A, __A, __A, __A,
157 __A, __A, __A, __A, __A, __A, __A, __A,
158 __A, __A, __A, __A, __A, __A, __A, __A };
159 }
160
161 extern __inline __m512i
162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
163 _mm512_set1_epi16 (short __A)
164 {
165 return __extension__ (__m512i)(__v32hi)
166 { __A, __A, __A, __A, __A, __A, __A, __A,
167 __A, __A, __A, __A, __A, __A, __A, __A,
168 __A, __A, __A, __A, __A, __A, __A, __A,
169 __A, __A, __A, __A, __A, __A, __A, __A };
170 }
171
172 extern __inline __m512d
173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
174 _mm512_set1_pd (double __A)
175 {
176 return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
177 (__v2df) { __A, },
178 (__v8df)
179 _mm512_undefined_pd (),
180 (__mmask8) -1);
181 }
182
183 extern __inline __m512
184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
185 _mm512_set1_ps (float __A)
186 {
187 return (__m512) __builtin_ia32_broadcastss512 (__extension__
188 (__v4sf) { __A, },
189 (__v16sf)
190 _mm512_undefined_ps (),
191 (__mmask16) -1);
192 }
193
194 /* Create the vector [A B C D A B C D A B C D A B C D]. */
195 extern __inline __m512i
196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
197 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
198 {
199 return __extension__ (__m512i)(__v16si)
200 { __D, __C, __B, __A, __D, __C, __B, __A,
201 __D, __C, __B, __A, __D, __C, __B, __A };
202 }
203
204 extern __inline __m512i
205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
206 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
207 long long __D)
208 {
209 return __extension__ (__m512i) (__v8di)
210 { __D, __C, __B, __A, __D, __C, __B, __A };
211 }
212
213 extern __inline __m512d
214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
215 _mm512_set4_pd (double __A, double __B, double __C, double __D)
216 {
217 return __extension__ (__m512d)
218 { __D, __C, __B, __A, __D, __C, __B, __A };
219 }
220
221 extern __inline __m512
222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
223 _mm512_set4_ps (float __A, float __B, float __C, float __D)
224 {
225 return __extension__ (__m512)
226 { __D, __C, __B, __A, __D, __C, __B, __A,
227 __D, __C, __B, __A, __D, __C, __B, __A };
228 }
229
230 #define _mm512_setr4_epi64(e0,e1,e2,e3) \
231 _mm512_set4_epi64(e3,e2,e1,e0)
232
233 #define _mm512_setr4_epi32(e0,e1,e2,e3) \
234 _mm512_set4_epi32(e3,e2,e1,e0)
235
236 #define _mm512_setr4_pd(e0,e1,e2,e3) \
237 _mm512_set4_pd(e3,e2,e1,e0)
238
239 #define _mm512_setr4_ps(e0,e1,e2,e3) \
240 _mm512_set4_ps(e3,e2,e1,e0)
241
242 extern __inline __m512
243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
244 _mm512_setzero_ps (void)
245 {
246 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
247 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
248 }
249
250 extern __inline __m512d
251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
252 _mm512_setzero_pd (void)
253 {
254 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
255 }
256
257 extern __inline __m512i
258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
259 _mm512_setzero_epi32 (void)
260 {
261 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
262 }
263
264 extern __inline __m512i
265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
266 _mm512_setzero_si512 (void)
267 {
268 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
269 }
270
271 extern __inline __m512d
272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
273 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
274 {
275 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
276 (__v8df) __W,
277 (__mmask8) __U);
278 }
279
280 extern __inline __m512d
281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
282 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
283 {
284 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
285 (__v8df)
286 _mm512_setzero_pd (),
287 (__mmask8) __U);
288 }
289
290 extern __inline __m512
291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
292 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
293 {
294 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
295 (__v16sf) __W,
296 (__mmask16) __U);
297 }
298
299 extern __inline __m512
300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
301 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
302 {
303 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
304 (__v16sf)
305 _mm512_setzero_ps (),
306 (__mmask16) __U);
307 }
308
309 extern __inline __m512d
310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
311 _mm512_load_pd (void const *__P)
312 {
313 return *(__m512d *) __P;
314 }
315
316 extern __inline __m512d
317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
318 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
319 {
320 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
321 (__v8df) __W,
322 (__mmask8) __U);
323 }
324
325 extern __inline __m512d
326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
327 _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
328 {
329 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
330 (__v8df)
331 _mm512_setzero_pd (),
332 (__mmask8) __U);
333 }
334
335 extern __inline void
336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
337 _mm512_store_pd (void *__P, __m512d __A)
338 {
339 *(__m512d *) __P = __A;
340 }
341
342 extern __inline void
343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
344 _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
345 {
346 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
347 (__mmask8) __U);
348 }
349
350 extern __inline __m512
351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
352 _mm512_load_ps (void const *__P)
353 {
354 return *(__m512 *) __P;
355 }
356
357 extern __inline __m512
358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
359 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
360 {
361 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
362 (__v16sf) __W,
363 (__mmask16) __U);
364 }
365
366 extern __inline __m512
367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
368 _mm512_maskz_load_ps (__mmask16 __U, void const *__P)
369 {
370 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
371 (__v16sf)
372 _mm512_setzero_ps (),
373 (__mmask16) __U);
374 }
375
376 extern __inline void
377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
378 _mm512_store_ps (void *__P, __m512 __A)
379 {
380 *(__m512 *) __P = __A;
381 }
382
383 extern __inline void
384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
385 _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
386 {
387 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
388 (__mmask16) __U);
389 }
390
391 extern __inline __m512i
392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
393 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
394 {
395 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
396 (__v8di) __W,
397 (__mmask8) __U);
398 }
399
400 extern __inline __m512i
401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
402 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
403 {
404 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
405 (__v8di)
406 _mm512_setzero_si512 (),
407 (__mmask8) __U);
408 }
409
410 extern __inline __m512i
411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
412 _mm512_load_epi64 (void const *__P)
413 {
414 return *(__m512i *) __P;
415 }
416
417 extern __inline __m512i
418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
419 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
420 {
421 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
422 (__v8di) __W,
423 (__mmask8) __U);
424 }
425
426 extern __inline __m512i
427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
428 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
429 {
430 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
431 (__v8di)
432 _mm512_setzero_si512 (),
433 (__mmask8) __U);
434 }
435
436 extern __inline void
437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
438 _mm512_store_epi64 (void *__P, __m512i __A)
439 {
440 *(__m512i *) __P = __A;
441 }
442
443 extern __inline void
444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
445 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
446 {
447 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
448 (__mmask8) __U);
449 }
450
451 extern __inline __m512i
452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
453 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
454 {
455 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
456 (__v16si) __W,
457 (__mmask16) __U);
458 }
459
460 extern __inline __m512i
461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
462 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
463 {
464 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
465 (__v16si)
466 _mm512_setzero_si512 (),
467 (__mmask16) __U);
468 }
469
470 extern __inline __m512i
471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
472 _mm512_load_si512 (void const *__P)
473 {
474 return *(__m512i *) __P;
475 }
476
477 extern __inline __m512i
478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
479 _mm512_load_epi32 (void const *__P)
480 {
481 return *(__m512i *) __P;
482 }
483
484 extern __inline __m512i
485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
486 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
487 {
488 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
489 (__v16si) __W,
490 (__mmask16) __U);
491 }
492
493 extern __inline __m512i
494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
495 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
496 {
497 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
498 (__v16si)
499 _mm512_setzero_si512 (),
500 (__mmask16) __U);
501 }
502
503 extern __inline void
504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
505 _mm512_store_si512 (void *__P, __m512i __A)
506 {
507 *(__m512i *) __P = __A;
508 }
509
510 extern __inline void
511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
512 _mm512_store_epi32 (void *__P, __m512i __A)
513 {
514 *(__m512i *) __P = __A;
515 }
516
517 extern __inline void
518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
519 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
520 {
521 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
522 (__mmask16) __U);
523 }
524
525 extern __inline __m512i
526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
527 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
528 {
529 return (__m512i) ((__v16su) __A * (__v16su) __B);
530 }
531
532 extern __inline __m512i
533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
534 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
535 {
536 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
537 (__v16si) __B,
538 (__v16si)
539 _mm512_setzero_si512 (),
540 __M);
541 }
542
543 extern __inline __m512i
544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
545 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
546 {
547 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
548 (__v16si) __B,
549 (__v16si) __W, __M);
550 }
551
552 extern __inline __m512i
553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
554 _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
555 {
556 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
557 (__v16si) __Y,
558 (__v16si)
559 _mm512_undefined_epi32 (),
560 (__mmask16) -1);
561 }
562
563 extern __inline __m512i
564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
565 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
566 {
567 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
568 (__v16si) __Y,
569 (__v16si) __W,
570 (__mmask16) __U);
571 }
572
573 extern __inline __m512i
574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
575 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
576 {
577 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
578 (__v16si) __Y,
579 (__v16si)
580 _mm512_setzero_si512 (),
581 (__mmask16) __U);
582 }
583
584 extern __inline __m512i
585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
586 _mm512_srav_epi32 (__m512i __X, __m512i __Y)
587 {
588 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
589 (__v16si) __Y,
590 (__v16si)
591 _mm512_undefined_epi32 (),
592 (__mmask16) -1);
593 }
594
595 extern __inline __m512i
596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
597 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
598 {
599 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
600 (__v16si) __Y,
601 (__v16si) __W,
602 (__mmask16) __U);
603 }
604
605 extern __inline __m512i
606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
607 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
608 {
609 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
610 (__v16si) __Y,
611 (__v16si)
612 _mm512_setzero_si512 (),
613 (__mmask16) __U);
614 }
615
616 extern __inline __m512i
617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
618 _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
619 {
620 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
621 (__v16si) __Y,
622 (__v16si)
623 _mm512_undefined_epi32 (),
624 (__mmask16) -1);
625 }
626
627 extern __inline __m512i
628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
629 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
630 {
631 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
632 (__v16si) __Y,
633 (__v16si) __W,
634 (__mmask16) __U);
635 }
636
637 extern __inline __m512i
638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
639 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
640 {
641 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
642 (__v16si) __Y,
643 (__v16si)
644 _mm512_setzero_si512 (),
645 (__mmask16) __U);
646 }
647
648 extern __inline __m512i
649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
650 _mm512_add_epi64 (__m512i __A, __m512i __B)
651 {
652 return (__m512i) ((__v8du) __A + (__v8du) __B);
653 }
654
655 extern __inline __m512i
656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
657 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
658 {
659 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
660 (__v8di) __B,
661 (__v8di) __W,
662 (__mmask8) __U);
663 }
664
665 extern __inline __m512i
666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
667 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
668 {
669 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
670 (__v8di) __B,
671 (__v8di)
672 _mm512_setzero_si512 (),
673 (__mmask8) __U);
674 }
675
676 extern __inline __m512i
677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
678 _mm512_sub_epi64 (__m512i __A, __m512i __B)
679 {
680 return (__m512i) ((__v8du) __A - (__v8du) __B);
681 }
682
683 extern __inline __m512i
684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
685 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
686 {
687 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
688 (__v8di) __B,
689 (__v8di) __W,
690 (__mmask8) __U);
691 }
692
693 extern __inline __m512i
694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
695 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
696 {
697 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
698 (__v8di) __B,
699 (__v8di)
700 _mm512_setzero_si512 (),
701 (__mmask8) __U);
702 }
703
704 extern __inline __m512i
705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
706 _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
707 {
708 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
709 (__v8di) __Y,
710 (__v8di)
711 _mm512_undefined_pd (),
712 (__mmask8) -1);
713 }
714
715 extern __inline __m512i
716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
717 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
718 {
719 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
720 (__v8di) __Y,
721 (__v8di) __W,
722 (__mmask8) __U);
723 }
724
725 extern __inline __m512i
726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
727 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
728 {
729 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
730 (__v8di) __Y,
731 (__v8di)
732 _mm512_setzero_si512 (),
733 (__mmask8) __U);
734 }
735
736 extern __inline __m512i
737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
738 _mm512_srav_epi64 (__m512i __X, __m512i __Y)
739 {
740 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
741 (__v8di) __Y,
742 (__v8di)
743 _mm512_undefined_epi32 (),
744 (__mmask8) -1);
745 }
746
747 extern __inline __m512i
748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
749 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
750 {
751 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
752 (__v8di) __Y,
753 (__v8di) __W,
754 (__mmask8) __U);
755 }
756
757 extern __inline __m512i
758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
759 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
760 {
761 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
762 (__v8di) __Y,
763 (__v8di)
764 _mm512_setzero_si512 (),
765 (__mmask8) __U);
766 }
767
768 extern __inline __m512i
769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
770 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
771 {
772 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
773 (__v8di) __Y,
774 (__v8di)
775 _mm512_undefined_epi32 (),
776 (__mmask8) -1);
777 }
778
779 extern __inline __m512i
780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
781 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
782 {
783 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
784 (__v8di) __Y,
785 (__v8di) __W,
786 (__mmask8) __U);
787 }
788
789 extern __inline __m512i
790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
791 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
792 {
793 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
794 (__v8di) __Y,
795 (__v8di)
796 _mm512_setzero_si512 (),
797 (__mmask8) __U);
798 }
799
800 extern __inline __m512i
801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
802 _mm512_add_epi32 (__m512i __A, __m512i __B)
803 {
804 return (__m512i) ((__v16su) __A + (__v16su) __B);
805 }
806
807 extern __inline __m512i
808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
809 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
810 {
811 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
812 (__v16si) __B,
813 (__v16si) __W,
814 (__mmask16) __U);
815 }
816
817 extern __inline __m512i
818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
819 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
820 {
821 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
822 (__v16si) __B,
823 (__v16si)
824 _mm512_setzero_si512 (),
825 (__mmask16) __U);
826 }
827
828 extern __inline __m512i
829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
830 _mm512_mul_epi32 (__m512i __X, __m512i __Y)
831 {
832 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
833 (__v16si) __Y,
834 (__v8di)
835 _mm512_undefined_epi32 (),
836 (__mmask8) -1);
837 }
838
839 extern __inline __m512i
840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
841 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
842 {
843 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
844 (__v16si) __Y,
845 (__v8di) __W, __M);
846 }
847
848 extern __inline __m512i
849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
850 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
851 {
852 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
853 (__v16si) __Y,
854 (__v8di)
855 _mm512_setzero_si512 (),
856 __M);
857 }
858
859 extern __inline __m512i
860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
861 _mm512_sub_epi32 (__m512i __A, __m512i __B)
862 {
863 return (__m512i) ((__v16su) __A - (__v16su) __B);
864 }
865
866 extern __inline __m512i
867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
868 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
869 {
870 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
871 (__v16si) __B,
872 (__v16si) __W,
873 (__mmask16) __U);
874 }
875
876 extern __inline __m512i
877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
878 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
879 {
880 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
881 (__v16si) __B,
882 (__v16si)
883 _mm512_setzero_si512 (),
884 (__mmask16) __U);
885 }
886
887 extern __inline __m512i
888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
889 _mm512_mul_epu32 (__m512i __X, __m512i __Y)
890 {
891 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
892 (__v16si) __Y,
893 (__v8di)
894 _mm512_undefined_epi32 (),
895 (__mmask8) -1);
896 }
897
898 extern __inline __m512i
899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
900 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
901 {
902 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
903 (__v16si) __Y,
904 (__v8di) __W, __M);
905 }
906
907 extern __inline __m512i
908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
909 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
910 {
911 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
912 (__v16si) __Y,
913 (__v8di)
914 _mm512_setzero_si512 (),
915 __M);
916 }
917
918 #ifdef __OPTIMIZE__
919 extern __inline __m512i
920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
921 _mm512_slli_epi64 (__m512i __A, unsigned int __B)
922 {
923 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
924 (__v8di)
925 _mm512_undefined_epi32 (),
926 (__mmask8) -1);
927 }
928
929 extern __inline __m512i
930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
931 _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
932 unsigned int __B)
933 {
934 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
935 (__v8di) __W,
936 (__mmask8) __U);
937 }
938
939 extern __inline __m512i
940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
941 _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
942 {
943 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
944 (__v8di)
945 _mm512_setzero_si512 (),
946 (__mmask8) __U);
947 }
948 #else
949 #define _mm512_slli_epi64(X, C) \
950 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
951 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
952 (__mmask8)-1))
953
954 #define _mm512_mask_slli_epi64(W, U, X, C) \
955 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
956 (__v8di)(__m512i)(W),\
957 (__mmask8)(U)))
958
959 #define _mm512_maskz_slli_epi64(U, X, C) \
960 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
961 (__v8di)(__m512i)_mm512_setzero_si512 (),\
962 (__mmask8)(U)))
963 #endif
964
965 extern __inline __m512i
966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
967 _mm512_sll_epi64 (__m512i __A, __m128i __B)
968 {
969 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
970 (__v2di) __B,
971 (__v8di)
972 _mm512_undefined_epi32 (),
973 (__mmask8) -1);
974 }
975
976 extern __inline __m512i
977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
978 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
979 {
980 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
981 (__v2di) __B,
982 (__v8di) __W,
983 (__mmask8) __U);
984 }
985
986 extern __inline __m512i
987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
988 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
989 {
990 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
991 (__v2di) __B,
992 (__v8di)
993 _mm512_setzero_si512 (),
994 (__mmask8) __U);
995 }
996
997 #ifdef __OPTIMIZE__
998 extern __inline __m512i
999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1000 _mm512_srli_epi64 (__m512i __A, unsigned int __B)
1001 {
1002 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1003 (__v8di)
1004 _mm512_undefined_epi32 (),
1005 (__mmask8) -1);
1006 }
1007
1008 extern __inline __m512i
1009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1010 _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1011 __m512i __A, unsigned int __B)
1012 {
1013 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1014 (__v8di) __W,
1015 (__mmask8) __U);
1016 }
1017
1018 extern __inline __m512i
1019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1020 _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1021 {
1022 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1023 (__v8di)
1024 _mm512_setzero_si512 (),
1025 (__mmask8) __U);
1026 }
1027 #else
1028 #define _mm512_srli_epi64(X, C) \
1029 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1030 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1031 (__mmask8)-1))
1032
1033 #define _mm512_mask_srli_epi64(W, U, X, C) \
1034 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1035 (__v8di)(__m512i)(W),\
1036 (__mmask8)(U)))
1037
1038 #define _mm512_maskz_srli_epi64(U, X, C) \
1039 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1040 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1041 (__mmask8)(U)))
1042 #endif
1043
1044 extern __inline __m512i
1045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1046 _mm512_srl_epi64 (__m512i __A, __m128i __B)
1047 {
1048 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1049 (__v2di) __B,
1050 (__v8di)
1051 _mm512_undefined_epi32 (),
1052 (__mmask8) -1);
1053 }
1054
1055 extern __inline __m512i
1056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1057 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1058 {
1059 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1060 (__v2di) __B,
1061 (__v8di) __W,
1062 (__mmask8) __U);
1063 }
1064
1065 extern __inline __m512i
1066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1067 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1068 {
1069 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1070 (__v2di) __B,
1071 (__v8di)
1072 _mm512_setzero_si512 (),
1073 (__mmask8) __U);
1074 }
1075
1076 #ifdef __OPTIMIZE__
1077 extern __inline __m512i
1078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1079 _mm512_srai_epi64 (__m512i __A, unsigned int __B)
1080 {
1081 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1082 (__v8di)
1083 _mm512_undefined_epi32 (),
1084 (__mmask8) -1);
1085 }
1086
1087 extern __inline __m512i
1088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1089 _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1090 unsigned int __B)
1091 {
1092 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1093 (__v8di) __W,
1094 (__mmask8) __U);
1095 }
1096
1097 extern __inline __m512i
1098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1099 _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1100 {
1101 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1102 (__v8di)
1103 _mm512_setzero_si512 (),
1104 (__mmask8) __U);
1105 }
1106 #else
1107 #define _mm512_srai_epi64(X, C) \
1108 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1109 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1110 (__mmask8)-1))
1111
1112 #define _mm512_mask_srai_epi64(W, U, X, C) \
1113 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1114 (__v8di)(__m512i)(W),\
1115 (__mmask8)(U)))
1116
1117 #define _mm512_maskz_srai_epi64(U, X, C) \
1118 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1119 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1120 (__mmask8)(U)))
1121 #endif
1122
1123 extern __inline __m512i
1124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1125 _mm512_sra_epi64 (__m512i __A, __m128i __B)
1126 {
1127 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1128 (__v2di) __B,
1129 (__v8di)
1130 _mm512_undefined_epi32 (),
1131 (__mmask8) -1);
1132 }
1133
1134 extern __inline __m512i
1135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1136 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1137 {
1138 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1139 (__v2di) __B,
1140 (__v8di) __W,
1141 (__mmask8) __U);
1142 }
1143
1144 extern __inline __m512i
1145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1146 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1147 {
1148 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1149 (__v2di) __B,
1150 (__v8di)
1151 _mm512_setzero_si512 (),
1152 (__mmask8) __U);
1153 }
1154
1155 #ifdef __OPTIMIZE__
1156 extern __inline __m512i
1157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1158 _mm512_slli_epi32 (__m512i __A, unsigned int __B)
1159 {
1160 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1161 (__v16si)
1162 _mm512_undefined_epi32 (),
1163 (__mmask16) -1);
1164 }
1165
1166 extern __inline __m512i
1167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1168 _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1169 unsigned int __B)
1170 {
1171 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1172 (__v16si) __W,
1173 (__mmask16) __U);
1174 }
1175
1176 extern __inline __m512i
1177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1178 _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1179 {
1180 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1181 (__v16si)
1182 _mm512_setzero_si512 (),
1183 (__mmask16) __U);
1184 }
1185 #else
1186 #define _mm512_slli_epi32(X, C) \
1187 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1188 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1189 (__mmask16)-1))
1190
1191 #define _mm512_mask_slli_epi32(W, U, X, C) \
1192 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1193 (__v16si)(__m512i)(W),\
1194 (__mmask16)(U)))
1195
1196 #define _mm512_maskz_slli_epi32(U, X, C) \
1197 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1198 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1199 (__mmask16)(U)))
1200 #endif
1201
1202 extern __inline __m512i
1203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1204 _mm512_sll_epi32 (__m512i __A, __m128i __B)
1205 {
1206 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1207 (__v4si) __B,
1208 (__v16si)
1209 _mm512_undefined_epi32 (),
1210 (__mmask16) -1);
1211 }
1212
1213 extern __inline __m512i
1214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1215 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1216 {
1217 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1218 (__v4si) __B,
1219 (__v16si) __W,
1220 (__mmask16) __U);
1221 }
1222
1223 extern __inline __m512i
1224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1225 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1226 {
1227 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1228 (__v4si) __B,
1229 (__v16si)
1230 _mm512_setzero_si512 (),
1231 (__mmask16) __U);
1232 }
1233
1234 #ifdef __OPTIMIZE__
1235 extern __inline __m512i
1236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1237 _mm512_srli_epi32 (__m512i __A, unsigned int __B)
1238 {
1239 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1240 (__v16si)
1241 _mm512_undefined_epi32 (),
1242 (__mmask16) -1);
1243 }
1244
1245 extern __inline __m512i
1246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1247 _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1248 __m512i __A, unsigned int __B)
1249 {
1250 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1251 (__v16si) __W,
1252 (__mmask16) __U);
1253 }
1254
1255 extern __inline __m512i
1256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1257 _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1258 {
1259 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1260 (__v16si)
1261 _mm512_setzero_si512 (),
1262 (__mmask16) __U);
1263 }
1264 #else
1265 #define _mm512_srli_epi32(X, C) \
1266 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1267 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1268 (__mmask16)-1))
1269
1270 #define _mm512_mask_srli_epi32(W, U, X, C) \
1271 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1272 (__v16si)(__m512i)(W),\
1273 (__mmask16)(U)))
1274
1275 #define _mm512_maskz_srli_epi32(U, X, C) \
1276 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1277 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1278 (__mmask16)(U)))
1279 #endif
1280
1281 extern __inline __m512i
1282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1283 _mm512_srl_epi32 (__m512i __A, __m128i __B)
1284 {
1285 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1286 (__v4si) __B,
1287 (__v16si)
1288 _mm512_undefined_epi32 (),
1289 (__mmask16) -1);
1290 }
1291
1292 extern __inline __m512i
1293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1294 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1295 {
1296 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1297 (__v4si) __B,
1298 (__v16si) __W,
1299 (__mmask16) __U);
1300 }
1301
1302 extern __inline __m512i
1303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1304 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1305 {
1306 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1307 (__v4si) __B,
1308 (__v16si)
1309 _mm512_setzero_si512 (),
1310 (__mmask16) __U);
1311 }
1312
1313 #ifdef __OPTIMIZE__
1314 extern __inline __m512i
1315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1316 _mm512_srai_epi32 (__m512i __A, unsigned int __B)
1317 {
1318 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1319 (__v16si)
1320 _mm512_undefined_epi32 (),
1321 (__mmask16) -1);
1322 }
1323
1324 extern __inline __m512i
1325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1326 _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1327 unsigned int __B)
1328 {
1329 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1330 (__v16si) __W,
1331 (__mmask16) __U);
1332 }
1333
1334 extern __inline __m512i
1335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1336 _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1337 {
1338 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1339 (__v16si)
1340 _mm512_setzero_si512 (),
1341 (__mmask16) __U);
1342 }
1343 #else
1344 #define _mm512_srai_epi32(X, C) \
1345 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1346 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1347 (__mmask16)-1))
1348
1349 #define _mm512_mask_srai_epi32(W, U, X, C) \
1350 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1351 (__v16si)(__m512i)(W),\
1352 (__mmask16)(U)))
1353
1354 #define _mm512_maskz_srai_epi32(U, X, C) \
1355 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1356 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1357 (__mmask16)(U)))
1358 #endif
1359
1360 extern __inline __m512i
1361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1362 _mm512_sra_epi32 (__m512i __A, __m128i __B)
1363 {
1364 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1365 (__v4si) __B,
1366 (__v16si)
1367 _mm512_undefined_epi32 (),
1368 (__mmask16) -1);
1369 }
1370
1371 extern __inline __m512i
1372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1373 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1374 {
1375 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1376 (__v4si) __B,
1377 (__v16si) __W,
1378 (__mmask16) __U);
1379 }
1380
1381 extern __inline __m512i
1382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1383 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1384 {
1385 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1386 (__v4si) __B,
1387 (__v16si)
1388 _mm512_setzero_si512 (),
1389 (__mmask16) __U);
1390 }
1391
1392 #ifdef __OPTIMIZE__
1393 extern __inline __m128d
1394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1395 _mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1396 {
1397 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1398 (__v2df) __B,
1399 __R);
1400 }
1401
1402 extern __inline __m128
1403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1404 _mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1405 {
1406 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1407 (__v4sf) __B,
1408 __R);
1409 }
1410
1411 extern __inline __m128d
1412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1413 _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1414 {
1415 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1416 (__v2df) __B,
1417 __R);
1418 }
1419
1420 extern __inline __m128
1421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1422 _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1423 {
1424 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1425 (__v4sf) __B,
1426 __R);
1427 }
1428
1429 #else
1430 #define _mm_add_round_sd(A, B, C) \
1431 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1432
1433 #define _mm_add_round_ss(A, B, C) \
1434 (__m128)__builtin_ia32_addss_round(A, B, C)
1435
1436 #define _mm_sub_round_sd(A, B, C) \
1437 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1438
1439 #define _mm_sub_round_ss(A, B, C) \
1440 (__m128)__builtin_ia32_subss_round(A, B, C)
1441 #endif
1442
1443 #ifdef __OPTIMIZE__
1444 extern __inline __m512i
1445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1446 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
1447 const int __imm)
1448 {
1449 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1450 (__v8di) __B,
1451 (__v8di) __C, __imm,
1452 (__mmask8) -1);
1453 }
1454
1455 extern __inline __m512i
1456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1457 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
1458 __m512i __C, const int __imm)
1459 {
1460 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1461 (__v8di) __B,
1462 (__v8di) __C, __imm,
1463 (__mmask8) __U);
1464 }
1465
1466 extern __inline __m512i
1467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1468 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
1469 __m512i __C, const int __imm)
1470 {
1471 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1472 (__v8di) __B,
1473 (__v8di) __C,
1474 __imm, (__mmask8) __U);
1475 }
1476
1477 extern __inline __m512i
1478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1479 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
1480 const int __imm)
1481 {
1482 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1483 (__v16si) __B,
1484 (__v16si) __C,
1485 __imm, (__mmask16) -1);
1486 }
1487
1488 extern __inline __m512i
1489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1490 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
1491 __m512i __C, const int __imm)
1492 {
1493 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1494 (__v16si) __B,
1495 (__v16si) __C,
1496 __imm, (__mmask16) __U);
1497 }
1498
1499 extern __inline __m512i
1500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1501 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
1502 __m512i __C, const int __imm)
1503 {
1504 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1505 (__v16si) __B,
1506 (__v16si) __C,
1507 __imm, (__mmask16) __U);
1508 }
1509 #else
1510 #define _mm512_ternarylogic_epi64(A, B, C, I) \
1511 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1512 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1513 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1514 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1515 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1516 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1517 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
1518 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1519 #define _mm512_ternarylogic_epi32(A, B, C, I) \
1520 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1521 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1522 (__mmask16)-1))
1523 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1524 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1525 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1526 (__mmask16)(U)))
1527 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1528 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
1529 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1530 (__mmask16)(U)))
1531 #endif
1532
1533 extern __inline __m512d
1534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1535 _mm512_rcp14_pd (__m512d __A)
1536 {
1537 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1538 (__v8df)
1539 _mm512_undefined_pd (),
1540 (__mmask8) -1);
1541 }
1542
1543 extern __inline __m512d
1544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1545 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1546 {
1547 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1548 (__v8df) __W,
1549 (__mmask8) __U);
1550 }
1551
1552 extern __inline __m512d
1553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1554 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1555 {
1556 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1557 (__v8df)
1558 _mm512_setzero_pd (),
1559 (__mmask8) __U);
1560 }
1561
1562 extern __inline __m512
1563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1564 _mm512_rcp14_ps (__m512 __A)
1565 {
1566 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1567 (__v16sf)
1568 _mm512_undefined_ps (),
1569 (__mmask16) -1);
1570 }
1571
1572 extern __inline __m512
1573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1574 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1575 {
1576 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1577 (__v16sf) __W,
1578 (__mmask16) __U);
1579 }
1580
1581 extern __inline __m512
1582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1583 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1584 {
1585 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1586 (__v16sf)
1587 _mm512_setzero_ps (),
1588 (__mmask16) __U);
1589 }
1590
1591 extern __inline __m128d
1592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1593 _mm_rcp14_sd (__m128d __A, __m128d __B)
1594 {
1595 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1596 (__v2df) __A);
1597 }
1598
1599 extern __inline __m128
1600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1601 _mm_rcp14_ss (__m128 __A, __m128 __B)
1602 {
1603 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1604 (__v4sf) __A);
1605 }
1606
1607 extern __inline __m512d
1608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1609 _mm512_rsqrt14_pd (__m512d __A)
1610 {
1611 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1612 (__v8df)
1613 _mm512_undefined_pd (),
1614 (__mmask8) -1);
1615 }
1616
1617 extern __inline __m512d
1618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1619 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1620 {
1621 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1622 (__v8df) __W,
1623 (__mmask8) __U);
1624 }
1625
1626 extern __inline __m512d
1627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1628 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1629 {
1630 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1631 (__v8df)
1632 _mm512_setzero_pd (),
1633 (__mmask8) __U);
1634 }
1635
1636 extern __inline __m512
1637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1638 _mm512_rsqrt14_ps (__m512 __A)
1639 {
1640 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1641 (__v16sf)
1642 _mm512_undefined_ps (),
1643 (__mmask16) -1);
1644 }
1645
1646 extern __inline __m512
1647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1648 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1649 {
1650 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1651 (__v16sf) __W,
1652 (__mmask16) __U);
1653 }
1654
1655 extern __inline __m512
1656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1657 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1658 {
1659 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1660 (__v16sf)
1661 _mm512_setzero_ps (),
1662 (__mmask16) __U);
1663 }
1664
1665 extern __inline __m128d
1666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1667 _mm_rsqrt14_sd (__m128d __A, __m128d __B)
1668 {
1669 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1670 (__v2df) __A);
1671 }
1672
1673 extern __inline __m128
1674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1675 _mm_rsqrt14_ss (__m128 __A, __m128 __B)
1676 {
1677 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1678 (__v4sf) __A);
1679 }
1680
1681 #ifdef __OPTIMIZE__
1682 extern __inline __m512d
1683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1684 _mm512_sqrt_round_pd (__m512d __A, const int __R)
1685 {
1686 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1687 (__v8df)
1688 _mm512_undefined_pd (),
1689 (__mmask8) -1, __R);
1690 }
1691
1692 extern __inline __m512d
1693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1694 _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1695 const int __R)
1696 {
1697 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1698 (__v8df) __W,
1699 (__mmask8) __U, __R);
1700 }
1701
1702 extern __inline __m512d
1703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1704 _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1705 {
1706 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1707 (__v8df)
1708 _mm512_setzero_pd (),
1709 (__mmask8) __U, __R);
1710 }
1711
1712 extern __inline __m512
1713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1714 _mm512_sqrt_round_ps (__m512 __A, const int __R)
1715 {
1716 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1717 (__v16sf)
1718 _mm512_undefined_ps (),
1719 (__mmask16) -1, __R);
1720 }
1721
1722 extern __inline __m512
1723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1724 _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
1725 {
1726 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1727 (__v16sf) __W,
1728 (__mmask16) __U, __R);
1729 }
1730
1731 extern __inline __m512
1732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1733 _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
1734 {
1735 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1736 (__v16sf)
1737 _mm512_setzero_ps (),
1738 (__mmask16) __U, __R);
1739 }
1740
1741 extern __inline __m128d
1742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1743 _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
1744 {
1745 return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
1746 (__v2df) __A,
1747 __R);
1748 }
1749
1750 extern __inline __m128
1751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1752 _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
1753 {
1754 return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
1755 (__v4sf) __A,
1756 __R);
1757 }
1758 #else
1759 #define _mm512_sqrt_round_pd(A, C) \
1760 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
1761
1762 #define _mm512_mask_sqrt_round_pd(W, U, A, C) \
1763 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
1764
1765 #define _mm512_maskz_sqrt_round_pd(U, A, C) \
1766 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
1767
1768 #define _mm512_sqrt_round_ps(A, C) \
1769 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
1770
1771 #define _mm512_mask_sqrt_round_ps(W, U, A, C) \
1772 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
1773
1774 #define _mm512_maskz_sqrt_round_ps(U, A, C) \
1775 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
1776
1777 #define _mm_sqrt_round_sd(A, B, C) \
1778 (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
1779
1780 #define _mm_sqrt_round_ss(A, B, C) \
1781 (__m128)__builtin_ia32_sqrtss_round(A, B, C)
1782 #endif
1783
1784 extern __inline __m512i
1785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1786 _mm512_cvtepi8_epi32 (__m128i __A)
1787 {
1788 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1789 (__v16si)
1790 _mm512_undefined_epi32 (),
1791 (__mmask16) -1);
1792 }
1793
1794 extern __inline __m512i
1795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1796 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1797 {
1798 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1799 (__v16si) __W,
1800 (__mmask16) __U);
1801 }
1802
1803 extern __inline __m512i
1804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1805 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
1806 {
1807 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1808 (__v16si)
1809 _mm512_setzero_si512 (),
1810 (__mmask16) __U);
1811 }
1812
1813 extern __inline __m512i
1814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1815 _mm512_cvtepi8_epi64 (__m128i __A)
1816 {
1817 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1818 (__v8di)
1819 _mm512_undefined_epi32 (),
1820 (__mmask8) -1);
1821 }
1822
1823 extern __inline __m512i
1824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1825 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1826 {
1827 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1828 (__v8di) __W,
1829 (__mmask8) __U);
1830 }
1831
1832 extern __inline __m512i
1833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1834 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
1835 {
1836 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1837 (__v8di)
1838 _mm512_setzero_si512 (),
1839 (__mmask8) __U);
1840 }
1841
1842 extern __inline __m512i
1843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1844 _mm512_cvtepi16_epi32 (__m256i __A)
1845 {
1846 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1847 (__v16si)
1848 _mm512_undefined_epi32 (),
1849 (__mmask16) -1);
1850 }
1851
1852 extern __inline __m512i
1853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1854 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1855 {
1856 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1857 (__v16si) __W,
1858 (__mmask16) __U);
1859 }
1860
1861 extern __inline __m512i
1862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1863 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
1864 {
1865 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1866 (__v16si)
1867 _mm512_setzero_si512 (),
1868 (__mmask16) __U);
1869 }
1870
1871 extern __inline __m512i
1872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1873 _mm512_cvtepi16_epi64 (__m128i __A)
1874 {
1875 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1876 (__v8di)
1877 _mm512_undefined_epi32 (),
1878 (__mmask8) -1);
1879 }
1880
1881 extern __inline __m512i
1882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1883 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1884 {
1885 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1886 (__v8di) __W,
1887 (__mmask8) __U);
1888 }
1889
1890 extern __inline __m512i
1891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1892 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
1893 {
1894 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1895 (__v8di)
1896 _mm512_setzero_si512 (),
1897 (__mmask8) __U);
1898 }
1899
1900 extern __inline __m512i
1901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1902 _mm512_cvtepi32_epi64 (__m256i __X)
1903 {
1904 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1905 (__v8di)
1906 _mm512_undefined_epi32 (),
1907 (__mmask8) -1);
1908 }
1909
1910 extern __inline __m512i
1911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1912 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
1913 {
1914 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1915 (__v8di) __W,
1916 (__mmask8) __U);
1917 }
1918
1919 extern __inline __m512i
1920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1921 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
1922 {
1923 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1924 (__v8di)
1925 _mm512_setzero_si512 (),
1926 (__mmask8) __U);
1927 }
1928
1929 extern __inline __m512i
1930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1931 _mm512_cvtepu8_epi32 (__m128i __A)
1932 {
1933 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1934 (__v16si)
1935 _mm512_undefined_epi32 (),
1936 (__mmask16) -1);
1937 }
1938
1939 extern __inline __m512i
1940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1941 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1942 {
1943 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1944 (__v16si) __W,
1945 (__mmask16) __U);
1946 }
1947
1948 extern __inline __m512i
1949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1950 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
1951 {
1952 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1953 (__v16si)
1954 _mm512_setzero_si512 (),
1955 (__mmask16) __U);
1956 }
1957
1958 extern __inline __m512i
1959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1960 _mm512_cvtepu8_epi64 (__m128i __A)
1961 {
1962 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1963 (__v8di)
1964 _mm512_undefined_epi32 (),
1965 (__mmask8) -1);
1966 }
1967
1968 extern __inline __m512i
1969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1970 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1971 {
1972 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1973 (__v8di) __W,
1974 (__mmask8) __U);
1975 }
1976
1977 extern __inline __m512i
1978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1979 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
1980 {
1981 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1982 (__v8di)
1983 _mm512_setzero_si512 (),
1984 (__mmask8) __U);
1985 }
1986
1987 extern __inline __m512i
1988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1989 _mm512_cvtepu16_epi32 (__m256i __A)
1990 {
1991 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1992 (__v16si)
1993 _mm512_undefined_epi32 (),
1994 (__mmask16) -1);
1995 }
1996
1997 extern __inline __m512i
1998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1999 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2000 {
2001 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2002 (__v16si) __W,
2003 (__mmask16) __U);
2004 }
2005
2006 extern __inline __m512i
2007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2008 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2009 {
2010 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2011 (__v16si)
2012 _mm512_setzero_si512 (),
2013 (__mmask16) __U);
2014 }
2015
2016 extern __inline __m512i
2017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2018 _mm512_cvtepu16_epi64 (__m128i __A)
2019 {
2020 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2021 (__v8di)
2022 _mm512_undefined_epi32 (),
2023 (__mmask8) -1);
2024 }
2025
2026 extern __inline __m512i
2027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2029 {
2030 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2031 (__v8di) __W,
2032 (__mmask8) __U);
2033 }
2034
2035 extern __inline __m512i
2036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2037 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2038 {
2039 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2040 (__v8di)
2041 _mm512_setzero_si512 (),
2042 (__mmask8) __U);
2043 }
2044
2045 extern __inline __m512i
2046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2047 _mm512_cvtepu32_epi64 (__m256i __X)
2048 {
2049 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2050 (__v8di)
2051 _mm512_undefined_epi32 (),
2052 (__mmask8) -1);
2053 }
2054
2055 extern __inline __m512i
2056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2057 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2058 {
2059 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2060 (__v8di) __W,
2061 (__mmask8) __U);
2062 }
2063
2064 extern __inline __m512i
2065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2066 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2067 {
2068 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2069 (__v8di)
2070 _mm512_setzero_si512 (),
2071 (__mmask8) __U);
2072 }
2073
2074 #ifdef __OPTIMIZE__
2075 extern __inline __m512d
2076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2077 _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2078 {
2079 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2080 (__v8df) __B,
2081 (__v8df)
2082 _mm512_undefined_pd (),
2083 (__mmask8) -1, __R);
2084 }
2085
2086 extern __inline __m512d
2087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2088 _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2089 __m512d __B, const int __R)
2090 {
2091 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2092 (__v8df) __B,
2093 (__v8df) __W,
2094 (__mmask8) __U, __R);
2095 }
2096
2097 extern __inline __m512d
2098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2099 _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2100 const int __R)
2101 {
2102 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2103 (__v8df) __B,
2104 (__v8df)
2105 _mm512_setzero_pd (),
2106 (__mmask8) __U, __R);
2107 }
2108
2109 extern __inline __m512
2110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2111 _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2112 {
2113 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2114 (__v16sf) __B,
2115 (__v16sf)
2116 _mm512_undefined_ps (),
2117 (__mmask16) -1, __R);
2118 }
2119
2120 extern __inline __m512
2121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2122 _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2123 __m512 __B, const int __R)
2124 {
2125 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2126 (__v16sf) __B,
2127 (__v16sf) __W,
2128 (__mmask16) __U, __R);
2129 }
2130
2131 extern __inline __m512
2132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2133 _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2134 {
2135 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2136 (__v16sf) __B,
2137 (__v16sf)
2138 _mm512_setzero_ps (),
2139 (__mmask16) __U, __R);
2140 }
2141
2142 extern __inline __m512d
2143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2144 _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2145 {
2146 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2147 (__v8df) __B,
2148 (__v8df)
2149 _mm512_undefined_pd (),
2150 (__mmask8) -1, __R);
2151 }
2152
2153 extern __inline __m512d
2154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2155 _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2156 __m512d __B, const int __R)
2157 {
2158 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2159 (__v8df) __B,
2160 (__v8df) __W,
2161 (__mmask8) __U, __R);
2162 }
2163
2164 extern __inline __m512d
2165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2166 _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2167 const int __R)
2168 {
2169 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2170 (__v8df) __B,
2171 (__v8df)
2172 _mm512_setzero_pd (),
2173 (__mmask8) __U, __R);
2174 }
2175
2176 extern __inline __m512
2177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2178 _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2179 {
2180 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2181 (__v16sf) __B,
2182 (__v16sf)
2183 _mm512_undefined_ps (),
2184 (__mmask16) -1, __R);
2185 }
2186
2187 extern __inline __m512
2188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2189 _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2190 __m512 __B, const int __R)
2191 {
2192 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2193 (__v16sf) __B,
2194 (__v16sf) __W,
2195 (__mmask16) __U, __R);
2196 }
2197
2198 extern __inline __m512
2199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2200 _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2201 {
2202 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2203 (__v16sf) __B,
2204 (__v16sf)
2205 _mm512_setzero_ps (),
2206 (__mmask16) __U, __R);
2207 }
2208 #else
2209 #define _mm512_add_round_pd(A, B, C) \
2210 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2211
2212 #define _mm512_mask_add_round_pd(W, U, A, B, C) \
2213 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2214
2215 #define _mm512_maskz_add_round_pd(U, A, B, C) \
2216 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2217
2218 #define _mm512_add_round_ps(A, B, C) \
2219 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2220
2221 #define _mm512_mask_add_round_ps(W, U, A, B, C) \
2222 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2223
2224 #define _mm512_maskz_add_round_ps(U, A, B, C) \
2225 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2226
2227 #define _mm512_sub_round_pd(A, B, C) \
2228 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2229
2230 #define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2231 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2232
2233 #define _mm512_maskz_sub_round_pd(U, A, B, C) \
2234 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2235
2236 #define _mm512_sub_round_ps(A, B, C) \
2237 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2238
2239 #define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2240 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2241
2242 #define _mm512_maskz_sub_round_ps(U, A, B, C) \
2243 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2244 #endif
2245
2246 #ifdef __OPTIMIZE__
2247 extern __inline __m512d
2248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2249 _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2250 {
2251 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2252 (__v8df) __B,
2253 (__v8df)
2254 _mm512_undefined_pd (),
2255 (__mmask8) -1, __R);
2256 }
2257
2258 extern __inline __m512d
2259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2260 _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2261 __m512d __B, const int __R)
2262 {
2263 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2264 (__v8df) __B,
2265 (__v8df) __W,
2266 (__mmask8) __U, __R);
2267 }
2268
2269 extern __inline __m512d
2270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2271 _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2272 const int __R)
2273 {
2274 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2275 (__v8df) __B,
2276 (__v8df)
2277 _mm512_setzero_pd (),
2278 (__mmask8) __U, __R);
2279 }
2280
2281 extern __inline __m512
2282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2283 _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2284 {
2285 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2286 (__v16sf) __B,
2287 (__v16sf)
2288 _mm512_undefined_ps (),
2289 (__mmask16) -1, __R);
2290 }
2291
2292 extern __inline __m512
2293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2294 _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2295 __m512 __B, const int __R)
2296 {
2297 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2298 (__v16sf) __B,
2299 (__v16sf) __W,
2300 (__mmask16) __U, __R);
2301 }
2302
2303 extern __inline __m512
2304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2305 _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2306 {
2307 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2308 (__v16sf) __B,
2309 (__v16sf)
2310 _mm512_setzero_ps (),
2311 (__mmask16) __U, __R);
2312 }
2313
2314 extern __inline __m512d
2315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2316 _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2317 {
2318 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2319 (__v8df) __V,
2320 (__v8df)
2321 _mm512_undefined_pd (),
2322 (__mmask8) -1, __R);
2323 }
2324
2325 extern __inline __m512d
2326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2327 _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2328 __m512d __V, const int __R)
2329 {
2330 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2331 (__v8df) __V,
2332 (__v8df) __W,
2333 (__mmask8) __U, __R);
2334 }
2335
2336 extern __inline __m512d
2337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2338 _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2339 const int __R)
2340 {
2341 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2342 (__v8df) __V,
2343 (__v8df)
2344 _mm512_setzero_pd (),
2345 (__mmask8) __U, __R);
2346 }
2347
2348 extern __inline __m512
2349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2350 _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2351 {
2352 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2353 (__v16sf) __B,
2354 (__v16sf)
2355 _mm512_undefined_ps (),
2356 (__mmask16) -1, __R);
2357 }
2358
2359 extern __inline __m512
2360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2361 _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2362 __m512 __B, const int __R)
2363 {
2364 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2365 (__v16sf) __B,
2366 (__v16sf) __W,
2367 (__mmask16) __U, __R);
2368 }
2369
2370 extern __inline __m512
2371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2372 _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2373 {
2374 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2375 (__v16sf) __B,
2376 (__v16sf)
2377 _mm512_setzero_ps (),
2378 (__mmask16) __U, __R);
2379 }
2380
2381 extern __inline __m128d
2382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2383 _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2384 {
2385 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2386 (__v2df) __B,
2387 __R);
2388 }
2389
2390 extern __inline __m128
2391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2392 _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2393 {
2394 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2395 (__v4sf) __B,
2396 __R);
2397 }
2398
2399 extern __inline __m128d
2400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2401 _mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2402 {
2403 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2404 (__v2df) __B,
2405 __R);
2406 }
2407
2408 extern __inline __m128
2409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2410 _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2411 {
2412 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2413 (__v4sf) __B,
2414 __R);
2415 }
2416
2417 #else
2418 #define _mm512_mul_round_pd(A, B, C) \
2419 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2420
2421 #define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2422 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2423
2424 #define _mm512_maskz_mul_round_pd(U, A, B, C) \
2425 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2426
2427 #define _mm512_mul_round_ps(A, B, C) \
2428 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2429
2430 #define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2431 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2432
2433 #define _mm512_maskz_mul_round_ps(U, A, B, C) \
2434 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2435
2436 #define _mm512_div_round_pd(A, B, C) \
2437 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2438
2439 #define _mm512_mask_div_round_pd(W, U, A, B, C) \
2440 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2441
2442 #define _mm512_maskz_div_round_pd(U, A, B, C) \
2443 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2444
2445 #define _mm512_div_round_ps(A, B, C) \
2446 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2447
2448 #define _mm512_mask_div_round_ps(W, U, A, B, C) \
2449 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2450
2451 #define _mm512_maskz_div_round_ps(U, A, B, C) \
2452 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2453
2454 #define _mm_mul_round_sd(A, B, C) \
2455 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2456
2457 #define _mm_mul_round_ss(A, B, C) \
2458 (__m128)__builtin_ia32_mulss_round(A, B, C)
2459
2460 #define _mm_div_round_sd(A, B, C) \
2461 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2462
2463 #define _mm_div_round_ss(A, B, C) \
2464 (__m128)__builtin_ia32_divss_round(A, B, C)
2465 #endif
2466
2467 #ifdef __OPTIMIZE__
2468 extern __inline __m512d
2469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2470 _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2471 {
2472 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2473 (__v8df) __B,
2474 (__v8df)
2475 _mm512_undefined_pd (),
2476 (__mmask8) -1, __R);
2477 }
2478
2479 extern __inline __m512d
2480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2481 _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2482 __m512d __B, const int __R)
2483 {
2484 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2485 (__v8df) __B,
2486 (__v8df) __W,
2487 (__mmask8) __U, __R);
2488 }
2489
2490 extern __inline __m512d
2491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2492 _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2493 const int __R)
2494 {
2495 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2496 (__v8df) __B,
2497 (__v8df)
2498 _mm512_setzero_pd (),
2499 (__mmask8) __U, __R);
2500 }
2501
2502 extern __inline __m512
2503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2504 _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2505 {
2506 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2507 (__v16sf) __B,
2508 (__v16sf)
2509 _mm512_undefined_ps (),
2510 (__mmask16) -1, __R);
2511 }
2512
2513 extern __inline __m512
2514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2515 _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2516 __m512 __B, const int __R)
2517 {
2518 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2519 (__v16sf) __B,
2520 (__v16sf) __W,
2521 (__mmask16) __U, __R);
2522 }
2523
2524 extern __inline __m512
2525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2526 _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2527 {
2528 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2529 (__v16sf) __B,
2530 (__v16sf)
2531 _mm512_setzero_ps (),
2532 (__mmask16) __U, __R);
2533 }
2534
2535 extern __inline __m512d
2536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2537 _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2538 {
2539 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2540 (__v8df) __B,
2541 (__v8df)
2542 _mm512_undefined_pd (),
2543 (__mmask8) -1, __R);
2544 }
2545
2546 extern __inline __m512d
2547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2548 _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2549 __m512d __B, const int __R)
2550 {
2551 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2552 (__v8df) __B,
2553 (__v8df) __W,
2554 (__mmask8) __U, __R);
2555 }
2556
2557 extern __inline __m512d
2558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2559 _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2560 const int __R)
2561 {
2562 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2563 (__v8df) __B,
2564 (__v8df)
2565 _mm512_setzero_pd (),
2566 (__mmask8) __U, __R);
2567 }
2568
2569 extern __inline __m512
2570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2571 _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
2572 {
2573 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2574 (__v16sf) __B,
2575 (__v16sf)
2576 _mm512_undefined_ps (),
2577 (__mmask16) -1, __R);
2578 }
2579
2580 extern __inline __m512
2581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2582 _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2583 __m512 __B, const int __R)
2584 {
2585 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2586 (__v16sf) __B,
2587 (__v16sf) __W,
2588 (__mmask16) __U, __R);
2589 }
2590
2591 extern __inline __m512
2592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2593 _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2594 {
2595 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2596 (__v16sf) __B,
2597 (__v16sf)
2598 _mm512_setzero_ps (),
2599 (__mmask16) __U, __R);
2600 }
2601 #else
2602 #define _mm512_max_round_pd(A, B, R) \
2603 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2604
2605 #define _mm512_mask_max_round_pd(W, U, A, B, R) \
2606 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
2607
2608 #define _mm512_maskz_max_round_pd(U, A, B, R) \
2609 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2610
2611 #define _mm512_max_round_ps(A, B, R) \
2612 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
2613
2614 #define _mm512_mask_max_round_ps(W, U, A, B, R) \
2615 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
2616
2617 #define _mm512_maskz_max_round_ps(U, A, B, R) \
2618 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2619
2620 #define _mm512_min_round_pd(A, B, R) \
2621 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2622
2623 #define _mm512_mask_min_round_pd(W, U, A, B, R) \
2624 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
2625
2626 #define _mm512_maskz_min_round_pd(U, A, B, R) \
2627 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2628
2629 #define _mm512_min_round_ps(A, B, R) \
2630 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
2631
2632 #define _mm512_mask_min_round_ps(W, U, A, B, R) \
2633 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
2634
2635 #define _mm512_maskz_min_round_ps(U, A, B, R) \
2636 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2637 #endif
2638
2639 #ifdef __OPTIMIZE__
2640 extern __inline __m512d
2641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2642 _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
2643 {
2644 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2645 (__v8df) __B,
2646 (__v8df)
2647 _mm512_undefined_pd (),
2648 (__mmask8) -1, __R);
2649 }
2650
2651 extern __inline __m512d
2652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2653 _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2654 __m512d __B, const int __R)
2655 {
2656 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2657 (__v8df) __B,
2658 (__v8df) __W,
2659 (__mmask8) __U, __R);
2660 }
2661
2662 extern __inline __m512d
2663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2664 _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2665 const int __R)
2666 {
2667 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2668 (__v8df) __B,
2669 (__v8df)
2670 _mm512_setzero_pd (),
2671 (__mmask8) __U, __R);
2672 }
2673
2674 extern __inline __m512
2675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2676 _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
2677 {
2678 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2679 (__v16sf) __B,
2680 (__v16sf)
2681 _mm512_undefined_ps (),
2682 (__mmask16) -1, __R);
2683 }
2684
2685 extern __inline __m512
2686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2687 _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2688 __m512 __B, const int __R)
2689 {
2690 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2691 (__v16sf) __B,
2692 (__v16sf) __W,
2693 (__mmask16) __U, __R);
2694 }
2695
2696 extern __inline __m512
2697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2698 _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2699 const int __R)
2700 {
2701 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2702 (__v16sf) __B,
2703 (__v16sf)
2704 _mm512_setzero_ps (),
2705 (__mmask16) __U, __R);
2706 }
2707
2708 extern __inline __m128d
2709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2710 _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
2711 {
2712 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
2713 (__v2df) __B,
2714 __R);
2715 }
2716
2717 extern __inline __m128
2718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2719 _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
2720 {
2721 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
2722 (__v4sf) __B,
2723 __R);
2724 }
2725 #else
2726 #define _mm512_scalef_round_pd(A, B, C) \
2727 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2728
2729 #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
2730 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
2731
2732 #define _mm512_maskz_scalef_round_pd(U, A, B, C) \
2733 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2734
2735 #define _mm512_scalef_round_ps(A, B, C) \
2736 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2737
2738 #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
2739 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
2740
2741 #define _mm512_maskz_scalef_round_ps(U, A, B, C) \
2742 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2743
2744 #define _mm_scalef_round_sd(A, B, C) \
2745 (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
2746
2747 #define _mm_scalef_round_ss(A, B, C) \
2748 (__m128)__builtin_ia32_scalefss_round(A, B, C)
2749 #endif
2750
2751 #ifdef __OPTIMIZE__
2752 extern __inline __m512d
2753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2754 _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2755 {
2756 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2757 (__v8df) __B,
2758 (__v8df) __C,
2759 (__mmask8) -1, __R);
2760 }
2761
2762 extern __inline __m512d
2763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2764 _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2765 __m512d __C, const int __R)
2766 {
2767 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2768 (__v8df) __B,
2769 (__v8df) __C,
2770 (__mmask8) __U, __R);
2771 }
2772
2773 extern __inline __m512d
2774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2775 _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
2776 __mmask8 __U, const int __R)
2777 {
2778 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2779 (__v8df) __B,
2780 (__v8df) __C,
2781 (__mmask8) __U, __R);
2782 }
2783
2784 extern __inline __m512d
2785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2786 _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2787 __m512d __C, const int __R)
2788 {
2789 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2790 (__v8df) __B,
2791 (__v8df) __C,
2792 (__mmask8) __U, __R);
2793 }
2794
2795 extern __inline __m512
2796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2797 _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2798 {
2799 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2800 (__v16sf) __B,
2801 (__v16sf) __C,
2802 (__mmask16) -1, __R);
2803 }
2804
2805 extern __inline __m512
2806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2807 _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2808 __m512 __C, const int __R)
2809 {
2810 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2811 (__v16sf) __B,
2812 (__v16sf) __C,
2813 (__mmask16) __U, __R);
2814 }
2815
2816 extern __inline __m512
2817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2818 _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
2819 __mmask16 __U, const int __R)
2820 {
2821 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2822 (__v16sf) __B,
2823 (__v16sf) __C,
2824 (__mmask16) __U, __R);
2825 }
2826
2827 extern __inline __m512
2828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2829 _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2830 __m512 __C, const int __R)
2831 {
2832 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2833 (__v16sf) __B,
2834 (__v16sf) __C,
2835 (__mmask16) __U, __R);
2836 }
2837
2838 extern __inline __m512d
2839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2840 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2841 {
2842 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2843 (__v8df) __B,
2844 -(__v8df) __C,
2845 (__mmask8) -1, __R);
2846 }
2847
2848 extern __inline __m512d
2849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2850 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2851 __m512d __C, const int __R)
2852 {
2853 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2854 (__v8df) __B,
2855 -(__v8df) __C,
2856 (__mmask8) __U, __R);
2857 }
2858
2859 extern __inline __m512d
2860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2861 _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2862 __mmask8 __U, const int __R)
2863 {
2864 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
2865 (__v8df) __B,
2866 (__v8df) __C,
2867 (__mmask8) __U, __R);
2868 }
2869
2870 extern __inline __m512d
2871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2872 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2873 __m512d __C, const int __R)
2874 {
2875 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2876 (__v8df) __B,
2877 -(__v8df) __C,
2878 (__mmask8) __U, __R);
2879 }
2880
2881 extern __inline __m512
2882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2883 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2884 {
2885 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2886 (__v16sf) __B,
2887 -(__v16sf) __C,
2888 (__mmask16) -1, __R);
2889 }
2890
2891 extern __inline __m512
2892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2893 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2894 __m512 __C, const int __R)
2895 {
2896 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2897 (__v16sf) __B,
2898 -(__v16sf) __C,
2899 (__mmask16) __U, __R);
2900 }
2901
2902 extern __inline __m512
2903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2904 _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2905 __mmask16 __U, const int __R)
2906 {
2907 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
2908 (__v16sf) __B,
2909 (__v16sf) __C,
2910 (__mmask16) __U, __R);
2911 }
2912
2913 extern __inline __m512
2914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2915 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2916 __m512 __C, const int __R)
2917 {
2918 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2919 (__v16sf) __B,
2920 -(__v16sf) __C,
2921 (__mmask16) __U, __R);
2922 }
2923
2924 extern __inline __m512d
2925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2926 _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2927 {
2928 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2929 (__v8df) __B,
2930 (__v8df) __C,
2931 (__mmask8) -1, __R);
2932 }
2933
2934 extern __inline __m512d
2935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2936 _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2937 __m512d __C, const int __R)
2938 {
2939 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2940 (__v8df) __B,
2941 (__v8df) __C,
2942 (__mmask8) __U, __R);
2943 }
2944
2945 extern __inline __m512d
2946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2947 _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2948 __mmask8 __U, const int __R)
2949 {
2950 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2951 (__v8df) __B,
2952 (__v8df) __C,
2953 (__mmask8) __U, __R);
2954 }
2955
2956 extern __inline __m512d
2957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2958 _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2959 __m512d __C, const int __R)
2960 {
2961 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2962 (__v8df) __B,
2963 (__v8df) __C,
2964 (__mmask8) __U, __R);
2965 }
2966
2967 extern __inline __m512
2968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2969 _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2970 {
2971 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2972 (__v16sf) __B,
2973 (__v16sf) __C,
2974 (__mmask16) -1, __R);
2975 }
2976
2977 extern __inline __m512
2978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2979 _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2980 __m512 __C, const int __R)
2981 {
2982 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2983 (__v16sf) __B,
2984 (__v16sf) __C,
2985 (__mmask16) __U, __R);
2986 }
2987
2988 extern __inline __m512
2989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2990 _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2991 __mmask16 __U, const int __R)
2992 {
2993 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2994 (__v16sf) __B,
2995 (__v16sf) __C,
2996 (__mmask16) __U, __R);
2997 }
2998
2999 extern __inline __m512
3000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3001 _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3002 __m512 __C, const int __R)
3003 {
3004 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3005 (__v16sf) __B,
3006 (__v16sf) __C,
3007 (__mmask16) __U, __R);
3008 }
3009
3010 extern __inline __m512d
3011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3012 _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3013 {
3014 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3015 (__v8df) __B,
3016 -(__v8df) __C,
3017 (__mmask8) -1, __R);
3018 }
3019
3020 extern __inline __m512d
3021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3022 _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3023 __m512d __C, const int __R)
3024 {
3025 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3026 (__v8df) __B,
3027 -(__v8df) __C,
3028 (__mmask8) __U, __R);
3029 }
3030
3031 extern __inline __m512d
3032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3033 _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3034 __mmask8 __U, const int __R)
3035 {
3036 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3037 (__v8df) __B,
3038 (__v8df) __C,
3039 (__mmask8) __U, __R);
3040 }
3041
3042 extern __inline __m512d
3043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3044 _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3045 __m512d __C, const int __R)
3046 {
3047 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3048 (__v8df) __B,
3049 -(__v8df) __C,
3050 (__mmask8) __U, __R);
3051 }
3052
3053 extern __inline __m512
3054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3055 _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3056 {
3057 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3058 (__v16sf) __B,
3059 -(__v16sf) __C,
3060 (__mmask16) -1, __R);
3061 }
3062
3063 extern __inline __m512
3064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3065 _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3066 __m512 __C, const int __R)
3067 {
3068 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3069 (__v16sf) __B,
3070 -(__v16sf) __C,
3071 (__mmask16) __U, __R);
3072 }
3073
3074 extern __inline __m512
3075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3076 _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3077 __mmask16 __U, const int __R)
3078 {
3079 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3080 (__v16sf) __B,
3081 (__v16sf) __C,
3082 (__mmask16) __U, __R);
3083 }
3084
3085 extern __inline __m512
3086 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3087 _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3088 __m512 __C, const int __R)
3089 {
3090 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3091 (__v16sf) __B,
3092 -(__v16sf) __C,
3093 (__mmask16) __U, __R);
3094 }
3095
3096 extern __inline __m512d
3097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3098 _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3099 {
3100 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3101 (__v8df) __B,
3102 (__v8df) __C,
3103 (__mmask8) -1, __R);
3104 }
3105
3106 extern __inline __m512d
3107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3108 _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3109 __m512d __C, const int __R)
3110 {
3111 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3112 (__v8df) __B,
3113 (__v8df) __C,
3114 (__mmask8) __U, __R);
3115 }
3116
3117 extern __inline __m512d
3118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3119 _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3120 __mmask8 __U, const int __R)
3121 {
3122 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3123 (__v8df) __B,
3124 (__v8df) __C,
3125 (__mmask8) __U, __R);
3126 }
3127
3128 extern __inline __m512d
3129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3130 _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3131 __m512d __C, const int __R)
3132 {
3133 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3134 (__v8df) __B,
3135 (__v8df) __C,
3136 (__mmask8) __U, __R);
3137 }
3138
3139 extern __inline __m512
3140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3141 _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3142 {
3143 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3144 (__v16sf) __B,
3145 (__v16sf) __C,
3146 (__mmask16) -1, __R);
3147 }
3148
3149 extern __inline __m512
3150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3151 _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3152 __m512 __C, const int __R)
3153 {
3154 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3155 (__v16sf) __B,
3156 (__v16sf) __C,
3157 (__mmask16) __U, __R);
3158 }
3159
3160 extern __inline __m512
3161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3162 _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3163 __mmask16 __U, const int __R)
3164 {
3165 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3166 (__v16sf) __B,
3167 (__v16sf) __C,
3168 (__mmask16) __U, __R);
3169 }
3170
3171 extern __inline __m512
3172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3173 _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3174 __m512 __C, const int __R)
3175 {
3176 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3177 (__v16sf) __B,
3178 (__v16sf) __C,
3179 (__mmask16) __U, __R);
3180 }
3181
3182 extern __inline __m512d
3183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3184 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3185 {
3186 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3187 (__v8df) __B,
3188 -(__v8df) __C,
3189 (__mmask8) -1, __R);
3190 }
3191
3192 extern __inline __m512d
3193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3194 _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3195 __m512d __C, const int __R)
3196 {
3197 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3198 (__v8df) __B,
3199 (__v8df) __C,
3200 (__mmask8) __U, __R);
3201 }
3202
3203 extern __inline __m512d
3204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3205 _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3206 __mmask8 __U, const int __R)
3207 {
3208 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3209 (__v8df) __B,
3210 (__v8df) __C,
3211 (__mmask8) __U, __R);
3212 }
3213
3214 extern __inline __m512d
3215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3216 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3217 __m512d __C, const int __R)
3218 {
3219 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3220 (__v8df) __B,
3221 -(__v8df) __C,
3222 (__mmask8) __U, __R);
3223 }
3224
3225 extern __inline __m512
3226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3227 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3228 {
3229 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3230 (__v16sf) __B,
3231 -(__v16sf) __C,
3232 (__mmask16) -1, __R);
3233 }
3234
3235 extern __inline __m512
3236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3237 _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3238 __m512 __C, const int __R)
3239 {
3240 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3241 (__v16sf) __B,
3242 (__v16sf) __C,
3243 (__mmask16) __U, __R);
3244 }
3245
3246 extern __inline __m512
3247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3248 _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3249 __mmask16 __U, const int __R)
3250 {
3251 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3252 (__v16sf) __B,
3253 (__v16sf) __C,
3254 (__mmask16) __U, __R);
3255 }
3256
3257 extern __inline __m512
3258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3259 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3260 __m512 __C, const int __R)
3261 {
3262 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3263 (__v16sf) __B,
3264 -(__v16sf) __C,
3265 (__mmask16) __U, __R);
3266 }
3267 #else
3268 #define _mm512_fmadd_round_pd(A, B, C, R) \
3269 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3270
3271 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3272 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3273
3274 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3275 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3276
3277 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3278 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3279
3280 #define _mm512_fmadd_round_ps(A, B, C, R) \
3281 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3282
3283 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3284 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3285
3286 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3287 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3288
3289 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3290 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3291
3292 #define _mm512_fmsub_round_pd(A, B, C, R) \
3293 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3294
3295 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
3296 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3297
3298 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3299 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3300
3301 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
3302 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3303
3304 #define _mm512_fmsub_round_ps(A, B, C, R) \
3305 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3306
3307 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
3308 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3309
3310 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3311 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3312
3313 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
3314 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3315
3316 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
3317 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3318
3319 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
3320 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3321
3322 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3323 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3324
3325 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3326 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3327
3328 #define _mm512_fmaddsub_round_ps(A, B, C, R) \
3329 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3330
3331 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3332 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3333
3334 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3335 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3336
3337 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3338 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3339
3340 #define _mm512_fmsubadd_round_pd(A, B, C, R) \
3341 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3342
3343 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3344 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3345
3346 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3347 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3348
3349 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3350 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3351
3352 #define _mm512_fmsubadd_round_ps(A, B, C, R) \
3353 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3354
3355 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3356 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3357
3358 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3359 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3360
3361 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3362 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3363
3364 #define _mm512_fnmadd_round_pd(A, B, C, R) \
3365 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3366
3367 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3368 (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3369
3370 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
3371 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3372
3373 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
3374 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3375
3376 #define _mm512_fnmadd_round_ps(A, B, C, R) \
3377 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3378
3379 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3380 (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3381
3382 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
3383 (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3384
3385 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
3386 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3387
3388 #define _mm512_fnmsub_round_pd(A, B, C, R) \
3389 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3390
3391 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3392 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3393
3394 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3395 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3396
3397 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
3398 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3399
3400 #define _mm512_fnmsub_round_ps(A, B, C, R) \
3401 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3402
3403 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3404 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3405
3406 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3407 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3408
3409 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
3410 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3411 #endif
3412
3413 extern __inline __m512i
3414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3415 _mm512_abs_epi64 (__m512i __A)
3416 {
3417 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3418 (__v8di)
3419 _mm512_undefined_epi32 (),
3420 (__mmask8) -1);
3421 }
3422
3423 extern __inline __m512i
3424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3425 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3426 {
3427 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3428 (__v8di) __W,
3429 (__mmask8) __U);
3430 }
3431
3432 extern __inline __m512i
3433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3434 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3435 {
3436 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3437 (__v8di)
3438 _mm512_setzero_si512 (),
3439 (__mmask8) __U);
3440 }
3441
3442 extern __inline __m512i
3443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3444 _mm512_abs_epi32 (__m512i __A)
3445 {
3446 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3447 (__v16si)
3448 _mm512_undefined_epi32 (),
3449 (__mmask16) -1);
3450 }
3451
3452 extern __inline __m512i
3453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3454 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3455 {
3456 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3457 (__v16si) __W,
3458 (__mmask16) __U);
3459 }
3460
3461 extern __inline __m512i
3462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3463 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3464 {
3465 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3466 (__v16si)
3467 _mm512_setzero_si512 (),
3468 (__mmask16) __U);
3469 }
3470
3471 extern __inline __m512
3472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3473 _mm512_broadcastss_ps (__m128 __A)
3474 {
3475 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3476 (__v16sf)
3477 _mm512_undefined_ps (),
3478 (__mmask16) -1);
3479 }
3480
3481 extern __inline __m512
3482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3483 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3484 {
3485 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3486 (__v16sf) __O, __M);
3487 }
3488
3489 extern __inline __m512
3490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3491 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
3492 {
3493 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3494 (__v16sf)
3495 _mm512_setzero_ps (),
3496 __M);
3497 }
3498
3499 extern __inline __m512d
3500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3501 _mm512_broadcastsd_pd (__m128d __A)
3502 {
3503 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3504 (__v8df)
3505 _mm512_undefined_pd (),
3506 (__mmask8) -1);
3507 }
3508
3509 extern __inline __m512d
3510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3511 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
3512 {
3513 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3514 (__v8df) __O, __M);
3515 }
3516
3517 extern __inline __m512d
3518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3519 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
3520 {
3521 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3522 (__v8df)
3523 _mm512_setzero_pd (),
3524 __M);
3525 }
3526
3527 extern __inline __m512i
3528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3529 _mm512_broadcastd_epi32 (__m128i __A)
3530 {
3531 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3532 (__v16si)
3533 _mm512_undefined_epi32 (),
3534 (__mmask16) -1);
3535 }
3536
3537 extern __inline __m512i
3538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3539 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
3540 {
3541 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3542 (__v16si) __O, __M);
3543 }
3544
3545 extern __inline __m512i
3546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3547 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
3548 {
3549 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3550 (__v16si)
3551 _mm512_setzero_si512 (),
3552 __M);
3553 }
3554
3555 extern __inline __m512i
3556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3557 _mm512_set1_epi32 (int __A)
3558 {
3559 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3560 (__v16si)
3561 _mm512_undefined_epi32 (),
3562 (__mmask16)(-1));
3563 }
3564
3565 extern __inline __m512i
3566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3567 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
3568 {
3569 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
3570 __M);
3571 }
3572
3573 extern __inline __m512i
3574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3575 _mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
3576 {
3577 return (__m512i)
3578 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3579 (__v16si) _mm512_setzero_si512 (),
3580 __M);
3581 }
3582
3583 extern __inline __m512i
3584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3585 _mm512_broadcastq_epi64 (__m128i __A)
3586 {
3587 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3588 (__v8di)
3589 _mm512_undefined_epi32 (),
3590 (__mmask8) -1);
3591 }
3592
3593 extern __inline __m512i
3594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3595 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
3596 {
3597 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3598 (__v8di) __O, __M);
3599 }
3600
3601 extern __inline __m512i
3602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3603 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
3604 {
3605 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3606 (__v8di)
3607 _mm512_setzero_si512 (),
3608 __M);
3609 }
3610
3611 extern __inline __m512i
3612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3613 _mm512_set1_epi64 (long long __A)
3614 {
3615 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3616 (__v8di)
3617 _mm512_undefined_epi32 (),
3618 (__mmask8)(-1));
3619 }
3620
3621 extern __inline __m512i
3622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3623 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
3624 {
3625 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
3626 __M);
3627 }
3628
3629 extern __inline __m512i
3630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3631 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
3632 {
3633 return (__m512i)
3634 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3635 (__v8di) _mm512_setzero_si512 (),
3636 __M);
3637 }
3638
3639 extern __inline __m512
3640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3641 _mm512_broadcast_f32x4 (__m128 __A)
3642 {
3643 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3644 (__v16sf)
3645 _mm512_undefined_ps (),
3646 (__mmask16) -1);
3647 }
3648
3649 extern __inline __m512
3650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3651 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
3652 {
3653 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3654 (__v16sf) __O,
3655 __M);
3656 }
3657
3658 extern __inline __m512
3659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3660 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
3661 {
3662 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3663 (__v16sf)
3664 _mm512_setzero_ps (),
3665 __M);
3666 }
3667
3668 extern __inline __m512i
3669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3670 _mm512_broadcast_i32x4 (__m128i __A)
3671 {
3672 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3673 (__v16si)
3674 _mm512_undefined_epi32 (),
3675 (__mmask16) -1);
3676 }
3677
3678 extern __inline __m512i
3679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3680 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
3681 {
3682 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3683 (__v16si) __O,
3684 __M);
3685 }
3686
3687 extern __inline __m512i
3688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3689 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
3690 {
3691 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3692 (__v16si)
3693 _mm512_setzero_si512 (),
3694 __M);
3695 }
3696
3697 extern __inline __m512d
3698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3699 _mm512_broadcast_f64x4 (__m256d __A)
3700 {
3701 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3702 (__v8df)
3703 _mm512_undefined_pd (),
3704 (__mmask8) -1);
3705 }
3706
3707 extern __inline __m512d
3708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3709 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
3710 {
3711 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3712 (__v8df) __O,
3713 __M);
3714 }
3715
3716 extern __inline __m512d
3717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3718 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
3719 {
3720 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3721 (__v8df)
3722 _mm512_setzero_pd (),
3723 __M);
3724 }
3725
3726 extern __inline __m512i
3727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3728 _mm512_broadcast_i64x4 (__m256i __A)
3729 {
3730 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3731 (__v8di)
3732 _mm512_undefined_epi32 (),
3733 (__mmask8) -1);
3734 }
3735
3736 extern __inline __m512i
3737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3738 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
3739 {
3740 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3741 (__v8di) __O,
3742 __M);
3743 }
3744
3745 extern __inline __m512i
3746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3747 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
3748 {
3749 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3750 (__v8di)
3751 _mm512_setzero_si512 (),
3752 __M);
3753 }
3754
3755 typedef enum
3756 {
3757 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
3758 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
3759 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
3760 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
3761 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
3762 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
3763 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
3764 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
3765 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
3766 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
3767 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
3768 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
3769 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
3770 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
3771 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
3772 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
3773 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
3774 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
3775 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
3776 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
3777 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
3778 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
3779 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
3780 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
3781 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
3782 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
3783 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
3784 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
3785 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
3786 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
3787 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
3788 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
3789 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
3790 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
3791 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
3792 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
3793 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
3794 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
3795 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
3796 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
3797 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
3798 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
3799 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
3800 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
3801 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
3802 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
3803 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
3804 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
3805 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
3806 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
3807 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
3808 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
3809 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
3810 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
3811 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
3812 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
3813 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
3814 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
3815 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
3816 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
3817 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
3818 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
3819 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
3820 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
3821 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
3822 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
3823 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
3824 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
3825 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
3826 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
3827 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
3828 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
3829 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
3830 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
3831 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
3832 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
3833 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
3834 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
3835 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
3836 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
3837 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
3838 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
3839 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
3840 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
3841 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
3842 _MM_PERM_DDDD = 0xFF
3843 } _MM_PERM_ENUM;
3844
3845 #ifdef __OPTIMIZE__
3846 extern __inline __m512i
3847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3848 _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
3849 {
3850 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3851 __mask,
3852 (__v16si)
3853 _mm512_undefined_epi32 (),
3854 (__mmask16) -1);
3855 }
3856
3857 extern __inline __m512i
3858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3859 _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
3860 _MM_PERM_ENUM __mask)
3861 {
3862 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3863 __mask,
3864 (__v16si) __W,
3865 (__mmask16) __U);
3866 }
3867
3868 extern __inline __m512i
3869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3870 _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
3871 {
3872 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3873 __mask,
3874 (__v16si)
3875 _mm512_setzero_si512 (),
3876 (__mmask16) __U);
3877 }
3878
3879 extern __inline __m512i
3880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3881 _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
3882 {
3883 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3884 (__v8di) __B, __imm,
3885 (__v8di)
3886 _mm512_undefined_epi32 (),
3887 (__mmask8) -1);
3888 }
3889
3890 extern __inline __m512i
3891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3892 _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
3893 __m512i __B, const int __imm)
3894 {
3895 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3896 (__v8di) __B, __imm,
3897 (__v8di) __W,
3898 (__mmask8) __U);
3899 }
3900
3901 extern __inline __m512i
3902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3903 _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
3904 const int __imm)
3905 {
3906 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3907 (__v8di) __B, __imm,
3908 (__v8di)
3909 _mm512_setzero_si512 (),
3910 (__mmask8) __U);
3911 }
3912
3913 extern __inline __m512i
3914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3915 _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
3916 {
3917 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3918 (__v16si) __B,
3919 __imm,
3920 (__v16si)
3921 _mm512_undefined_epi32 (),
3922 (__mmask16) -1);
3923 }
3924
3925 extern __inline __m512i
3926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3927 _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
3928 __m512i __B, const int __imm)
3929 {
3930 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3931 (__v16si) __B,
3932 __imm,
3933 (__v16si) __W,
3934 (__mmask16) __U);
3935 }
3936
3937 extern __inline __m512i
3938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3939 _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
3940 const int __imm)
3941 {
3942 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3943 (__v16si) __B,
3944 __imm,
3945 (__v16si)
3946 _mm512_setzero_si512 (),
3947 (__mmask16) __U);
3948 }
3949
3950 extern __inline __m512d
3951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3952 _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
3953 {
3954 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3955 (__v8df) __B, __imm,
3956 (__v8df)
3957 _mm512_undefined_pd (),
3958 (__mmask8) -1);
3959 }
3960
3961 extern __inline __m512d
3962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3963 _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
3964 __m512d __B, const int __imm)
3965 {
3966 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3967 (__v8df) __B, __imm,
3968 (__v8df) __W,
3969 (__mmask8) __U);
3970 }
3971
3972 extern __inline __m512d
3973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3974 _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
3975 const int __imm)
3976 {
3977 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3978 (__v8df) __B, __imm,
3979 (__v8df)
3980 _mm512_setzero_pd (),
3981 (__mmask8) __U);
3982 }
3983
3984 extern __inline __m512
3985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3986 _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
3987 {
3988 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3989 (__v16sf) __B, __imm,
3990 (__v16sf)
3991 _mm512_undefined_ps (),
3992 (__mmask16) -1);
3993 }
3994
3995 extern __inline __m512
3996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3997 _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
3998 __m512 __B, const int __imm)
3999 {
4000 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4001 (__v16sf) __B, __imm,
4002 (__v16sf) __W,
4003 (__mmask16) __U);
4004 }
4005
4006 extern __inline __m512
4007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4008 _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4009 const int __imm)
4010 {
4011 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4012 (__v16sf) __B, __imm,
4013 (__v16sf)
4014 _mm512_setzero_ps (),
4015 (__mmask16) __U);
4016 }
4017
4018 #else
4019 #define _mm512_shuffle_epi32(X, C) \
4020 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4021 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4022 (__mmask16)-1))
4023
4024 #define _mm512_mask_shuffle_epi32(W, U, X, C) \
4025 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4026 (__v16si)(__m512i)(W),\
4027 (__mmask16)(U)))
4028
4029 #define _mm512_maskz_shuffle_epi32(U, X, C) \
4030 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4031 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4032 (__mmask16)(U)))
4033
4034 #define _mm512_shuffle_i64x2(X, Y, C) \
4035 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4036 (__v8di)(__m512i)(Y), (int)(C),\
4037 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
4038 (__mmask8)-1))
4039
4040 #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
4041 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4042 (__v8di)(__m512i)(Y), (int)(C),\
4043 (__v8di)(__m512i)(W),\
4044 (__mmask8)(U)))
4045
4046 #define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
4047 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4048 (__v8di)(__m512i)(Y), (int)(C),\
4049 (__v8di)(__m512i)_mm512_setzero_si512 (),\
4050 (__mmask8)(U)))
4051
4052 #define _mm512_shuffle_i32x4(X, Y, C) \
4053 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4054 (__v16si)(__m512i)(Y), (int)(C),\
4055 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4056 (__mmask16)-1))
4057
4058 #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
4059 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4060 (__v16si)(__m512i)(Y), (int)(C),\
4061 (__v16si)(__m512i)(W),\
4062 (__mmask16)(U)))
4063
4064 #define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
4065 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4066 (__v16si)(__m512i)(Y), (int)(C),\
4067 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4068 (__mmask16)(U)))
4069
4070 #define _mm512_shuffle_f64x2(X, Y, C) \
4071 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4072 (__v8df)(__m512d)(Y), (int)(C),\
4073 (__v8df)(__m512d)_mm512_undefined_pd(),\
4074 (__mmask8)-1))
4075
4076 #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
4077 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4078 (__v8df)(__m512d)(Y), (int)(C),\
4079 (__v8df)(__m512d)(W),\
4080 (__mmask8)(U)))
4081
4082 #define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
4083 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4084 (__v8df)(__m512d)(Y), (int)(C),\
4085 (__v8df)(__m512d)_mm512_setzero_pd(),\
4086 (__mmask8)(U)))
4087
4088 #define _mm512_shuffle_f32x4(X, Y, C) \
4089 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4090 (__v16sf)(__m512)(Y), (int)(C),\
4091 (__v16sf)(__m512)_mm512_undefined_ps(),\
4092 (__mmask16)-1))
4093
4094 #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
4095 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4096 (__v16sf)(__m512)(Y), (int)(C),\
4097 (__v16sf)(__m512)(W),\
4098 (__mmask16)(U)))
4099
4100 #define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
4101 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4102 (__v16sf)(__m512)(Y), (int)(C),\
4103 (__v16sf)(__m512)_mm512_setzero_ps(),\
4104 (__mmask16)(U)))
4105 #endif
4106
4107 extern __inline __m512i
4108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4109 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
4110 {
4111 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4112 (__v16si) __B,
4113 (__v16si)
4114 _mm512_undefined_epi32 (),
4115 (__mmask16) -1);
4116 }
4117
4118 extern __inline __m512i
4119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4120 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4121 {
4122 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4123 (__v16si) __B,
4124 (__v16si) __W,
4125 (__mmask16) __U);
4126 }
4127
4128 extern __inline __m512i
4129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4130 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4131 {
4132 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4133 (__v16si) __B,
4134 (__v16si)
4135 _mm512_setzero_si512 (),
4136 (__mmask16) __U);
4137 }
4138
4139 extern __inline __m512i
4140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4141 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
4142 {
4143 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4144 (__v16si) __B,
4145 (__v16si)
4146 _mm512_undefined_epi32 (),
4147 (__mmask16) -1);
4148 }
4149
4150 extern __inline __m512i
4151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4152 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4153 {
4154 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4155 (__v16si) __B,
4156 (__v16si) __W,
4157 (__mmask16) __U);
4158 }
4159
4160 extern __inline __m512i
4161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4162 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4163 {
4164 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4165 (__v16si) __B,
4166 (__v16si)
4167 _mm512_setzero_si512 (),
4168 (__mmask16) __U);
4169 }
4170
4171 extern __inline __m512i
4172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4173 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
4174 {
4175 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4176 (__v8di) __B,
4177 (__v8di)
4178 _mm512_undefined_epi32 (),
4179 (__mmask8) -1);
4180 }
4181
4182 extern __inline __m512i
4183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4184 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4185 {
4186 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4187 (__v8di) __B,
4188 (__v8di) __W,
4189 (__mmask8) __U);
4190 }
4191
4192 extern __inline __m512i
4193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4194 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4195 {
4196 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4197 (__v8di) __B,
4198 (__v8di)
4199 _mm512_setzero_si512 (),
4200 (__mmask8) __U);
4201 }
4202
4203 extern __inline __m512i
4204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4205 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
4206 {
4207 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4208 (__v8di) __B,
4209 (__v8di)
4210 _mm512_undefined_epi32 (),
4211 (__mmask8) -1);
4212 }
4213
4214 extern __inline __m512i
4215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4216 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4217 {
4218 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4219 (__v8di) __B,
4220 (__v8di) __W,
4221 (__mmask8) __U);
4222 }
4223
4224 extern __inline __m512i
4225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4226 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4227 {
4228 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4229 (__v8di) __B,
4230 (__v8di)
4231 _mm512_setzero_si512 (),
4232 (__mmask8) __U);
4233 }
4234
4235 #ifdef __OPTIMIZE__
4236 extern __inline __m256i
4237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4238 _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4239 {
4240 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4241 (__v8si)
4242 _mm256_undefined_si256 (),
4243 (__mmask8) -1, __R);
4244 }
4245
4246 extern __inline __m256i
4247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4248 _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4249 const int __R)
4250 {
4251 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4252 (__v8si) __W,
4253 (__mmask8) __U, __R);
4254 }
4255
4256 extern __inline __m256i
4257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4258 _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4259 {
4260 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4261 (__v8si)
4262 _mm256_setzero_si256 (),
4263 (__mmask8) __U, __R);
4264 }
4265
4266 extern __inline __m256i
4267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4268 _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4269 {
4270 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4271 (__v8si)
4272 _mm256_undefined_si256 (),
4273 (__mmask8) -1, __R);
4274 }
4275
4276 extern __inline __m256i
4277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4278 _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4279 const int __R)
4280 {
4281 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4282 (__v8si) __W,
4283 (__mmask8) __U, __R);
4284 }
4285
4286 extern __inline __m256i
4287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4288 _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4289 {
4290 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4291 (__v8si)
4292 _mm256_setzero_si256 (),
4293 (__mmask8) __U, __R);
4294 }
4295 #else
4296 #define _mm512_cvtt_roundpd_epi32(A, B) \
4297 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4298
4299 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4300 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4301
4302 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4303 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4304
4305 #define _mm512_cvtt_roundpd_epu32(A, B) \
4306 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4307
4308 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4309 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4310
4311 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4312 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4313 #endif
4314
4315 #ifdef __OPTIMIZE__
4316 extern __inline __m256i
4317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4318 _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4319 {
4320 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4321 (__v8si)
4322 _mm256_undefined_si256 (),
4323 (__mmask8) -1, __R);
4324 }
4325
4326 extern __inline __m256i
4327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4328 _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4329 const int __R)
4330 {
4331 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4332 (__v8si) __W,
4333 (__mmask8) __U, __R);
4334 }
4335
4336 extern __inline __m256i
4337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4338 _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4339 {
4340 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4341 (__v8si)
4342 _mm256_setzero_si256 (),
4343 (__mmask8) __U, __R);
4344 }
4345
4346 extern __inline __m256i
4347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4348 _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4349 {
4350 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4351 (__v8si)
4352 _mm256_undefined_si256 (),
4353 (__mmask8) -1, __R);
4354 }
4355
4356 extern __inline __m256i
4357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4358 _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4359 const int __R)
4360 {
4361 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4362 (__v8si) __W,
4363 (__mmask8) __U, __R);
4364 }
4365
4366 extern __inline __m256i
4367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4368 _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4369 {
4370 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4371 (__v8si)
4372 _mm256_setzero_si256 (),
4373 (__mmask8) __U, __R);
4374 }
4375 #else
4376 #define _mm512_cvt_roundpd_epi32(A, B) \
4377 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4378
4379 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4380 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4381
4382 #define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4383 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4384
4385 #define _mm512_cvt_roundpd_epu32(A, B) \
4386 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4387
4388 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4389 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4390
4391 #define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4392 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4393 #endif
4394
4395 #ifdef __OPTIMIZE__
4396 extern __inline __m512i
4397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4398 _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4399 {
4400 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4401 (__v16si)
4402 _mm512_undefined_epi32 (),
4403 (__mmask16) -1, __R);
4404 }
4405
4406 extern __inline __m512i
4407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4408 _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4409 const int __R)
4410 {
4411 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4412 (__v16si) __W,
4413 (__mmask16) __U, __R);
4414 }
4415
4416 extern __inline __m512i
4417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4418 _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4419 {
4420 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4421 (__v16si)
4422 _mm512_setzero_si512 (),
4423 (__mmask16) __U, __R);
4424 }
4425
4426 extern __inline __m512i
4427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4428 _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4429 {
4430 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4431 (__v16si)
4432 _mm512_undefined_epi32 (),
4433 (__mmask16) -1, __R);
4434 }
4435
4436 extern __inline __m512i
4437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4438 _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4439 const int __R)
4440 {
4441 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4442 (__v16si) __W,
4443 (__mmask16) __U, __R);
4444 }
4445
4446 extern __inline __m512i
4447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4448 _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4449 {
4450 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4451 (__v16si)
4452 _mm512_setzero_si512 (),
4453 (__mmask16) __U, __R);
4454 }
4455 #else
4456 #define _mm512_cvtt_roundps_epi32(A, B) \
4457 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4458
4459 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
4460 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4461
4462 #define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
4463 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4464
4465 #define _mm512_cvtt_roundps_epu32(A, B) \
4466 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4467
4468 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
4469 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4470
4471 #define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
4472 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4473 #endif
4474
4475 #ifdef __OPTIMIZE__
4476 extern __inline __m512i
4477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4478 _mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4479 {
4480 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4481 (__v16si)
4482 _mm512_undefined_epi32 (),
4483 (__mmask16) -1, __R);
4484 }
4485
4486 extern __inline __m512i
4487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4488 _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4489 const int __R)
4490 {
4491 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4492 (__v16si) __W,
4493 (__mmask16) __U, __R);
4494 }
4495
4496 extern __inline __m512i
4497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4498 _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4499 {
4500 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4501 (__v16si)
4502 _mm512_setzero_si512 (),
4503 (__mmask16) __U, __R);
4504 }
4505
4506 extern __inline __m512i
4507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4508 _mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
4509 {
4510 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4511 (__v16si)
4512 _mm512_undefined_epi32 (),
4513 (__mmask16) -1, __R);
4514 }
4515
4516 extern __inline __m512i
4517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4518 _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4519 const int __R)
4520 {
4521 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4522 (__v16si) __W,
4523 (__mmask16) __U, __R);
4524 }
4525
4526 extern __inline __m512i
4527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4528 _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4529 {
4530 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4531 (__v16si)
4532 _mm512_setzero_si512 (),
4533 (__mmask16) __U, __R);
4534 }
4535 #else
4536 #define _mm512_cvt_roundps_epi32(A, B) \
4537 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4538
4539 #define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
4540 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
4541
4542 #define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
4543 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4544
4545 #define _mm512_cvt_roundps_epu32(A, B) \
4546 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4547
4548 #define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
4549 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
4550
4551 #define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
4552 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4553 #endif
4554
4555 extern __inline __m128d
4556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4557 _mm_cvtu32_sd (__m128d __A, unsigned __B)
4558 {
4559 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
4560 }
4561
4562 #ifdef __x86_64__
4563 #ifdef __OPTIMIZE__
4564 extern __inline __m128d
4565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4566 _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
4567 {
4568 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
4569 }
4570
4571 extern __inline __m128d
4572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4573 _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
4574 {
4575 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4576 }
4577
4578 extern __inline __m128d
4579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4580 _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
4581 {
4582 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4583 }
4584 #else
4585 #define _mm_cvt_roundu64_sd(A, B, C) \
4586 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
4587
4588 #define _mm_cvt_roundi64_sd(A, B, C) \
4589 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4590
4591 #define _mm_cvt_roundsi64_sd(A, B, C) \
4592 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4593 #endif
4594
4595 #endif
4596
4597 #ifdef __OPTIMIZE__
4598 extern __inline __m128
4599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4600 _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
4601 {
4602 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
4603 }
4604
4605 extern __inline __m128
4606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4607 _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
4608 {
4609 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4610 }
4611
4612 extern __inline __m128
4613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4614 _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
4615 {
4616 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4617 }
4618 #else
4619 #define _mm_cvt_roundu32_ss(A, B, C) \
4620 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
4621
4622 #define _mm_cvt_roundi32_ss(A, B, C) \
4623 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4624
4625 #define _mm_cvt_roundsi32_ss(A, B, C) \
4626 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4627 #endif
4628
4629 #ifdef __x86_64__
4630 #ifdef __OPTIMIZE__
4631 extern __inline __m128
4632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4633 _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
4634 {
4635 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
4636 }
4637
4638 extern __inline __m128
4639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4640 _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
4641 {
4642 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4643 }
4644
4645 extern __inline __m128
4646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4647 _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
4648 {
4649 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4650 }
4651 #else
4652 #define _mm_cvt_roundu64_ss(A, B, C) \
4653 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
4654
4655 #define _mm_cvt_roundi64_ss(A, B, C) \
4656 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4657
4658 #define _mm_cvt_roundsi64_ss(A, B, C) \
4659 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4660 #endif
4661
4662 #endif
4663
4664 extern __inline __m128i
4665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4666 _mm512_cvtepi32_epi8 (__m512i __A)
4667 {
4668 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4669 (__v16qi)
4670 _mm_undefined_si128 (),
4671 (__mmask16) -1);
4672 }
4673
4674 extern __inline void
4675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4676 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4677 {
4678 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4679 }
4680
4681 extern __inline __m128i
4682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4683 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4684 {
4685 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4686 (__v16qi) __O, __M);
4687 }
4688
4689 extern __inline __m128i
4690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4691 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
4692 {
4693 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4694 (__v16qi)
4695 _mm_setzero_si128 (),
4696 __M);
4697 }
4698
4699 extern __inline __m128i
4700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4701 _mm512_cvtsepi32_epi8 (__m512i __A)
4702 {
4703 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4704 (__v16qi)
4705 _mm_undefined_si128 (),
4706 (__mmask16) -1);
4707 }
4708
4709 extern __inline void
4710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4711 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4712 {
4713 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4714 }
4715
4716 extern __inline __m128i
4717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4718 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4719 {
4720 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4721 (__v16qi) __O, __M);
4722 }
4723
4724 extern __inline __m128i
4725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4726 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
4727 {
4728 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4729 (__v16qi)
4730 _mm_setzero_si128 (),
4731 __M);
4732 }
4733
4734 extern __inline __m128i
4735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4736 _mm512_cvtusepi32_epi8 (__m512i __A)
4737 {
4738 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4739 (__v16qi)
4740 _mm_undefined_si128 (),
4741 (__mmask16) -1);
4742 }
4743
4744 extern __inline void
4745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4746 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4747 {
4748 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4749 }
4750
4751 extern __inline __m128i
4752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4753 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4754 {
4755 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4756 (__v16qi) __O,
4757 __M);
4758 }
4759
4760 extern __inline __m128i
4761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4762 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
4763 {
4764 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4765 (__v16qi)
4766 _mm_setzero_si128 (),
4767 __M);
4768 }
4769
4770 extern __inline __m256i
4771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4772 _mm512_cvtepi32_epi16 (__m512i __A)
4773 {
4774 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4775 (__v16hi)
4776 _mm256_undefined_si256 (),
4777 (__mmask16) -1);
4778 }
4779
4780 extern __inline void
4781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4782 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
4783 {
4784 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
4785 }
4786
4787 extern __inline __m256i
4788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4789 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4790 {
4791 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4792 (__v16hi) __O, __M);
4793 }
4794
4795 extern __inline __m256i
4796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4797 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
4798 {
4799 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4800 (__v16hi)
4801 _mm256_setzero_si256 (),
4802 __M);
4803 }
4804
4805 extern __inline __m256i
4806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4807 _mm512_cvtsepi32_epi16 (__m512i __A)
4808 {
4809 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4810 (__v16hi)
4811 _mm256_undefined_si256 (),
4812 (__mmask16) -1);
4813 }
4814
4815 extern __inline void
4816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4817 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4818 {
4819 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4820 }
4821
4822 extern __inline __m256i
4823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4824 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4825 {
4826 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4827 (__v16hi) __O, __M);
4828 }
4829
4830 extern __inline __m256i
4831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4832 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
4833 {
4834 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4835 (__v16hi)
4836 _mm256_setzero_si256 (),
4837 __M);
4838 }
4839
4840 extern __inline __m256i
4841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4842 _mm512_cvtusepi32_epi16 (__m512i __A)
4843 {
4844 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4845 (__v16hi)
4846 _mm256_undefined_si256 (),
4847 (__mmask16) -1);
4848 }
4849
4850 extern __inline void
4851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4852 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4853 {
4854 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4855 }
4856
4857 extern __inline __m256i
4858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4859 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4860 {
4861 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4862 (__v16hi) __O,
4863 __M);
4864 }
4865
4866 extern __inline __m256i
4867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4868 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
4869 {
4870 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4871 (__v16hi)
4872 _mm256_setzero_si256 (),
4873 __M);
4874 }
4875
4876 extern __inline __m256i
4877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4878 _mm512_cvtepi64_epi32 (__m512i __A)
4879 {
4880 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4881 (__v8si)
4882 _mm256_undefined_si256 (),
4883 (__mmask8) -1);
4884 }
4885
4886 extern __inline void
4887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4888 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4889 {
4890 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4891 }
4892
4893 extern __inline __m256i
4894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4895 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4896 {
4897 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4898 (__v8si) __O, __M);
4899 }
4900
4901 extern __inline __m256i
4902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4903 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
4904 {
4905 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4906 (__v8si)
4907 _mm256_setzero_si256 (),
4908 __M);
4909 }
4910
4911 extern __inline __m256i
4912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4913 _mm512_cvtsepi64_epi32 (__m512i __A)
4914 {
4915 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4916 (__v8si)
4917 _mm256_undefined_si256 (),
4918 (__mmask8) -1);
4919 }
4920
4921 extern __inline void
4922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4923 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
4924 {
4925 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4926 }
4927
4928 extern __inline __m256i
4929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4930 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4931 {
4932 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4933 (__v8si) __O, __M);
4934 }
4935
4936 extern __inline __m256i
4937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4938 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
4939 {
4940 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4941 (__v8si)
4942 _mm256_setzero_si256 (),
4943 __M);
4944 }
4945
4946 extern __inline __m256i
4947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4948 _mm512_cvtusepi64_epi32 (__m512i __A)
4949 {
4950 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4951 (__v8si)
4952 _mm256_undefined_si256 (),
4953 (__mmask8) -1);
4954 }
4955
4956 extern __inline void
4957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4958 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4959 {
4960 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
4961 }
4962
4963 extern __inline __m256i
4964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4965 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4966 {
4967 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4968 (__v8si) __O, __M);
4969 }
4970
4971 extern __inline __m256i
4972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4973 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
4974 {
4975 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4976 (__v8si)
4977 _mm256_setzero_si256 (),
4978 __M);
4979 }
4980
4981 extern __inline __m128i
4982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4983 _mm512_cvtepi64_epi16 (__m512i __A)
4984 {
4985 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4986 (__v8hi)
4987 _mm_undefined_si128 (),
4988 (__mmask8) -1);
4989 }
4990
4991 extern __inline void
4992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4993 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
4994 {
4995 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
4996 }
4997
4998 extern __inline __m128i
4999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5000 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5001 {
5002 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5003 (__v8hi) __O, __M);
5004 }
5005
5006 extern __inline __m128i
5007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5008 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5009 {
5010 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5011 (__v8hi)
5012 _mm_setzero_si128 (),
5013 __M);
5014 }
5015
5016 extern __inline __m128i
5017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5018 _mm512_cvtsepi64_epi16 (__m512i __A)
5019 {
5020 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5021 (__v8hi)
5022 _mm_undefined_si128 (),
5023 (__mmask8) -1);
5024 }
5025
5026 extern __inline void
5027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5028 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5029 {
5030 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5031 }
5032
5033 extern __inline __m128i
5034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5035 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5036 {
5037 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5038 (__v8hi) __O, __M);
5039 }
5040
5041 extern __inline __m128i
5042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5043 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5044 {
5045 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5046 (__v8hi)
5047 _mm_setzero_si128 (),
5048 __M);
5049 }
5050
5051 extern __inline __m128i
5052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5053 _mm512_cvtusepi64_epi16 (__m512i __A)
5054 {
5055 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5056 (__v8hi)
5057 _mm_undefined_si128 (),
5058 (__mmask8) -1);
5059 }
5060
5061 extern __inline void
5062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5063 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5064 {
5065 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5066 }
5067
5068 extern __inline __m128i
5069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5070 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5071 {
5072 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5073 (__v8hi) __O, __M);
5074 }
5075
5076 extern __inline __m128i
5077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5078 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5079 {
5080 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5081 (__v8hi)
5082 _mm_setzero_si128 (),
5083 __M);
5084 }
5085
5086 extern __inline __m128i
5087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5088 _mm512_cvtepi64_epi8 (__m512i __A)
5089 {
5090 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5091 (__v16qi)
5092 _mm_undefined_si128 (),
5093 (__mmask8) -1);
5094 }
5095
5096 extern __inline void
5097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5098 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5099 {
5100 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5101 }
5102
5103 extern __inline __m128i
5104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5105 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5106 {
5107 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5108 (__v16qi) __O, __M);
5109 }
5110
5111 extern __inline __m128i
5112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5113 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5114 {
5115 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5116 (__v16qi)
5117 _mm_setzero_si128 (),
5118 __M);
5119 }
5120
5121 extern __inline __m128i
5122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5123 _mm512_cvtsepi64_epi8 (__m512i __A)
5124 {
5125 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5126 (__v16qi)
5127 _mm_undefined_si128 (),
5128 (__mmask8) -1);
5129 }
5130
5131 extern __inline void
5132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5133 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5134 {
5135 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5136 }
5137
5138 extern __inline __m128i
5139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5140 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5141 {
5142 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5143 (__v16qi) __O, __M);
5144 }
5145
5146 extern __inline __m128i
5147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5148 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5149 {
5150 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5151 (__v16qi)
5152 _mm_setzero_si128 (),
5153 __M);
5154 }
5155
5156 extern __inline __m128i
5157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5158 _mm512_cvtusepi64_epi8 (__m512i __A)
5159 {
5160 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5161 (__v16qi)
5162 _mm_undefined_si128 (),
5163 (__mmask8) -1);
5164 }
5165
5166 extern __inline void
5167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5168 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5169 {
5170 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5171 }
5172
5173 extern __inline __m128i
5174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5175 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5176 {
5177 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5178 (__v16qi) __O,
5179 __M);
5180 }
5181
5182 extern __inline __m128i
5183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5184 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5185 {
5186 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5187 (__v16qi)
5188 _mm_setzero_si128 (),
5189 __M);
5190 }
5191
5192 extern __inline __m512d
5193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5194 _mm512_cvtepi32_pd (__m256i __A)
5195 {
5196 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5197 (__v8df)
5198 _mm512_undefined_pd (),
5199 (__mmask8) -1);
5200 }
5201
5202 extern __inline __m512d
5203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5204 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5205 {
5206 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5207 (__v8df) __W,
5208 (__mmask8) __U);
5209 }
5210
5211 extern __inline __m512d
5212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5213 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5214 {
5215 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5216 (__v8df)
5217 _mm512_setzero_pd (),
5218 (__mmask8) __U);
5219 }
5220
5221 extern __inline __m512d
5222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5223 _mm512_cvtepu32_pd (__m256i __A)
5224 {
5225 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5226 (__v8df)
5227 _mm512_undefined_pd (),
5228 (__mmask8) -1);
5229 }
5230
5231 extern __inline __m512d
5232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5233 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5234 {
5235 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5236 (__v8df) __W,
5237 (__mmask8) __U);
5238 }
5239
5240 extern __inline __m512d
5241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5242 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5243 {
5244 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5245 (__v8df)
5246 _mm512_setzero_pd (),
5247 (__mmask8) __U);
5248 }
5249
5250 #ifdef __OPTIMIZE__
5251 extern __inline __m512
5252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5253 _mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5254 {
5255 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5256 (__v16sf)
5257 _mm512_undefined_ps (),
5258 (__mmask16) -1, __R);
5259 }
5260
5261 extern __inline __m512
5262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5263 _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5264 const int __R)
5265 {
5266 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5267 (__v16sf) __W,
5268 (__mmask16) __U, __R);
5269 }
5270
5271 extern __inline __m512
5272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5273 _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5274 {
5275 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5276 (__v16sf)
5277 _mm512_setzero_ps (),
5278 (__mmask16) __U, __R);
5279 }
5280
5281 extern __inline __m512
5282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5283 _mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5284 {
5285 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5286 (__v16sf)
5287 _mm512_undefined_ps (),
5288 (__mmask16) -1, __R);
5289 }
5290
5291 extern __inline __m512
5292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5293 _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5294 const int __R)
5295 {
5296 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5297 (__v16sf) __W,
5298 (__mmask16) __U, __R);
5299 }
5300
5301 extern __inline __m512
5302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5303 _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5304 {
5305 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5306 (__v16sf)
5307 _mm512_setzero_ps (),
5308 (__mmask16) __U, __R);
5309 }
5310
5311 #else
5312 #define _mm512_cvt_roundepi32_ps(A, B) \
5313 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5314
5315 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5316 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5317
5318 #define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5319 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5320
5321 #define _mm512_cvt_roundepu32_ps(A, B) \
5322 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5323
5324 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5325 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5326
5327 #define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5328 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5329 #endif
5330
5331 #ifdef __OPTIMIZE__
5332 extern __inline __m256d
5333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5334 _mm512_extractf64x4_pd (__m512d __A, const int __imm)
5335 {
5336 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5337 __imm,
5338 (__v4df)
5339 _mm256_undefined_pd (),
5340 (__mmask8) -1);
5341 }
5342
5343 extern __inline __m256d
5344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5345 _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5346 const int __imm)
5347 {
5348 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5349 __imm,
5350 (__v4df) __W,
5351 (__mmask8) __U);
5352 }
5353
5354 extern __inline __m256d
5355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5356 _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5357 {
5358 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5359 __imm,
5360 (__v4df)
5361 _mm256_setzero_pd (),
5362 (__mmask8) __U);
5363 }
5364
5365 extern __inline __m128
5366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5367 _mm512_extractf32x4_ps (__m512 __A, const int __imm)
5368 {
5369 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5370 __imm,
5371 (__v4sf)
5372 _mm_undefined_ps (),
5373 (__mmask8) -1);
5374 }
5375
5376 extern __inline __m128
5377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5378 _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5379 const int __imm)
5380 {
5381 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5382 __imm,
5383 (__v4sf) __W,
5384 (__mmask8) __U);
5385 }
5386
5387 extern __inline __m128
5388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5389 _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5390 {
5391 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5392 __imm,
5393 (__v4sf)
5394 _mm_setzero_ps (),
5395 (__mmask8) __U);
5396 }
5397
5398 extern __inline __m256i
5399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5400 _mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5401 {
5402 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5403 __imm,
5404 (__v4di)
5405 _mm256_undefined_si256 (),
5406 (__mmask8) -1);
5407 }
5408
5409 extern __inline __m256i
5410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5411 _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5412 const int __imm)
5413 {
5414 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5415 __imm,
5416 (__v4di) __W,
5417 (__mmask8) __U);
5418 }
5419
5420 extern __inline __m256i
5421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5422 _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5423 {
5424 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5425 __imm,
5426 (__v4di)
5427 _mm256_setzero_si256 (),
5428 (__mmask8) __U);
5429 }
5430
5431 extern __inline __m128i
5432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5433 _mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5434 {
5435 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5436 __imm,
5437 (__v4si)
5438 _mm_undefined_si128 (),
5439 (__mmask8) -1);
5440 }
5441
5442 extern __inline __m128i
5443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5444 _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5445 const int __imm)
5446 {
5447 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5448 __imm,
5449 (__v4si) __W,
5450 (__mmask8) __U);
5451 }
5452
5453 extern __inline __m128i
5454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5455 _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5456 {
5457 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5458 __imm,
5459 (__v4si)
5460 _mm_setzero_si128 (),
5461 (__mmask8) __U);
5462 }
5463 #else
5464
5465 #define _mm512_extractf64x4_pd(X, C) \
5466 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5467 (int) (C),\
5468 (__v4df)(__m256d)_mm256_undefined_pd(),\
5469 (__mmask8)-1))
5470
5471 #define _mm512_mask_extractf64x4_pd(W, U, X, C) \
5472 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5473 (int) (C),\
5474 (__v4df)(__m256d)(W),\
5475 (__mmask8)(U)))
5476
5477 #define _mm512_maskz_extractf64x4_pd(U, X, C) \
5478 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5479 (int) (C),\
5480 (__v4df)(__m256d)_mm256_setzero_pd(),\
5481 (__mmask8)(U)))
5482
5483 #define _mm512_extractf32x4_ps(X, C) \
5484 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5485 (int) (C),\
5486 (__v4sf)(__m128)_mm_undefined_ps(),\
5487 (__mmask8)-1))
5488
5489 #define _mm512_mask_extractf32x4_ps(W, U, X, C) \
5490 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5491 (int) (C),\
5492 (__v4sf)(__m128)(W),\
5493 (__mmask8)(U)))
5494
5495 #define _mm512_maskz_extractf32x4_ps(U, X, C) \
5496 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5497 (int) (C),\
5498 (__v4sf)(__m128)_mm_setzero_ps(),\
5499 (__mmask8)(U)))
5500
5501 #define _mm512_extracti64x4_epi64(X, C) \
5502 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5503 (int) (C),\
5504 (__v4di)(__m256i)_mm256_undefined_si256 (),\
5505 (__mmask8)-1))
5506
5507 #define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
5508 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5509 (int) (C),\
5510 (__v4di)(__m256i)(W),\
5511 (__mmask8)(U)))
5512
5513 #define _mm512_maskz_extracti64x4_epi64(U, X, C) \
5514 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5515 (int) (C),\
5516 (__v4di)(__m256i)_mm256_setzero_si256 (),\
5517 (__mmask8)(U)))
5518
5519 #define _mm512_extracti32x4_epi32(X, C) \
5520 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5521 (int) (C),\
5522 (__v4si)(__m128i)_mm_undefined_si128 (),\
5523 (__mmask8)-1))
5524
5525 #define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
5526 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5527 (int) (C),\
5528 (__v4si)(__m128i)(W),\
5529 (__mmask8)(U)))
5530
5531 #define _mm512_maskz_extracti32x4_epi32(U, X, C) \
5532 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5533 (int) (C),\
5534 (__v4si)(__m128i)_mm_setzero_si128 (),\
5535 (__mmask8)(U)))
5536 #endif
5537
5538 #ifdef __OPTIMIZE__
5539 extern __inline __m512i
5540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5541 _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
5542 {
5543 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
5544 (__v4si) __B,
5545 __imm,
5546 (__v16si) __A, -1);
5547 }
5548
5549 extern __inline __m512
5550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5551 _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
5552 {
5553 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
5554 (__v4sf) __B,
5555 __imm,
5556 (__v16sf) __A, -1);
5557 }
5558
5559 extern __inline __m512i
5560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5561 _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
5562 {
5563 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5564 (__v4di) __B,
5565 __imm,
5566 (__v8di)
5567 _mm512_undefined_epi32 (),
5568 (__mmask8) -1);
5569 }
5570
5571 extern __inline __m512i
5572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5573 _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
5574 __m256i __B, const int __imm)
5575 {
5576 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5577 (__v4di) __B,
5578 __imm,
5579 (__v8di) __W,
5580 (__mmask8) __U);
5581 }
5582
5583 extern __inline __m512i
5584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5585 _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
5586 const int __imm)
5587 {
5588 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5589 (__v4di) __B,
5590 __imm,
5591 (__v8di)
5592 _mm512_setzero_si512 (),
5593 (__mmask8) __U);
5594 }
5595
5596 extern __inline __m512d
5597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5598 _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
5599 {
5600 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5601 (__v4df) __B,
5602 __imm,
5603 (__v8df)
5604 _mm512_undefined_pd (),
5605 (__mmask8) -1);
5606 }
5607
5608 extern __inline __m512d
5609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5610 _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
5611 __m256d __B, const int __imm)
5612 {
5613 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5614 (__v4df) __B,
5615 __imm,
5616 (__v8df) __W,
5617 (__mmask8) __U);
5618 }
5619
5620 extern __inline __m512d
5621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5622 _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
5623 const int __imm)
5624 {
5625 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5626 (__v4df) __B,
5627 __imm,
5628 (__v8df)
5629 _mm512_setzero_pd (),
5630 (__mmask8) __U);
5631 }
5632 #else
5633 #define _mm512_insertf32x4(X, Y, C) \
5634 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
5635 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
5636
5637 #define _mm512_inserti32x4(X, Y, C) \
5638 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
5639 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
5640
5641 #define _mm512_insertf64x4(X, Y, C) \
5642 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5643 (__v4df)(__m256d) (Y), (int) (C), \
5644 (__v8df)(__m512d)_mm512_undefined_pd(), \
5645 (__mmask8)-1))
5646
5647 #define _mm512_mask_insertf64x4(W, U, X, Y, C) \
5648 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5649 (__v4df)(__m256d) (Y), (int) (C), \
5650 (__v8df)(__m512d)(W), \
5651 (__mmask8)(U)))
5652
5653 #define _mm512_maskz_insertf64x4(U, X, Y, C) \
5654 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5655 (__v4df)(__m256d) (Y), (int) (C), \
5656 (__v8df)(__m512d)_mm512_setzero_pd(), \
5657 (__mmask8)(U)))
5658
5659 #define _mm512_inserti64x4(X, Y, C) \
5660 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5661 (__v4di)(__m256i) (Y), (int) (C), \
5662 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
5663 (__mmask8)-1))
5664
5665 #define _mm512_mask_inserti64x4(W, U, X, Y, C) \
5666 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5667 (__v4di)(__m256i) (Y), (int) (C),\
5668 (__v8di)(__m512i)(W),\
5669 (__mmask8)(U)))
5670
5671 #define _mm512_maskz_inserti64x4(U, X, Y, C) \
5672 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5673 (__v4di)(__m256i) (Y), (int) (C), \
5674 (__v8di)(__m512i)_mm512_setzero_si512 (), \
5675 (__mmask8)(U)))
5676 #endif
5677
5678 extern __inline __m512d
5679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5680 _mm512_loadu_pd (void const *__P)
5681 {
5682 return *(__m512d_u *)__P;
5683 }
5684
5685 extern __inline __m512d
5686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5687 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
5688 {
5689 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
5690 (__v8df) __W,
5691 (__mmask8) __U);
5692 }
5693
5694 extern __inline __m512d
5695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5696 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
5697 {
5698 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
5699 (__v8df)
5700 _mm512_setzero_pd (),
5701 (__mmask8) __U);
5702 }
5703
5704 extern __inline void
5705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5706 _mm512_storeu_pd (void *__P, __m512d __A)
5707 {
5708 *(__m512d_u *)__P = __A;
5709 }
5710
5711 extern __inline void
5712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5713 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
5714 {
5715 __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
5716 (__mmask8) __U);
5717 }
5718
5719 extern __inline __m512
5720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5721 _mm512_loadu_ps (void const *__P)
5722 {
5723 return *(__m512_u *)__P;
5724 }
5725
5726 extern __inline __m512
5727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5728 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
5729 {
5730 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
5731 (__v16sf) __W,
5732 (__mmask16) __U);
5733 }
5734
5735 extern __inline __m512
5736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5737 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
5738 {
5739 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
5740 (__v16sf)
5741 _mm512_setzero_ps (),
5742 (__mmask16) __U);
5743 }
5744
5745 extern __inline void
5746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5747 _mm512_storeu_ps (void *__P, __m512 __A)
5748 {
5749 *(__m512_u *)__P = __A;
5750 }
5751
5752 extern __inline void
5753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5754 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
5755 {
5756 __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
5757 (__mmask16) __U);
5758 }
5759
5760 extern __inline __m512i
5761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5762 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5763 {
5764 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
5765 (__v8di) __W,
5766 (__mmask8) __U);
5767 }
5768
5769 extern __inline __m512i
5770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5771 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5772 {
5773 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
5774 (__v8di)
5775 _mm512_setzero_si512 (),
5776 (__mmask8) __U);
5777 }
5778
5779 extern __inline void
5780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5781 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
5782 {
5783 __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A,
5784 (__mmask8) __U);
5785 }
5786
5787 extern __inline __m512i
5788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5789 _mm512_loadu_si512 (void const *__P)
5790 {
5791 return *(__m512i_u *)__P;
5792 }
5793
5794 extern __inline __m512i
5795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5796 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5797 {
5798 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
5799 (__v16si) __W,
5800 (__mmask16) __U);
5801 }
5802
5803 extern __inline __m512i
5804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5805 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
5806 {
5807 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
5808 (__v16si)
5809 _mm512_setzero_si512 (),
5810 (__mmask16) __U);
5811 }
5812
5813 extern __inline void
5814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5815 _mm512_storeu_si512 (void *__P, __m512i __A)
5816 {
5817 *(__m512i_u *)__P = __A;
5818 }
5819
5820 extern __inline void
5821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5822 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
5823 {
5824 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
5825 (__mmask16) __U);
5826 }
5827
5828 extern __inline __m512d
5829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5830 _mm512_permutevar_pd (__m512d __A, __m512i __C)
5831 {
5832 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5833 (__v8di) __C,
5834 (__v8df)
5835 _mm512_undefined_pd (),
5836 (__mmask8) -1);
5837 }
5838
5839 extern __inline __m512d
5840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5841 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
5842 {
5843 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5844 (__v8di) __C,
5845 (__v8df) __W,
5846 (__mmask8) __U);
5847 }
5848
5849 extern __inline __m512d
5850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5851 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
5852 {
5853 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5854 (__v8di) __C,
5855 (__v8df)
5856 _mm512_setzero_pd (),
5857 (__mmask8) __U);
5858 }
5859
5860 extern __inline __m512
5861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5862 _mm512_permutevar_ps (__m512 __A, __m512i __C)
5863 {
5864 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5865 (__v16si) __C,
5866 (__v16sf)
5867 _mm512_undefined_ps (),
5868 (__mmask16) -1);
5869 }
5870
5871 extern __inline __m512
5872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5873 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
5874 {
5875 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5876 (__v16si) __C,
5877 (__v16sf) __W,
5878 (__mmask16) __U);
5879 }
5880
5881 extern __inline __m512
5882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5883 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
5884 {
5885 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5886 (__v16si) __C,
5887 (__v16sf)
5888 _mm512_setzero_ps (),
5889 (__mmask16) __U);
5890 }
5891
5892 extern __inline __m512i
5893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5894 _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
5895 {
5896 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5897 /* idx */ ,
5898 (__v8di) __A,
5899 (__v8di) __B,
5900 (__mmask8) -1);
5901 }
5902
5903 extern __inline __m512i
5904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5905 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
5906 __m512i __B)
5907 {
5908 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5909 /* idx */ ,
5910 (__v8di) __A,
5911 (__v8di) __B,
5912 (__mmask8) __U);
5913 }
5914
5915 extern __inline __m512i
5916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5917 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
5918 __mmask8 __U, __m512i __B)
5919 {
5920 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
5921 (__v8di) __I
5922 /* idx */ ,
5923 (__v8di) __B,
5924 (__mmask8) __U);
5925 }
5926
5927 extern __inline __m512i
5928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5929 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
5930 __m512i __I, __m512i __B)
5931 {
5932 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
5933 /* idx */ ,
5934 (__v8di) __A,
5935 (__v8di) __B,
5936 (__mmask8) __U);
5937 }
5938
5939 extern __inline __m512i
5940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5941 _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
5942 {
5943 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5944 /* idx */ ,
5945 (__v16si) __A,
5946 (__v16si) __B,
5947 (__mmask16) -1);
5948 }
5949
5950 extern __inline __m512i
5951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5952 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
5953 __m512i __I, __m512i __B)
5954 {
5955 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5956 /* idx */ ,
5957 (__v16si) __A,
5958 (__v16si) __B,
5959 (__mmask16) __U);
5960 }
5961
5962 extern __inline __m512i
5963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5964 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
5965 __mmask16 __U, __m512i __B)
5966 {
5967 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
5968 (__v16si) __I
5969 /* idx */ ,
5970 (__v16si) __B,
5971 (__mmask16) __U);
5972 }
5973
5974 extern __inline __m512i
5975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5976 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
5977 __m512i __I, __m512i __B)
5978 {
5979 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
5980 /* idx */ ,
5981 (__v16si) __A,
5982 (__v16si) __B,
5983 (__mmask16) __U);
5984 }
5985
5986 extern __inline __m512d
5987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5988 _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
5989 {
5990 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
5991 /* idx */ ,
5992 (__v8df) __A,
5993 (__v8df) __B,
5994 (__mmask8) -1);
5995 }
5996
5997 extern __inline __m512d
5998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5999 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6000 __m512d __B)
6001 {
6002 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6003 /* idx */ ,
6004 (__v8df) __A,
6005 (__v8df) __B,
6006 (__mmask8) __U);
6007 }
6008
6009 extern __inline __m512d
6010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6011 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6012 __m512d __B)
6013 {
6014 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6015 (__v8di) __I
6016 /* idx */ ,
6017 (__v8df) __B,
6018 (__mmask8) __U);
6019 }
6020
6021 extern __inline __m512d
6022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6023 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6024 __m512d __B)
6025 {
6026 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6027 /* idx */ ,
6028 (__v8df) __A,
6029 (__v8df) __B,
6030 (__mmask8) __U);
6031 }
6032
6033 extern __inline __m512
6034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6035 _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6036 {
6037 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6038 /* idx */ ,
6039 (__v16sf) __A,
6040 (__v16sf) __B,
6041 (__mmask16) -1);
6042 }
6043
6044 extern __inline __m512
6045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6046 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6047 {
6048 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6049 /* idx */ ,
6050 (__v16sf) __A,
6051 (__v16sf) __B,
6052 (__mmask16) __U);
6053 }
6054
6055 extern __inline __m512
6056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6057 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6058 __m512 __B)
6059 {
6060 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6061 (__v16si) __I
6062 /* idx */ ,
6063 (__v16sf) __B,
6064 (__mmask16) __U);
6065 }
6066
6067 extern __inline __m512
6068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6069 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6070 __m512 __B)
6071 {
6072 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6073 /* idx */ ,
6074 (__v16sf) __A,
6075 (__v16sf) __B,
6076 (__mmask16) __U);
6077 }
6078
6079 #ifdef __OPTIMIZE__
6080 extern __inline __m512d
6081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6082 _mm512_permute_pd (__m512d __X, const int __C)
6083 {
6084 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6085 (__v8df)
6086 _mm512_undefined_pd (),
6087 (__mmask8) -1);
6088 }
6089
6090 extern __inline __m512d
6091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6092 _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6093 {
6094 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6095 (__v8df) __W,
6096 (__mmask8) __U);
6097 }
6098
6099 extern __inline __m512d
6100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6101 _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6102 {
6103 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6104 (__v8df)
6105 _mm512_setzero_pd (),
6106 (__mmask8) __U);
6107 }
6108
6109 extern __inline __m512
6110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6111 _mm512_permute_ps (__m512 __X, const int __C)
6112 {
6113 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6114 (__v16sf)
6115 _mm512_undefined_ps (),
6116 (__mmask16) -1);
6117 }
6118
6119 extern __inline __m512
6120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6121 _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6122 {
6123 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6124 (__v16sf) __W,
6125 (__mmask16) __U);
6126 }
6127
6128 extern __inline __m512
6129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6130 _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6131 {
6132 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6133 (__v16sf)
6134 _mm512_setzero_ps (),
6135 (__mmask16) __U);
6136 }
6137 #else
6138 #define _mm512_permute_pd(X, C) \
6139 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6140 (__v8df)(__m512d)_mm512_undefined_pd(),\
6141 (__mmask8)(-1)))
6142
6143 #define _mm512_mask_permute_pd(W, U, X, C) \
6144 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6145 (__v8df)(__m512d)(W), \
6146 (__mmask8)(U)))
6147
6148 #define _mm512_maskz_permute_pd(U, X, C) \
6149 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6150 (__v8df)(__m512d)_mm512_setzero_pd(), \
6151 (__mmask8)(U)))
6152
6153 #define _mm512_permute_ps(X, C) \
6154 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6155 (__v16sf)(__m512)_mm512_undefined_ps(),\
6156 (__mmask16)(-1)))
6157
6158 #define _mm512_mask_permute_ps(W, U, X, C) \
6159 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6160 (__v16sf)(__m512)(W), \
6161 (__mmask16)(U)))
6162
6163 #define _mm512_maskz_permute_ps(U, X, C) \
6164 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6165 (__v16sf)(__m512)_mm512_setzero_ps(), \
6166 (__mmask16)(U)))
6167 #endif
6168
6169 #ifdef __OPTIMIZE__
6170 extern __inline __m512i
6171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6172 _mm512_permutex_epi64 (__m512i __X, const int __I)
6173 {
6174 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6175 (__v8di)
6176 _mm512_undefined_epi32 (),
6177 (__mmask8) (-1));
6178 }
6179
6180 extern __inline __m512i
6181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6182 _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6183 __m512i __X, const int __I)
6184 {
6185 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6186 (__v8di) __W,
6187 (__mmask8) __M);
6188 }
6189
6190 extern __inline __m512i
6191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6192 _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6193 {
6194 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6195 (__v8di)
6196 _mm512_setzero_si512 (),
6197 (__mmask8) __M);
6198 }
6199
6200 extern __inline __m512d
6201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6202 _mm512_permutex_pd (__m512d __X, const int __M)
6203 {
6204 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6205 (__v8df)
6206 _mm512_undefined_pd (),
6207 (__mmask8) -1);
6208 }
6209
6210 extern __inline __m512d
6211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6212 _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6213 {
6214 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6215 (__v8df) __W,
6216 (__mmask8) __U);
6217 }
6218
6219 extern __inline __m512d
6220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6221 _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6222 {
6223 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6224 (__v8df)
6225 _mm512_setzero_pd (),
6226 (__mmask8) __U);
6227 }
6228 #else
6229 #define _mm512_permutex_pd(X, M) \
6230 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6231 (__v8df)(__m512d)_mm512_undefined_pd(),\
6232 (__mmask8)-1))
6233
6234 #define _mm512_mask_permutex_pd(W, U, X, M) \
6235 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6236 (__v8df)(__m512d)(W), (__mmask8)(U)))
6237
6238 #define _mm512_maskz_permutex_pd(U, X, M) \
6239 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6240 (__v8df)(__m512d)_mm512_setzero_pd(),\
6241 (__mmask8)(U)))
6242
6243 #define _mm512_permutex_epi64(X, I) \
6244 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6245 (int)(I), \
6246 (__v8di)(__m512i) \
6247 (_mm512_undefined_epi32 ()),\
6248 (__mmask8)(-1)))
6249
6250 #define _mm512_maskz_permutex_epi64(M, X, I) \
6251 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6252 (int)(I), \
6253 (__v8di)(__m512i) \
6254 (_mm512_setzero_si512 ()),\
6255 (__mmask8)(M)))
6256
6257 #define _mm512_mask_permutex_epi64(W, M, X, I) \
6258 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6259 (int)(I), \
6260 (__v8di)(__m512i)(W), \
6261 (__mmask8)(M)))
6262 #endif
6263
6264 extern __inline __m512i
6265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6266 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6267 {
6268 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6269 (__v8di) __X,
6270 (__v8di)
6271 _mm512_setzero_si512 (),
6272 __M);
6273 }
6274
6275 extern __inline __m512i
6276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6277 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6278 {
6279 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6280 (__v8di) __X,
6281 (__v8di)
6282 _mm512_undefined_epi32 (),
6283 (__mmask8) -1);
6284 }
6285
6286 extern __inline __m512i
6287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6288 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6289 __m512i __Y)
6290 {
6291 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6292 (__v8di) __X,
6293 (__v8di) __W,
6294 __M);
6295 }
6296
6297 extern __inline __m512i
6298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6299 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6300 {
6301 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6302 (__v16si) __X,
6303 (__v16si)
6304 _mm512_setzero_si512 (),
6305 __M);
6306 }
6307
6308 extern __inline __m512i
6309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6310 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6311 {
6312 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6313 (__v16si) __X,
6314 (__v16si)
6315 _mm512_undefined_epi32 (),
6316 (__mmask16) -1);
6317 }
6318
6319 extern __inline __m512i
6320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6321 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6322 __m512i __Y)
6323 {
6324 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6325 (__v16si) __X,
6326 (__v16si) __W,
6327 __M);
6328 }
6329
6330 extern __inline __m512d
6331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6332 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6333 {
6334 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6335 (__v8di) __X,
6336 (__v8df)
6337 _mm512_undefined_pd (),
6338 (__mmask8) -1);
6339 }
6340
6341 extern __inline __m512d
6342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6343 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6344 {
6345 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6346 (__v8di) __X,
6347 (__v8df) __W,
6348 (__mmask8) __U);
6349 }
6350
6351 extern __inline __m512d
6352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6353 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6354 {
6355 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6356 (__v8di) __X,
6357 (__v8df)
6358 _mm512_setzero_pd (),
6359 (__mmask8) __U);
6360 }
6361
6362 extern __inline __m512
6363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6364 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6365 {
6366 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6367 (__v16si) __X,
6368 (__v16sf)
6369 _mm512_undefined_ps (),
6370 (__mmask16) -1);
6371 }
6372
6373 extern __inline __m512
6374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6375 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6376 {
6377 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6378 (__v16si) __X,
6379 (__v16sf) __W,
6380 (__mmask16) __U);
6381 }
6382
6383 extern __inline __m512
6384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6385 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6386 {
6387 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6388 (__v16si) __X,
6389 (__v16sf)
6390 _mm512_setzero_ps (),
6391 (__mmask16) __U);
6392 }
6393
6394 #ifdef __OPTIMIZE__
6395 extern __inline __m512
6396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6397 _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6398 {
6399 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6400 (__v16sf) __V, __imm,
6401 (__v16sf)
6402 _mm512_undefined_ps (),
6403 (__mmask16) -1);
6404 }
6405
6406 extern __inline __m512
6407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6408 _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6409 __m512 __V, const int __imm)
6410 {
6411 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6412 (__v16sf) __V, __imm,
6413 (__v16sf) __W,
6414 (__mmask16) __U);
6415 }
6416
6417 extern __inline __m512
6418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6419 _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6420 {
6421 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6422 (__v16sf) __V, __imm,
6423 (__v16sf)
6424 _mm512_setzero_ps (),
6425 (__mmask16) __U);
6426 }
6427
6428 extern __inline __m512d
6429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6430 _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6431 {
6432 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6433 (__v8df) __V, __imm,
6434 (__v8df)
6435 _mm512_undefined_pd (),
6436 (__mmask8) -1);
6437 }
6438
6439 extern __inline __m512d
6440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6441 _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6442 __m512d __V, const int __imm)
6443 {
6444 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6445 (__v8df) __V, __imm,
6446 (__v8df) __W,
6447 (__mmask8) __U);
6448 }
6449
6450 extern __inline __m512d
6451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6452 _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6453 const int __imm)
6454 {
6455 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6456 (__v8df) __V, __imm,
6457 (__v8df)
6458 _mm512_setzero_pd (),
6459 (__mmask8) __U);
6460 }
6461
6462 extern __inline __m512d
6463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6464 _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6465 const int __imm, const int __R)
6466 {
6467 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6468 (__v8df) __B,
6469 (__v8di) __C,
6470 __imm,
6471 (__mmask8) -1, __R);
6472 }
6473
6474 extern __inline __m512d
6475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6476 _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6477 __m512i __C, const int __imm, const int __R)
6478 {
6479 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6480 (__v8df) __B,
6481 (__v8di) __C,
6482 __imm,
6483 (__mmask8) __U, __R);
6484 }
6485
6486 extern __inline __m512d
6487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6488 _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6489 __m512i __C, const int __imm, const int __R)
6490 {
6491 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
6492 (__v8df) __B,
6493 (__v8di) __C,
6494 __imm,
6495 (__mmask8) __U, __R);
6496 }
6497
6498 extern __inline __m512
6499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6500 _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
6501 const int __imm, const int __R)
6502 {
6503 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6504 (__v16sf) __B,
6505 (__v16si) __C,
6506 __imm,
6507 (__mmask16) -1, __R);
6508 }
6509
6510 extern __inline __m512
6511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6512 _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6513 __m512i __C, const int __imm, const int __R)
6514 {
6515 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6516 (__v16sf) __B,
6517 (__v16si) __C,
6518 __imm,
6519 (__mmask16) __U, __R);
6520 }
6521
6522 extern __inline __m512
6523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6524 _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6525 __m512i __C, const int __imm, const int __R)
6526 {
6527 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
6528 (__v16sf) __B,
6529 (__v16si) __C,
6530 __imm,
6531 (__mmask16) __U, __R);
6532 }
6533
6534 extern __inline __m128d
6535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6536 _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
6537 const int __imm, const int __R)
6538 {
6539 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6540 (__v2df) __B,
6541 (__v2di) __C, __imm,
6542 (__mmask8) -1, __R);
6543 }
6544
6545 extern __inline __m128d
6546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6547 _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
6548 __m128i __C, const int __imm, const int __R)
6549 {
6550 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6551 (__v2df) __B,
6552 (__v2di) __C, __imm,
6553 (__mmask8) __U, __R);
6554 }
6555
6556 extern __inline __m128d
6557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6558 _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
6559 __m128i __C, const int __imm, const int __R)
6560 {
6561 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
6562 (__v2df) __B,
6563 (__v2di) __C,
6564 __imm,
6565 (__mmask8) __U, __R);
6566 }
6567
6568 extern __inline __m128
6569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6570 _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
6571 const int __imm, const int __R)
6572 {
6573 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6574 (__v4sf) __B,
6575 (__v4si) __C, __imm,
6576 (__mmask8) -1, __R);
6577 }
6578
6579 extern __inline __m128
6580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6581 _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
6582 __m128i __C, const int __imm, const int __R)
6583 {
6584 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6585 (__v4sf) __B,
6586 (__v4si) __C, __imm,
6587 (__mmask8) __U, __R);
6588 }
6589
6590 extern __inline __m128
6591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6592 _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
6593 __m128i __C, const int __imm, const int __R)
6594 {
6595 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
6596 (__v4sf) __B,
6597 (__v4si) __C, __imm,
6598 (__mmask8) __U, __R);
6599 }
6600
6601 #else
6602 #define _mm512_shuffle_pd(X, Y, C) \
6603 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6604 (__v8df)(__m512d)(Y), (int)(C),\
6605 (__v8df)(__m512d)_mm512_undefined_pd(),\
6606 (__mmask8)-1))
6607
6608 #define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
6609 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6610 (__v8df)(__m512d)(Y), (int)(C),\
6611 (__v8df)(__m512d)(W),\
6612 (__mmask8)(U)))
6613
6614 #define _mm512_maskz_shuffle_pd(U, X, Y, C) \
6615 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6616 (__v8df)(__m512d)(Y), (int)(C),\
6617 (__v8df)(__m512d)_mm512_setzero_pd(),\
6618 (__mmask8)(U)))
6619
6620 #define _mm512_shuffle_ps(X, Y, C) \
6621 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6622 (__v16sf)(__m512)(Y), (int)(C),\
6623 (__v16sf)(__m512)_mm512_undefined_ps(),\
6624 (__mmask16)-1))
6625
6626 #define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
6627 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6628 (__v16sf)(__m512)(Y), (int)(C),\
6629 (__v16sf)(__m512)(W),\
6630 (__mmask16)(U)))
6631
6632 #define _mm512_maskz_shuffle_ps(U, X, Y, C) \
6633 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6634 (__v16sf)(__m512)(Y), (int)(C),\
6635 (__v16sf)(__m512)_mm512_setzero_ps(),\
6636 (__mmask16)(U)))
6637
6638 #define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
6639 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6640 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6641 (__mmask8)(-1), (R)))
6642
6643 #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
6644 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6645 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6646 (__mmask8)(U), (R)))
6647
6648 #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
6649 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
6650 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6651 (__mmask8)(U), (R)))
6652
6653 #define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
6654 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6655 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6656 (__mmask16)(-1), (R)))
6657
6658 #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
6659 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6660 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6661 (__mmask16)(U), (R)))
6662
6663 #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
6664 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
6665 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6666 (__mmask16)(U), (R)))
6667
6668 #define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
6669 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6670 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6671 (__mmask8)(-1), (R)))
6672
6673 #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
6674 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6675 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6676 (__mmask8)(U), (R)))
6677
6678 #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
6679 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
6680 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6681 (__mmask8)(U), (R)))
6682
6683 #define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
6684 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6685 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6686 (__mmask8)(-1), (R)))
6687
6688 #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
6689 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6690 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6691 (__mmask8)(U), (R)))
6692
6693 #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
6694 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
6695 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6696 (__mmask8)(U), (R)))
6697 #endif
6698
6699 extern __inline __m512
6700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6701 _mm512_movehdup_ps (__m512 __A)
6702 {
6703 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6704 (__v16sf)
6705 _mm512_undefined_ps (),
6706 (__mmask16) -1);
6707 }
6708
6709 extern __inline __m512
6710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6711 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6712 {
6713 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6714 (__v16sf) __W,
6715 (__mmask16) __U);
6716 }
6717
6718 extern __inline __m512
6719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6720 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
6721 {
6722 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6723 (__v16sf)
6724 _mm512_setzero_ps (),
6725 (__mmask16) __U);
6726 }
6727
6728 extern __inline __m512
6729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6730 _mm512_moveldup_ps (__m512 __A)
6731 {
6732 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6733 (__v16sf)
6734 _mm512_undefined_ps (),
6735 (__mmask16) -1);
6736 }
6737
6738 extern __inline __m512
6739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6740 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6741 {
6742 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6743 (__v16sf) __W,
6744 (__mmask16) __U);
6745 }
6746
6747 extern __inline __m512
6748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6749 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
6750 {
6751 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6752 (__v16sf)
6753 _mm512_setzero_ps (),
6754 (__mmask16) __U);
6755 }
6756
6757 extern __inline __m512i
6758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6759 _mm512_or_si512 (__m512i __A, __m512i __B)
6760 {
6761 return (__m512i) ((__v16su) __A | (__v16su) __B);
6762 }
6763
6764 extern __inline __m512i
6765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6766 _mm512_or_epi32 (__m512i __A, __m512i __B)
6767 {
6768 return (__m512i) ((__v16su) __A | (__v16su) __B);
6769 }
6770
6771 extern __inline __m512i
6772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6773 _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6774 {
6775 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6776 (__v16si) __B,
6777 (__v16si) __W,
6778 (__mmask16) __U);
6779 }
6780
6781 extern __inline __m512i
6782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6783 _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6784 {
6785 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6786 (__v16si) __B,
6787 (__v16si)
6788 _mm512_setzero_si512 (),
6789 (__mmask16) __U);
6790 }
6791
6792 extern __inline __m512i
6793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6794 _mm512_or_epi64 (__m512i __A, __m512i __B)
6795 {
6796 return (__m512i) ((__v8du) __A | (__v8du) __B);
6797 }
6798
6799 extern __inline __m512i
6800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6801 _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
6802 {
6803 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6804 (__v8di) __B,
6805 (__v8di) __W,
6806 (__mmask8) __U);
6807 }
6808
6809 extern __inline __m512i
6810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6811 _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
6812 {
6813 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6814 (__v8di) __B,
6815 (__v8di)
6816 _mm512_setzero_si512 (),
6817 (__mmask8) __U);
6818 }
6819
6820 extern __inline __m512i
6821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6822 _mm512_xor_si512 (__m512i __A, __m512i __B)
6823 {
6824 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
6825 }
6826
6827 extern __inline __m512i
6828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6829 _mm512_xor_epi32 (__m512i __A, __m512i __B)
6830 {
6831 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
6832 }
6833
6834 extern __inline __m512i
6835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6836 _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6837 {
6838 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6839 (__v16si) __B,
6840 (__v16si) __W,
6841 (__mmask16) __U);
6842 }
6843
6844 extern __inline __m512i
6845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6846 _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6847 {
6848 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6849 (__v16si) __B,
6850 (__v16si)
6851 _mm512_setzero_si512 (),
6852 (__mmask16) __U);
6853 }
6854
6855 extern __inline __m512i
6856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6857 _mm512_xor_epi64 (__m512i __A, __m512i __B)
6858 {
6859 return (__m512i) ((__v8du) __A ^ (__v8du) __B);
6860 }
6861
6862 extern __inline __m512i
6863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6864 _mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6865 {
6866 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6867 (__v8di) __B,
6868 (__v8di) __W,
6869 (__mmask8) __U);
6870 }
6871
6872 extern __inline __m512i
6873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6874 _mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
6875 {
6876 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6877 (__v8di) __B,
6878 (__v8di)
6879 _mm512_setzero_si512 (),
6880 (__mmask8) __U);
6881 }
6882
6883 #ifdef __OPTIMIZE__
6884 extern __inline __m512i
6885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6886 _mm512_rol_epi32 (__m512i __A, const int __B)
6887 {
6888 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6889 (__v16si)
6890 _mm512_undefined_epi32 (),
6891 (__mmask16) -1);
6892 }
6893
6894 extern __inline __m512i
6895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6896 _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
6897 {
6898 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6899 (__v16si) __W,
6900 (__mmask16) __U);
6901 }
6902
6903 extern __inline __m512i
6904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6905 _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
6906 {
6907 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6908 (__v16si)
6909 _mm512_setzero_si512 (),
6910 (__mmask16) __U);
6911 }
6912
6913 extern __inline __m512i
6914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6915 _mm512_ror_epi32 (__m512i __A, int __B)
6916 {
6917 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6918 (__v16si)
6919 _mm512_undefined_epi32 (),
6920 (__mmask16) -1);
6921 }
6922
6923 extern __inline __m512i
6924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6925 _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
6926 {
6927 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6928 (__v16si) __W,
6929 (__mmask16) __U);
6930 }
6931
6932 extern __inline __m512i
6933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6934 _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
6935 {
6936 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6937 (__v16si)
6938 _mm512_setzero_si512 (),
6939 (__mmask16) __U);
6940 }
6941
6942 extern __inline __m512i
6943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6944 _mm512_rol_epi64 (__m512i __A, const int __B)
6945 {
6946 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6947 (__v8di)
6948 _mm512_undefined_epi32 (),
6949 (__mmask8) -1);
6950 }
6951
6952 extern __inline __m512i
6953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6954 _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
6955 {
6956 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6957 (__v8di) __W,
6958 (__mmask8) __U);
6959 }
6960
6961 extern __inline __m512i
6962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6963 _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
6964 {
6965 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6966 (__v8di)
6967 _mm512_setzero_si512 (),
6968 (__mmask8) __U);
6969 }
6970
6971 extern __inline __m512i
6972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6973 _mm512_ror_epi64 (__m512i __A, int __B)
6974 {
6975 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6976 (__v8di)
6977 _mm512_undefined_epi32 (),
6978 (__mmask8) -1);
6979 }
6980
6981 extern __inline __m512i
6982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6983 _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
6984 {
6985 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6986 (__v8di) __W,
6987 (__mmask8) __U);
6988 }
6989
6990 extern __inline __m512i
6991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6992 _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
6993 {
6994 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6995 (__v8di)
6996 _mm512_setzero_si512 (),
6997 (__mmask8) __U);
6998 }
6999
7000 #else
7001 #define _mm512_rol_epi32(A, B) \
7002 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7003 (int)(B), \
7004 (__v16si)_mm512_undefined_epi32 (), \
7005 (__mmask16)(-1)))
7006 #define _mm512_mask_rol_epi32(W, U, A, B) \
7007 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7008 (int)(B), \
7009 (__v16si)(__m512i)(W), \
7010 (__mmask16)(U)))
7011 #define _mm512_maskz_rol_epi32(U, A, B) \
7012 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7013 (int)(B), \
7014 (__v16si)_mm512_setzero_si512 (), \
7015 (__mmask16)(U)))
7016 #define _mm512_ror_epi32(A, B) \
7017 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7018 (int)(B), \
7019 (__v16si)_mm512_undefined_epi32 (), \
7020 (__mmask16)(-1)))
7021 #define _mm512_mask_ror_epi32(W, U, A, B) \
7022 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7023 (int)(B), \
7024 (__v16si)(__m512i)(W), \
7025 (__mmask16)(U)))
7026 #define _mm512_maskz_ror_epi32(U, A, B) \
7027 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7028 (int)(B), \
7029 (__v16si)_mm512_setzero_si512 (), \
7030 (__mmask16)(U)))
7031 #define _mm512_rol_epi64(A, B) \
7032 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7033 (int)(B), \
7034 (__v8di)_mm512_undefined_epi32 (), \
7035 (__mmask8)(-1)))
7036 #define _mm512_mask_rol_epi64(W, U, A, B) \
7037 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7038 (int)(B), \
7039 (__v8di)(__m512i)(W), \
7040 (__mmask8)(U)))
7041 #define _mm512_maskz_rol_epi64(U, A, B) \
7042 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7043 (int)(B), \
7044 (__v8di)_mm512_setzero_si512 (), \
7045 (__mmask8)(U)))
7046
7047 #define _mm512_ror_epi64(A, B) \
7048 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7049 (int)(B), \
7050 (__v8di)_mm512_undefined_epi32 (), \
7051 (__mmask8)(-1)))
7052 #define _mm512_mask_ror_epi64(W, U, A, B) \
7053 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7054 (int)(B), \
7055 (__v8di)(__m512i)(W), \
7056 (__mmask8)(U)))
7057 #define _mm512_maskz_ror_epi64(U, A, B) \
7058 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7059 (int)(B), \
7060 (__v8di)_mm512_setzero_si512 (), \
7061 (__mmask8)(U)))
7062 #endif
7063
7064 extern __inline __m512i
7065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7066 _mm512_and_si512 (__m512i __A, __m512i __B)
7067 {
7068 return (__m512i) ((__v16su) __A & (__v16su) __B);
7069 }
7070
7071 extern __inline __m512i
7072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7073 _mm512_and_epi32 (__m512i __A, __m512i __B)
7074 {
7075 return (__m512i) ((__v16su) __A & (__v16su) __B);
7076 }
7077
7078 extern __inline __m512i
7079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7080 _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7081 {
7082 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7083 (__v16si) __B,
7084 (__v16si) __W,
7085 (__mmask16) __U);
7086 }
7087
7088 extern __inline __m512i
7089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7090 _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7091 {
7092 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7093 (__v16si) __B,
7094 (__v16si)
7095 _mm512_setzero_si512 (),
7096 (__mmask16) __U);
7097 }
7098
7099 extern __inline __m512i
7100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7101 _mm512_and_epi64 (__m512i __A, __m512i __B)
7102 {
7103 return (__m512i) ((__v8du) __A & (__v8du) __B);
7104 }
7105
7106 extern __inline __m512i
7107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7108 _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7109 {
7110 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7111 (__v8di) __B,
7112 (__v8di) __W, __U);
7113 }
7114
7115 extern __inline __m512i
7116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7117 _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7118 {
7119 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7120 (__v8di) __B,
7121 (__v8di)
7122 _mm512_setzero_pd (),
7123 __U);
7124 }
7125
7126 extern __inline __m512i
7127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7128 _mm512_andnot_si512 (__m512i __A, __m512i __B)
7129 {
7130 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7131 (__v16si) __B,
7132 (__v16si)
7133 _mm512_undefined_epi32 (),
7134 (__mmask16) -1);
7135 }
7136
7137 extern __inline __m512i
7138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7139 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
7140 {
7141 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7142 (__v16si) __B,
7143 (__v16si)
7144 _mm512_undefined_epi32 (),
7145 (__mmask16) -1);
7146 }
7147
7148 extern __inline __m512i
7149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7150 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7151 {
7152 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7153 (__v16si) __B,
7154 (__v16si) __W,
7155 (__mmask16) __U);
7156 }
7157
7158 extern __inline __m512i
7159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7160 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7161 {
7162 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7163 (__v16si) __B,
7164 (__v16si)
7165 _mm512_setzero_si512 (),
7166 (__mmask16) __U);
7167 }
7168
7169 extern __inline __m512i
7170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7171 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
7172 {
7173 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7174 (__v8di) __B,
7175 (__v8di)
7176 _mm512_undefined_epi32 (),
7177 (__mmask8) -1);
7178 }
7179
7180 extern __inline __m512i
7181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7182 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7183 {
7184 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7185 (__v8di) __B,
7186 (__v8di) __W, __U);
7187 }
7188
7189 extern __inline __m512i
7190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7191 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7192 {
7193 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7194 (__v8di) __B,
7195 (__v8di)
7196 _mm512_setzero_pd (),
7197 __U);
7198 }
7199
7200 extern __inline __mmask16
7201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7202 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
7203 {
7204 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7205 (__v16si) __B,
7206 (__mmask16) -1);
7207 }
7208
7209 extern __inline __mmask16
7210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7211 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7212 {
7213 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7214 (__v16si) __B, __U);
7215 }
7216
7217 extern __inline __mmask8
7218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7219 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
7220 {
7221 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7222 (__v8di) __B,
7223 (__mmask8) -1);
7224 }
7225
7226 extern __inline __mmask8
7227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7228 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7229 {
7230 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7231 }
7232
7233 extern __inline __mmask16
7234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7235 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7236 {
7237 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7238 (__v16si) __B,
7239 (__mmask16) -1);
7240 }
7241
7242 extern __inline __mmask16
7243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7244 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7245 {
7246 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7247 (__v16si) __B, __U);
7248 }
7249
7250 extern __inline __mmask8
7251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7252 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7253 {
7254 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7255 (__v8di) __B,
7256 (__mmask8) -1);
7257 }
7258
7259 extern __inline __mmask8
7260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7261 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7262 {
7263 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7264 (__v8di) __B, __U);
7265 }
7266
7267 extern __inline __m512i
7268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7269 _mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7270 {
7271 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7272 (__v16si) __B,
7273 (__v16si)
7274 _mm512_undefined_epi32 (),
7275 (__mmask16) -1);
7276 }
7277
7278 extern __inline __m512i
7279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7280 _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7281 __m512i __B)
7282 {
7283 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7284 (__v16si) __B,
7285 (__v16si) __W,
7286 (__mmask16) __U);
7287 }
7288
7289 extern __inline __m512i
7290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7291 _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7292 {
7293 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7294 (__v16si) __B,
7295 (__v16si)
7296 _mm512_setzero_si512 (),
7297 (__mmask16) __U);
7298 }
7299
7300 extern __inline __m512i
7301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7302 _mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7303 {
7304 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7305 (__v8di) __B,
7306 (__v8di)
7307 _mm512_undefined_epi32 (),
7308 (__mmask8) -1);
7309 }
7310
7311 extern __inline __m512i
7312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7313 _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7314 {
7315 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7316 (__v8di) __B,
7317 (__v8di) __W,
7318 (__mmask8) __U);
7319 }
7320
7321 extern __inline __m512i
7322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7323 _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7324 {
7325 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7326 (__v8di) __B,
7327 (__v8di)
7328 _mm512_setzero_si512 (),
7329 (__mmask8) __U);
7330 }
7331
7332 extern __inline __m512i
7333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7334 _mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7335 {
7336 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7337 (__v16si) __B,
7338 (__v16si)
7339 _mm512_undefined_epi32 (),
7340 (__mmask16) -1);
7341 }
7342
7343 extern __inline __m512i
7344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7345 _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7346 __m512i __B)
7347 {
7348 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7349 (__v16si) __B,
7350 (__v16si) __W,
7351 (__mmask16) __U);
7352 }
7353
7354 extern __inline __m512i
7355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7356 _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7357 {
7358 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7359 (__v16si) __B,
7360 (__v16si)
7361 _mm512_setzero_si512 (),
7362 (__mmask16) __U);
7363 }
7364
7365 extern __inline __m512i
7366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7367 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7368 {
7369 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7370 (__v8di) __B,
7371 (__v8di)
7372 _mm512_undefined_epi32 (),
7373 (__mmask8) -1);
7374 }
7375
7376 extern __inline __m512i
7377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7378 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7379 {
7380 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7381 (__v8di) __B,
7382 (__v8di) __W,
7383 (__mmask8) __U);
7384 }
7385
7386 extern __inline __m512i
7387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7388 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7389 {
7390 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7391 (__v8di) __B,
7392 (__v8di)
7393 _mm512_setzero_si512 (),
7394 (__mmask8) __U);
7395 }
7396
7397 #ifdef __x86_64__
7398 #ifdef __OPTIMIZE__
7399 extern __inline unsigned long long
7400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7401 _mm_cvt_roundss_u64 (__m128 __A, const int __R)
7402 {
7403 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7404 }
7405
7406 extern __inline long long
7407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7408 _mm_cvt_roundss_si64 (__m128 __A, const int __R)
7409 {
7410 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7411 }
7412
7413 extern __inline long long
7414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7415 _mm_cvt_roundss_i64 (__m128 __A, const int __R)
7416 {
7417 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7418 }
7419
7420 extern __inline unsigned long long
7421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7422 _mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7423 {
7424 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7425 }
7426
7427 extern __inline long long
7428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7429 _mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7430 {
7431 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7432 }
7433
7434 extern __inline long long
7435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7436 _mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7437 {
7438 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7439 }
7440 #else
7441 #define _mm_cvt_roundss_u64(A, B) \
7442 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7443
7444 #define _mm_cvt_roundss_si64(A, B) \
7445 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7446
7447 #define _mm_cvt_roundss_i64(A, B) \
7448 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7449
7450 #define _mm_cvtt_roundss_u64(A, B) \
7451 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
7452
7453 #define _mm_cvtt_roundss_i64(A, B) \
7454 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7455
7456 #define _mm_cvtt_roundss_si64(A, B) \
7457 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7458 #endif
7459 #endif
7460
7461 #ifdef __OPTIMIZE__
7462 extern __inline unsigned
7463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7464 _mm_cvt_roundss_u32 (__m128 __A, const int __R)
7465 {
7466 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
7467 }
7468
7469 extern __inline int
7470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7471 _mm_cvt_roundss_si32 (__m128 __A, const int __R)
7472 {
7473 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7474 }
7475
7476 extern __inline int
7477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7478 _mm_cvt_roundss_i32 (__m128 __A, const int __R)
7479 {
7480 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7481 }
7482
7483 extern __inline unsigned
7484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7485 _mm_cvtt_roundss_u32 (__m128 __A, const int __R)
7486 {
7487 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
7488 }
7489
7490 extern __inline int
7491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7492 _mm_cvtt_roundss_i32 (__m128 __A, const int __R)
7493 {
7494 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7495 }
7496
7497 extern __inline int
7498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7499 _mm_cvtt_roundss_si32 (__m128 __A, const int __R)
7500 {
7501 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7502 }
7503 #else
7504 #define _mm_cvt_roundss_u32(A, B) \
7505 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
7506
7507 #define _mm_cvt_roundss_si32(A, B) \
7508 ((int)__builtin_ia32_vcvtss2si32(A, B))
7509
7510 #define _mm_cvt_roundss_i32(A, B) \
7511 ((int)__builtin_ia32_vcvtss2si32(A, B))
7512
7513 #define _mm_cvtt_roundss_u32(A, B) \
7514 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
7515
7516 #define _mm_cvtt_roundss_si32(A, B) \
7517 ((int)__builtin_ia32_vcvttss2si32(A, B))
7518
7519 #define _mm_cvtt_roundss_i32(A, B) \
7520 ((int)__builtin_ia32_vcvttss2si32(A, B))
7521 #endif
7522
7523 #ifdef __x86_64__
7524 #ifdef __OPTIMIZE__
7525 extern __inline unsigned long long
7526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7527 _mm_cvt_roundsd_u64 (__m128d __A, const int __R)
7528 {
7529 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
7530 }
7531
7532 extern __inline long long
7533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7534 _mm_cvt_roundsd_si64 (__m128d __A, const int __R)
7535 {
7536 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7537 }
7538
7539 extern __inline long long
7540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7541 _mm_cvt_roundsd_i64 (__m128d __A, const int __R)
7542 {
7543 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7544 }
7545
7546 extern __inline unsigned long long
7547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7548 _mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
7549 {
7550 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
7551 }
7552
7553 extern __inline long long
7554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7555 _mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
7556 {
7557 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7558 }
7559
7560 extern __inline long long
7561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7562 _mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
7563 {
7564 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7565 }
7566 #else
7567 #define _mm_cvt_roundsd_u64(A, B) \
7568 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
7569
7570 #define _mm_cvt_roundsd_si64(A, B) \
7571 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7572
7573 #define _mm_cvt_roundsd_i64(A, B) \
7574 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7575
7576 #define _mm_cvtt_roundsd_u64(A, B) \
7577 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
7578
7579 #define _mm_cvtt_roundsd_si64(A, B) \
7580 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7581
7582 #define _mm_cvtt_roundsd_i64(A, B) \
7583 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7584 #endif
7585 #endif
7586
7587 #ifdef __OPTIMIZE__
7588 extern __inline unsigned
7589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7590 _mm_cvt_roundsd_u32 (__m128d __A, const int __R)
7591 {
7592 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
7593 }
7594
7595 extern __inline int
7596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7597 _mm_cvt_roundsd_si32 (__m128d __A, const int __R)
7598 {
7599 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7600 }
7601
7602 extern __inline int
7603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7604 _mm_cvt_roundsd_i32 (__m128d __A, const int __R)
7605 {
7606 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7607 }
7608
7609 extern __inline unsigned
7610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7611 _mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
7612 {
7613 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
7614 }
7615
7616 extern __inline int
7617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7618 _mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
7619 {
7620 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7621 }
7622
7623 extern __inline int
7624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7625 _mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
7626 {
7627 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7628 }
7629 #else
7630 #define _mm_cvt_roundsd_u32(A, B) \
7631 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
7632
7633 #define _mm_cvt_roundsd_si32(A, B) \
7634 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7635
7636 #define _mm_cvt_roundsd_i32(A, B) \
7637 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7638
7639 #define _mm_cvtt_roundsd_u32(A, B) \
7640 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
7641
7642 #define _mm_cvtt_roundsd_si32(A, B) \
7643 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7644
7645 #define _mm_cvtt_roundsd_i32(A, B) \
7646 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7647 #endif
7648
7649 extern __inline __m512d
7650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7651 _mm512_movedup_pd (__m512d __A)
7652 {
7653 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7654 (__v8df)
7655 _mm512_undefined_pd (),
7656 (__mmask8) -1);
7657 }
7658
7659 extern __inline __m512d
7660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7661 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
7662 {
7663 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7664 (__v8df) __W,
7665 (__mmask8) __U);
7666 }
7667
7668 extern __inline __m512d
7669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7670 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
7671 {
7672 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7673 (__v8df)
7674 _mm512_setzero_pd (),
7675 (__mmask8) __U);
7676 }
7677
7678 extern __inline __m512d
7679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7680 _mm512_unpacklo_pd (__m512d __A, __m512d __B)
7681 {
7682 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7683 (__v8df) __B,
7684 (__v8df)
7685 _mm512_undefined_pd (),
7686 (__mmask8) -1);
7687 }
7688
7689 extern __inline __m512d
7690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7691 _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7692 {
7693 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7694 (__v8df) __B,
7695 (__v8df) __W,
7696 (__mmask8) __U);
7697 }
7698
7699 extern __inline __m512d
7700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7701 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
7702 {
7703 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7704 (__v8df) __B,
7705 (__v8df)
7706 _mm512_setzero_pd (),
7707 (__mmask8) __U);
7708 }
7709
7710 extern __inline __m512d
7711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7712 _mm512_unpackhi_pd (__m512d __A, __m512d __B)
7713 {
7714 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7715 (__v8df) __B,
7716 (__v8df)
7717 _mm512_undefined_pd (),
7718 (__mmask8) -1);
7719 }
7720
7721 extern __inline __m512d
7722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7723 _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7724 {
7725 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7726 (__v8df) __B,
7727 (__v8df) __W,
7728 (__mmask8) __U);
7729 }
7730
7731 extern __inline __m512d
7732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7733 _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
7734 {
7735 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7736 (__v8df) __B,
7737 (__v8df)
7738 _mm512_setzero_pd (),
7739 (__mmask8) __U);
7740 }
7741
7742 extern __inline __m512
7743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7744 _mm512_unpackhi_ps (__m512 __A, __m512 __B)
7745 {
7746 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7747 (__v16sf) __B,
7748 (__v16sf)
7749 _mm512_undefined_ps (),
7750 (__mmask16) -1);
7751 }
7752
7753 extern __inline __m512
7754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7755 _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
7756 {
7757 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7758 (__v16sf) __B,
7759 (__v16sf) __W,
7760 (__mmask16) __U);
7761 }
7762
7763 extern __inline __m512
7764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7765 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
7766 {
7767 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7768 (__v16sf) __B,
7769 (__v16sf)
7770 _mm512_setzero_ps (),
7771 (__mmask16) __U);
7772 }
7773
7774 #ifdef __OPTIMIZE__
7775 extern __inline __m512d
7776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7777 _mm512_cvt_roundps_pd (__m256 __A, const int __R)
7778 {
7779 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7780 (__v8df)
7781 _mm512_undefined_pd (),
7782 (__mmask8) -1, __R);
7783 }
7784
7785 extern __inline __m512d
7786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7787 _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
7788 const int __R)
7789 {
7790 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7791 (__v8df) __W,
7792 (__mmask8) __U, __R);
7793 }
7794
7795 extern __inline __m512d
7796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7797 _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
7798 {
7799 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7800 (__v8df)
7801 _mm512_setzero_pd (),
7802 (__mmask8) __U, __R);
7803 }
7804
7805 extern __inline __m512
7806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7807 _mm512_cvt_roundph_ps (__m256i __A, const int __R)
7808 {
7809 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7810 (__v16sf)
7811 _mm512_undefined_ps (),
7812 (__mmask16) -1, __R);
7813 }
7814
7815 extern __inline __m512
7816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7817 _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
7818 const int __R)
7819 {
7820 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7821 (__v16sf) __W,
7822 (__mmask16) __U, __R);
7823 }
7824
7825 extern __inline __m512
7826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7827 _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
7828 {
7829 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7830 (__v16sf)
7831 _mm512_setzero_ps (),
7832 (__mmask16) __U, __R);
7833 }
7834
7835 extern __inline __m256i
7836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7837 _mm512_cvt_roundps_ph (__m512 __A, const int __I)
7838 {
7839 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7840 __I,
7841 (__v16hi)
7842 _mm256_undefined_si256 (),
7843 -1);
7844 }
7845
7846 extern __inline __m256i
7847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7848 _mm512_cvtps_ph (__m512 __A, const int __I)
7849 {
7850 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7851 __I,
7852 (__v16hi)
7853 _mm256_undefined_si256 (),
7854 -1);
7855 }
7856
7857 extern __inline __m256i
7858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7859 _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
7860 const int __I)
7861 {
7862 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7863 __I,
7864 (__v16hi) __U,
7865 (__mmask16) __W);
7866 }
7867
7868 extern __inline __m256i
7869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7870 _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
7871 {
7872 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7873 __I,
7874 (__v16hi) __U,
7875 (__mmask16) __W);
7876 }
7877
7878 extern __inline __m256i
7879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7880 _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
7881 {
7882 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7883 __I,
7884 (__v16hi)
7885 _mm256_setzero_si256 (),
7886 (__mmask16) __W);
7887 }
7888
7889 extern __inline __m256i
7890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7891 _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
7892 {
7893 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7894 __I,
7895 (__v16hi)
7896 _mm256_setzero_si256 (),
7897 (__mmask16) __W);
7898 }
7899 #else
7900 #define _mm512_cvt_roundps_pd(A, B) \
7901 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
7902
7903 #define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
7904 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
7905
7906 #define _mm512_maskz_cvt_roundps_pd(U, A, B) \
7907 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
7908
7909 #define _mm512_cvt_roundph_ps(A, B) \
7910 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
7911
7912 #define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
7913 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
7914
7915 #define _mm512_maskz_cvt_roundph_ps(U, A, B) \
7916 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
7917
7918 #define _mm512_cvt_roundps_ph(A, I) \
7919 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7920 (__v16hi)_mm256_undefined_si256 (), -1))
7921 #define _mm512_cvtps_ph(A, I) \
7922 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7923 (__v16hi)_mm256_undefined_si256 (), -1))
7924 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
7925 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7926 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7927 #define _mm512_mask_cvtps_ph(U, W, A, I) \
7928 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7929 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7930 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
7931 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7932 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7933 #define _mm512_maskz_cvtps_ph(W, A, I) \
7934 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7935 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7936 #endif
7937
7938 #ifdef __OPTIMIZE__
7939 extern __inline __m256
7940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7941 _mm512_cvt_roundpd_ps (__m512d __A, const int __R)
7942 {
7943 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7944 (__v8sf)
7945 _mm256_undefined_ps (),
7946 (__mmask8) -1, __R);
7947 }
7948
7949 extern __inline __m256
7950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7951 _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
7952 const int __R)
7953 {
7954 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7955 (__v8sf) __W,
7956 (__mmask8) __U, __R);
7957 }
7958
7959 extern __inline __m256
7960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7961 _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
7962 {
7963 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7964 (__v8sf)
7965 _mm256_setzero_ps (),
7966 (__mmask8) __U, __R);
7967 }
7968
7969 extern __inline __m128
7970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7971 _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
7972 {
7973 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
7974 (__v2df) __B,
7975 __R);
7976 }
7977
7978 extern __inline __m128d
7979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7980 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
7981 {
7982 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
7983 (__v4sf) __B,
7984 __R);
7985 }
7986 #else
7987 #define _mm512_cvt_roundpd_ps(A, B) \
7988 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
7989
7990 #define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
7991 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
7992
7993 #define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
7994 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
7995
7996 #define _mm_cvt_roundsd_ss(A, B, C) \
7997 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
7998
7999 #define _mm_cvt_roundss_sd(A, B, C) \
8000 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
8001 #endif
8002
8003 extern __inline void
8004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8005 _mm512_stream_si512 (__m512i * __P, __m512i __A)
8006 {
8007 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8008 }
8009
8010 extern __inline void
8011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8012 _mm512_stream_ps (float *__P, __m512 __A)
8013 {
8014 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8015 }
8016
8017 extern __inline void
8018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8019 _mm512_stream_pd (double *__P, __m512d __A)
8020 {
8021 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8022 }
8023
8024 extern __inline __m512i
8025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8026 _mm512_stream_load_si512 (void *__P)
8027 {
8028 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8029 }
8030
8031 /* Constants for mantissa extraction */
8032 typedef enum
8033 {
8034 _MM_MANT_NORM_1_2, /* interval [1, 2) */
8035 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
8036 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
8037 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
8038 } _MM_MANTISSA_NORM_ENUM;
8039
8040 typedef enum
8041 {
8042 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
8043 _MM_MANT_SIGN_zero, /* sign = 0 */
8044 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
8045 } _MM_MANTISSA_SIGN_ENUM;
8046
8047 #ifdef __OPTIMIZE__
8048 extern __inline __m128
8049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8050 _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8051 {
8052 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8053 (__v4sf) __B,
8054 __R);
8055 }
8056
8057 extern __inline __m128d
8058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8059 _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8060 {
8061 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8062 (__v2df) __B,
8063 __R);
8064 }
8065
8066 extern __inline __m512
8067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8068 _mm512_getexp_round_ps (__m512 __A, const int __R)
8069 {
8070 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8071 (__v16sf)
8072 _mm512_undefined_ps (),
8073 (__mmask16) -1, __R);
8074 }
8075
8076 extern __inline __m512
8077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8078 _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8079 const int __R)
8080 {
8081 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8082 (__v16sf) __W,
8083 (__mmask16) __U, __R);
8084 }
8085
8086 extern __inline __m512
8087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8088 _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8089 {
8090 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8091 (__v16sf)
8092 _mm512_setzero_ps (),
8093 (__mmask16) __U, __R);
8094 }
8095
8096 extern __inline __m512d
8097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8098 _mm512_getexp_round_pd (__m512d __A, const int __R)
8099 {
8100 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8101 (__v8df)
8102 _mm512_undefined_pd (),
8103 (__mmask8) -1, __R);
8104 }
8105
8106 extern __inline __m512d
8107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8108 _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8109 const int __R)
8110 {
8111 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8112 (__v8df) __W,
8113 (__mmask8) __U, __R);
8114 }
8115
8116 extern __inline __m512d
8117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8118 _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8119 {
8120 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8121 (__v8df)
8122 _mm512_setzero_pd (),
8123 (__mmask8) __U, __R);
8124 }
8125
8126 extern __inline __m512d
8127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8128 _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8129 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8130 {
8131 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8132 (__C << 2) | __B,
8133 _mm512_undefined_pd (),
8134 (__mmask8) -1, __R);
8135 }
8136
8137 extern __inline __m512d
8138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8139 _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8140 _MM_MANTISSA_NORM_ENUM __B,
8141 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8142 {
8143 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8144 (__C << 2) | __B,
8145 (__v8df) __W, __U,
8146 __R);
8147 }
8148
8149 extern __inline __m512d
8150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8151 _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8152 _MM_MANTISSA_NORM_ENUM __B,
8153 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8154 {
8155 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8156 (__C << 2) | __B,
8157 (__v8df)
8158 _mm512_setzero_pd (),
8159 __U, __R);
8160 }
8161
8162 extern __inline __m512
8163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8164 _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8165 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8166 {
8167 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8168 (__C << 2) | __B,
8169 _mm512_undefined_ps (),
8170 (__mmask16) -1, __R);
8171 }
8172
8173 extern __inline __m512
8174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8175 _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8176 _MM_MANTISSA_NORM_ENUM __B,
8177 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8178 {
8179 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8180 (__C << 2) | __B,
8181 (__v16sf) __W, __U,
8182 __R);
8183 }
8184
8185 extern __inline __m512
8186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8187 _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8188 _MM_MANTISSA_NORM_ENUM __B,
8189 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8190 {
8191 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8192 (__C << 2) | __B,
8193 (__v16sf)
8194 _mm512_setzero_ps (),
8195 __U, __R);
8196 }
8197
8198 extern __inline __m128d
8199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8200 _mm_getmant_round_sd (__m128d __A, __m128d __B,
8201 _MM_MANTISSA_NORM_ENUM __C,
8202 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8203 {
8204 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8205 (__v2df) __B,
8206 (__D << 2) | __C,
8207 __R);
8208 }
8209
8210 extern __inline __m128
8211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8212 _mm_getmant_round_ss (__m128 __A, __m128 __B,
8213 _MM_MANTISSA_NORM_ENUM __C,
8214 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8215 {
8216 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8217 (__v4sf) __B,
8218 (__D << 2) | __C,
8219 __R);
8220 }
8221
8222 #else
8223 #define _mm512_getmant_round_pd(X, B, C, R) \
8224 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8225 (int)(((C)<<2) | (B)), \
8226 (__v8df)(__m512d)_mm512_undefined_pd(), \
8227 (__mmask8)-1,\
8228 (R)))
8229
8230 #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
8231 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8232 (int)(((C)<<2) | (B)), \
8233 (__v8df)(__m512d)(W), \
8234 (__mmask8)(U),\
8235 (R)))
8236
8237 #define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
8238 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8239 (int)(((C)<<2) | (B)), \
8240 (__v8df)(__m512d)_mm512_setzero_pd(), \
8241 (__mmask8)(U),\
8242 (R)))
8243 #define _mm512_getmant_round_ps(X, B, C, R) \
8244 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8245 (int)(((C)<<2) | (B)), \
8246 (__v16sf)(__m512)_mm512_undefined_ps(), \
8247 (__mmask16)-1,\
8248 (R)))
8249
8250 #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
8251 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8252 (int)(((C)<<2) | (B)), \
8253 (__v16sf)(__m512)(W), \
8254 (__mmask16)(U),\
8255 (R)))
8256
8257 #define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
8258 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8259 (int)(((C)<<2) | (B)), \
8260 (__v16sf)(__m512)_mm512_setzero_ps(), \
8261 (__mmask16)(U),\
8262 (R)))
8263 #define _mm_getmant_round_sd(X, Y, C, D, R) \
8264 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
8265 (__v2df)(__m128d)(Y), \
8266 (int)(((D)<<2) | (C)), \
8267 (R)))
8268
8269 #define _mm_getmant_round_ss(X, Y, C, D, R) \
8270 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
8271 (__v4sf)(__m128)(Y), \
8272 (int)(((D)<<2) | (C)), \
8273 (R)))
8274
8275 #define _mm_getexp_round_ss(A, B, R) \
8276 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8277
8278 #define _mm_getexp_round_sd(A, B, R) \
8279 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8280
8281 #define _mm512_getexp_round_ps(A, R) \
8282 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8283 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
8284
8285 #define _mm512_mask_getexp_round_ps(W, U, A, R) \
8286 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8287 (__v16sf)(__m512)(W), (__mmask16)(U), R))
8288
8289 #define _mm512_maskz_getexp_round_ps(U, A, R) \
8290 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8291 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8292
8293 #define _mm512_getexp_round_pd(A, R) \
8294 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8295 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
8296
8297 #define _mm512_mask_getexp_round_pd(W, U, A, R) \
8298 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8299 (__v8df)(__m512d)(W), (__mmask8)(U), R))
8300
8301 #define _mm512_maskz_getexp_round_pd(U, A, R) \
8302 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8303 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8304 #endif
8305
8306 #ifdef __OPTIMIZE__
8307 extern __inline __m512
8308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8309 _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
8310 {
8311 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
8312 (__v16sf)
8313 _mm512_undefined_ps (),
8314 -1, __R);
8315 }
8316
8317 extern __inline __m512
8318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8319 _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
8320 const int __imm, const int __R)
8321 {
8322 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
8323 (__v16sf) __A,
8324 (__mmask16) __B, __R);
8325 }
8326
8327 extern __inline __m512
8328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8329 _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
8330 const int __imm, const int __R)
8331 {
8332 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
8333 __imm,
8334 (__v16sf)
8335 _mm512_setzero_ps (),
8336 (__mmask16) __A, __R);
8337 }
8338
8339 extern __inline __m512d
8340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8341 _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
8342 {
8343 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
8344 (__v8df)
8345 _mm512_undefined_pd (),
8346 -1, __R);
8347 }
8348
8349 extern __inline __m512d
8350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8351 _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
8352 __m512d __C, const int __imm, const int __R)
8353 {
8354 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
8355 (__v8df) __A,
8356 (__mmask8) __B, __R);
8357 }
8358
8359 extern __inline __m512d
8360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8361 _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
8362 const int __imm, const int __R)
8363 {
8364 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
8365 __imm,
8366 (__v8df)
8367 _mm512_setzero_pd (),
8368 (__mmask8) __A, __R);
8369 }
8370
8371 extern __inline __m128
8372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8373 _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
8374 {
8375 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
8376 (__v4sf) __B, __imm, __R);
8377 }
8378
8379 extern __inline __m128d
8380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8381 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
8382 const int __R)
8383 {
8384 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
8385 (__v2df) __B, __imm, __R);
8386 }
8387
8388 #else
8389 #define _mm512_roundscale_round_ps(A, B, R) \
8390 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
8391 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
8392 #define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
8393 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
8394 (int)(D), \
8395 (__v16sf)(__m512)(A), \
8396 (__mmask16)(B), R))
8397 #define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
8398 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
8399 (int)(C), \
8400 (__v16sf)_mm512_setzero_ps(),\
8401 (__mmask16)(A), R))
8402 #define _mm512_roundscale_round_pd(A, B, R) \
8403 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
8404 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
8405 #define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
8406 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
8407 (int)(D), \
8408 (__v8df)(__m512d)(A), \
8409 (__mmask8)(B), R))
8410 #define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
8411 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
8412 (int)(C), \
8413 (__v8df)_mm512_setzero_pd(),\
8414 (__mmask8)(A), R))
8415 #define _mm_roundscale_round_ss(A, B, C, R) \
8416 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
8417 (__v4sf)(__m128)(B), (int)(C), R))
8418 #define _mm_roundscale_round_sd(A, B, C, R) \
8419 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
8420 (__v2df)(__m128d)(B), (int)(C), R))
8421 #endif
8422
8423 extern __inline __m512
8424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8425 _mm512_floor_ps (__m512 __A)
8426 {
8427 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8428 _MM_FROUND_FLOOR,
8429 (__v16sf) __A, -1,
8430 _MM_FROUND_CUR_DIRECTION);
8431 }
8432
8433 extern __inline __m512d
8434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8435 _mm512_floor_pd (__m512d __A)
8436 {
8437 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8438 _MM_FROUND_FLOOR,
8439 (__v8df) __A, -1,
8440 _MM_FROUND_CUR_DIRECTION);
8441 }
8442
8443 extern __inline __m512
8444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8445 _mm512_ceil_ps (__m512 __A)
8446 {
8447 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8448 _MM_FROUND_CEIL,
8449 (__v16sf) __A, -1,
8450 _MM_FROUND_CUR_DIRECTION);
8451 }
8452
8453 extern __inline __m512d
8454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8455 _mm512_ceil_pd (__m512d __A)
8456 {
8457 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8458 _MM_FROUND_CEIL,
8459 (__v8df) __A, -1,
8460 _MM_FROUND_CUR_DIRECTION);
8461 }
8462
8463 extern __inline __m512
8464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8465 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
8466 {
8467 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8468 _MM_FROUND_FLOOR,
8469 (__v16sf) __W, __U,
8470 _MM_FROUND_CUR_DIRECTION);
8471 }
8472
8473 extern __inline __m512d
8474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8475 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
8476 {
8477 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8478 _MM_FROUND_FLOOR,
8479 (__v8df) __W, __U,
8480 _MM_FROUND_CUR_DIRECTION);
8481 }
8482
8483 extern __inline __m512
8484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8485 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
8486 {
8487 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8488 _MM_FROUND_CEIL,
8489 (__v16sf) __W, __U,
8490 _MM_FROUND_CUR_DIRECTION);
8491 }
8492
8493 extern __inline __m512d
8494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8495 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
8496 {
8497 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8498 _MM_FROUND_CEIL,
8499 (__v8df) __W, __U,
8500 _MM_FROUND_CUR_DIRECTION);
8501 }
8502
8503 #ifdef __OPTIMIZE__
8504 extern __inline __m512i
8505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8506 _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
8507 {
8508 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8509 (__v16si) __B, __imm,
8510 (__v16si)
8511 _mm512_undefined_epi32 (),
8512 (__mmask16) -1);
8513 }
8514
8515 extern __inline __m512i
8516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8517 _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
8518 __m512i __B, const int __imm)
8519 {
8520 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8521 (__v16si) __B, __imm,
8522 (__v16si) __W,
8523 (__mmask16) __U);
8524 }
8525
8526 extern __inline __m512i
8527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8528 _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
8529 const int __imm)
8530 {
8531 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8532 (__v16si) __B, __imm,
8533 (__v16si)
8534 _mm512_setzero_si512 (),
8535 (__mmask16) __U);
8536 }
8537
8538 extern __inline __m512i
8539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8540 _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
8541 {
8542 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8543 (__v8di) __B, __imm,
8544 (__v8di)
8545 _mm512_undefined_epi32 (),
8546 (__mmask8) -1);
8547 }
8548
8549 extern __inline __m512i
8550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8551 _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
8552 __m512i __B, const int __imm)
8553 {
8554 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8555 (__v8di) __B, __imm,
8556 (__v8di) __W,
8557 (__mmask8) __U);
8558 }
8559
8560 extern __inline __m512i
8561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8562 _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
8563 const int __imm)
8564 {
8565 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8566 (__v8di) __B, __imm,
8567 (__v8di)
8568 _mm512_setzero_si512 (),
8569 (__mmask8) __U);
8570 }
8571 #else
8572 #define _mm512_alignr_epi32(X, Y, C) \
8573 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8574 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\
8575 (__mmask16)-1))
8576
8577 #define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
8578 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8579 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
8580 (__mmask16)(U)))
8581
8582 #define _mm512_maskz_alignr_epi32(U, X, Y, C) \
8583 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8584 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
8585 (__mmask16)(U)))
8586
8587 #define _mm512_alignr_epi64(X, Y, C) \
8588 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8589 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (), \
8590 (__mmask8)-1))
8591
8592 #define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
8593 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8594 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
8595
8596 #define _mm512_maskz_alignr_epi64(U, X, Y, C) \
8597 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8598 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
8599 (__mmask8)(U)))
8600 #endif
8601
8602 extern __inline __mmask16
8603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8604 _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
8605 {
8606 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8607 (__v16si) __B,
8608 (__mmask16) -1);
8609 }
8610
8611 extern __inline __mmask16
8612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8613 _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8614 {
8615 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8616 (__v16si) __B, __U);
8617 }
8618
8619 extern __inline __mmask8
8620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8621 _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8622 {
8623 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8624 (__v8di) __B, __U);
8625 }
8626
8627 extern __inline __mmask8
8628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8629 _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
8630 {
8631 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8632 (__v8di) __B,
8633 (__mmask8) -1);
8634 }
8635
8636 extern __inline __mmask16
8637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8638 _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
8639 {
8640 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8641 (__v16si) __B,
8642 (__mmask16) -1);
8643 }
8644
8645 extern __inline __mmask16
8646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8647 _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8648 {
8649 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8650 (__v16si) __B, __U);
8651 }
8652
8653 extern __inline __mmask8
8654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8655 _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8656 {
8657 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8658 (__v8di) __B, __U);
8659 }
8660
8661 extern __inline __mmask8
8662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8663 _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
8664 {
8665 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8666 (__v8di) __B,
8667 (__mmask8) -1);
8668 }
8669
8670 extern __inline __mmask16
8671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8672 _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
8673 {
8674 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8675 (__v16si) __Y, 5,
8676 (__mmask16) -1);
8677 }
8678
8679 extern __inline __mmask16
8680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8681 _mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8682 {
8683 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8684 (__v16si) __Y, 5,
8685 (__mmask16) __M);
8686 }
8687
8688 extern __inline __mmask16
8689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8690 _mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8691 {
8692 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8693 (__v16si) __Y, 5,
8694 (__mmask16) __M);
8695 }
8696
8697 extern __inline __mmask16
8698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8699 _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
8700 {
8701 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8702 (__v16si) __Y, 5,
8703 (__mmask16) -1);
8704 }
8705
8706 extern __inline __mmask8
8707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8708 _mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8709 {
8710 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8711 (__v8di) __Y, 5,
8712 (__mmask8) __M);
8713 }
8714
8715 extern __inline __mmask8
8716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8717 _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
8718 {
8719 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8720 (__v8di) __Y, 5,
8721 (__mmask8) -1);
8722 }
8723
8724 extern __inline __mmask8
8725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8726 _mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8727 {
8728 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8729 (__v8di) __Y, 5,
8730 (__mmask8) __M);
8731 }
8732
8733 extern __inline __mmask8
8734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8735 _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
8736 {
8737 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8738 (__v8di) __Y, 5,
8739 (__mmask8) -1);
8740 }
8741
8742 extern __inline __mmask16
8743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8744 _mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8745 {
8746 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8747 (__v16si) __Y, 2,
8748 (__mmask16) __M);
8749 }
8750
8751 extern __inline __mmask16
8752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8753 _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
8754 {
8755 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8756 (__v16si) __Y, 2,
8757 (__mmask16) -1);
8758 }
8759
8760 extern __inline __mmask16
8761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8762 _mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8763 {
8764 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8765 (__v16si) __Y, 2,
8766 (__mmask16) __M);
8767 }
8768
8769 extern __inline __mmask16
8770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8771 _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
8772 {
8773 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8774 (__v16si) __Y, 2,
8775 (__mmask16) -1);
8776 }
8777
8778 extern __inline __mmask8
8779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8780 _mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8781 {
8782 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8783 (__v8di) __Y, 2,
8784 (__mmask8) __M);
8785 }
8786
8787 extern __inline __mmask8
8788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8789 _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
8790 {
8791 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8792 (__v8di) __Y, 2,
8793 (__mmask8) -1);
8794 }
8795
8796 extern __inline __mmask8
8797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8798 _mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8799 {
8800 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8801 (__v8di) __Y, 2,
8802 (__mmask8) __M);
8803 }
8804
8805 extern __inline __mmask8
8806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8807 _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
8808 {
8809 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8810 (__v8di) __Y, 2,
8811 (__mmask8) -1);
8812 }
8813
8814 extern __inline __mmask16
8815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8816 _mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8817 {
8818 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8819 (__v16si) __Y, 1,
8820 (__mmask16) __M);
8821 }
8822
8823 extern __inline __mmask16
8824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8825 _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
8826 {
8827 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8828 (__v16si) __Y, 1,
8829 (__mmask16) -1);
8830 }
8831
8832 extern __inline __mmask16
8833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8834 _mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8835 {
8836 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8837 (__v16si) __Y, 1,
8838 (__mmask16) __M);
8839 }
8840
8841 extern __inline __mmask16
8842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8843 _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
8844 {
8845 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8846 (__v16si) __Y, 1,
8847 (__mmask16) -1);
8848 }
8849
8850 extern __inline __mmask8
8851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8852 _mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8853 {
8854 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8855 (__v8di) __Y, 1,
8856 (__mmask8) __M);
8857 }
8858
8859 extern __inline __mmask8
8860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8861 _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
8862 {
8863 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8864 (__v8di) __Y, 1,
8865 (__mmask8) -1);
8866 }
8867
8868 extern __inline __mmask8
8869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8870 _mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8871 {
8872 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8873 (__v8di) __Y, 1,
8874 (__mmask8) __M);
8875 }
8876
8877 extern __inline __mmask8
8878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8879 _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
8880 {
8881 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8882 (__v8di) __Y, 1,
8883 (__mmask8) -1);
8884 }
8885
8886 extern __inline __mmask16
8887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8888 _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
8889 {
8890 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8891 (__v16si) __Y, 4,
8892 (__mmask16) -1);
8893 }
8894
8895 extern __inline __mmask16
8896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8897 _mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8898 {
8899 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8900 (__v16si) __Y, 4,
8901 (__mmask16) __M);
8902 }
8903
8904 extern __inline __mmask16
8905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8906 _mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8907 {
8908 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8909 (__v16si) __Y, 4,
8910 (__mmask16) __M);
8911 }
8912
8913 extern __inline __mmask16
8914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8915 _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
8916 {
8917 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8918 (__v16si) __Y, 4,
8919 (__mmask16) -1);
8920 }
8921
8922 extern __inline __mmask8
8923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8924 _mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8925 {
8926 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8927 (__v8di) __Y, 4,
8928 (__mmask8) __M);
8929 }
8930
8931 extern __inline __mmask8
8932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8933 _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
8934 {
8935 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8936 (__v8di) __Y, 4,
8937 (__mmask8) -1);
8938 }
8939
8940 extern __inline __mmask8
8941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8942 _mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8943 {
8944 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8945 (__v8di) __Y, 4,
8946 (__mmask8) __M);
8947 }
8948
8949 extern __inline __mmask8
8950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8951 _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
8952 {
8953 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8954 (__v8di) __Y, 4,
8955 (__mmask8) -1);
8956 }
8957
8958 #define _MM_CMPINT_EQ 0x0
8959 #define _MM_CMPINT_LT 0x1
8960 #define _MM_CMPINT_LE 0x2
8961 #define _MM_CMPINT_UNUSED 0x3
8962 #define _MM_CMPINT_NE 0x4
8963 #define _MM_CMPINT_NLT 0x5
8964 #define _MM_CMPINT_GE 0x5
8965 #define _MM_CMPINT_NLE 0x6
8966 #define _MM_CMPINT_GT 0x6
8967
8968 #ifdef __OPTIMIZE__
8969 extern __inline __mmask16
8970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8971 _kshiftli_mask16 (__mmask16 __A, unsigned int __B)
8972 {
8973 return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A,
8974 (__mmask8) __B);
8975 }
8976
8977 extern __inline __mmask16
8978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8979 _kshiftri_mask16 (__mmask16 __A, unsigned int __B)
8980 {
8981 return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A,
8982 (__mmask8) __B);
8983 }
8984
8985 extern __inline __mmask8
8986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8987 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
8988 {
8989 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8990 (__v8di) __Y, __P,
8991 (__mmask8) -1);
8992 }
8993
8994 extern __inline __mmask16
8995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8996 _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
8997 {
8998 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8999 (__v16si) __Y, __P,
9000 (__mmask16) -1);
9001 }
9002
9003 extern __inline __mmask8
9004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9005 _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
9006 {
9007 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9008 (__v8di) __Y, __P,
9009 (__mmask8) -1);
9010 }
9011
9012 extern __inline __mmask16
9013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9014 _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
9015 {
9016 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9017 (__v16si) __Y, __P,
9018 (__mmask16) -1);
9019 }
9020
9021 extern __inline __mmask8
9022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9023 _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
9024 const int __R)
9025 {
9026 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9027 (__v8df) __Y, __P,
9028 (__mmask8) -1, __R);
9029 }
9030
9031 extern __inline __mmask16
9032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9033 _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
9034 {
9035 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9036 (__v16sf) __Y, __P,
9037 (__mmask16) -1, __R);
9038 }
9039
9040 extern __inline __mmask8
9041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9042 _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9043 const int __P)
9044 {
9045 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9046 (__v8di) __Y, __P,
9047 (__mmask8) __U);
9048 }
9049
9050 extern __inline __mmask16
9051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9052 _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9053 const int __P)
9054 {
9055 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9056 (__v16si) __Y, __P,
9057 (__mmask16) __U);
9058 }
9059
9060 extern __inline __mmask8
9061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9062 _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9063 const int __P)
9064 {
9065 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9066 (__v8di) __Y, __P,
9067 (__mmask8) __U);
9068 }
9069
9070 extern __inline __mmask16
9071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9072 _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9073 const int __P)
9074 {
9075 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9076 (__v16si) __Y, __P,
9077 (__mmask16) __U);
9078 }
9079
9080 extern __inline __mmask8
9081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9082 _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9083 const int __P, const int __R)
9084 {
9085 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9086 (__v8df) __Y, __P,
9087 (__mmask8) __U, __R);
9088 }
9089
9090 extern __inline __mmask16
9091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9092 _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9093 const int __P, const int __R)
9094 {
9095 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9096 (__v16sf) __Y, __P,
9097 (__mmask16) __U, __R);
9098 }
9099
9100 extern __inline __mmask8
9101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9102 _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
9103 {
9104 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9105 (__v2df) __Y, __P,
9106 (__mmask8) -1, __R);
9107 }
9108
9109 extern __inline __mmask8
9110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9111 _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
9112 const int __P, const int __R)
9113 {
9114 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9115 (__v2df) __Y, __P,
9116 (__mmask8) __M, __R);
9117 }
9118
9119 extern __inline __mmask8
9120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9121 _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
9122 {
9123 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9124 (__v4sf) __Y, __P,
9125 (__mmask8) -1, __R);
9126 }
9127
9128 extern __inline __mmask8
9129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9130 _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
9131 const int __P, const int __R)
9132 {
9133 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9134 (__v4sf) __Y, __P,
9135 (__mmask8) __M, __R);
9136 }
9137
9138 #else
9139 #define _kshiftli_mask16(X, Y) \
9140 ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y)))
9141
9142 #define _kshiftri_mask16(X, Y) \
9143 ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y)))
9144
9145 #define _mm512_cmp_epi64_mask(X, Y, P) \
9146 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9147 (__v8di)(__m512i)(Y), (int)(P),\
9148 (__mmask8)-1))
9149
9150 #define _mm512_cmp_epi32_mask(X, Y, P) \
9151 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9152 (__v16si)(__m512i)(Y), (int)(P), \
9153 (__mmask16)-1))
9154
9155 #define _mm512_cmp_epu64_mask(X, Y, P) \
9156 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9157 (__v8di)(__m512i)(Y), (int)(P),\
9158 (__mmask8)-1))
9159
9160 #define _mm512_cmp_epu32_mask(X, Y, P) \
9161 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9162 (__v16si)(__m512i)(Y), (int)(P), \
9163 (__mmask16)-1))
9164
9165 #define _mm512_cmp_round_pd_mask(X, Y, P, R) \
9166 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9167 (__v8df)(__m512d)(Y), (int)(P),\
9168 (__mmask8)-1, R))
9169
9170 #define _mm512_cmp_round_ps_mask(X, Y, P, R) \
9171 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9172 (__v16sf)(__m512)(Y), (int)(P),\
9173 (__mmask16)-1, R))
9174
9175 #define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
9176 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9177 (__v8di)(__m512i)(Y), (int)(P),\
9178 (__mmask8)M))
9179
9180 #define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
9181 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9182 (__v16si)(__m512i)(Y), (int)(P), \
9183 (__mmask16)M))
9184
9185 #define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
9186 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9187 (__v8di)(__m512i)(Y), (int)(P),\
9188 (__mmask8)M))
9189
9190 #define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
9191 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9192 (__v16si)(__m512i)(Y), (int)(P), \
9193 (__mmask16)M))
9194
9195 #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
9196 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9197 (__v8df)(__m512d)(Y), (int)(P),\
9198 (__mmask8)M, R))
9199
9200 #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
9201 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9202 (__v16sf)(__m512)(Y), (int)(P),\
9203 (__mmask16)M, R))
9204
9205 #define _mm_cmp_round_sd_mask(X, Y, P, R) \
9206 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9207 (__v2df)(__m128d)(Y), (int)(P),\
9208 (__mmask8)-1, R))
9209
9210 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
9211 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9212 (__v2df)(__m128d)(Y), (int)(P),\
9213 (M), R))
9214
9215 #define _mm_cmp_round_ss_mask(X, Y, P, R) \
9216 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9217 (__v4sf)(__m128)(Y), (int)(P), \
9218 (__mmask8)-1, R))
9219
9220 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
9221 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9222 (__v4sf)(__m128)(Y), (int)(P), \
9223 (M), R))
9224 #endif
9225
9226 #ifdef __OPTIMIZE__
9227 extern __inline __m512
9228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9229 _mm512_i32gather_ps (__m512i __index, void const *__addr, int __scale)
9230 {
9231 __m512 __v1_old = _mm512_undefined_ps ();
9232 __mmask16 __mask = 0xFFFF;
9233
9234 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
9235 __addr,
9236 (__v16si) __index,
9237 __mask, __scale);
9238 }
9239
9240 extern __inline __m512
9241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9242 _mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask,
9243 __m512i __index, void const *__addr, int __scale)
9244 {
9245 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
9246 __addr,
9247 (__v16si) __index,
9248 __mask, __scale);
9249 }
9250
9251 extern __inline __m512d
9252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9253 _mm512_i32gather_pd (__m256i __index, void const *__addr, int __scale)
9254 {
9255 __m512d __v1_old = _mm512_undefined_pd ();
9256 __mmask8 __mask = 0xFF;
9257
9258 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9259 __addr,
9260 (__v8si) __index, __mask,
9261 __scale);
9262 }
9263
9264 extern __inline __m512d
9265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9266 _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
9267 __m256i __index, void const *__addr, int __scale)
9268 {
9269 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9270 __addr,
9271 (__v8si) __index,
9272 __mask, __scale);
9273 }
9274
9275 extern __inline __m256
9276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9277 _mm512_i64gather_ps (__m512i __index, void const *__addr, int __scale)
9278 {
9279 __m256 __v1_old = _mm256_undefined_ps ();
9280 __mmask8 __mask = 0xFF;
9281
9282 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9283 __addr,
9284 (__v8di) __index, __mask,
9285 __scale);
9286 }
9287
9288 extern __inline __m256
9289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9290 _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
9291 __m512i __index, void const *__addr, int __scale)
9292 {
9293 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9294 __addr,
9295 (__v8di) __index,
9296 __mask, __scale);
9297 }
9298
9299 extern __inline __m512d
9300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9301 _mm512_i64gather_pd (__m512i __index, void const *__addr, int __scale)
9302 {
9303 __m512d __v1_old = _mm512_undefined_pd ();
9304 __mmask8 __mask = 0xFF;
9305
9306 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9307 __addr,
9308 (__v8di) __index, __mask,
9309 __scale);
9310 }
9311
9312 extern __inline __m512d
9313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9314 _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
9315 __m512i __index, void const *__addr, int __scale)
9316 {
9317 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9318 __addr,
9319 (__v8di) __index,
9320 __mask, __scale);
9321 }
9322
9323 extern __inline __m512i
9324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9325 _mm512_i32gather_epi32 (__m512i __index, void const *__addr, int __scale)
9326 {
9327 __m512i __v1_old = _mm512_undefined_epi32 ();
9328 __mmask16 __mask = 0xFFFF;
9329
9330 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9331 __addr,
9332 (__v16si) __index,
9333 __mask, __scale);
9334 }
9335
9336 extern __inline __m512i
9337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9338 _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
9339 __m512i __index, void const *__addr, int __scale)
9340 {
9341 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9342 __addr,
9343 (__v16si) __index,
9344 __mask, __scale);
9345 }
9346
9347 extern __inline __m512i
9348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9349 _mm512_i32gather_epi64 (__m256i __index, void const *__addr, int __scale)
9350 {
9351 __m512i __v1_old = _mm512_undefined_epi32 ();
9352 __mmask8 __mask = 0xFF;
9353
9354 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9355 __addr,
9356 (__v8si) __index, __mask,
9357 __scale);
9358 }
9359
9360 extern __inline __m512i
9361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9362 _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9363 __m256i __index, void const *__addr,
9364 int __scale)
9365 {
9366 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9367 __addr,
9368 (__v8si) __index,
9369 __mask, __scale);
9370 }
9371
9372 extern __inline __m256i
9373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9374 _mm512_i64gather_epi32 (__m512i __index, void const *__addr, int __scale)
9375 {
9376 __m256i __v1_old = _mm256_undefined_si256 ();
9377 __mmask8 __mask = 0xFF;
9378
9379 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9380 __addr,
9381 (__v8di) __index,
9382 __mask, __scale);
9383 }
9384
9385 extern __inline __m256i
9386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9387 _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
9388 __m512i __index, void const *__addr, int __scale)
9389 {
9390 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9391 __addr,
9392 (__v8di) __index,
9393 __mask, __scale);
9394 }
9395
9396 extern __inline __m512i
9397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9398 _mm512_i64gather_epi64 (__m512i __index, void const *__addr, int __scale)
9399 {
9400 __m512i __v1_old = _mm512_undefined_epi32 ();
9401 __mmask8 __mask = 0xFF;
9402
9403 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9404 __addr,
9405 (__v8di) __index, __mask,
9406 __scale);
9407 }
9408
9409 extern __inline __m512i
9410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9411 _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9412 __m512i __index, void const *__addr,
9413 int __scale)
9414 {
9415 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9416 __addr,
9417 (__v8di) __index,
9418 __mask, __scale);
9419 }
9420
9421 extern __inline void
9422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9423 _mm512_i32scatter_ps (void *__addr, __m512i __index, __m512 __v1, int __scale)
9424 {
9425 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
9426 (__v16si) __index, (__v16sf) __v1, __scale);
9427 }
9428
9429 extern __inline void
9430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9431 _mm512_mask_i32scatter_ps (void *__addr, __mmask16 __mask,
9432 __m512i __index, __m512 __v1, int __scale)
9433 {
9434 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
9435 (__v16sf) __v1, __scale);
9436 }
9437
9438 extern __inline void
9439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9440 _mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
9441 int __scale)
9442 {
9443 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
9444 (__v8si) __index, (__v8df) __v1, __scale);
9445 }
9446
9447 extern __inline void
9448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9449 _mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
9450 __m256i __index, __m512d __v1, int __scale)
9451 {
9452 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
9453 (__v8df) __v1, __scale);
9454 }
9455
9456 extern __inline void
9457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9458 _mm512_i64scatter_ps (void *__addr, __m512i __index, __m256 __v1, int __scale)
9459 {
9460 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
9461 (__v8di) __index, (__v8sf) __v1, __scale);
9462 }
9463
9464 extern __inline void
9465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9466 _mm512_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
9467 __m512i __index, __m256 __v1, int __scale)
9468 {
9469 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
9470 (__v8sf) __v1, __scale);
9471 }
9472
9473 extern __inline void
9474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9475 _mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1,
9476 int __scale)
9477 {
9478 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
9479 (__v8di) __index, (__v8df) __v1, __scale);
9480 }
9481
9482 extern __inline void
9483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9484 _mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
9485 __m512i __index, __m512d __v1, int __scale)
9486 {
9487 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
9488 (__v8df) __v1, __scale);
9489 }
9490
9491 extern __inline void
9492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9493 _mm512_i32scatter_epi32 (void *__addr, __m512i __index,
9494 __m512i __v1, int __scale)
9495 {
9496 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
9497 (__v16si) __index, (__v16si) __v1, __scale);
9498 }
9499
9500 extern __inline void
9501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9502 _mm512_mask_i32scatter_epi32 (void *__addr, __mmask16 __mask,
9503 __m512i __index, __m512i __v1, int __scale)
9504 {
9505 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
9506 (__v16si) __v1, __scale);
9507 }
9508
9509 extern __inline void
9510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9511 _mm512_i32scatter_epi64 (void *__addr, __m256i __index,
9512 __m512i __v1, int __scale)
9513 {
9514 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
9515 (__v8si) __index, (__v8di) __v1, __scale);
9516 }
9517
9518 extern __inline void
9519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9520 _mm512_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
9521 __m256i __index, __m512i __v1, int __scale)
9522 {
9523 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
9524 (__v8di) __v1, __scale);
9525 }
9526
9527 extern __inline void
9528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9529 _mm512_i64scatter_epi32 (void *__addr, __m512i __index,
9530 __m256i __v1, int __scale)
9531 {
9532 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
9533 (__v8di) __index, (__v8si) __v1, __scale);
9534 }
9535
9536 extern __inline void
9537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9538 _mm512_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
9539 __m512i __index, __m256i __v1, int __scale)
9540 {
9541 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
9542 (__v8si) __v1, __scale);
9543 }
9544
9545 extern __inline void
9546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9547 _mm512_i64scatter_epi64 (void *__addr, __m512i __index,
9548 __m512i __v1, int __scale)
9549 {
9550 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
9551 (__v8di) __index, (__v8di) __v1, __scale);
9552 }
9553
9554 extern __inline void
9555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9556 _mm512_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
9557 __m512i __index, __m512i __v1, int __scale)
9558 {
9559 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
9560 (__v8di) __v1, __scale);
9561 }
9562 #else
9563 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
9564 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
9565 (void const *)ADDR, \
9566 (__v16si)(__m512i)INDEX, \
9567 (__mmask16)0xFFFF, (int)SCALE)
9568
9569 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9570 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
9571 (void const *)ADDR, \
9572 (__v16si)(__m512i)INDEX, \
9573 (__mmask16)MASK, (int)SCALE)
9574
9575 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
9576 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
9577 (void const *)ADDR, \
9578 (__v8si)(__m256i)INDEX, \
9579 (__mmask8)0xFF, (int)SCALE)
9580
9581 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9582 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
9583 (void const *)ADDR, \
9584 (__v8si)(__m256i)INDEX, \
9585 (__mmask8)MASK, (int)SCALE)
9586
9587 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
9588 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
9589 (void const *)ADDR, \
9590 (__v8di)(__m512i)INDEX, \
9591 (__mmask8)0xFF, (int)SCALE)
9592
9593 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9594 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
9595 (void const *)ADDR, \
9596 (__v8di)(__m512i)INDEX, \
9597 (__mmask8)MASK, (int)SCALE)
9598
9599 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
9600 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
9601 (void const *)ADDR, \
9602 (__v8di)(__m512i)INDEX, \
9603 (__mmask8)0xFF, (int)SCALE)
9604
9605 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9606 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
9607 (void const *)ADDR, \
9608 (__v8di)(__m512i)INDEX, \
9609 (__mmask8)MASK, (int)SCALE)
9610
9611 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
9612 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (), \
9613 (void const *)ADDR, \
9614 (__v16si)(__m512i)INDEX, \
9615 (__mmask16)0xFFFF, (int)SCALE)
9616
9617 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9618 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
9619 (void const *)ADDR, \
9620 (__v16si)(__m512i)INDEX, \
9621 (__mmask16)MASK, (int)SCALE)
9622
9623 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
9624 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (), \
9625 (void const *)ADDR, \
9626 (__v8si)(__m256i)INDEX, \
9627 (__mmask8)0xFF, (int)SCALE)
9628
9629 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9630 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
9631 (void const *)ADDR, \
9632 (__v8si)(__m256i)INDEX, \
9633 (__mmask8)MASK, (int)SCALE)
9634
9635 #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
9636 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
9637 (void const *)ADDR, \
9638 (__v8di)(__m512i)INDEX, \
9639 (__mmask8)0xFF, (int)SCALE)
9640
9641 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9642 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
9643 (void const *)ADDR, \
9644 (__v8di)(__m512i)INDEX, \
9645 (__mmask8)MASK, (int)SCALE)
9646
9647 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
9648 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (), \
9649 (void const *)ADDR, \
9650 (__v8di)(__m512i)INDEX, \
9651 (__mmask8)0xFF, (int)SCALE)
9652
9653 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9654 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
9655 (void const *)ADDR, \
9656 (__v8di)(__m512i)INDEX, \
9657 (__mmask8)MASK, (int)SCALE)
9658
9659 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
9660 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)0xFFFF, \
9661 (__v16si)(__m512i)INDEX, \
9662 (__v16sf)(__m512)V1, (int)SCALE)
9663
9664 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9665 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)MASK, \
9666 (__v16si)(__m512i)INDEX, \
9667 (__v16sf)(__m512)V1, (int)SCALE)
9668
9669 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
9670 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)0xFF, \
9671 (__v8si)(__m256i)INDEX, \
9672 (__v8df)(__m512d)V1, (int)SCALE)
9673
9674 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9675 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)MASK, \
9676 (__v8si)(__m256i)INDEX, \
9677 (__v8df)(__m512d)V1, (int)SCALE)
9678
9679 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
9680 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask8)0xFF, \
9681 (__v8di)(__m512i)INDEX, \
9682 (__v8sf)(__m256)V1, (int)SCALE)
9683
9684 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9685 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask16)MASK, \
9686 (__v8di)(__m512i)INDEX, \
9687 (__v8sf)(__m256)V1, (int)SCALE)
9688
9689 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
9690 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)0xFF, \
9691 (__v8di)(__m512i)INDEX, \
9692 (__v8df)(__m512d)V1, (int)SCALE)
9693
9694 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9695 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)MASK, \
9696 (__v8di)(__m512i)INDEX, \
9697 (__v8df)(__m512d)V1, (int)SCALE)
9698
9699 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
9700 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)0xFFFF, \
9701 (__v16si)(__m512i)INDEX, \
9702 (__v16si)(__m512i)V1, (int)SCALE)
9703
9704 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9705 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)MASK, \
9706 (__v16si)(__m512i)INDEX, \
9707 (__v16si)(__m512i)V1, (int)SCALE)
9708
9709 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
9710 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)0xFF, \
9711 (__v8si)(__m256i)INDEX, \
9712 (__v8di)(__m512i)V1, (int)SCALE)
9713
9714 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9715 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)MASK, \
9716 (__v8si)(__m256i)INDEX, \
9717 (__v8di)(__m512i)V1, (int)SCALE)
9718
9719 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
9720 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)0xFF, \
9721 (__v8di)(__m512i)INDEX, \
9722 (__v8si)(__m256i)V1, (int)SCALE)
9723
9724 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9725 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)MASK, \
9726 (__v8di)(__m512i)INDEX, \
9727 (__v8si)(__m256i)V1, (int)SCALE)
9728
9729 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
9730 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)0xFF, \
9731 (__v8di)(__m512i)INDEX, \
9732 (__v8di)(__m512i)V1, (int)SCALE)
9733
9734 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9735 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)MASK, \
9736 (__v8di)(__m512i)INDEX, \
9737 (__v8di)(__m512i)V1, (int)SCALE)
9738 #endif
9739
9740 extern __inline __m512d
9741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9742 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
9743 {
9744 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9745 (__v8df) __W,
9746 (__mmask8) __U);
9747 }
9748
9749 extern __inline __m512d
9750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9751 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
9752 {
9753 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9754 (__v8df)
9755 _mm512_setzero_pd (),
9756 (__mmask8) __U);
9757 }
9758
9759 extern __inline void
9760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9761 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9762 {
9763 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9764 (__mmask8) __U);
9765 }
9766
9767 extern __inline __m512
9768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9769 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
9770 {
9771 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9772 (__v16sf) __W,
9773 (__mmask16) __U);
9774 }
9775
9776 extern __inline __m512
9777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9778 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
9779 {
9780 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9781 (__v16sf)
9782 _mm512_setzero_ps (),
9783 (__mmask16) __U);
9784 }
9785
9786 extern __inline void
9787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9788 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9789 {
9790 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9791 (__mmask16) __U);
9792 }
9793
9794 extern __inline __m512i
9795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9796 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9797 {
9798 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9799 (__v8di) __W,
9800 (__mmask8) __U);
9801 }
9802
9803 extern __inline __m512i
9804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9805 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
9806 {
9807 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9808 (__v8di)
9809 _mm512_setzero_si512 (),
9810 (__mmask8) __U);
9811 }
9812
9813 extern __inline void
9814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9815 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9816 {
9817 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9818 (__mmask8) __U);
9819 }
9820
9821 extern __inline __m512i
9822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9823 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9824 {
9825 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9826 (__v16si) __W,
9827 (__mmask16) __U);
9828 }
9829
9830 extern __inline __m512i
9831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9832 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
9833 {
9834 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9835 (__v16si)
9836 _mm512_setzero_si512 (),
9837 (__mmask16) __U);
9838 }
9839
9840 extern __inline void
9841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9842 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9843 {
9844 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9845 (__mmask16) __U);
9846 }
9847
9848 extern __inline __m512d
9849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9850 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9851 {
9852 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9853 (__v8df) __W,
9854 (__mmask8) __U);
9855 }
9856
9857 extern __inline __m512d
9858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9859 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9860 {
9861 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
9862 (__v8df)
9863 _mm512_setzero_pd (),
9864 (__mmask8) __U);
9865 }
9866
9867 extern __inline __m512d
9868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9869 _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
9870 {
9871 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
9872 (__v8df) __W,
9873 (__mmask8) __U);
9874 }
9875
9876 extern __inline __m512d
9877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9878 _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
9879 {
9880 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
9881 (__v8df)
9882 _mm512_setzero_pd (),
9883 (__mmask8) __U);
9884 }
9885
9886 extern __inline __m512
9887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9888 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9889 {
9890 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9891 (__v16sf) __W,
9892 (__mmask16) __U);
9893 }
9894
9895 extern __inline __m512
9896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9897 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9898 {
9899 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
9900 (__v16sf)
9901 _mm512_setzero_ps (),
9902 (__mmask16) __U);
9903 }
9904
9905 extern __inline __m512
9906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9907 _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
9908 {
9909 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
9910 (__v16sf) __W,
9911 (__mmask16) __U);
9912 }
9913
9914 extern __inline __m512
9915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9916 _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
9917 {
9918 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
9919 (__v16sf)
9920 _mm512_setzero_ps (),
9921 (__mmask16) __U);
9922 }
9923
9924 extern __inline __m512i
9925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9926 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9927 {
9928 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9929 (__v8di) __W,
9930 (__mmask8) __U);
9931 }
9932
9933 extern __inline __m512i
9934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9935 _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
9936 {
9937 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
9938 (__v8di)
9939 _mm512_setzero_si512 (),
9940 (__mmask8) __U);
9941 }
9942
9943 extern __inline __m512i
9944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9945 _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
9946 {
9947 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
9948 (__v8di) __W,
9949 (__mmask8) __U);
9950 }
9951
9952 extern __inline __m512i
9953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9954 _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
9955 {
9956 return (__m512i)
9957 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
9958 (__v8di)
9959 _mm512_setzero_si512 (),
9960 (__mmask8) __U);
9961 }
9962
9963 extern __inline __m512i
9964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9965 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9966 {
9967 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9968 (__v16si) __W,
9969 (__mmask16) __U);
9970 }
9971
9972 extern __inline __m512i
9973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9974 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9975 {
9976 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
9977 (__v16si)
9978 _mm512_setzero_si512 (),
9979 (__mmask16) __U);
9980 }
9981
9982 extern __inline __m512i
9983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9984 _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
9985 {
9986 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
9987 (__v16si) __W,
9988 (__mmask16) __U);
9989 }
9990
9991 extern __inline __m512i
9992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9993 _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
9994 {
9995 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
9996 (__v16si)
9997 _mm512_setzero_si512
9998 (), (__mmask16) __U);
9999 }
10000
10001 /* Mask arithmetic operations */
10002 #define _kand_mask16 _mm512_kand
10003 #define _kandn_mask16 _mm512_kandn
10004 #define _knot_mask16 _mm512_knot
10005 #define _kor_mask16 _mm512_kor
10006 #define _kxnor_mask16 _mm512_kxnor
10007 #define _kxor_mask16 _mm512_kxor
10008
10009 extern __inline __mmask16
10010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10011 _kadd_mask16 (__mmask16 __A, __mmask16 __B)
10012 {
10013 return (__mmask16) __builtin_ia32_kaddhi ((__mmask16) __A, (__mmask16) __B);
10014 }
10015
10016 extern __inline unsigned int
10017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10018 _cvtmask16_u32 (__mmask16 __A)
10019 {
10020 return (unsigned int) __builtin_ia32_kmovw ((__mmask16 ) __A);
10021 }
10022
10023 extern __inline __mmask16
10024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10025 _cvtu32_mask16 (unsigned int __A)
10026 {
10027 return (__mmask16) __builtin_ia32_kmovw ((__mmask16 ) __A);
10028 }
10029
10030 extern __inline __mmask16
10031 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10032 _load_mask16 (__mmask16 *__A)
10033 {
10034 return (__mmask16) __builtin_ia32_kmovw (*(__mmask16 *) __A);
10035 }
10036
10037 extern __inline void
10038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10039 _store_mask16 (__mmask16 *__A, __mmask16 __B)
10040 {
10041 *(__mmask16 *) __A = __builtin_ia32_kmovw (__B);
10042 }
10043
10044 extern __inline __mmask16
10045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10046 _mm512_kand (__mmask16 __A, __mmask16 __B)
10047 {
10048 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
10049 }
10050
10051 extern __inline __mmask16
10052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10053 _mm512_kandn (__mmask16 __A, __mmask16 __B)
10054 {
10055 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
10056 (__mmask16) __B);
10057 }
10058
10059 extern __inline __mmask16
10060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10061 _mm512_kor (__mmask16 __A, __mmask16 __B)
10062 {
10063 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
10064 }
10065
10066 extern __inline int
10067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10068 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
10069 {
10070 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10071 (__mmask16) __B);
10072 }
10073
10074 extern __inline int
10075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10076 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
10077 {
10078 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
10079 (__mmask16) __B);
10080 }
10081
10082 extern __inline __mmask16
10083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10084 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
10085 {
10086 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
10087 }
10088
10089 extern __inline __mmask16
10090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10091 _mm512_kxor (__mmask16 __A, __mmask16 __B)
10092 {
10093 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
10094 }
10095
10096 extern __inline __mmask16
10097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10098 _mm512_knot (__mmask16 __A)
10099 {
10100 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
10101 }
10102
10103 extern __inline __mmask16
10104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10105 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
10106 {
10107 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10108 }
10109
10110 extern __inline __mmask16
10111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10112 _kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
10113 {
10114 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10115 }
10116
10117 #ifdef __OPTIMIZE__
10118 extern __inline __m512i
10119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10120 _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
10121 const int __imm)
10122 {
10123 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10124 (__v4si) __D,
10125 __imm,
10126 (__v16si)
10127 _mm512_setzero_si512 (),
10128 __B);
10129 }
10130
10131 extern __inline __m512
10132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10133 _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
10134 const int __imm)
10135 {
10136 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10137 (__v4sf) __D,
10138 __imm,
10139 (__v16sf)
10140 _mm512_setzero_ps (), __B);
10141 }
10142
10143 extern __inline __m512i
10144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10145 _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
10146 __m128i __D, const int __imm)
10147 {
10148 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10149 (__v4si) __D,
10150 __imm,
10151 (__v16si) __A,
10152 __B);
10153 }
10154
10155 extern __inline __m512
10156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10157 _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
10158 __m128 __D, const int __imm)
10159 {
10160 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10161 (__v4sf) __D,
10162 __imm,
10163 (__v16sf) __A, __B);
10164 }
10165 #else
10166 #define _mm512_maskz_insertf32x4(A, X, Y, C) \
10167 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10168 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
10169 (__mmask8)(A)))
10170
10171 #define _mm512_maskz_inserti32x4(A, X, Y, C) \
10172 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10173 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
10174 (__mmask8)(A)))
10175
10176 #define _mm512_mask_insertf32x4(A, B, X, Y, C) \
10177 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10178 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
10179 (__mmask8)(B)))
10180
10181 #define _mm512_mask_inserti32x4(A, B, X, Y, C) \
10182 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10183 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
10184 (__mmask8)(B)))
10185 #endif
10186
10187 extern __inline __m512i
10188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10189 _mm512_max_epi64 (__m512i __A, __m512i __B)
10190 {
10191 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10192 (__v8di) __B,
10193 (__v8di)
10194 _mm512_undefined_epi32 (),
10195 (__mmask8) -1);
10196 }
10197
10198 extern __inline __m512i
10199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10200 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10201 {
10202 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10203 (__v8di) __B,
10204 (__v8di)
10205 _mm512_setzero_si512 (),
10206 __M);
10207 }
10208
10209 extern __inline __m512i
10210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10211 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10212 {
10213 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10214 (__v8di) __B,
10215 (__v8di) __W, __M);
10216 }
10217
10218 extern __inline __m512i
10219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10220 _mm512_min_epi64 (__m512i __A, __m512i __B)
10221 {
10222 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10223 (__v8di) __B,
10224 (__v8di)
10225 _mm512_undefined_epi32 (),
10226 (__mmask8) -1);
10227 }
10228
10229 extern __inline __m512i
10230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10231 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10232 {
10233 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10234 (__v8di) __B,
10235 (__v8di) __W, __M);
10236 }
10237
10238 extern __inline __m512i
10239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10240 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10241 {
10242 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10243 (__v8di) __B,
10244 (__v8di)
10245 _mm512_setzero_si512 (),
10246 __M);
10247 }
10248
10249 extern __inline __m512i
10250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10251 _mm512_max_epu64 (__m512i __A, __m512i __B)
10252 {
10253 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10254 (__v8di) __B,
10255 (__v8di)
10256 _mm512_undefined_epi32 (),
10257 (__mmask8) -1);
10258 }
10259
10260 extern __inline __m512i
10261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10262 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10263 {
10264 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10265 (__v8di) __B,
10266 (__v8di)
10267 _mm512_setzero_si512 (),
10268 __M);
10269 }
10270
10271 extern __inline __m512i
10272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10273 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10274 {
10275 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10276 (__v8di) __B,
10277 (__v8di) __W, __M);
10278 }
10279
10280 extern __inline __m512i
10281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10282 _mm512_min_epu64 (__m512i __A, __m512i __B)
10283 {
10284 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10285 (__v8di) __B,
10286 (__v8di)
10287 _mm512_undefined_epi32 (),
10288 (__mmask8) -1);
10289 }
10290
10291 extern __inline __m512i
10292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10293 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10294 {
10295 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10296 (__v8di) __B,
10297 (__v8di) __W, __M);
10298 }
10299
10300 extern __inline __m512i
10301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10302 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10303 {
10304 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10305 (__v8di) __B,
10306 (__v8di)
10307 _mm512_setzero_si512 (),
10308 __M);
10309 }
10310
10311 extern __inline __m512i
10312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10313 _mm512_max_epi32 (__m512i __A, __m512i __B)
10314 {
10315 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10316 (__v16si) __B,
10317 (__v16si)
10318 _mm512_undefined_epi32 (),
10319 (__mmask16) -1);
10320 }
10321
10322 extern __inline __m512i
10323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10324 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10325 {
10326 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10327 (__v16si) __B,
10328 (__v16si)
10329 _mm512_setzero_si512 (),
10330 __M);
10331 }
10332
10333 extern __inline __m512i
10334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10335 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10336 {
10337 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10338 (__v16si) __B,
10339 (__v16si) __W, __M);
10340 }
10341
10342 extern __inline __m512i
10343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10344 _mm512_min_epi32 (__m512i __A, __m512i __B)
10345 {
10346 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10347 (__v16si) __B,
10348 (__v16si)
10349 _mm512_undefined_epi32 (),
10350 (__mmask16) -1);
10351 }
10352
10353 extern __inline __m512i
10354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10355 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10356 {
10357 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10358 (__v16si) __B,
10359 (__v16si)
10360 _mm512_setzero_si512 (),
10361 __M);
10362 }
10363
10364 extern __inline __m512i
10365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10366 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10367 {
10368 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10369 (__v16si) __B,
10370 (__v16si) __W, __M);
10371 }
10372
10373 extern __inline __m512i
10374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10375 _mm512_max_epu32 (__m512i __A, __m512i __B)
10376 {
10377 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10378 (__v16si) __B,
10379 (__v16si)
10380 _mm512_undefined_epi32 (),
10381 (__mmask16) -1);
10382 }
10383
10384 extern __inline __m512i
10385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10386 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10387 {
10388 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10389 (__v16si) __B,
10390 (__v16si)
10391 _mm512_setzero_si512 (),
10392 __M);
10393 }
10394
10395 extern __inline __m512i
10396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10397 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10398 {
10399 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10400 (__v16si) __B,
10401 (__v16si) __W, __M);
10402 }
10403
10404 extern __inline __m512i
10405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10406 _mm512_min_epu32 (__m512i __A, __m512i __B)
10407 {
10408 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10409 (__v16si) __B,
10410 (__v16si)
10411 _mm512_undefined_epi32 (),
10412 (__mmask16) -1);
10413 }
10414
10415 extern __inline __m512i
10416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10417 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10418 {
10419 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10420 (__v16si) __B,
10421 (__v16si)
10422 _mm512_setzero_si512 (),
10423 __M);
10424 }
10425
10426 extern __inline __m512i
10427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10428 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10429 {
10430 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10431 (__v16si) __B,
10432 (__v16si) __W, __M);
10433 }
10434
10435 extern __inline __m512
10436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10437 _mm512_unpacklo_ps (__m512 __A, __m512 __B)
10438 {
10439 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10440 (__v16sf) __B,
10441 (__v16sf)
10442 _mm512_undefined_ps (),
10443 (__mmask16) -1);
10444 }
10445
10446 extern __inline __m512
10447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10448 _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10449 {
10450 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10451 (__v16sf) __B,
10452 (__v16sf) __W,
10453 (__mmask16) __U);
10454 }
10455
10456 extern __inline __m512
10457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10458 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
10459 {
10460 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10461 (__v16sf) __B,
10462 (__v16sf)
10463 _mm512_setzero_ps (),
10464 (__mmask16) __U);
10465 }
10466
10467 #ifdef __OPTIMIZE__
10468 extern __inline __m128d
10469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10470 _mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
10471 {
10472 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
10473 (__v2df) __B,
10474 __R);
10475 }
10476
10477 extern __inline __m128
10478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10479 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
10480 {
10481 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
10482 (__v4sf) __B,
10483 __R);
10484 }
10485
10486 extern __inline __m128d
10487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10488 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
10489 {
10490 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
10491 (__v2df) __B,
10492 __R);
10493 }
10494
10495 extern __inline __m128
10496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10497 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
10498 {
10499 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
10500 (__v4sf) __B,
10501 __R);
10502 }
10503
10504 #else
10505 #define _mm_max_round_sd(A, B, C) \
10506 (__m128d)__builtin_ia32_addsd_round(A, B, C)
10507
10508 #define _mm_max_round_ss(A, B, C) \
10509 (__m128)__builtin_ia32_addss_round(A, B, C)
10510
10511 #define _mm_min_round_sd(A, B, C) \
10512 (__m128d)__builtin_ia32_subsd_round(A, B, C)
10513
10514 #define _mm_min_round_ss(A, B, C) \
10515 (__m128)__builtin_ia32_subss_round(A, B, C)
10516 #endif
10517
10518 extern __inline __m512d
10519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10520 _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
10521 {
10522 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
10523 (__v8df) __W,
10524 (__mmask8) __U);
10525 }
10526
10527 extern __inline __m512
10528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10529 _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
10530 {
10531 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
10532 (__v16sf) __W,
10533 (__mmask16) __U);
10534 }
10535
10536 extern __inline __m512i
10537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10538 _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
10539 {
10540 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
10541 (__v8di) __W,
10542 (__mmask8) __U);
10543 }
10544
10545 extern __inline __m512i
10546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10547 _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
10548 {
10549 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
10550 (__v16si) __W,
10551 (__mmask16) __U);
10552 }
10553
10554 #ifdef __OPTIMIZE__
10555 extern __inline __m128d
10556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10557 _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10558 {
10559 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10560 (__v2df) __A,
10561 (__v2df) __B,
10562 __R);
10563 }
10564
10565 extern __inline __m128
10566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10567 _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10568 {
10569 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10570 (__v4sf) __A,
10571 (__v4sf) __B,
10572 __R);
10573 }
10574
10575 extern __inline __m128d
10576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10577 _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10578 {
10579 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10580 (__v2df) __A,
10581 -(__v2df) __B,
10582 __R);
10583 }
10584
10585 extern __inline __m128
10586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10587 _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10588 {
10589 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10590 (__v4sf) __A,
10591 -(__v4sf) __B,
10592 __R);
10593 }
10594
10595 extern __inline __m128d
10596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10597 _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10598 {
10599 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10600 -(__v2df) __A,
10601 (__v2df) __B,
10602 __R);
10603 }
10604
10605 extern __inline __m128
10606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10607 _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10608 {
10609 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10610 -(__v4sf) __A,
10611 (__v4sf) __B,
10612 __R);
10613 }
10614
10615 extern __inline __m128d
10616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10617 _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10618 {
10619 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10620 -(__v2df) __A,
10621 -(__v2df) __B,
10622 __R);
10623 }
10624
10625 extern __inline __m128
10626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10627 _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10628 {
10629 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10630 -(__v4sf) __A,
10631 -(__v4sf) __B,
10632 __R);
10633 }
10634 #else
10635 #define _mm_fmadd_round_sd(A, B, C, R) \
10636 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
10637
10638 #define _mm_fmadd_round_ss(A, B, C, R) \
10639 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
10640
10641 #define _mm_fmsub_round_sd(A, B, C, R) \
10642 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
10643
10644 #define _mm_fmsub_round_ss(A, B, C, R) \
10645 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
10646
10647 #define _mm_fnmadd_round_sd(A, B, C, R) \
10648 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
10649
10650 #define _mm_fnmadd_round_ss(A, B, C, R) \
10651 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
10652
10653 #define _mm_fnmsub_round_sd(A, B, C, R) \
10654 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
10655
10656 #define _mm_fnmsub_round_ss(A, B, C, R) \
10657 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
10658 #endif
10659
10660 #ifdef __OPTIMIZE__
10661 extern __inline int
10662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10663 _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
10664 {
10665 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
10666 }
10667
10668 extern __inline int
10669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10670 _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
10671 {
10672 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
10673 }
10674 #else
10675 #define _mm_comi_round_ss(A, B, C, D)\
10676 __builtin_ia32_vcomiss(A, B, C, D)
10677 #define _mm_comi_round_sd(A, B, C, D)\
10678 __builtin_ia32_vcomisd(A, B, C, D)
10679 #endif
10680
10681 extern __inline __m512d
10682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10683 _mm512_sqrt_pd (__m512d __A)
10684 {
10685 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10686 (__v8df)
10687 _mm512_undefined_pd (),
10688 (__mmask8) -1,
10689 _MM_FROUND_CUR_DIRECTION);
10690 }
10691
10692 extern __inline __m512d
10693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10694 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
10695 {
10696 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10697 (__v8df) __W,
10698 (__mmask8) __U,
10699 _MM_FROUND_CUR_DIRECTION);
10700 }
10701
10702 extern __inline __m512d
10703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10704 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
10705 {
10706 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10707 (__v8df)
10708 _mm512_setzero_pd (),
10709 (__mmask8) __U,
10710 _MM_FROUND_CUR_DIRECTION);
10711 }
10712
10713 extern __inline __m512
10714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10715 _mm512_sqrt_ps (__m512 __A)
10716 {
10717 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10718 (__v16sf)
10719 _mm512_undefined_ps (),
10720 (__mmask16) -1,
10721 _MM_FROUND_CUR_DIRECTION);
10722 }
10723
10724 extern __inline __m512
10725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10726 _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
10727 {
10728 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10729 (__v16sf) __W,
10730 (__mmask16) __U,
10731 _MM_FROUND_CUR_DIRECTION);
10732 }
10733
10734 extern __inline __m512
10735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10736 _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
10737 {
10738 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10739 (__v16sf)
10740 _mm512_setzero_ps (),
10741 (__mmask16) __U,
10742 _MM_FROUND_CUR_DIRECTION);
10743 }
10744
10745 extern __inline __m512d
10746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10747 _mm512_add_pd (__m512d __A, __m512d __B)
10748 {
10749 return (__m512d) ((__v8df)__A + (__v8df)__B);
10750 }
10751
10752 extern __inline __m512d
10753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10754 _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10755 {
10756 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10757 (__v8df) __B,
10758 (__v8df) __W,
10759 (__mmask8) __U,
10760 _MM_FROUND_CUR_DIRECTION);
10761 }
10762
10763 extern __inline __m512d
10764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10765 _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
10766 {
10767 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10768 (__v8df) __B,
10769 (__v8df)
10770 _mm512_setzero_pd (),
10771 (__mmask8) __U,
10772 _MM_FROUND_CUR_DIRECTION);
10773 }
10774
10775 extern __inline __m512
10776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10777 _mm512_add_ps (__m512 __A, __m512 __B)
10778 {
10779 return (__m512) ((__v16sf)__A + (__v16sf)__B);
10780 }
10781
10782 extern __inline __m512
10783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10784 _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10785 {
10786 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10787 (__v16sf) __B,
10788 (__v16sf) __W,
10789 (__mmask16) __U,
10790 _MM_FROUND_CUR_DIRECTION);
10791 }
10792
10793 extern __inline __m512
10794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10795 _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
10796 {
10797 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10798 (__v16sf) __B,
10799 (__v16sf)
10800 _mm512_setzero_ps (),
10801 (__mmask16) __U,
10802 _MM_FROUND_CUR_DIRECTION);
10803 }
10804
10805 extern __inline __m512d
10806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10807 _mm512_sub_pd (__m512d __A, __m512d __B)
10808 {
10809 return (__m512d) ((__v8df)__A - (__v8df)__B);
10810 }
10811
10812 extern __inline __m512d
10813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10814 _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10815 {
10816 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10817 (__v8df) __B,
10818 (__v8df) __W,
10819 (__mmask8) __U,
10820 _MM_FROUND_CUR_DIRECTION);
10821 }
10822
10823 extern __inline __m512d
10824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10825 _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
10826 {
10827 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10828 (__v8df) __B,
10829 (__v8df)
10830 _mm512_setzero_pd (),
10831 (__mmask8) __U,
10832 _MM_FROUND_CUR_DIRECTION);
10833 }
10834
10835 extern __inline __m512
10836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10837 _mm512_sub_ps (__m512 __A, __m512 __B)
10838 {
10839 return (__m512) ((__v16sf)__A - (__v16sf)__B);
10840 }
10841
10842 extern __inline __m512
10843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10844 _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10845 {
10846 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10847 (__v16sf) __B,
10848 (__v16sf) __W,
10849 (__mmask16) __U,
10850 _MM_FROUND_CUR_DIRECTION);
10851 }
10852
10853 extern __inline __m512
10854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10855 _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
10856 {
10857 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10858 (__v16sf) __B,
10859 (__v16sf)
10860 _mm512_setzero_ps (),
10861 (__mmask16) __U,
10862 _MM_FROUND_CUR_DIRECTION);
10863 }
10864
10865 extern __inline __m512d
10866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10867 _mm512_mul_pd (__m512d __A, __m512d __B)
10868 {
10869 return (__m512d) ((__v8df)__A * (__v8df)__B);
10870 }
10871
10872 extern __inline __m512d
10873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10874 _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10875 {
10876 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10877 (__v8df) __B,
10878 (__v8df) __W,
10879 (__mmask8) __U,
10880 _MM_FROUND_CUR_DIRECTION);
10881 }
10882
10883 extern __inline __m512d
10884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10885 _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
10886 {
10887 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10888 (__v8df) __B,
10889 (__v8df)
10890 _mm512_setzero_pd (),
10891 (__mmask8) __U,
10892 _MM_FROUND_CUR_DIRECTION);
10893 }
10894
10895 extern __inline __m512
10896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10897 _mm512_mul_ps (__m512 __A, __m512 __B)
10898 {
10899 return (__m512) ((__v16sf)__A * (__v16sf)__B);
10900 }
10901
10902 extern __inline __m512
10903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10904 _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10905 {
10906 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10907 (__v16sf) __B,
10908 (__v16sf) __W,
10909 (__mmask16) __U,
10910 _MM_FROUND_CUR_DIRECTION);
10911 }
10912
10913 extern __inline __m512
10914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10915 _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
10916 {
10917 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10918 (__v16sf) __B,
10919 (__v16sf)
10920 _mm512_setzero_ps (),
10921 (__mmask16) __U,
10922 _MM_FROUND_CUR_DIRECTION);
10923 }
10924
10925 extern __inline __m512d
10926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10927 _mm512_div_pd (__m512d __M, __m512d __V)
10928 {
10929 return (__m512d) ((__v8df)__M / (__v8df)__V);
10930 }
10931
10932 extern __inline __m512d
10933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10934 _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
10935 {
10936 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10937 (__v8df) __V,
10938 (__v8df) __W,
10939 (__mmask8) __U,
10940 _MM_FROUND_CUR_DIRECTION);
10941 }
10942
10943 extern __inline __m512d
10944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10945 _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
10946 {
10947 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10948 (__v8df) __V,
10949 (__v8df)
10950 _mm512_setzero_pd (),
10951 (__mmask8) __U,
10952 _MM_FROUND_CUR_DIRECTION);
10953 }
10954
10955 extern __inline __m512
10956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10957 _mm512_div_ps (__m512 __A, __m512 __B)
10958 {
10959 return (__m512) ((__v16sf)__A / (__v16sf)__B);
10960 }
10961
10962 extern __inline __m512
10963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10964 _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10965 {
10966 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10967 (__v16sf) __B,
10968 (__v16sf) __W,
10969 (__mmask16) __U,
10970 _MM_FROUND_CUR_DIRECTION);
10971 }
10972
10973 extern __inline __m512
10974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10975 _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
10976 {
10977 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10978 (__v16sf) __B,
10979 (__v16sf)
10980 _mm512_setzero_ps (),
10981 (__mmask16) __U,
10982 _MM_FROUND_CUR_DIRECTION);
10983 }
10984
10985 extern __inline __m512d
10986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10987 _mm512_max_pd (__m512d __A, __m512d __B)
10988 {
10989 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10990 (__v8df) __B,
10991 (__v8df)
10992 _mm512_undefined_pd (),
10993 (__mmask8) -1,
10994 _MM_FROUND_CUR_DIRECTION);
10995 }
10996
10997 extern __inline __m512d
10998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10999 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11000 {
11001 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11002 (__v8df) __B,
11003 (__v8df) __W,
11004 (__mmask8) __U,
11005 _MM_FROUND_CUR_DIRECTION);
11006 }
11007
11008 extern __inline __m512d
11009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11010 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
11011 {
11012 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11013 (__v8df) __B,
11014 (__v8df)
11015 _mm512_setzero_pd (),
11016 (__mmask8) __U,
11017 _MM_FROUND_CUR_DIRECTION);
11018 }
11019
11020 extern __inline __m512
11021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11022 _mm512_max_ps (__m512 __A, __m512 __B)
11023 {
11024 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11025 (__v16sf) __B,
11026 (__v16sf)
11027 _mm512_undefined_ps (),
11028 (__mmask16) -1,
11029 _MM_FROUND_CUR_DIRECTION);
11030 }
11031
11032 extern __inline __m512
11033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11034 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11035 {
11036 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11037 (__v16sf) __B,
11038 (__v16sf) __W,
11039 (__mmask16) __U,
11040 _MM_FROUND_CUR_DIRECTION);
11041 }
11042
11043 extern __inline __m512
11044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11045 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
11046 {
11047 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11048 (__v16sf) __B,
11049 (__v16sf)
11050 _mm512_setzero_ps (),
11051 (__mmask16) __U,
11052 _MM_FROUND_CUR_DIRECTION);
11053 }
11054
11055 extern __inline __m512d
11056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11057 _mm512_min_pd (__m512d __A, __m512d __B)
11058 {
11059 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11060 (__v8df) __B,
11061 (__v8df)
11062 _mm512_undefined_pd (),
11063 (__mmask8) -1,
11064 _MM_FROUND_CUR_DIRECTION);
11065 }
11066
11067 extern __inline __m512d
11068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11069 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11070 {
11071 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11072 (__v8df) __B,
11073 (__v8df) __W,
11074 (__mmask8) __U,
11075 _MM_FROUND_CUR_DIRECTION);
11076 }
11077
11078 extern __inline __m512d
11079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11080 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
11081 {
11082 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11083 (__v8df) __B,
11084 (__v8df)
11085 _mm512_setzero_pd (),
11086 (__mmask8) __U,
11087 _MM_FROUND_CUR_DIRECTION);
11088 }
11089
11090 extern __inline __m512
11091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11092 _mm512_min_ps (__m512 __A, __m512 __B)
11093 {
11094 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11095 (__v16sf) __B,
11096 (__v16sf)
11097 _mm512_undefined_ps (),
11098 (__mmask16) -1,
11099 _MM_FROUND_CUR_DIRECTION);
11100 }
11101
11102 extern __inline __m512
11103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11104 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11105 {
11106 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11107 (__v16sf) __B,
11108 (__v16sf) __W,
11109 (__mmask16) __U,
11110 _MM_FROUND_CUR_DIRECTION);
11111 }
11112
11113 extern __inline __m512
11114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11115 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
11116 {
11117 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11118 (__v16sf) __B,
11119 (__v16sf)
11120 _mm512_setzero_ps (),
11121 (__mmask16) __U,
11122 _MM_FROUND_CUR_DIRECTION);
11123 }
11124
11125 extern __inline __m512d
11126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11127 _mm512_scalef_pd (__m512d __A, __m512d __B)
11128 {
11129 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11130 (__v8df) __B,
11131 (__v8df)
11132 _mm512_undefined_pd (),
11133 (__mmask8) -1,
11134 _MM_FROUND_CUR_DIRECTION);
11135 }
11136
11137 extern __inline __m512d
11138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11139 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11140 {
11141 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11142 (__v8df) __B,
11143 (__v8df) __W,
11144 (__mmask8) __U,
11145 _MM_FROUND_CUR_DIRECTION);
11146 }
11147
11148 extern __inline __m512d
11149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11150 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
11151 {
11152 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11153 (__v8df) __B,
11154 (__v8df)
11155 _mm512_setzero_pd (),
11156 (__mmask8) __U,
11157 _MM_FROUND_CUR_DIRECTION);
11158 }
11159
11160 extern __inline __m512
11161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11162 _mm512_scalef_ps (__m512 __A, __m512 __B)
11163 {
11164 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11165 (__v16sf) __B,
11166 (__v16sf)
11167 _mm512_undefined_ps (),
11168 (__mmask16) -1,
11169 _MM_FROUND_CUR_DIRECTION);
11170 }
11171
11172 extern __inline __m512
11173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11174 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11175 {
11176 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11177 (__v16sf) __B,
11178 (__v16sf) __W,
11179 (__mmask16) __U,
11180 _MM_FROUND_CUR_DIRECTION);
11181 }
11182
11183 extern __inline __m512
11184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11185 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
11186 {
11187 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11188 (__v16sf) __B,
11189 (__v16sf)
11190 _mm512_setzero_ps (),
11191 (__mmask16) __U,
11192 _MM_FROUND_CUR_DIRECTION);
11193 }
11194
11195 extern __inline __m128d
11196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11197 _mm_scalef_sd (__m128d __A, __m128d __B)
11198 {
11199 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
11200 (__v2df) __B,
11201 _MM_FROUND_CUR_DIRECTION);
11202 }
11203
11204 extern __inline __m128
11205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11206 _mm_scalef_ss (__m128 __A, __m128 __B)
11207 {
11208 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
11209 (__v4sf) __B,
11210 _MM_FROUND_CUR_DIRECTION);
11211 }
11212
11213 extern __inline __m512d
11214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11215 _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11216 {
11217 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11218 (__v8df) __B,
11219 (__v8df) __C,
11220 (__mmask8) -1,
11221 _MM_FROUND_CUR_DIRECTION);
11222 }
11223
11224 extern __inline __m512d
11225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11226 _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11227 {
11228 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11229 (__v8df) __B,
11230 (__v8df) __C,
11231 (__mmask8) __U,
11232 _MM_FROUND_CUR_DIRECTION);
11233 }
11234
11235 extern __inline __m512d
11236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11237 _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11238 {
11239 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
11240 (__v8df) __B,
11241 (__v8df) __C,
11242 (__mmask8) __U,
11243 _MM_FROUND_CUR_DIRECTION);
11244 }
11245
11246 extern __inline __m512d
11247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11248 _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11249 {
11250 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11251 (__v8df) __B,
11252 (__v8df) __C,
11253 (__mmask8) __U,
11254 _MM_FROUND_CUR_DIRECTION);
11255 }
11256
11257 extern __inline __m512
11258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11259 _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11260 {
11261 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11262 (__v16sf) __B,
11263 (__v16sf) __C,
11264 (__mmask16) -1,
11265 _MM_FROUND_CUR_DIRECTION);
11266 }
11267
11268 extern __inline __m512
11269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11270 _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11271 {
11272 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11273 (__v16sf) __B,
11274 (__v16sf) __C,
11275 (__mmask16) __U,
11276 _MM_FROUND_CUR_DIRECTION);
11277 }
11278
11279 extern __inline __m512
11280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11281 _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11282 {
11283 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
11284 (__v16sf) __B,
11285 (__v16sf) __C,
11286 (__mmask16) __U,
11287 _MM_FROUND_CUR_DIRECTION);
11288 }
11289
11290 extern __inline __m512
11291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11292 _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11293 {
11294 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11295 (__v16sf) __B,
11296 (__v16sf) __C,
11297 (__mmask16) __U,
11298 _MM_FROUND_CUR_DIRECTION);
11299 }
11300
11301 extern __inline __m512d
11302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11303 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11304 {
11305 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11306 (__v8df) __B,
11307 -(__v8df) __C,
11308 (__mmask8) -1,
11309 _MM_FROUND_CUR_DIRECTION);
11310 }
11311
11312 extern __inline __m512d
11313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11314 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11315 {
11316 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11317 (__v8df) __B,
11318 -(__v8df) __C,
11319 (__mmask8) __U,
11320 _MM_FROUND_CUR_DIRECTION);
11321 }
11322
11323 extern __inline __m512d
11324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11325 _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11326 {
11327 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
11328 (__v8df) __B,
11329 (__v8df) __C,
11330 (__mmask8) __U,
11331 _MM_FROUND_CUR_DIRECTION);
11332 }
11333
11334 extern __inline __m512d
11335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11336 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11337 {
11338 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11339 (__v8df) __B,
11340 -(__v8df) __C,
11341 (__mmask8) __U,
11342 _MM_FROUND_CUR_DIRECTION);
11343 }
11344
11345 extern __inline __m512
11346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11347 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11348 {
11349 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11350 (__v16sf) __B,
11351 -(__v16sf) __C,
11352 (__mmask16) -1,
11353 _MM_FROUND_CUR_DIRECTION);
11354 }
11355
11356 extern __inline __m512
11357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11358 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11359 {
11360 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11361 (__v16sf) __B,
11362 -(__v16sf) __C,
11363 (__mmask16) __U,
11364 _MM_FROUND_CUR_DIRECTION);
11365 }
11366
11367 extern __inline __m512
11368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11369 _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11370 {
11371 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
11372 (__v16sf) __B,
11373 (__v16sf) __C,
11374 (__mmask16) __U,
11375 _MM_FROUND_CUR_DIRECTION);
11376 }
11377
11378 extern __inline __m512
11379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11380 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11381 {
11382 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11383 (__v16sf) __B,
11384 -(__v16sf) __C,
11385 (__mmask16) __U,
11386 _MM_FROUND_CUR_DIRECTION);
11387 }
11388
11389 extern __inline __m512d
11390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11391 _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
11392 {
11393 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11394 (__v8df) __B,
11395 (__v8df) __C,
11396 (__mmask8) -1,
11397 _MM_FROUND_CUR_DIRECTION);
11398 }
11399
11400 extern __inline __m512d
11401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11402 _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11403 {
11404 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11405 (__v8df) __B,
11406 (__v8df) __C,
11407 (__mmask8) __U,
11408 _MM_FROUND_CUR_DIRECTION);
11409 }
11410
11411 extern __inline __m512d
11412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11413 _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11414 {
11415 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
11416 (__v8df) __B,
11417 (__v8df) __C,
11418 (__mmask8) __U,
11419 _MM_FROUND_CUR_DIRECTION);
11420 }
11421
11422 extern __inline __m512d
11423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11424 _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11425 {
11426 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11427 (__v8df) __B,
11428 (__v8df) __C,
11429 (__mmask8) __U,
11430 _MM_FROUND_CUR_DIRECTION);
11431 }
11432
11433 extern __inline __m512
11434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11435 _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
11436 {
11437 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11438 (__v16sf) __B,
11439 (__v16sf) __C,
11440 (__mmask16) -1,
11441 _MM_FROUND_CUR_DIRECTION);
11442 }
11443
11444 extern __inline __m512
11445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11446 _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11447 {
11448 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11449 (__v16sf) __B,
11450 (__v16sf) __C,
11451 (__mmask16) __U,
11452 _MM_FROUND_CUR_DIRECTION);
11453 }
11454
11455 extern __inline __m512
11456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11457 _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11458 {
11459 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
11460 (__v16sf) __B,
11461 (__v16sf) __C,
11462 (__mmask16) __U,
11463 _MM_FROUND_CUR_DIRECTION);
11464 }
11465
11466 extern __inline __m512
11467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11468 _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11469 {
11470 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11471 (__v16sf) __B,
11472 (__v16sf) __C,
11473 (__mmask16) __U,
11474 _MM_FROUND_CUR_DIRECTION);
11475 }
11476
11477 extern __inline __m512d
11478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11479 _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
11480 {
11481 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11482 (__v8df) __B,
11483 -(__v8df) __C,
11484 (__mmask8) -1,
11485 _MM_FROUND_CUR_DIRECTION);
11486 }
11487
11488 extern __inline __m512d
11489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11490 _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11491 {
11492 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11493 (__v8df) __B,
11494 -(__v8df) __C,
11495 (__mmask8) __U,
11496 _MM_FROUND_CUR_DIRECTION);
11497 }
11498
11499 extern __inline __m512d
11500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11501 _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11502 {
11503 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
11504 (__v8df) __B,
11505 (__v8df) __C,
11506 (__mmask8) __U,
11507 _MM_FROUND_CUR_DIRECTION);
11508 }
11509
11510 extern __inline __m512d
11511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11512 _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11513 {
11514 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11515 (__v8df) __B,
11516 -(__v8df) __C,
11517 (__mmask8) __U,
11518 _MM_FROUND_CUR_DIRECTION);
11519 }
11520
11521 extern __inline __m512
11522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11523 _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
11524 {
11525 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11526 (__v16sf) __B,
11527 -(__v16sf) __C,
11528 (__mmask16) -1,
11529 _MM_FROUND_CUR_DIRECTION);
11530 }
11531
11532 extern __inline __m512
11533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11534 _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11535 {
11536 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11537 (__v16sf) __B,
11538 -(__v16sf) __C,
11539 (__mmask16) __U,
11540 _MM_FROUND_CUR_DIRECTION);
11541 }
11542
11543 extern __inline __m512
11544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11545 _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11546 {
11547 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
11548 (__v16sf) __B,
11549 (__v16sf) __C,
11550 (__mmask16) __U,
11551 _MM_FROUND_CUR_DIRECTION);
11552 }
11553
11554 extern __inline __m512
11555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11556 _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11557 {
11558 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11559 (__v16sf) __B,
11560 -(__v16sf) __C,
11561 (__mmask16) __U,
11562 _MM_FROUND_CUR_DIRECTION);
11563 }
11564
11565 extern __inline __m512d
11566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11567 _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11568 {
11569 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11570 (__v8df) __B,
11571 (__v8df) __C,
11572 (__mmask8) -1,
11573 _MM_FROUND_CUR_DIRECTION);
11574 }
11575
11576 extern __inline __m512d
11577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11578 _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11579 {
11580 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
11581 (__v8df) __B,
11582 (__v8df) __C,
11583 (__mmask8) __U,
11584 _MM_FROUND_CUR_DIRECTION);
11585 }
11586
11587 extern __inline __m512d
11588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11589 _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11590 {
11591 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
11592 (__v8df) __B,
11593 (__v8df) __C,
11594 (__mmask8) __U,
11595 _MM_FROUND_CUR_DIRECTION);
11596 }
11597
11598 extern __inline __m512d
11599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11600 _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11601 {
11602 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11603 (__v8df) __B,
11604 (__v8df) __C,
11605 (__mmask8) __U,
11606 _MM_FROUND_CUR_DIRECTION);
11607 }
11608
11609 extern __inline __m512
11610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11611 _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11612 {
11613 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11614 (__v16sf) __B,
11615 (__v16sf) __C,
11616 (__mmask16) -1,
11617 _MM_FROUND_CUR_DIRECTION);
11618 }
11619
11620 extern __inline __m512
11621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11622 _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11623 {
11624 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
11625 (__v16sf) __B,
11626 (__v16sf) __C,
11627 (__mmask16) __U,
11628 _MM_FROUND_CUR_DIRECTION);
11629 }
11630
11631 extern __inline __m512
11632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11633 _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11634 {
11635 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
11636 (__v16sf) __B,
11637 (__v16sf) __C,
11638 (__mmask16) __U,
11639 _MM_FROUND_CUR_DIRECTION);
11640 }
11641
11642 extern __inline __m512
11643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11644 _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11645 {
11646 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11647 (__v16sf) __B,
11648 (__v16sf) __C,
11649 (__mmask16) __U,
11650 _MM_FROUND_CUR_DIRECTION);
11651 }
11652
11653 extern __inline __m512d
11654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11655 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11656 {
11657 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11658 (__v8df) __B,
11659 -(__v8df) __C,
11660 (__mmask8) -1,
11661 _MM_FROUND_CUR_DIRECTION);
11662 }
11663
11664 extern __inline __m512d
11665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11666 _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11667 {
11668 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
11669 (__v8df) __B,
11670 (__v8df) __C,
11671 (__mmask8) __U,
11672 _MM_FROUND_CUR_DIRECTION);
11673 }
11674
11675 extern __inline __m512d
11676 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11677 _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11678 {
11679 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
11680 (__v8df) __B,
11681 (__v8df) __C,
11682 (__mmask8) __U,
11683 _MM_FROUND_CUR_DIRECTION);
11684 }
11685
11686 extern __inline __m512d
11687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11688 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11689 {
11690 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11691 (__v8df) __B,
11692 -(__v8df) __C,
11693 (__mmask8) __U,
11694 _MM_FROUND_CUR_DIRECTION);
11695 }
11696
11697 extern __inline __m512
11698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11699 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11700 {
11701 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11702 (__v16sf) __B,
11703 -(__v16sf) __C,
11704 (__mmask16) -1,
11705 _MM_FROUND_CUR_DIRECTION);
11706 }
11707
11708 extern __inline __m512
11709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11710 _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11711 {
11712 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
11713 (__v16sf) __B,
11714 (__v16sf) __C,
11715 (__mmask16) __U,
11716 _MM_FROUND_CUR_DIRECTION);
11717 }
11718
11719 extern __inline __m512
11720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11721 _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11722 {
11723 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
11724 (__v16sf) __B,
11725 (__v16sf) __C,
11726 (__mmask16) __U,
11727 _MM_FROUND_CUR_DIRECTION);
11728 }
11729
11730 extern __inline __m512
11731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11732 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11733 {
11734 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11735 (__v16sf) __B,
11736 -(__v16sf) __C,
11737 (__mmask16) __U,
11738 _MM_FROUND_CUR_DIRECTION);
11739 }
11740
11741 extern __inline __m256i
11742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11743 _mm512_cvttpd_epi32 (__m512d __A)
11744 {
11745 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11746 (__v8si)
11747 _mm256_undefined_si256 (),
11748 (__mmask8) -1,
11749 _MM_FROUND_CUR_DIRECTION);
11750 }
11751
11752 extern __inline __m256i
11753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11754 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11755 {
11756 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11757 (__v8si) __W,
11758 (__mmask8) __U,
11759 _MM_FROUND_CUR_DIRECTION);
11760 }
11761
11762 extern __inline __m256i
11763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11764 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
11765 {
11766 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11767 (__v8si)
11768 _mm256_setzero_si256 (),
11769 (__mmask8) __U,
11770 _MM_FROUND_CUR_DIRECTION);
11771 }
11772
11773 extern __inline __m256i
11774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11775 _mm512_cvttpd_epu32 (__m512d __A)
11776 {
11777 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11778 (__v8si)
11779 _mm256_undefined_si256 (),
11780 (__mmask8) -1,
11781 _MM_FROUND_CUR_DIRECTION);
11782 }
11783
11784 extern __inline __m256i
11785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11786 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11787 {
11788 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11789 (__v8si) __W,
11790 (__mmask8) __U,
11791 _MM_FROUND_CUR_DIRECTION);
11792 }
11793
11794 extern __inline __m256i
11795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11796 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
11797 {
11798 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11799 (__v8si)
11800 _mm256_setzero_si256 (),
11801 (__mmask8) __U,
11802 _MM_FROUND_CUR_DIRECTION);
11803 }
11804
11805 extern __inline __m256i
11806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11807 _mm512_cvtpd_epi32 (__m512d __A)
11808 {
11809 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11810 (__v8si)
11811 _mm256_undefined_si256 (),
11812 (__mmask8) -1,
11813 _MM_FROUND_CUR_DIRECTION);
11814 }
11815
11816 extern __inline __m256i
11817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11818 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11819 {
11820 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11821 (__v8si) __W,
11822 (__mmask8) __U,
11823 _MM_FROUND_CUR_DIRECTION);
11824 }
11825
11826 extern __inline __m256i
11827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11828 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
11829 {
11830 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11831 (__v8si)
11832 _mm256_setzero_si256 (),
11833 (__mmask8) __U,
11834 _MM_FROUND_CUR_DIRECTION);
11835 }
11836
11837 extern __inline __m256i
11838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11839 _mm512_cvtpd_epu32 (__m512d __A)
11840 {
11841 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11842 (__v8si)
11843 _mm256_undefined_si256 (),
11844 (__mmask8) -1,
11845 _MM_FROUND_CUR_DIRECTION);
11846 }
11847
11848 extern __inline __m256i
11849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11850 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11851 {
11852 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11853 (__v8si) __W,
11854 (__mmask8) __U,
11855 _MM_FROUND_CUR_DIRECTION);
11856 }
11857
11858 extern __inline __m256i
11859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11860 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
11861 {
11862 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11863 (__v8si)
11864 _mm256_setzero_si256 (),
11865 (__mmask8) __U,
11866 _MM_FROUND_CUR_DIRECTION);
11867 }
11868
11869 extern __inline __m512i
11870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11871 _mm512_cvttps_epi32 (__m512 __A)
11872 {
11873 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11874 (__v16si)
11875 _mm512_undefined_epi32 (),
11876 (__mmask16) -1,
11877 _MM_FROUND_CUR_DIRECTION);
11878 }
11879
11880 extern __inline __m512i
11881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11882 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11883 {
11884 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11885 (__v16si) __W,
11886 (__mmask16) __U,
11887 _MM_FROUND_CUR_DIRECTION);
11888 }
11889
11890 extern __inline __m512i
11891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11892 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
11893 {
11894 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11895 (__v16si)
11896 _mm512_setzero_si512 (),
11897 (__mmask16) __U,
11898 _MM_FROUND_CUR_DIRECTION);
11899 }
11900
11901 extern __inline __m512i
11902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11903 _mm512_cvttps_epu32 (__m512 __A)
11904 {
11905 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11906 (__v16si)
11907 _mm512_undefined_epi32 (),
11908 (__mmask16) -1,
11909 _MM_FROUND_CUR_DIRECTION);
11910 }
11911
11912 extern __inline __m512i
11913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11914 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11915 {
11916 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11917 (__v16si) __W,
11918 (__mmask16) __U,
11919 _MM_FROUND_CUR_DIRECTION);
11920 }
11921
11922 extern __inline __m512i
11923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11924 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
11925 {
11926 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11927 (__v16si)
11928 _mm512_setzero_si512 (),
11929 (__mmask16) __U,
11930 _MM_FROUND_CUR_DIRECTION);
11931 }
11932
11933 extern __inline __m512i
11934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11935 _mm512_cvtps_epi32 (__m512 __A)
11936 {
11937 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11938 (__v16si)
11939 _mm512_undefined_epi32 (),
11940 (__mmask16) -1,
11941 _MM_FROUND_CUR_DIRECTION);
11942 }
11943
11944 extern __inline __m512i
11945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11946 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11947 {
11948 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11949 (__v16si) __W,
11950 (__mmask16) __U,
11951 _MM_FROUND_CUR_DIRECTION);
11952 }
11953
11954 extern __inline __m512i
11955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11956 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
11957 {
11958 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11959 (__v16si)
11960 _mm512_setzero_si512 (),
11961 (__mmask16) __U,
11962 _MM_FROUND_CUR_DIRECTION);
11963 }
11964
11965 extern __inline __m512i
11966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11967 _mm512_cvtps_epu32 (__m512 __A)
11968 {
11969 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11970 (__v16si)
11971 _mm512_undefined_epi32 (),
11972 (__mmask16) -1,
11973 _MM_FROUND_CUR_DIRECTION);
11974 }
11975
11976 extern __inline __m512i
11977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11978 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11979 {
11980 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11981 (__v16si) __W,
11982 (__mmask16) __U,
11983 _MM_FROUND_CUR_DIRECTION);
11984 }
11985
11986 extern __inline __m512i
11987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11988 _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
11989 {
11990 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11991 (__v16si)
11992 _mm512_setzero_si512 (),
11993 (__mmask16) __U,
11994 _MM_FROUND_CUR_DIRECTION);
11995 }
11996
11997 #ifdef __x86_64__
11998 extern __inline __m128
11999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12000 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
12001 {
12002 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
12003 _MM_FROUND_CUR_DIRECTION);
12004 }
12005
12006 extern __inline __m128d
12007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12008 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
12009 {
12010 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
12011 _MM_FROUND_CUR_DIRECTION);
12012 }
12013 #endif
12014
12015 extern __inline __m128
12016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12017 _mm_cvtu32_ss (__m128 __A, unsigned __B)
12018 {
12019 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
12020 _MM_FROUND_CUR_DIRECTION);
12021 }
12022
12023 extern __inline __m512
12024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12025 _mm512_cvtepi32_ps (__m512i __A)
12026 {
12027 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12028 (__v16sf)
12029 _mm512_undefined_ps (),
12030 (__mmask16) -1,
12031 _MM_FROUND_CUR_DIRECTION);
12032 }
12033
12034 extern __inline __m512
12035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12036 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12037 {
12038 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12039 (__v16sf) __W,
12040 (__mmask16) __U,
12041 _MM_FROUND_CUR_DIRECTION);
12042 }
12043
12044 extern __inline __m512
12045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12046 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
12047 {
12048 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12049 (__v16sf)
12050 _mm512_setzero_ps (),
12051 (__mmask16) __U,
12052 _MM_FROUND_CUR_DIRECTION);
12053 }
12054
12055 extern __inline __m512
12056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12057 _mm512_cvtepu32_ps (__m512i __A)
12058 {
12059 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12060 (__v16sf)
12061 _mm512_undefined_ps (),
12062 (__mmask16) -1,
12063 _MM_FROUND_CUR_DIRECTION);
12064 }
12065
12066 extern __inline __m512
12067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12068 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12069 {
12070 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12071 (__v16sf) __W,
12072 (__mmask16) __U,
12073 _MM_FROUND_CUR_DIRECTION);
12074 }
12075
12076 extern __inline __m512
12077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12078 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
12079 {
12080 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12081 (__v16sf)
12082 _mm512_setzero_ps (),
12083 (__mmask16) __U,
12084 _MM_FROUND_CUR_DIRECTION);
12085 }
12086
12087 #ifdef __OPTIMIZE__
12088 extern __inline __m512d
12089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12090 _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
12091 {
12092 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12093 (__v8df) __B,
12094 (__v8di) __C,
12095 __imm,
12096 (__mmask8) -1,
12097 _MM_FROUND_CUR_DIRECTION);
12098 }
12099
12100 extern __inline __m512d
12101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12102 _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
12103 __m512i __C, const int __imm)
12104 {
12105 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12106 (__v8df) __B,
12107 (__v8di) __C,
12108 __imm,
12109 (__mmask8) __U,
12110 _MM_FROUND_CUR_DIRECTION);
12111 }
12112
12113 extern __inline __m512d
12114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12115 _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
12116 __m512i __C, const int __imm)
12117 {
12118 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
12119 (__v8df) __B,
12120 (__v8di) __C,
12121 __imm,
12122 (__mmask8) __U,
12123 _MM_FROUND_CUR_DIRECTION);
12124 }
12125
12126 extern __inline __m512
12127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12128 _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
12129 {
12130 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12131 (__v16sf) __B,
12132 (__v16si) __C,
12133 __imm,
12134 (__mmask16) -1,
12135 _MM_FROUND_CUR_DIRECTION);
12136 }
12137
12138 extern __inline __m512
12139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12140 _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
12141 __m512i __C, const int __imm)
12142 {
12143 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12144 (__v16sf) __B,
12145 (__v16si) __C,
12146 __imm,
12147 (__mmask16) __U,
12148 _MM_FROUND_CUR_DIRECTION);
12149 }
12150
12151 extern __inline __m512
12152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12153 _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
12154 __m512i __C, const int __imm)
12155 {
12156 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
12157 (__v16sf) __B,
12158 (__v16si) __C,
12159 __imm,
12160 (__mmask16) __U,
12161 _MM_FROUND_CUR_DIRECTION);
12162 }
12163
12164 extern __inline __m128d
12165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12166 _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
12167 {
12168 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12169 (__v2df) __B,
12170 (__v2di) __C, __imm,
12171 (__mmask8) -1,
12172 _MM_FROUND_CUR_DIRECTION);
12173 }
12174
12175 extern __inline __m128d
12176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12177 _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
12178 __m128i __C, const int __imm)
12179 {
12180 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12181 (__v2df) __B,
12182 (__v2di) __C, __imm,
12183 (__mmask8) __U,
12184 _MM_FROUND_CUR_DIRECTION);
12185 }
12186
12187 extern __inline __m128d
12188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12189 _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
12190 __m128i __C, const int __imm)
12191 {
12192 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
12193 (__v2df) __B,
12194 (__v2di) __C,
12195 __imm,
12196 (__mmask8) __U,
12197 _MM_FROUND_CUR_DIRECTION);
12198 }
12199
12200 extern __inline __m128
12201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12202 _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
12203 {
12204 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12205 (__v4sf) __B,
12206 (__v4si) __C, __imm,
12207 (__mmask8) -1,
12208 _MM_FROUND_CUR_DIRECTION);
12209 }
12210
12211 extern __inline __m128
12212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12213 _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
12214 __m128i __C, const int __imm)
12215 {
12216 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12217 (__v4sf) __B,
12218 (__v4si) __C, __imm,
12219 (__mmask8) __U,
12220 _MM_FROUND_CUR_DIRECTION);
12221 }
12222
12223 extern __inline __m128
12224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12225 _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
12226 __m128i __C, const int __imm)
12227 {
12228 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
12229 (__v4sf) __B,
12230 (__v4si) __C, __imm,
12231 (__mmask8) __U,
12232 _MM_FROUND_CUR_DIRECTION);
12233 }
12234 #else
12235 #define _mm512_fixupimm_pd(X, Y, Z, C) \
12236 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12237 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12238 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12239
12240 #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
12241 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12242 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12243 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12244
12245 #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
12246 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
12247 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12248 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12249
12250 #define _mm512_fixupimm_ps(X, Y, Z, C) \
12251 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12252 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12253 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12254
12255 #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
12256 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12257 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12258 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12259
12260 #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
12261 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
12262 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12263 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12264
12265 #define _mm_fixupimm_sd(X, Y, Z, C) \
12266 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12267 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12268 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12269
12270 #define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
12271 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12272 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12273 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12274
12275 #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
12276 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
12277 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12278 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12279
12280 #define _mm_fixupimm_ss(X, Y, Z, C) \
12281 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12282 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12283 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12284
12285 #define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
12286 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12287 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12288 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12289
12290 #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
12291 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
12292 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12293 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12294 #endif
12295
12296 #ifdef __x86_64__
12297 extern __inline unsigned long long
12298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12299 _mm_cvtss_u64 (__m128 __A)
12300 {
12301 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
12302 __A,
12303 _MM_FROUND_CUR_DIRECTION);
12304 }
12305
12306 extern __inline unsigned long long
12307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12308 _mm_cvttss_u64 (__m128 __A)
12309 {
12310 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
12311 __A,
12312 _MM_FROUND_CUR_DIRECTION);
12313 }
12314
12315 extern __inline long long
12316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12317 _mm_cvttss_i64 (__m128 __A)
12318 {
12319 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
12320 _MM_FROUND_CUR_DIRECTION);
12321 }
12322 #endif /* __x86_64__ */
12323
12324 extern __inline unsigned
12325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12326 _mm_cvtss_u32 (__m128 __A)
12327 {
12328 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
12329 _MM_FROUND_CUR_DIRECTION);
12330 }
12331
12332 extern __inline unsigned
12333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12334 _mm_cvttss_u32 (__m128 __A)
12335 {
12336 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
12337 _MM_FROUND_CUR_DIRECTION);
12338 }
12339
12340 extern __inline int
12341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12342 _mm_cvttss_i32 (__m128 __A)
12343 {
12344 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
12345 _MM_FROUND_CUR_DIRECTION);
12346 }
12347
12348 #ifdef __x86_64__
12349 extern __inline unsigned long long
12350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12351 _mm_cvtsd_u64 (__m128d __A)
12352 {
12353 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
12354 __A,
12355 _MM_FROUND_CUR_DIRECTION);
12356 }
12357
12358 extern __inline unsigned long long
12359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12360 _mm_cvttsd_u64 (__m128d __A)
12361 {
12362 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
12363 __A,
12364 _MM_FROUND_CUR_DIRECTION);
12365 }
12366
12367 extern __inline long long
12368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12369 _mm_cvttsd_i64 (__m128d __A)
12370 {
12371 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
12372 _MM_FROUND_CUR_DIRECTION);
12373 }
12374 #endif /* __x86_64__ */
12375
12376 extern __inline unsigned
12377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12378 _mm_cvtsd_u32 (__m128d __A)
12379 {
12380 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
12381 _MM_FROUND_CUR_DIRECTION);
12382 }
12383
12384 extern __inline unsigned
12385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12386 _mm_cvttsd_u32 (__m128d __A)
12387 {
12388 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
12389 _MM_FROUND_CUR_DIRECTION);
12390 }
12391
12392 extern __inline int
12393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12394 _mm_cvttsd_i32 (__m128d __A)
12395 {
12396 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
12397 _MM_FROUND_CUR_DIRECTION);
12398 }
12399
12400 extern __inline __m512d
12401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12402 _mm512_cvtps_pd (__m256 __A)
12403 {
12404 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12405 (__v8df)
12406 _mm512_undefined_pd (),
12407 (__mmask8) -1,
12408 _MM_FROUND_CUR_DIRECTION);
12409 }
12410
12411 extern __inline __m512d
12412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12413 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
12414 {
12415 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12416 (__v8df) __W,
12417 (__mmask8) __U,
12418 _MM_FROUND_CUR_DIRECTION);
12419 }
12420
12421 extern __inline __m512d
12422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12423 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
12424 {
12425 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12426 (__v8df)
12427 _mm512_setzero_pd (),
12428 (__mmask8) __U,
12429 _MM_FROUND_CUR_DIRECTION);
12430 }
12431
12432 extern __inline __m512
12433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12434 _mm512_cvtph_ps (__m256i __A)
12435 {
12436 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12437 (__v16sf)
12438 _mm512_undefined_ps (),
12439 (__mmask16) -1,
12440 _MM_FROUND_CUR_DIRECTION);
12441 }
12442
12443 extern __inline __m512
12444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12445 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
12446 {
12447 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12448 (__v16sf) __W,
12449 (__mmask16) __U,
12450 _MM_FROUND_CUR_DIRECTION);
12451 }
12452
12453 extern __inline __m512
12454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12455 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
12456 {
12457 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12458 (__v16sf)
12459 _mm512_setzero_ps (),
12460 (__mmask16) __U,
12461 _MM_FROUND_CUR_DIRECTION);
12462 }
12463
12464 extern __inline __m256
12465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12466 _mm512_cvtpd_ps (__m512d __A)
12467 {
12468 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12469 (__v8sf)
12470 _mm256_undefined_ps (),
12471 (__mmask8) -1,
12472 _MM_FROUND_CUR_DIRECTION);
12473 }
12474
12475 extern __inline __m256
12476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12477 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
12478 {
12479 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12480 (__v8sf) __W,
12481 (__mmask8) __U,
12482 _MM_FROUND_CUR_DIRECTION);
12483 }
12484
12485 extern __inline __m256
12486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12487 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
12488 {
12489 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12490 (__v8sf)
12491 _mm256_setzero_ps (),
12492 (__mmask8) __U,
12493 _MM_FROUND_CUR_DIRECTION);
12494 }
12495
12496 #ifdef __OPTIMIZE__
12497 extern __inline __m512
12498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12499 _mm512_getexp_ps (__m512 __A)
12500 {
12501 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12502 (__v16sf)
12503 _mm512_undefined_ps (),
12504 (__mmask16) -1,
12505 _MM_FROUND_CUR_DIRECTION);
12506 }
12507
12508 extern __inline __m512
12509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12510 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
12511 {
12512 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12513 (__v16sf) __W,
12514 (__mmask16) __U,
12515 _MM_FROUND_CUR_DIRECTION);
12516 }
12517
12518 extern __inline __m512
12519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12520 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
12521 {
12522 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12523 (__v16sf)
12524 _mm512_setzero_ps (),
12525 (__mmask16) __U,
12526 _MM_FROUND_CUR_DIRECTION);
12527 }
12528
12529 extern __inline __m512d
12530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12531 _mm512_getexp_pd (__m512d __A)
12532 {
12533 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12534 (__v8df)
12535 _mm512_undefined_pd (),
12536 (__mmask8) -1,
12537 _MM_FROUND_CUR_DIRECTION);
12538 }
12539
12540 extern __inline __m512d
12541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12542 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
12543 {
12544 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12545 (__v8df) __W,
12546 (__mmask8) __U,
12547 _MM_FROUND_CUR_DIRECTION);
12548 }
12549
12550 extern __inline __m512d
12551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12552 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
12553 {
12554 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12555 (__v8df)
12556 _mm512_setzero_pd (),
12557 (__mmask8) __U,
12558 _MM_FROUND_CUR_DIRECTION);
12559 }
12560
12561 extern __inline __m128
12562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12563 _mm_getexp_ss (__m128 __A, __m128 __B)
12564 {
12565 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
12566 (__v4sf) __B,
12567 _MM_FROUND_CUR_DIRECTION);
12568 }
12569
12570 extern __inline __m128d
12571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12572 _mm_getexp_sd (__m128d __A, __m128d __B)
12573 {
12574 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
12575 (__v2df) __B,
12576 _MM_FROUND_CUR_DIRECTION);
12577 }
12578
12579 extern __inline __m512d
12580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12581 _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
12582 _MM_MANTISSA_SIGN_ENUM __C)
12583 {
12584 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12585 (__C << 2) | __B,
12586 _mm512_undefined_pd (),
12587 (__mmask8) -1,
12588 _MM_FROUND_CUR_DIRECTION);
12589 }
12590
12591 extern __inline __m512d
12592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12593 _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
12594 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12595 {
12596 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12597 (__C << 2) | __B,
12598 (__v8df) __W, __U,
12599 _MM_FROUND_CUR_DIRECTION);
12600 }
12601
12602 extern __inline __m512d
12603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12604 _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
12605 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12606 {
12607 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12608 (__C << 2) | __B,
12609 (__v8df)
12610 _mm512_setzero_pd (),
12611 __U,
12612 _MM_FROUND_CUR_DIRECTION);
12613 }
12614
12615 extern __inline __m512
12616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12617 _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
12618 _MM_MANTISSA_SIGN_ENUM __C)
12619 {
12620 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12621 (__C << 2) | __B,
12622 _mm512_undefined_ps (),
12623 (__mmask16) -1,
12624 _MM_FROUND_CUR_DIRECTION);
12625 }
12626
12627 extern __inline __m512
12628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12629 _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
12630 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12631 {
12632 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12633 (__C << 2) | __B,
12634 (__v16sf) __W, __U,
12635 _MM_FROUND_CUR_DIRECTION);
12636 }
12637
12638 extern __inline __m512
12639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12640 _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
12641 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12642 {
12643 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12644 (__C << 2) | __B,
12645 (__v16sf)
12646 _mm512_setzero_ps (),
12647 __U,
12648 _MM_FROUND_CUR_DIRECTION);
12649 }
12650
12651 extern __inline __m128d
12652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12653 _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
12654 _MM_MANTISSA_SIGN_ENUM __D)
12655 {
12656 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
12657 (__v2df) __B,
12658 (__D << 2) | __C,
12659 _MM_FROUND_CUR_DIRECTION);
12660 }
12661
12662 extern __inline __m128
12663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12664 _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
12665 _MM_MANTISSA_SIGN_ENUM __D)
12666 {
12667 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
12668 (__v4sf) __B,
12669 (__D << 2) | __C,
12670 _MM_FROUND_CUR_DIRECTION);
12671 }
12672
12673 #else
12674 #define _mm512_getmant_pd(X, B, C) \
12675 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12676 (int)(((C)<<2) | (B)), \
12677 (__v8df)_mm512_undefined_pd(), \
12678 (__mmask8)-1,\
12679 _MM_FROUND_CUR_DIRECTION))
12680
12681 #define _mm512_mask_getmant_pd(W, U, X, B, C) \
12682 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12683 (int)(((C)<<2) | (B)), \
12684 (__v8df)(__m512d)(W), \
12685 (__mmask8)(U),\
12686 _MM_FROUND_CUR_DIRECTION))
12687
12688 #define _mm512_maskz_getmant_pd(U, X, B, C) \
12689 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12690 (int)(((C)<<2) | (B)), \
12691 (__v8df)_mm512_setzero_pd(), \
12692 (__mmask8)(U),\
12693 _MM_FROUND_CUR_DIRECTION))
12694 #define _mm512_getmant_ps(X, B, C) \
12695 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12696 (int)(((C)<<2) | (B)), \
12697 (__v16sf)_mm512_undefined_ps(), \
12698 (__mmask16)-1,\
12699 _MM_FROUND_CUR_DIRECTION))
12700
12701 #define _mm512_mask_getmant_ps(W, U, X, B, C) \
12702 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12703 (int)(((C)<<2) | (B)), \
12704 (__v16sf)(__m512)(W), \
12705 (__mmask16)(U),\
12706 _MM_FROUND_CUR_DIRECTION))
12707
12708 #define _mm512_maskz_getmant_ps(U, X, B, C) \
12709 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12710 (int)(((C)<<2) | (B)), \
12711 (__v16sf)_mm512_setzero_ps(), \
12712 (__mmask16)(U),\
12713 _MM_FROUND_CUR_DIRECTION))
12714 #define _mm_getmant_sd(X, Y, C, D) \
12715 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
12716 (__v2df)(__m128d)(Y), \
12717 (int)(((D)<<2) | (C)), \
12718 _MM_FROUND_CUR_DIRECTION))
12719
12720 #define _mm_getmant_ss(X, Y, C, D) \
12721 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
12722 (__v4sf)(__m128)(Y), \
12723 (int)(((D)<<2) | (C)), \
12724 _MM_FROUND_CUR_DIRECTION))
12725
12726 #define _mm_getexp_ss(A, B) \
12727 ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
12728 _MM_FROUND_CUR_DIRECTION))
12729
12730 #define _mm_getexp_sd(A, B) \
12731 ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
12732 _MM_FROUND_CUR_DIRECTION))
12733
12734 #define _mm512_getexp_ps(A) \
12735 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12736 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
12737
12738 #define _mm512_mask_getexp_ps(W, U, A) \
12739 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12740 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12741
12742 #define _mm512_maskz_getexp_ps(U, A) \
12743 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12744 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12745
12746 #define _mm512_getexp_pd(A) \
12747 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12748 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
12749
12750 #define _mm512_mask_getexp_pd(W, U, A) \
12751 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12752 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12753
12754 #define _mm512_maskz_getexp_pd(U, A) \
12755 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12756 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12757 #endif
12758
12759 #ifdef __OPTIMIZE__
12760 extern __inline __m512
12761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12762 _mm512_roundscale_ps (__m512 __A, const int __imm)
12763 {
12764 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
12765 (__v16sf)
12766 _mm512_undefined_ps (),
12767 -1,
12768 _MM_FROUND_CUR_DIRECTION);
12769 }
12770
12771 extern __inline __m512
12772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12773 _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
12774 const int __imm)
12775 {
12776 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
12777 (__v16sf) __A,
12778 (__mmask16) __B,
12779 _MM_FROUND_CUR_DIRECTION);
12780 }
12781
12782 extern __inline __m512
12783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12784 _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
12785 {
12786 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
12787 __imm,
12788 (__v16sf)
12789 _mm512_setzero_ps (),
12790 (__mmask16) __A,
12791 _MM_FROUND_CUR_DIRECTION);
12792 }
12793
12794 extern __inline __m512d
12795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12796 _mm512_roundscale_pd (__m512d __A, const int __imm)
12797 {
12798 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
12799 (__v8df)
12800 _mm512_undefined_pd (),
12801 -1,
12802 _MM_FROUND_CUR_DIRECTION);
12803 }
12804
12805 extern __inline __m512d
12806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12807 _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
12808 const int __imm)
12809 {
12810 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
12811 (__v8df) __A,
12812 (__mmask8) __B,
12813 _MM_FROUND_CUR_DIRECTION);
12814 }
12815
12816 extern __inline __m512d
12817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12818 _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
12819 {
12820 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
12821 __imm,
12822 (__v8df)
12823 _mm512_setzero_pd (),
12824 (__mmask8) __A,
12825 _MM_FROUND_CUR_DIRECTION);
12826 }
12827
12828 extern __inline __m128
12829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12830 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
12831 {
12832 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
12833 (__v4sf) __B, __imm,
12834 _MM_FROUND_CUR_DIRECTION);
12835 }
12836
12837 extern __inline __m128d
12838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12839 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
12840 {
12841 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
12842 (__v2df) __B, __imm,
12843 _MM_FROUND_CUR_DIRECTION);
12844 }
12845
12846 #else
12847 #define _mm512_roundscale_ps(A, B) \
12848 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
12849 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12850 #define _mm512_mask_roundscale_ps(A, B, C, D) \
12851 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
12852 (int)(D), \
12853 (__v16sf)(__m512)(A), \
12854 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
12855 #define _mm512_maskz_roundscale_ps(A, B, C) \
12856 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
12857 (int)(C), \
12858 (__v16sf)_mm512_setzero_ps(),\
12859 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
12860 #define _mm512_roundscale_pd(A, B) \
12861 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
12862 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12863 #define _mm512_mask_roundscale_pd(A, B, C, D) \
12864 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
12865 (int)(D), \
12866 (__v8df)(__m512d)(A), \
12867 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
12868 #define _mm512_maskz_roundscale_pd(A, B, C) \
12869 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
12870 (int)(C), \
12871 (__v8df)_mm512_setzero_pd(),\
12872 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
12873 #define _mm_roundscale_ss(A, B, C) \
12874 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
12875 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12876 #define _mm_roundscale_sd(A, B, C) \
12877 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
12878 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12879 #endif
12880
12881 #ifdef __OPTIMIZE__
12882 extern __inline __mmask8
12883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12884 _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
12885 {
12886 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12887 (__v8df) __Y, __P,
12888 (__mmask8) -1,
12889 _MM_FROUND_CUR_DIRECTION);
12890 }
12891
12892 extern __inline __mmask16
12893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12894 _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
12895 {
12896 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12897 (__v16sf) __Y, __P,
12898 (__mmask16) -1,
12899 _MM_FROUND_CUR_DIRECTION);
12900 }
12901
12902 extern __inline __mmask16
12903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12904 _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
12905 {
12906 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12907 (__v16sf) __Y, __P,
12908 (__mmask16) __U,
12909 _MM_FROUND_CUR_DIRECTION);
12910 }
12911
12912 extern __inline __mmask8
12913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12914 _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
12915 {
12916 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12917 (__v8df) __Y, __P,
12918 (__mmask8) __U,
12919 _MM_FROUND_CUR_DIRECTION);
12920 }
12921
12922 extern __inline __mmask8
12923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12924 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
12925 {
12926 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12927 (__v2df) __Y, __P,
12928 (__mmask8) -1,
12929 _MM_FROUND_CUR_DIRECTION);
12930 }
12931
12932 extern __inline __mmask8
12933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12934 _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
12935 {
12936 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12937 (__v2df) __Y, __P,
12938 (__mmask8) __M,
12939 _MM_FROUND_CUR_DIRECTION);
12940 }
12941
12942 extern __inline __mmask8
12943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12944 _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
12945 {
12946 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12947 (__v4sf) __Y, __P,
12948 (__mmask8) -1,
12949 _MM_FROUND_CUR_DIRECTION);
12950 }
12951
12952 extern __inline __mmask8
12953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12954 _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
12955 {
12956 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12957 (__v4sf) __Y, __P,
12958 (__mmask8) __M,
12959 _MM_FROUND_CUR_DIRECTION);
12960 }
12961
12962 #else
12963 #define _mm512_cmp_pd_mask(X, Y, P) \
12964 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12965 (__v8df)(__m512d)(Y), (int)(P),\
12966 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12967
12968 #define _mm512_cmp_ps_mask(X, Y, P) \
12969 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12970 (__v16sf)(__m512)(Y), (int)(P),\
12971 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
12972
12973 #define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
12974 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12975 (__v8df)(__m512d)(Y), (int)(P),\
12976 (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
12977
12978 #define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
12979 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12980 (__v16sf)(__m512)(Y), (int)(P),\
12981 (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
12982
12983 #define _mm_cmp_sd_mask(X, Y, P) \
12984 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
12985 (__v2df)(__m128d)(Y), (int)(P),\
12986 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12987
12988 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
12989 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
12990 (__v2df)(__m128d)(Y), (int)(P),\
12991 M,_MM_FROUND_CUR_DIRECTION))
12992
12993 #define _mm_cmp_ss_mask(X, Y, P) \
12994 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
12995 (__v4sf)(__m128)(Y), (int)(P), \
12996 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12997
12998 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
12999 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
13000 (__v4sf)(__m128)(Y), (int)(P), \
13001 M,_MM_FROUND_CUR_DIRECTION))
13002 #endif
13003
13004 extern __inline __mmask16
13005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13006 _mm512_kmov (__mmask16 __A)
13007 {
13008 return __builtin_ia32_kmovw (__A);
13009 }
13010
13011 extern __inline __m512
13012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13013 _mm512_castpd_ps (__m512d __A)
13014 {
13015 return (__m512) (__A);
13016 }
13017
13018 extern __inline __m512i
13019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13020 _mm512_castpd_si512 (__m512d __A)
13021 {
13022 return (__m512i) (__A);
13023 }
13024
13025 extern __inline __m512d
13026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13027 _mm512_castps_pd (__m512 __A)
13028 {
13029 return (__m512d) (__A);
13030 }
13031
13032 extern __inline __m512i
13033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13034 _mm512_castps_si512 (__m512 __A)
13035 {
13036 return (__m512i) (__A);
13037 }
13038
13039 extern __inline __m512
13040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13041 _mm512_castsi512_ps (__m512i __A)
13042 {
13043 return (__m512) (__A);
13044 }
13045
13046 extern __inline __m512d
13047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13048 _mm512_castsi512_pd (__m512i __A)
13049 {
13050 return (__m512d) (__A);
13051 }
13052
13053 extern __inline __m128d
13054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13055 _mm512_castpd512_pd128 (__m512d __A)
13056 {
13057 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
13058 }
13059
13060 extern __inline __m128
13061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13062 _mm512_castps512_ps128 (__m512 __A)
13063 {
13064 return _mm512_extractf32x4_ps(__A, 0);
13065 }
13066
13067 extern __inline __m128i
13068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13069 _mm512_castsi512_si128 (__m512i __A)
13070 {
13071 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
13072 }
13073
13074 extern __inline __m256d
13075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13076 _mm512_castpd512_pd256 (__m512d __A)
13077 {
13078 return _mm512_extractf64x4_pd(__A, 0);
13079 }
13080
13081 extern __inline __m256
13082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13083 _mm512_castps512_ps256 (__m512 __A)
13084 {
13085 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
13086 }
13087
13088 extern __inline __m256i
13089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13090 _mm512_castsi512_si256 (__m512i __A)
13091 {
13092 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
13093 }
13094
13095 extern __inline __m512d
13096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13097 _mm512_castpd128_pd512 (__m128d __A)
13098 {
13099 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
13100 }
13101
13102 extern __inline __m512
13103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13104 _mm512_castps128_ps512 (__m128 __A)
13105 {
13106 return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
13107 }
13108
13109 extern __inline __m512i
13110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13111 _mm512_castsi128_si512 (__m128i __A)
13112 {
13113 return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
13114 }
13115
13116 extern __inline __m512d
13117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13118 _mm512_castpd256_pd512 (__m256d __A)
13119 {
13120 return __builtin_ia32_pd512_256pd (__A);
13121 }
13122
13123 extern __inline __m512
13124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13125 _mm512_castps256_ps512 (__m256 __A)
13126 {
13127 return __builtin_ia32_ps512_256ps (__A);
13128 }
13129
13130 extern __inline __m512i
13131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13132 _mm512_castsi256_si512 (__m256i __A)
13133 {
13134 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
13135 }
13136
13137 extern __inline __mmask16
13138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13139 _mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
13140 {
13141 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13142 (__v16si) __B, 0,
13143 (__mmask16) -1);
13144 }
13145
13146 extern __inline __mmask16
13147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13148 _mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13149 {
13150 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13151 (__v16si) __B, 0, __U);
13152 }
13153
13154 extern __inline __mmask8
13155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13156 _mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13157 {
13158 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13159 (__v8di) __B, 0, __U);
13160 }
13161
13162 extern __inline __mmask8
13163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13164 _mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
13165 {
13166 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13167 (__v8di) __B, 0,
13168 (__mmask8) -1);
13169 }
13170
13171 extern __inline __mmask16
13172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13173 _mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
13174 {
13175 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13176 (__v16si) __B, 6,
13177 (__mmask16) -1);
13178 }
13179
13180 extern __inline __mmask16
13181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13182 _mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13183 {
13184 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13185 (__v16si) __B, 6, __U);
13186 }
13187
13188 extern __inline __mmask8
13189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13190 _mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13191 {
13192 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13193 (__v8di) __B, 6, __U);
13194 }
13195
13196 extern __inline __mmask8
13197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13198 _mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
13199 {
13200 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13201 (__v8di) __B, 6,
13202 (__mmask8) -1);
13203 }
13204
13205 #ifdef __DISABLE_AVX512F__
13206 #undef __DISABLE_AVX512F__
13207 #pragma GCC pop_options
13208 #endif /* __DISABLE_AVX512F__ */
13209
13210 #endif /* _AVX512FINTRIN_H_INCLUDED */