]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/avx512fintrin.h
re PR target/76731 ([AVX512] _mm512_i32gather_epi32 and other scatter/gather routines...
[thirdparty/gcc.git] / gcc / config / i386 / avx512fintrin.h
1 /* Copyright (C) 2013-2017 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef _AVX512FINTRIN_H_INCLUDED
29 #define _AVX512FINTRIN_H_INCLUDED
30
31 #ifndef __AVX512F__
32 #pragma GCC push_options
33 #pragma GCC target("avx512f")
34 #define __DISABLE_AVX512F__
35 #endif /* __AVX512F__ */
36
37 /* Internal data types for implementing the intrinsics. */
38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40 typedef long long __v8di __attribute__ ((__vector_size__ (64)));
41 typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
42 typedef int __v16si __attribute__ ((__vector_size__ (64)));
43 typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
44 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
45 typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
46 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
47 typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
48
49 /* The Intel API is flexible enough that we must allow aliasing with other
50 vector types, and their scalar components. */
51 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52 typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
54
55 /* Unaligned version of the same type. */
56 typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
57 typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
58 typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
59
60 typedef unsigned char __mmask8;
61 typedef unsigned short __mmask16;
62
63 extern __inline __m512i
64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
65 _mm512_set_epi64 (long long __A, long long __B, long long __C,
66 long long __D, long long __E, long long __F,
67 long long __G, long long __H)
68 {
69 return __extension__ (__m512i) (__v8di)
70 { __H, __G, __F, __E, __D, __C, __B, __A };
71 }
72
73 /* Create the vector [A B C D E F G H I J K L M N O P]. */
74 extern __inline __m512i
75 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
76 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
77 int __E, int __F, int __G, int __H,
78 int __I, int __J, int __K, int __L,
79 int __M, int __N, int __O, int __P)
80 {
81 return __extension__ (__m512i)(__v16si)
82 { __P, __O, __N, __M, __L, __K, __J, __I,
83 __H, __G, __F, __E, __D, __C, __B, __A };
84 }
85
86 extern __inline __m512d
87 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
88 _mm512_set_pd (double __A, double __B, double __C, double __D,
89 double __E, double __F, double __G, double __H)
90 {
91 return __extension__ (__m512d)
92 { __H, __G, __F, __E, __D, __C, __B, __A };
93 }
94
95 extern __inline __m512
96 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
97 _mm512_set_ps (float __A, float __B, float __C, float __D,
98 float __E, float __F, float __G, float __H,
99 float __I, float __J, float __K, float __L,
100 float __M, float __N, float __O, float __P)
101 {
102 return __extension__ (__m512)
103 { __P, __O, __N, __M, __L, __K, __J, __I,
104 __H, __G, __F, __E, __D, __C, __B, __A };
105 }
106
107 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
108 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
109
110 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
111 e8,e9,e10,e11,e12,e13,e14,e15) \
112 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
113
114 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
115 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
116
117 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
118 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
119
120 extern __inline __m512
121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
122 _mm512_undefined_ps (void)
123 {
124 __m512 __Y = __Y;
125 return __Y;
126 }
127
128 extern __inline __m512d
129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
130 _mm512_undefined_pd (void)
131 {
132 __m512d __Y = __Y;
133 return __Y;
134 }
135
136 extern __inline __m512i
137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
138 _mm512_undefined_epi32 (void)
139 {
140 __m512i __Y = __Y;
141 return __Y;
142 }
143
144 #define _mm512_undefined_si512 _mm512_undefined_epi32
145
146 extern __inline __m512i
147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
148 _mm512_set1_epi8 (char __A)
149 {
150 return __extension__ (__m512i)(__v64qi)
151 { __A, __A, __A, __A, __A, __A, __A, __A,
152 __A, __A, __A, __A, __A, __A, __A, __A,
153 __A, __A, __A, __A, __A, __A, __A, __A,
154 __A, __A, __A, __A, __A, __A, __A, __A,
155 __A, __A, __A, __A, __A, __A, __A, __A,
156 __A, __A, __A, __A, __A, __A, __A, __A,
157 __A, __A, __A, __A, __A, __A, __A, __A,
158 __A, __A, __A, __A, __A, __A, __A, __A };
159 }
160
161 extern __inline __m512i
162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
163 _mm512_set1_epi16 (short __A)
164 {
165 return __extension__ (__m512i)(__v32hi)
166 { __A, __A, __A, __A, __A, __A, __A, __A,
167 __A, __A, __A, __A, __A, __A, __A, __A,
168 __A, __A, __A, __A, __A, __A, __A, __A,
169 __A, __A, __A, __A, __A, __A, __A, __A };
170 }
171
172 extern __inline __m512d
173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
174 _mm512_set1_pd (double __A)
175 {
176 return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
177 (__v2df) { __A, },
178 (__v8df)
179 _mm512_undefined_pd (),
180 (__mmask8) -1);
181 }
182
183 extern __inline __m512
184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
185 _mm512_set1_ps (float __A)
186 {
187 return (__m512) __builtin_ia32_broadcastss512 (__extension__
188 (__v4sf) { __A, },
189 (__v16sf)
190 _mm512_undefined_ps (),
191 (__mmask16) -1);
192 }
193
194 /* Create the vector [A B C D A B C D A B C D A B C D]. */
195 extern __inline __m512i
196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
197 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
198 {
199 return __extension__ (__m512i)(__v16si)
200 { __D, __C, __B, __A, __D, __C, __B, __A,
201 __D, __C, __B, __A, __D, __C, __B, __A };
202 }
203
204 extern __inline __m512i
205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
206 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
207 long long __D)
208 {
209 return __extension__ (__m512i) (__v8di)
210 { __D, __C, __B, __A, __D, __C, __B, __A };
211 }
212
213 extern __inline __m512d
214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
215 _mm512_set4_pd (double __A, double __B, double __C, double __D)
216 {
217 return __extension__ (__m512d)
218 { __D, __C, __B, __A, __D, __C, __B, __A };
219 }
220
221 extern __inline __m512
222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
223 _mm512_set4_ps (float __A, float __B, float __C, float __D)
224 {
225 return __extension__ (__m512)
226 { __D, __C, __B, __A, __D, __C, __B, __A,
227 __D, __C, __B, __A, __D, __C, __B, __A };
228 }
229
230 #define _mm512_setr4_epi64(e0,e1,e2,e3) \
231 _mm512_set4_epi64(e3,e2,e1,e0)
232
233 #define _mm512_setr4_epi32(e0,e1,e2,e3) \
234 _mm512_set4_epi32(e3,e2,e1,e0)
235
236 #define _mm512_setr4_pd(e0,e1,e2,e3) \
237 _mm512_set4_pd(e3,e2,e1,e0)
238
239 #define _mm512_setr4_ps(e0,e1,e2,e3) \
240 _mm512_set4_ps(e3,e2,e1,e0)
241
242 extern __inline __m512
243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
244 _mm512_setzero_ps (void)
245 {
246 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
247 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
248 }
249
250 extern __inline __m512d
251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
252 _mm512_setzero_pd (void)
253 {
254 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
255 }
256
257 extern __inline __m512i
258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
259 _mm512_setzero_epi32 (void)
260 {
261 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
262 }
263
264 extern __inline __m512i
265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
266 _mm512_setzero_si512 (void)
267 {
268 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
269 }
270
271 extern __inline __m512d
272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
273 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
274 {
275 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
276 (__v8df) __W,
277 (__mmask8) __U);
278 }
279
280 extern __inline __m512d
281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
282 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
283 {
284 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
285 (__v8df)
286 _mm512_setzero_pd (),
287 (__mmask8) __U);
288 }
289
290 extern __inline __m512
291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
292 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
293 {
294 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
295 (__v16sf) __W,
296 (__mmask16) __U);
297 }
298
299 extern __inline __m512
300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
301 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
302 {
303 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
304 (__v16sf)
305 _mm512_setzero_ps (),
306 (__mmask16) __U);
307 }
308
309 extern __inline __m512d
310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
311 _mm512_load_pd (void const *__P)
312 {
313 return *(__m512d *) __P;
314 }
315
316 extern __inline __m512d
317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
318 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
319 {
320 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
321 (__v8df) __W,
322 (__mmask8) __U);
323 }
324
325 extern __inline __m512d
326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
327 _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
328 {
329 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
330 (__v8df)
331 _mm512_setzero_pd (),
332 (__mmask8) __U);
333 }
334
335 extern __inline void
336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
337 _mm512_store_pd (void *__P, __m512d __A)
338 {
339 *(__m512d *) __P = __A;
340 }
341
342 extern __inline void
343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
344 _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
345 {
346 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
347 (__mmask8) __U);
348 }
349
350 extern __inline __m512
351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
352 _mm512_load_ps (void const *__P)
353 {
354 return *(__m512 *) __P;
355 }
356
357 extern __inline __m512
358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
359 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
360 {
361 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
362 (__v16sf) __W,
363 (__mmask16) __U);
364 }
365
366 extern __inline __m512
367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
368 _mm512_maskz_load_ps (__mmask16 __U, void const *__P)
369 {
370 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
371 (__v16sf)
372 _mm512_setzero_ps (),
373 (__mmask16) __U);
374 }
375
376 extern __inline void
377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
378 _mm512_store_ps (void *__P, __m512 __A)
379 {
380 *(__m512 *) __P = __A;
381 }
382
383 extern __inline void
384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
385 _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
386 {
387 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
388 (__mmask16) __U);
389 }
390
391 extern __inline __m512i
392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
393 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
394 {
395 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
396 (__v8di) __W,
397 (__mmask8) __U);
398 }
399
400 extern __inline __m512i
401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
402 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
403 {
404 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
405 (__v8di)
406 _mm512_setzero_si512 (),
407 (__mmask8) __U);
408 }
409
410 extern __inline __m512i
411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
412 _mm512_load_epi64 (void const *__P)
413 {
414 return *(__m512i *) __P;
415 }
416
417 extern __inline __m512i
418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
419 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
420 {
421 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
422 (__v8di) __W,
423 (__mmask8) __U);
424 }
425
426 extern __inline __m512i
427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
428 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
429 {
430 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
431 (__v8di)
432 _mm512_setzero_si512 (),
433 (__mmask8) __U);
434 }
435
436 extern __inline void
437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
438 _mm512_store_epi64 (void *__P, __m512i __A)
439 {
440 *(__m512i *) __P = __A;
441 }
442
443 extern __inline void
444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
445 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
446 {
447 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
448 (__mmask8) __U);
449 }
450
451 extern __inline __m512i
452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
453 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
454 {
455 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
456 (__v16si) __W,
457 (__mmask16) __U);
458 }
459
460 extern __inline __m512i
461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
462 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
463 {
464 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
465 (__v16si)
466 _mm512_setzero_si512 (),
467 (__mmask16) __U);
468 }
469
470 extern __inline __m512i
471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
472 _mm512_load_si512 (void const *__P)
473 {
474 return *(__m512i *) __P;
475 }
476
477 extern __inline __m512i
478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
479 _mm512_load_epi32 (void const *__P)
480 {
481 return *(__m512i *) __P;
482 }
483
484 extern __inline __m512i
485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
486 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
487 {
488 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
489 (__v16si) __W,
490 (__mmask16) __U);
491 }
492
493 extern __inline __m512i
494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
495 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
496 {
497 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
498 (__v16si)
499 _mm512_setzero_si512 (),
500 (__mmask16) __U);
501 }
502
503 extern __inline void
504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
505 _mm512_store_si512 (void *__P, __m512i __A)
506 {
507 *(__m512i *) __P = __A;
508 }
509
510 extern __inline void
511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
512 _mm512_store_epi32 (void *__P, __m512i __A)
513 {
514 *(__m512i *) __P = __A;
515 }
516
517 extern __inline void
518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
519 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
520 {
521 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
522 (__mmask16) __U);
523 }
524
525 extern __inline __m512i
526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
527 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
528 {
529 return (__m512i) ((__v16su) __A * (__v16su) __B);
530 }
531
532 extern __inline __m512i
533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
534 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
535 {
536 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
537 (__v16si) __B,
538 (__v16si)
539 _mm512_setzero_si512 (),
540 __M);
541 }
542
543 extern __inline __m512i
544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
545 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
546 {
547 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
548 (__v16si) __B,
549 (__v16si) __W, __M);
550 }
551
552 extern __inline __m512i
553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
554 _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
555 {
556 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
557 (__v16si) __Y,
558 (__v16si)
559 _mm512_undefined_epi32 (),
560 (__mmask16) -1);
561 }
562
563 extern __inline __m512i
564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
565 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
566 {
567 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
568 (__v16si) __Y,
569 (__v16si) __W,
570 (__mmask16) __U);
571 }
572
573 extern __inline __m512i
574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
575 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
576 {
577 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
578 (__v16si) __Y,
579 (__v16si)
580 _mm512_setzero_si512 (),
581 (__mmask16) __U);
582 }
583
584 extern __inline __m512i
585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
586 _mm512_srav_epi32 (__m512i __X, __m512i __Y)
587 {
588 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
589 (__v16si) __Y,
590 (__v16si)
591 _mm512_undefined_epi32 (),
592 (__mmask16) -1);
593 }
594
595 extern __inline __m512i
596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
597 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
598 {
599 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
600 (__v16si) __Y,
601 (__v16si) __W,
602 (__mmask16) __U);
603 }
604
605 extern __inline __m512i
606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
607 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
608 {
609 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
610 (__v16si) __Y,
611 (__v16si)
612 _mm512_setzero_si512 (),
613 (__mmask16) __U);
614 }
615
616 extern __inline __m512i
617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
618 _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
619 {
620 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
621 (__v16si) __Y,
622 (__v16si)
623 _mm512_undefined_epi32 (),
624 (__mmask16) -1);
625 }
626
627 extern __inline __m512i
628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
629 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
630 {
631 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
632 (__v16si) __Y,
633 (__v16si) __W,
634 (__mmask16) __U);
635 }
636
637 extern __inline __m512i
638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
639 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
640 {
641 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
642 (__v16si) __Y,
643 (__v16si)
644 _mm512_setzero_si512 (),
645 (__mmask16) __U);
646 }
647
648 extern __inline __m512i
649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
650 _mm512_add_epi64 (__m512i __A, __m512i __B)
651 {
652 return (__m512i) ((__v8du) __A + (__v8du) __B);
653 }
654
655 extern __inline __m512i
656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
657 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
658 {
659 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
660 (__v8di) __B,
661 (__v8di) __W,
662 (__mmask8) __U);
663 }
664
665 extern __inline __m512i
666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
667 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
668 {
669 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
670 (__v8di) __B,
671 (__v8di)
672 _mm512_setzero_si512 (),
673 (__mmask8) __U);
674 }
675
676 extern __inline __m512i
677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
678 _mm512_sub_epi64 (__m512i __A, __m512i __B)
679 {
680 return (__m512i) ((__v8du) __A - (__v8du) __B);
681 }
682
683 extern __inline __m512i
684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
685 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
686 {
687 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
688 (__v8di) __B,
689 (__v8di) __W,
690 (__mmask8) __U);
691 }
692
693 extern __inline __m512i
694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
695 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
696 {
697 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
698 (__v8di) __B,
699 (__v8di)
700 _mm512_setzero_si512 (),
701 (__mmask8) __U);
702 }
703
704 extern __inline __m512i
705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
706 _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
707 {
708 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
709 (__v8di) __Y,
710 (__v8di)
711 _mm512_undefined_pd (),
712 (__mmask8) -1);
713 }
714
715 extern __inline __m512i
716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
717 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
718 {
719 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
720 (__v8di) __Y,
721 (__v8di) __W,
722 (__mmask8) __U);
723 }
724
725 extern __inline __m512i
726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
727 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
728 {
729 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
730 (__v8di) __Y,
731 (__v8di)
732 _mm512_setzero_si512 (),
733 (__mmask8) __U);
734 }
735
736 extern __inline __m512i
737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
738 _mm512_srav_epi64 (__m512i __X, __m512i __Y)
739 {
740 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
741 (__v8di) __Y,
742 (__v8di)
743 _mm512_undefined_epi32 (),
744 (__mmask8) -1);
745 }
746
747 extern __inline __m512i
748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
749 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
750 {
751 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
752 (__v8di) __Y,
753 (__v8di) __W,
754 (__mmask8) __U);
755 }
756
757 extern __inline __m512i
758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
759 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
760 {
761 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
762 (__v8di) __Y,
763 (__v8di)
764 _mm512_setzero_si512 (),
765 (__mmask8) __U);
766 }
767
768 extern __inline __m512i
769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
770 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
771 {
772 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
773 (__v8di) __Y,
774 (__v8di)
775 _mm512_undefined_epi32 (),
776 (__mmask8) -1);
777 }
778
779 extern __inline __m512i
780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
781 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
782 {
783 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
784 (__v8di) __Y,
785 (__v8di) __W,
786 (__mmask8) __U);
787 }
788
789 extern __inline __m512i
790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
791 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
792 {
793 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
794 (__v8di) __Y,
795 (__v8di)
796 _mm512_setzero_si512 (),
797 (__mmask8) __U);
798 }
799
800 extern __inline __m512i
801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
802 _mm512_add_epi32 (__m512i __A, __m512i __B)
803 {
804 return (__m512i) ((__v16su) __A + (__v16su) __B);
805 }
806
807 extern __inline __m512i
808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
809 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
810 {
811 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
812 (__v16si) __B,
813 (__v16si) __W,
814 (__mmask16) __U);
815 }
816
817 extern __inline __m512i
818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
819 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
820 {
821 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
822 (__v16si) __B,
823 (__v16si)
824 _mm512_setzero_si512 (),
825 (__mmask16) __U);
826 }
827
828 extern __inline __m512i
829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
830 _mm512_mul_epi32 (__m512i __X, __m512i __Y)
831 {
832 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
833 (__v16si) __Y,
834 (__v8di)
835 _mm512_undefined_epi32 (),
836 (__mmask8) -1);
837 }
838
839 extern __inline __m512i
840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
841 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
842 {
843 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
844 (__v16si) __Y,
845 (__v8di) __W, __M);
846 }
847
848 extern __inline __m512i
849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
850 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
851 {
852 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
853 (__v16si) __Y,
854 (__v8di)
855 _mm512_setzero_si512 (),
856 __M);
857 }
858
859 extern __inline __m512i
860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
861 _mm512_sub_epi32 (__m512i __A, __m512i __B)
862 {
863 return (__m512i) ((__v16su) __A - (__v16su) __B);
864 }
865
866 extern __inline __m512i
867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
868 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
869 {
870 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
871 (__v16si) __B,
872 (__v16si) __W,
873 (__mmask16) __U);
874 }
875
876 extern __inline __m512i
877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
878 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
879 {
880 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
881 (__v16si) __B,
882 (__v16si)
883 _mm512_setzero_si512 (),
884 (__mmask16) __U);
885 }
886
887 extern __inline __m512i
888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
889 _mm512_mul_epu32 (__m512i __X, __m512i __Y)
890 {
891 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
892 (__v16si) __Y,
893 (__v8di)
894 _mm512_undefined_epi32 (),
895 (__mmask8) -1);
896 }
897
898 extern __inline __m512i
899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
900 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
901 {
902 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
903 (__v16si) __Y,
904 (__v8di) __W, __M);
905 }
906
907 extern __inline __m512i
908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
909 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
910 {
911 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
912 (__v16si) __Y,
913 (__v8di)
914 _mm512_setzero_si512 (),
915 __M);
916 }
917
918 #ifdef __OPTIMIZE__
919 extern __inline __m512i
920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
921 _mm512_slli_epi64 (__m512i __A, unsigned int __B)
922 {
923 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
924 (__v8di)
925 _mm512_undefined_epi32 (),
926 (__mmask8) -1);
927 }
928
929 extern __inline __m512i
930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
931 _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
932 unsigned int __B)
933 {
934 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
935 (__v8di) __W,
936 (__mmask8) __U);
937 }
938
939 extern __inline __m512i
940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
941 _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
942 {
943 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
944 (__v8di)
945 _mm512_setzero_si512 (),
946 (__mmask8) __U);
947 }
948 #else
949 #define _mm512_slli_epi64(X, C) \
950 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
951 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
952 (__mmask8)-1))
953
954 #define _mm512_mask_slli_epi64(W, U, X, C) \
955 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
956 (__v8di)(__m512i)(W),\
957 (__mmask8)(U)))
958
959 #define _mm512_maskz_slli_epi64(U, X, C) \
960 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
961 (__v8di)(__m512i)_mm512_setzero_si512 (),\
962 (__mmask8)(U)))
963 #endif
964
965 extern __inline __m512i
966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
967 _mm512_sll_epi64 (__m512i __A, __m128i __B)
968 {
969 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
970 (__v2di) __B,
971 (__v8di)
972 _mm512_undefined_epi32 (),
973 (__mmask8) -1);
974 }
975
976 extern __inline __m512i
977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
978 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
979 {
980 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
981 (__v2di) __B,
982 (__v8di) __W,
983 (__mmask8) __U);
984 }
985
986 extern __inline __m512i
987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
988 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
989 {
990 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
991 (__v2di) __B,
992 (__v8di)
993 _mm512_setzero_si512 (),
994 (__mmask8) __U);
995 }
996
997 #ifdef __OPTIMIZE__
998 extern __inline __m512i
999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1000 _mm512_srli_epi64 (__m512i __A, unsigned int __B)
1001 {
1002 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1003 (__v8di)
1004 _mm512_undefined_epi32 (),
1005 (__mmask8) -1);
1006 }
1007
1008 extern __inline __m512i
1009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1010 _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1011 __m512i __A, unsigned int __B)
1012 {
1013 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1014 (__v8di) __W,
1015 (__mmask8) __U);
1016 }
1017
1018 extern __inline __m512i
1019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1020 _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1021 {
1022 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1023 (__v8di)
1024 _mm512_setzero_si512 (),
1025 (__mmask8) __U);
1026 }
1027 #else
1028 #define _mm512_srli_epi64(X, C) \
1029 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1030 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1031 (__mmask8)-1))
1032
1033 #define _mm512_mask_srli_epi64(W, U, X, C) \
1034 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1035 (__v8di)(__m512i)(W),\
1036 (__mmask8)(U)))
1037
1038 #define _mm512_maskz_srli_epi64(U, X, C) \
1039 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1040 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1041 (__mmask8)(U)))
1042 #endif
1043
1044 extern __inline __m512i
1045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1046 _mm512_srl_epi64 (__m512i __A, __m128i __B)
1047 {
1048 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1049 (__v2di) __B,
1050 (__v8di)
1051 _mm512_undefined_epi32 (),
1052 (__mmask8) -1);
1053 }
1054
1055 extern __inline __m512i
1056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1057 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1058 {
1059 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1060 (__v2di) __B,
1061 (__v8di) __W,
1062 (__mmask8) __U);
1063 }
1064
1065 extern __inline __m512i
1066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1067 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1068 {
1069 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1070 (__v2di) __B,
1071 (__v8di)
1072 _mm512_setzero_si512 (),
1073 (__mmask8) __U);
1074 }
1075
1076 #ifdef __OPTIMIZE__
1077 extern __inline __m512i
1078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1079 _mm512_srai_epi64 (__m512i __A, unsigned int __B)
1080 {
1081 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1082 (__v8di)
1083 _mm512_undefined_epi32 (),
1084 (__mmask8) -1);
1085 }
1086
1087 extern __inline __m512i
1088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1089 _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1090 unsigned int __B)
1091 {
1092 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1093 (__v8di) __W,
1094 (__mmask8) __U);
1095 }
1096
1097 extern __inline __m512i
1098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1099 _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1100 {
1101 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1102 (__v8di)
1103 _mm512_setzero_si512 (),
1104 (__mmask8) __U);
1105 }
1106 #else
1107 #define _mm512_srai_epi64(X, C) \
1108 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1109 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1110 (__mmask8)-1))
1111
1112 #define _mm512_mask_srai_epi64(W, U, X, C) \
1113 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1114 (__v8di)(__m512i)(W),\
1115 (__mmask8)(U)))
1116
1117 #define _mm512_maskz_srai_epi64(U, X, C) \
1118 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1119 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1120 (__mmask8)(U)))
1121 #endif
1122
1123 extern __inline __m512i
1124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1125 _mm512_sra_epi64 (__m512i __A, __m128i __B)
1126 {
1127 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1128 (__v2di) __B,
1129 (__v8di)
1130 _mm512_undefined_epi32 (),
1131 (__mmask8) -1);
1132 }
1133
1134 extern __inline __m512i
1135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1136 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1137 {
1138 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1139 (__v2di) __B,
1140 (__v8di) __W,
1141 (__mmask8) __U);
1142 }
1143
1144 extern __inline __m512i
1145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1146 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1147 {
1148 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1149 (__v2di) __B,
1150 (__v8di)
1151 _mm512_setzero_si512 (),
1152 (__mmask8) __U);
1153 }
1154
1155 #ifdef __OPTIMIZE__
1156 extern __inline __m512i
1157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1158 _mm512_slli_epi32 (__m512i __A, unsigned int __B)
1159 {
1160 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1161 (__v16si)
1162 _mm512_undefined_epi32 (),
1163 (__mmask16) -1);
1164 }
1165
1166 extern __inline __m512i
1167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1168 _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1169 unsigned int __B)
1170 {
1171 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1172 (__v16si) __W,
1173 (__mmask16) __U);
1174 }
1175
1176 extern __inline __m512i
1177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1178 _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1179 {
1180 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1181 (__v16si)
1182 _mm512_setzero_si512 (),
1183 (__mmask16) __U);
1184 }
1185 #else
1186 #define _mm512_slli_epi32(X, C) \
1187 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1188 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1189 (__mmask16)-1))
1190
1191 #define _mm512_mask_slli_epi32(W, U, X, C) \
1192 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1193 (__v16si)(__m512i)(W),\
1194 (__mmask16)(U)))
1195
1196 #define _mm512_maskz_slli_epi32(U, X, C) \
1197 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1198 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1199 (__mmask16)(U)))
1200 #endif
1201
1202 extern __inline __m512i
1203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1204 _mm512_sll_epi32 (__m512i __A, __m128i __B)
1205 {
1206 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1207 (__v4si) __B,
1208 (__v16si)
1209 _mm512_undefined_epi32 (),
1210 (__mmask16) -1);
1211 }
1212
1213 extern __inline __m512i
1214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1215 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1216 {
1217 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1218 (__v4si) __B,
1219 (__v16si) __W,
1220 (__mmask16) __U);
1221 }
1222
1223 extern __inline __m512i
1224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1225 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1226 {
1227 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1228 (__v4si) __B,
1229 (__v16si)
1230 _mm512_setzero_si512 (),
1231 (__mmask16) __U);
1232 }
1233
1234 #ifdef __OPTIMIZE__
1235 extern __inline __m512i
1236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1237 _mm512_srli_epi32 (__m512i __A, unsigned int __B)
1238 {
1239 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1240 (__v16si)
1241 _mm512_undefined_epi32 (),
1242 (__mmask16) -1);
1243 }
1244
1245 extern __inline __m512i
1246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1247 _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1248 __m512i __A, unsigned int __B)
1249 {
1250 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1251 (__v16si) __W,
1252 (__mmask16) __U);
1253 }
1254
1255 extern __inline __m512i
1256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1257 _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1258 {
1259 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1260 (__v16si)
1261 _mm512_setzero_si512 (),
1262 (__mmask16) __U);
1263 }
1264 #else
1265 #define _mm512_srli_epi32(X, C) \
1266 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1267 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1268 (__mmask16)-1))
1269
1270 #define _mm512_mask_srli_epi32(W, U, X, C) \
1271 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1272 (__v16si)(__m512i)(W),\
1273 (__mmask16)(U)))
1274
1275 #define _mm512_maskz_srli_epi32(U, X, C) \
1276 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1277 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1278 (__mmask16)(U)))
1279 #endif
1280
1281 extern __inline __m512i
1282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1283 _mm512_srl_epi32 (__m512i __A, __m128i __B)
1284 {
1285 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1286 (__v4si) __B,
1287 (__v16si)
1288 _mm512_undefined_epi32 (),
1289 (__mmask16) -1);
1290 }
1291
1292 extern __inline __m512i
1293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1294 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1295 {
1296 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1297 (__v4si) __B,
1298 (__v16si) __W,
1299 (__mmask16) __U);
1300 }
1301
1302 extern __inline __m512i
1303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1304 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1305 {
1306 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1307 (__v4si) __B,
1308 (__v16si)
1309 _mm512_setzero_si512 (),
1310 (__mmask16) __U);
1311 }
1312
1313 #ifdef __OPTIMIZE__
1314 extern __inline __m512i
1315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1316 _mm512_srai_epi32 (__m512i __A, unsigned int __B)
1317 {
1318 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1319 (__v16si)
1320 _mm512_undefined_epi32 (),
1321 (__mmask16) -1);
1322 }
1323
1324 extern __inline __m512i
1325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1326 _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1327 unsigned int __B)
1328 {
1329 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1330 (__v16si) __W,
1331 (__mmask16) __U);
1332 }
1333
1334 extern __inline __m512i
1335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1336 _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1337 {
1338 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1339 (__v16si)
1340 _mm512_setzero_si512 (),
1341 (__mmask16) __U);
1342 }
1343 #else
1344 #define _mm512_srai_epi32(X, C) \
1345 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1346 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1347 (__mmask16)-1))
1348
1349 #define _mm512_mask_srai_epi32(W, U, X, C) \
1350 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1351 (__v16si)(__m512i)(W),\
1352 (__mmask16)(U)))
1353
1354 #define _mm512_maskz_srai_epi32(U, X, C) \
1355 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1356 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1357 (__mmask16)(U)))
1358 #endif
1359
1360 extern __inline __m512i
1361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1362 _mm512_sra_epi32 (__m512i __A, __m128i __B)
1363 {
1364 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1365 (__v4si) __B,
1366 (__v16si)
1367 _mm512_undefined_epi32 (),
1368 (__mmask16) -1);
1369 }
1370
1371 extern __inline __m512i
1372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1373 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1374 {
1375 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1376 (__v4si) __B,
1377 (__v16si) __W,
1378 (__mmask16) __U);
1379 }
1380
1381 extern __inline __m512i
1382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1383 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1384 {
1385 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1386 (__v4si) __B,
1387 (__v16si)
1388 _mm512_setzero_si512 (),
1389 (__mmask16) __U);
1390 }
1391
1392 #ifdef __OPTIMIZE__
1393 extern __inline __m128d
1394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1395 _mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1396 {
1397 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1398 (__v2df) __B,
1399 __R);
1400 }
1401
1402 extern __inline __m128
1403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1404 _mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1405 {
1406 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1407 (__v4sf) __B,
1408 __R);
1409 }
1410
1411 extern __inline __m128d
1412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1413 _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1414 {
1415 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1416 (__v2df) __B,
1417 __R);
1418 }
1419
1420 extern __inline __m128
1421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1422 _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1423 {
1424 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1425 (__v4sf) __B,
1426 __R);
1427 }
1428
1429 #else
1430 #define _mm_add_round_sd(A, B, C) \
1431 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1432
1433 #define _mm_add_round_ss(A, B, C) \
1434 (__m128)__builtin_ia32_addss_round(A, B, C)
1435
1436 #define _mm_sub_round_sd(A, B, C) \
1437 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1438
1439 #define _mm_sub_round_ss(A, B, C) \
1440 (__m128)__builtin_ia32_subss_round(A, B, C)
1441 #endif
1442
1443 #ifdef __OPTIMIZE__
1444 extern __inline __m512i
1445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1446 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
1447 const int __imm)
1448 {
1449 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1450 (__v8di) __B,
1451 (__v8di) __C, __imm,
1452 (__mmask8) -1);
1453 }
1454
1455 extern __inline __m512i
1456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1457 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
1458 __m512i __C, const int __imm)
1459 {
1460 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1461 (__v8di) __B,
1462 (__v8di) __C, __imm,
1463 (__mmask8) __U);
1464 }
1465
1466 extern __inline __m512i
1467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1468 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
1469 __m512i __C, const int __imm)
1470 {
1471 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1472 (__v8di) __B,
1473 (__v8di) __C,
1474 __imm, (__mmask8) __U);
1475 }
1476
1477 extern __inline __m512i
1478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1479 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
1480 const int __imm)
1481 {
1482 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1483 (__v16si) __B,
1484 (__v16si) __C,
1485 __imm, (__mmask16) -1);
1486 }
1487
1488 extern __inline __m512i
1489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1490 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
1491 __m512i __C, const int __imm)
1492 {
1493 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1494 (__v16si) __B,
1495 (__v16si) __C,
1496 __imm, (__mmask16) __U);
1497 }
1498
1499 extern __inline __m512i
1500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1501 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
1502 __m512i __C, const int __imm)
1503 {
1504 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1505 (__v16si) __B,
1506 (__v16si) __C,
1507 __imm, (__mmask16) __U);
1508 }
1509 #else
1510 #define _mm512_ternarylogic_epi64(A, B, C, I) \
1511 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1512 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1513 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1514 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1515 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1516 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1517 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
1518 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1519 #define _mm512_ternarylogic_epi32(A, B, C, I) \
1520 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1521 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1522 (__mmask16)-1))
1523 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1524 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1525 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1526 (__mmask16)(U)))
1527 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1528 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
1529 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1530 (__mmask16)(U)))
1531 #endif
1532
1533 extern __inline __m512d
1534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1535 _mm512_rcp14_pd (__m512d __A)
1536 {
1537 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1538 (__v8df)
1539 _mm512_undefined_pd (),
1540 (__mmask8) -1);
1541 }
1542
1543 extern __inline __m512d
1544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1545 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1546 {
1547 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1548 (__v8df) __W,
1549 (__mmask8) __U);
1550 }
1551
1552 extern __inline __m512d
1553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1554 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1555 {
1556 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1557 (__v8df)
1558 _mm512_setzero_pd (),
1559 (__mmask8) __U);
1560 }
1561
1562 extern __inline __m512
1563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1564 _mm512_rcp14_ps (__m512 __A)
1565 {
1566 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1567 (__v16sf)
1568 _mm512_undefined_ps (),
1569 (__mmask16) -1);
1570 }
1571
1572 extern __inline __m512
1573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1574 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1575 {
1576 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1577 (__v16sf) __W,
1578 (__mmask16) __U);
1579 }
1580
1581 extern __inline __m512
1582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1583 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1584 {
1585 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1586 (__v16sf)
1587 _mm512_setzero_ps (),
1588 (__mmask16) __U);
1589 }
1590
1591 extern __inline __m128d
1592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1593 _mm_rcp14_sd (__m128d __A, __m128d __B)
1594 {
1595 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1596 (__v2df) __A);
1597 }
1598
1599 extern __inline __m128
1600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1601 _mm_rcp14_ss (__m128 __A, __m128 __B)
1602 {
1603 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1604 (__v4sf) __A);
1605 }
1606
1607 extern __inline __m512d
1608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1609 _mm512_rsqrt14_pd (__m512d __A)
1610 {
1611 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1612 (__v8df)
1613 _mm512_undefined_pd (),
1614 (__mmask8) -1);
1615 }
1616
1617 extern __inline __m512d
1618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1619 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1620 {
1621 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1622 (__v8df) __W,
1623 (__mmask8) __U);
1624 }
1625
1626 extern __inline __m512d
1627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1628 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1629 {
1630 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1631 (__v8df)
1632 _mm512_setzero_pd (),
1633 (__mmask8) __U);
1634 }
1635
1636 extern __inline __m512
1637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1638 _mm512_rsqrt14_ps (__m512 __A)
1639 {
1640 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1641 (__v16sf)
1642 _mm512_undefined_ps (),
1643 (__mmask16) -1);
1644 }
1645
1646 extern __inline __m512
1647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1648 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1649 {
1650 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1651 (__v16sf) __W,
1652 (__mmask16) __U);
1653 }
1654
1655 extern __inline __m512
1656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1657 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1658 {
1659 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1660 (__v16sf)
1661 _mm512_setzero_ps (),
1662 (__mmask16) __U);
1663 }
1664
1665 extern __inline __m128d
1666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1667 _mm_rsqrt14_sd (__m128d __A, __m128d __B)
1668 {
1669 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1670 (__v2df) __A);
1671 }
1672
1673 extern __inline __m128
1674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1675 _mm_rsqrt14_ss (__m128 __A, __m128 __B)
1676 {
1677 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1678 (__v4sf) __A);
1679 }
1680
1681 #ifdef __OPTIMIZE__
1682 extern __inline __m512d
1683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1684 _mm512_sqrt_round_pd (__m512d __A, const int __R)
1685 {
1686 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1687 (__v8df)
1688 _mm512_undefined_pd (),
1689 (__mmask8) -1, __R);
1690 }
1691
1692 extern __inline __m512d
1693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1694 _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1695 const int __R)
1696 {
1697 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1698 (__v8df) __W,
1699 (__mmask8) __U, __R);
1700 }
1701
1702 extern __inline __m512d
1703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1704 _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1705 {
1706 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1707 (__v8df)
1708 _mm512_setzero_pd (),
1709 (__mmask8) __U, __R);
1710 }
1711
1712 extern __inline __m512
1713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1714 _mm512_sqrt_round_ps (__m512 __A, const int __R)
1715 {
1716 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1717 (__v16sf)
1718 _mm512_undefined_ps (),
1719 (__mmask16) -1, __R);
1720 }
1721
1722 extern __inline __m512
1723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1724 _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
1725 {
1726 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1727 (__v16sf) __W,
1728 (__mmask16) __U, __R);
1729 }
1730
1731 extern __inline __m512
1732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1733 _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
1734 {
1735 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1736 (__v16sf)
1737 _mm512_setzero_ps (),
1738 (__mmask16) __U, __R);
1739 }
1740
1741 extern __inline __m128d
1742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1743 _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
1744 {
1745 return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
1746 (__v2df) __A,
1747 __R);
1748 }
1749
1750 extern __inline __m128
1751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1752 _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
1753 {
1754 return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
1755 (__v4sf) __A,
1756 __R);
1757 }
1758 #else
1759 #define _mm512_sqrt_round_pd(A, C) \
1760 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
1761
1762 #define _mm512_mask_sqrt_round_pd(W, U, A, C) \
1763 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
1764
1765 #define _mm512_maskz_sqrt_round_pd(U, A, C) \
1766 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
1767
1768 #define _mm512_sqrt_round_ps(A, C) \
1769 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
1770
1771 #define _mm512_mask_sqrt_round_ps(W, U, A, C) \
1772 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
1773
1774 #define _mm512_maskz_sqrt_round_ps(U, A, C) \
1775 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
1776
1777 #define _mm_sqrt_round_sd(A, B, C) \
1778 (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
1779
1780 #define _mm_sqrt_round_ss(A, B, C) \
1781 (__m128)__builtin_ia32_sqrtss_round(A, B, C)
1782 #endif
1783
1784 extern __inline __m512i
1785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1786 _mm512_cvtepi8_epi32 (__m128i __A)
1787 {
1788 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1789 (__v16si)
1790 _mm512_undefined_epi32 (),
1791 (__mmask16) -1);
1792 }
1793
1794 extern __inline __m512i
1795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1796 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1797 {
1798 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1799 (__v16si) __W,
1800 (__mmask16) __U);
1801 }
1802
1803 extern __inline __m512i
1804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1805 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
1806 {
1807 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1808 (__v16si)
1809 _mm512_setzero_si512 (),
1810 (__mmask16) __U);
1811 }
1812
1813 extern __inline __m512i
1814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1815 _mm512_cvtepi8_epi64 (__m128i __A)
1816 {
1817 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1818 (__v8di)
1819 _mm512_undefined_epi32 (),
1820 (__mmask8) -1);
1821 }
1822
1823 extern __inline __m512i
1824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1825 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1826 {
1827 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1828 (__v8di) __W,
1829 (__mmask8) __U);
1830 }
1831
1832 extern __inline __m512i
1833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1834 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
1835 {
1836 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1837 (__v8di)
1838 _mm512_setzero_si512 (),
1839 (__mmask8) __U);
1840 }
1841
1842 extern __inline __m512i
1843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1844 _mm512_cvtepi16_epi32 (__m256i __A)
1845 {
1846 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1847 (__v16si)
1848 _mm512_undefined_epi32 (),
1849 (__mmask16) -1);
1850 }
1851
1852 extern __inline __m512i
1853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1854 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1855 {
1856 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1857 (__v16si) __W,
1858 (__mmask16) __U);
1859 }
1860
1861 extern __inline __m512i
1862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1863 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
1864 {
1865 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1866 (__v16si)
1867 _mm512_setzero_si512 (),
1868 (__mmask16) __U);
1869 }
1870
1871 extern __inline __m512i
1872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1873 _mm512_cvtepi16_epi64 (__m128i __A)
1874 {
1875 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1876 (__v8di)
1877 _mm512_undefined_epi32 (),
1878 (__mmask8) -1);
1879 }
1880
1881 extern __inline __m512i
1882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1883 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1884 {
1885 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1886 (__v8di) __W,
1887 (__mmask8) __U);
1888 }
1889
1890 extern __inline __m512i
1891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1892 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
1893 {
1894 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1895 (__v8di)
1896 _mm512_setzero_si512 (),
1897 (__mmask8) __U);
1898 }
1899
1900 extern __inline __m512i
1901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1902 _mm512_cvtepi32_epi64 (__m256i __X)
1903 {
1904 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1905 (__v8di)
1906 _mm512_undefined_epi32 (),
1907 (__mmask8) -1);
1908 }
1909
1910 extern __inline __m512i
1911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1912 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
1913 {
1914 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1915 (__v8di) __W,
1916 (__mmask8) __U);
1917 }
1918
1919 extern __inline __m512i
1920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1921 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
1922 {
1923 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1924 (__v8di)
1925 _mm512_setzero_si512 (),
1926 (__mmask8) __U);
1927 }
1928
1929 extern __inline __m512i
1930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1931 _mm512_cvtepu8_epi32 (__m128i __A)
1932 {
1933 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1934 (__v16si)
1935 _mm512_undefined_epi32 (),
1936 (__mmask16) -1);
1937 }
1938
1939 extern __inline __m512i
1940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1941 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1942 {
1943 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1944 (__v16si) __W,
1945 (__mmask16) __U);
1946 }
1947
1948 extern __inline __m512i
1949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1950 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
1951 {
1952 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1953 (__v16si)
1954 _mm512_setzero_si512 (),
1955 (__mmask16) __U);
1956 }
1957
1958 extern __inline __m512i
1959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1960 _mm512_cvtepu8_epi64 (__m128i __A)
1961 {
1962 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1963 (__v8di)
1964 _mm512_undefined_epi32 (),
1965 (__mmask8) -1);
1966 }
1967
1968 extern __inline __m512i
1969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1970 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1971 {
1972 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1973 (__v8di) __W,
1974 (__mmask8) __U);
1975 }
1976
1977 extern __inline __m512i
1978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1979 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
1980 {
1981 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1982 (__v8di)
1983 _mm512_setzero_si512 (),
1984 (__mmask8) __U);
1985 }
1986
1987 extern __inline __m512i
1988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1989 _mm512_cvtepu16_epi32 (__m256i __A)
1990 {
1991 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1992 (__v16si)
1993 _mm512_undefined_epi32 (),
1994 (__mmask16) -1);
1995 }
1996
1997 extern __inline __m512i
1998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1999 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2000 {
2001 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2002 (__v16si) __W,
2003 (__mmask16) __U);
2004 }
2005
2006 extern __inline __m512i
2007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2008 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2009 {
2010 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2011 (__v16si)
2012 _mm512_setzero_si512 (),
2013 (__mmask16) __U);
2014 }
2015
2016 extern __inline __m512i
2017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2018 _mm512_cvtepu16_epi64 (__m128i __A)
2019 {
2020 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2021 (__v8di)
2022 _mm512_undefined_epi32 (),
2023 (__mmask8) -1);
2024 }
2025
2026 extern __inline __m512i
2027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2029 {
2030 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2031 (__v8di) __W,
2032 (__mmask8) __U);
2033 }
2034
2035 extern __inline __m512i
2036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2037 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2038 {
2039 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2040 (__v8di)
2041 _mm512_setzero_si512 (),
2042 (__mmask8) __U);
2043 }
2044
2045 extern __inline __m512i
2046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2047 _mm512_cvtepu32_epi64 (__m256i __X)
2048 {
2049 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2050 (__v8di)
2051 _mm512_undefined_epi32 (),
2052 (__mmask8) -1);
2053 }
2054
2055 extern __inline __m512i
2056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2057 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2058 {
2059 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2060 (__v8di) __W,
2061 (__mmask8) __U);
2062 }
2063
2064 extern __inline __m512i
2065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2066 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2067 {
2068 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2069 (__v8di)
2070 _mm512_setzero_si512 (),
2071 (__mmask8) __U);
2072 }
2073
2074 #ifdef __OPTIMIZE__
2075 extern __inline __m512d
2076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2077 _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2078 {
2079 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2080 (__v8df) __B,
2081 (__v8df)
2082 _mm512_undefined_pd (),
2083 (__mmask8) -1, __R);
2084 }
2085
2086 extern __inline __m512d
2087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2088 _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2089 __m512d __B, const int __R)
2090 {
2091 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2092 (__v8df) __B,
2093 (__v8df) __W,
2094 (__mmask8) __U, __R);
2095 }
2096
2097 extern __inline __m512d
2098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2099 _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2100 const int __R)
2101 {
2102 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2103 (__v8df) __B,
2104 (__v8df)
2105 _mm512_setzero_pd (),
2106 (__mmask8) __U, __R);
2107 }
2108
2109 extern __inline __m512
2110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2111 _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2112 {
2113 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2114 (__v16sf) __B,
2115 (__v16sf)
2116 _mm512_undefined_ps (),
2117 (__mmask16) -1, __R);
2118 }
2119
2120 extern __inline __m512
2121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2122 _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2123 __m512 __B, const int __R)
2124 {
2125 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2126 (__v16sf) __B,
2127 (__v16sf) __W,
2128 (__mmask16) __U, __R);
2129 }
2130
2131 extern __inline __m512
2132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2133 _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2134 {
2135 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2136 (__v16sf) __B,
2137 (__v16sf)
2138 _mm512_setzero_ps (),
2139 (__mmask16) __U, __R);
2140 }
2141
2142 extern __inline __m512d
2143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2144 _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2145 {
2146 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2147 (__v8df) __B,
2148 (__v8df)
2149 _mm512_undefined_pd (),
2150 (__mmask8) -1, __R);
2151 }
2152
2153 extern __inline __m512d
2154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2155 _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2156 __m512d __B, const int __R)
2157 {
2158 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2159 (__v8df) __B,
2160 (__v8df) __W,
2161 (__mmask8) __U, __R);
2162 }
2163
2164 extern __inline __m512d
2165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2166 _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2167 const int __R)
2168 {
2169 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2170 (__v8df) __B,
2171 (__v8df)
2172 _mm512_setzero_pd (),
2173 (__mmask8) __U, __R);
2174 }
2175
2176 extern __inline __m512
2177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2178 _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2179 {
2180 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2181 (__v16sf) __B,
2182 (__v16sf)
2183 _mm512_undefined_ps (),
2184 (__mmask16) -1, __R);
2185 }
2186
2187 extern __inline __m512
2188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2189 _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2190 __m512 __B, const int __R)
2191 {
2192 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2193 (__v16sf) __B,
2194 (__v16sf) __W,
2195 (__mmask16) __U, __R);
2196 }
2197
2198 extern __inline __m512
2199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2200 _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2201 {
2202 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2203 (__v16sf) __B,
2204 (__v16sf)
2205 _mm512_setzero_ps (),
2206 (__mmask16) __U, __R);
2207 }
2208 #else
2209 #define _mm512_add_round_pd(A, B, C) \
2210 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2211
2212 #define _mm512_mask_add_round_pd(W, U, A, B, C) \
2213 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2214
2215 #define _mm512_maskz_add_round_pd(U, A, B, C) \
2216 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2217
2218 #define _mm512_add_round_ps(A, B, C) \
2219 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2220
2221 #define _mm512_mask_add_round_ps(W, U, A, B, C) \
2222 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2223
2224 #define _mm512_maskz_add_round_ps(U, A, B, C) \
2225 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2226
2227 #define _mm512_sub_round_pd(A, B, C) \
2228 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2229
2230 #define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2231 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2232
2233 #define _mm512_maskz_sub_round_pd(U, A, B, C) \
2234 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2235
2236 #define _mm512_sub_round_ps(A, B, C) \
2237 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2238
2239 #define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2240 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2241
2242 #define _mm512_maskz_sub_round_ps(U, A, B, C) \
2243 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2244 #endif
2245
2246 #ifdef __OPTIMIZE__
2247 extern __inline __m512d
2248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2249 _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2250 {
2251 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2252 (__v8df) __B,
2253 (__v8df)
2254 _mm512_undefined_pd (),
2255 (__mmask8) -1, __R);
2256 }
2257
2258 extern __inline __m512d
2259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2260 _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2261 __m512d __B, const int __R)
2262 {
2263 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2264 (__v8df) __B,
2265 (__v8df) __W,
2266 (__mmask8) __U, __R);
2267 }
2268
2269 extern __inline __m512d
2270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2271 _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2272 const int __R)
2273 {
2274 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2275 (__v8df) __B,
2276 (__v8df)
2277 _mm512_setzero_pd (),
2278 (__mmask8) __U, __R);
2279 }
2280
2281 extern __inline __m512
2282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2283 _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2284 {
2285 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2286 (__v16sf) __B,
2287 (__v16sf)
2288 _mm512_undefined_ps (),
2289 (__mmask16) -1, __R);
2290 }
2291
2292 extern __inline __m512
2293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2294 _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2295 __m512 __B, const int __R)
2296 {
2297 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2298 (__v16sf) __B,
2299 (__v16sf) __W,
2300 (__mmask16) __U, __R);
2301 }
2302
2303 extern __inline __m512
2304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2305 _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2306 {
2307 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2308 (__v16sf) __B,
2309 (__v16sf)
2310 _mm512_setzero_ps (),
2311 (__mmask16) __U, __R);
2312 }
2313
2314 extern __inline __m512d
2315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2316 _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2317 {
2318 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2319 (__v8df) __V,
2320 (__v8df)
2321 _mm512_undefined_pd (),
2322 (__mmask8) -1, __R);
2323 }
2324
2325 extern __inline __m512d
2326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2327 _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2328 __m512d __V, const int __R)
2329 {
2330 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2331 (__v8df) __V,
2332 (__v8df) __W,
2333 (__mmask8) __U, __R);
2334 }
2335
2336 extern __inline __m512d
2337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2338 _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2339 const int __R)
2340 {
2341 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2342 (__v8df) __V,
2343 (__v8df)
2344 _mm512_setzero_pd (),
2345 (__mmask8) __U, __R);
2346 }
2347
2348 extern __inline __m512
2349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2350 _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2351 {
2352 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2353 (__v16sf) __B,
2354 (__v16sf)
2355 _mm512_undefined_ps (),
2356 (__mmask16) -1, __R);
2357 }
2358
2359 extern __inline __m512
2360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2361 _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2362 __m512 __B, const int __R)
2363 {
2364 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2365 (__v16sf) __B,
2366 (__v16sf) __W,
2367 (__mmask16) __U, __R);
2368 }
2369
2370 extern __inline __m512
2371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2372 _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2373 {
2374 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2375 (__v16sf) __B,
2376 (__v16sf)
2377 _mm512_setzero_ps (),
2378 (__mmask16) __U, __R);
2379 }
2380
2381 extern __inline __m128d
2382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2383 _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2384 {
2385 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2386 (__v2df) __B,
2387 __R);
2388 }
2389
2390 extern __inline __m128
2391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2392 _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2393 {
2394 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2395 (__v4sf) __B,
2396 __R);
2397 }
2398
2399 extern __inline __m128d
2400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2401 _mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2402 {
2403 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2404 (__v2df) __B,
2405 __R);
2406 }
2407
2408 extern __inline __m128
2409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2410 _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2411 {
2412 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2413 (__v4sf) __B,
2414 __R);
2415 }
2416
2417 #else
2418 #define _mm512_mul_round_pd(A, B, C) \
2419 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2420
2421 #define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2422 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2423
2424 #define _mm512_maskz_mul_round_pd(U, A, B, C) \
2425 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2426
2427 #define _mm512_mul_round_ps(A, B, C) \
2428 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2429
2430 #define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2431 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2432
2433 #define _mm512_maskz_mul_round_ps(U, A, B, C) \
2434 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2435
2436 #define _mm512_div_round_pd(A, B, C) \
2437 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2438
2439 #define _mm512_mask_div_round_pd(W, U, A, B, C) \
2440 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2441
2442 #define _mm512_maskz_div_round_pd(U, A, B, C) \
2443 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2444
2445 #define _mm512_div_round_ps(A, B, C) \
2446 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2447
2448 #define _mm512_mask_div_round_ps(W, U, A, B, C) \
2449 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2450
2451 #define _mm512_maskz_div_round_ps(U, A, B, C) \
2452 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2453
2454 #define _mm_mul_round_sd(A, B, C) \
2455 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2456
2457 #define _mm_mul_round_ss(A, B, C) \
2458 (__m128)__builtin_ia32_mulss_round(A, B, C)
2459
2460 #define _mm_div_round_sd(A, B, C) \
2461 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2462
2463 #define _mm_div_round_ss(A, B, C) \
2464 (__m128)__builtin_ia32_divss_round(A, B, C)
2465 #endif
2466
2467 #ifdef __OPTIMIZE__
2468 extern __inline __m512d
2469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2470 _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2471 {
2472 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2473 (__v8df) __B,
2474 (__v8df)
2475 _mm512_undefined_pd (),
2476 (__mmask8) -1, __R);
2477 }
2478
2479 extern __inline __m512d
2480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2481 _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2482 __m512d __B, const int __R)
2483 {
2484 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2485 (__v8df) __B,
2486 (__v8df) __W,
2487 (__mmask8) __U, __R);
2488 }
2489
2490 extern __inline __m512d
2491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2492 _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2493 const int __R)
2494 {
2495 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2496 (__v8df) __B,
2497 (__v8df)
2498 _mm512_setzero_pd (),
2499 (__mmask8) __U, __R);
2500 }
2501
2502 extern __inline __m512
2503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2504 _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2505 {
2506 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2507 (__v16sf) __B,
2508 (__v16sf)
2509 _mm512_undefined_ps (),
2510 (__mmask16) -1, __R);
2511 }
2512
2513 extern __inline __m512
2514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2515 _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2516 __m512 __B, const int __R)
2517 {
2518 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2519 (__v16sf) __B,
2520 (__v16sf) __W,
2521 (__mmask16) __U, __R);
2522 }
2523
2524 extern __inline __m512
2525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2526 _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2527 {
2528 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2529 (__v16sf) __B,
2530 (__v16sf)
2531 _mm512_setzero_ps (),
2532 (__mmask16) __U, __R);
2533 }
2534
2535 extern __inline __m512d
2536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2537 _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2538 {
2539 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2540 (__v8df) __B,
2541 (__v8df)
2542 _mm512_undefined_pd (),
2543 (__mmask8) -1, __R);
2544 }
2545
2546 extern __inline __m512d
2547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2548 _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2549 __m512d __B, const int __R)
2550 {
2551 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2552 (__v8df) __B,
2553 (__v8df) __W,
2554 (__mmask8) __U, __R);
2555 }
2556
2557 extern __inline __m512d
2558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2559 _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2560 const int __R)
2561 {
2562 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2563 (__v8df) __B,
2564 (__v8df)
2565 _mm512_setzero_pd (),
2566 (__mmask8) __U, __R);
2567 }
2568
2569 extern __inline __m512
2570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2571 _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
2572 {
2573 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2574 (__v16sf) __B,
2575 (__v16sf)
2576 _mm512_undefined_ps (),
2577 (__mmask16) -1, __R);
2578 }
2579
2580 extern __inline __m512
2581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2582 _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2583 __m512 __B, const int __R)
2584 {
2585 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2586 (__v16sf) __B,
2587 (__v16sf) __W,
2588 (__mmask16) __U, __R);
2589 }
2590
2591 extern __inline __m512
2592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2593 _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2594 {
2595 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2596 (__v16sf) __B,
2597 (__v16sf)
2598 _mm512_setzero_ps (),
2599 (__mmask16) __U, __R);
2600 }
2601 #else
2602 #define _mm512_max_round_pd(A, B, R) \
2603 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2604
2605 #define _mm512_mask_max_round_pd(W, U, A, B, R) \
2606 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
2607
2608 #define _mm512_maskz_max_round_pd(U, A, B, R) \
2609 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2610
2611 #define _mm512_max_round_ps(A, B, R) \
2612 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
2613
2614 #define _mm512_mask_max_round_ps(W, U, A, B, R) \
2615 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
2616
2617 #define _mm512_maskz_max_round_ps(U, A, B, R) \
2618 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2619
2620 #define _mm512_min_round_pd(A, B, R) \
2621 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2622
2623 #define _mm512_mask_min_round_pd(W, U, A, B, R) \
2624 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
2625
2626 #define _mm512_maskz_min_round_pd(U, A, B, R) \
2627 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2628
2629 #define _mm512_min_round_ps(A, B, R) \
2630 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
2631
2632 #define _mm512_mask_min_round_ps(W, U, A, B, R) \
2633 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
2634
2635 #define _mm512_maskz_min_round_ps(U, A, B, R) \
2636 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2637 #endif
2638
2639 #ifdef __OPTIMIZE__
2640 extern __inline __m512d
2641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2642 _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
2643 {
2644 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2645 (__v8df) __B,
2646 (__v8df)
2647 _mm512_undefined_pd (),
2648 (__mmask8) -1, __R);
2649 }
2650
2651 extern __inline __m512d
2652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2653 _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2654 __m512d __B, const int __R)
2655 {
2656 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2657 (__v8df) __B,
2658 (__v8df) __W,
2659 (__mmask8) __U, __R);
2660 }
2661
2662 extern __inline __m512d
2663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2664 _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2665 const int __R)
2666 {
2667 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2668 (__v8df) __B,
2669 (__v8df)
2670 _mm512_setzero_pd (),
2671 (__mmask8) __U, __R);
2672 }
2673
2674 extern __inline __m512
2675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2676 _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
2677 {
2678 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2679 (__v16sf) __B,
2680 (__v16sf)
2681 _mm512_undefined_ps (),
2682 (__mmask16) -1, __R);
2683 }
2684
2685 extern __inline __m512
2686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2687 _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2688 __m512 __B, const int __R)
2689 {
2690 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2691 (__v16sf) __B,
2692 (__v16sf) __W,
2693 (__mmask16) __U, __R);
2694 }
2695
2696 extern __inline __m512
2697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2698 _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2699 const int __R)
2700 {
2701 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2702 (__v16sf) __B,
2703 (__v16sf)
2704 _mm512_setzero_ps (),
2705 (__mmask16) __U, __R);
2706 }
2707
2708 extern __inline __m128d
2709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2710 _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
2711 {
2712 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
2713 (__v2df) __B,
2714 __R);
2715 }
2716
2717 extern __inline __m128
2718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2719 _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
2720 {
2721 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
2722 (__v4sf) __B,
2723 __R);
2724 }
2725 #else
2726 #define _mm512_scalef_round_pd(A, B, C) \
2727 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2728
2729 #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
2730 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
2731
2732 #define _mm512_maskz_scalef_round_pd(U, A, B, C) \
2733 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2734
2735 #define _mm512_scalef_round_ps(A, B, C) \
2736 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2737
2738 #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
2739 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
2740
2741 #define _mm512_maskz_scalef_round_ps(U, A, B, C) \
2742 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2743
2744 #define _mm_scalef_round_sd(A, B, C) \
2745 (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
2746
2747 #define _mm_scalef_round_ss(A, B, C) \
2748 (__m128)__builtin_ia32_scalefss_round(A, B, C)
2749 #endif
2750
2751 #ifdef __OPTIMIZE__
2752 extern __inline __m512d
2753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2754 _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2755 {
2756 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2757 (__v8df) __B,
2758 (__v8df) __C,
2759 (__mmask8) -1, __R);
2760 }
2761
2762 extern __inline __m512d
2763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2764 _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2765 __m512d __C, const int __R)
2766 {
2767 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2768 (__v8df) __B,
2769 (__v8df) __C,
2770 (__mmask8) __U, __R);
2771 }
2772
2773 extern __inline __m512d
2774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2775 _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
2776 __mmask8 __U, const int __R)
2777 {
2778 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2779 (__v8df) __B,
2780 (__v8df) __C,
2781 (__mmask8) __U, __R);
2782 }
2783
2784 extern __inline __m512d
2785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2786 _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2787 __m512d __C, const int __R)
2788 {
2789 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2790 (__v8df) __B,
2791 (__v8df) __C,
2792 (__mmask8) __U, __R);
2793 }
2794
2795 extern __inline __m512
2796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2797 _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2798 {
2799 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2800 (__v16sf) __B,
2801 (__v16sf) __C,
2802 (__mmask16) -1, __R);
2803 }
2804
2805 extern __inline __m512
2806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2807 _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2808 __m512 __C, const int __R)
2809 {
2810 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2811 (__v16sf) __B,
2812 (__v16sf) __C,
2813 (__mmask16) __U, __R);
2814 }
2815
2816 extern __inline __m512
2817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2818 _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
2819 __mmask16 __U, const int __R)
2820 {
2821 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2822 (__v16sf) __B,
2823 (__v16sf) __C,
2824 (__mmask16) __U, __R);
2825 }
2826
2827 extern __inline __m512
2828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2829 _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2830 __m512 __C, const int __R)
2831 {
2832 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2833 (__v16sf) __B,
2834 (__v16sf) __C,
2835 (__mmask16) __U, __R);
2836 }
2837
2838 extern __inline __m512d
2839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2840 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2841 {
2842 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2843 (__v8df) __B,
2844 -(__v8df) __C,
2845 (__mmask8) -1, __R);
2846 }
2847
2848 extern __inline __m512d
2849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2850 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2851 __m512d __C, const int __R)
2852 {
2853 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2854 (__v8df) __B,
2855 -(__v8df) __C,
2856 (__mmask8) __U, __R);
2857 }
2858
2859 extern __inline __m512d
2860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2861 _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2862 __mmask8 __U, const int __R)
2863 {
2864 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
2865 (__v8df) __B,
2866 (__v8df) __C,
2867 (__mmask8) __U, __R);
2868 }
2869
2870 extern __inline __m512d
2871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2872 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2873 __m512d __C, const int __R)
2874 {
2875 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2876 (__v8df) __B,
2877 -(__v8df) __C,
2878 (__mmask8) __U, __R);
2879 }
2880
2881 extern __inline __m512
2882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2883 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2884 {
2885 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2886 (__v16sf) __B,
2887 -(__v16sf) __C,
2888 (__mmask16) -1, __R);
2889 }
2890
2891 extern __inline __m512
2892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2893 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2894 __m512 __C, const int __R)
2895 {
2896 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2897 (__v16sf) __B,
2898 -(__v16sf) __C,
2899 (__mmask16) __U, __R);
2900 }
2901
2902 extern __inline __m512
2903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2904 _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2905 __mmask16 __U, const int __R)
2906 {
2907 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
2908 (__v16sf) __B,
2909 (__v16sf) __C,
2910 (__mmask16) __U, __R);
2911 }
2912
2913 extern __inline __m512
2914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2915 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2916 __m512 __C, const int __R)
2917 {
2918 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2919 (__v16sf) __B,
2920 -(__v16sf) __C,
2921 (__mmask16) __U, __R);
2922 }
2923
2924 extern __inline __m512d
2925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2926 _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2927 {
2928 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2929 (__v8df) __B,
2930 (__v8df) __C,
2931 (__mmask8) -1, __R);
2932 }
2933
2934 extern __inline __m512d
2935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2936 _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2937 __m512d __C, const int __R)
2938 {
2939 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2940 (__v8df) __B,
2941 (__v8df) __C,
2942 (__mmask8) __U, __R);
2943 }
2944
2945 extern __inline __m512d
2946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2947 _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2948 __mmask8 __U, const int __R)
2949 {
2950 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2951 (__v8df) __B,
2952 (__v8df) __C,
2953 (__mmask8) __U, __R);
2954 }
2955
2956 extern __inline __m512d
2957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2958 _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2959 __m512d __C, const int __R)
2960 {
2961 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2962 (__v8df) __B,
2963 (__v8df) __C,
2964 (__mmask8) __U, __R);
2965 }
2966
2967 extern __inline __m512
2968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2969 _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2970 {
2971 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2972 (__v16sf) __B,
2973 (__v16sf) __C,
2974 (__mmask16) -1, __R);
2975 }
2976
2977 extern __inline __m512
2978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2979 _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2980 __m512 __C, const int __R)
2981 {
2982 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2983 (__v16sf) __B,
2984 (__v16sf) __C,
2985 (__mmask16) __U, __R);
2986 }
2987
2988 extern __inline __m512
2989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2990 _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2991 __mmask16 __U, const int __R)
2992 {
2993 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2994 (__v16sf) __B,
2995 (__v16sf) __C,
2996 (__mmask16) __U, __R);
2997 }
2998
2999 extern __inline __m512
3000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3001 _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3002 __m512 __C, const int __R)
3003 {
3004 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3005 (__v16sf) __B,
3006 (__v16sf) __C,
3007 (__mmask16) __U, __R);
3008 }
3009
3010 extern __inline __m512d
3011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3012 _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3013 {
3014 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3015 (__v8df) __B,
3016 -(__v8df) __C,
3017 (__mmask8) -1, __R);
3018 }
3019
3020 extern __inline __m512d
3021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3022 _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3023 __m512d __C, const int __R)
3024 {
3025 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3026 (__v8df) __B,
3027 -(__v8df) __C,
3028 (__mmask8) __U, __R);
3029 }
3030
3031 extern __inline __m512d
3032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3033 _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3034 __mmask8 __U, const int __R)
3035 {
3036 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3037 (__v8df) __B,
3038 (__v8df) __C,
3039 (__mmask8) __U, __R);
3040 }
3041
3042 extern __inline __m512d
3043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3044 _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3045 __m512d __C, const int __R)
3046 {
3047 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3048 (__v8df) __B,
3049 -(__v8df) __C,
3050 (__mmask8) __U, __R);
3051 }
3052
3053 extern __inline __m512
3054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3055 _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3056 {
3057 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3058 (__v16sf) __B,
3059 -(__v16sf) __C,
3060 (__mmask16) -1, __R);
3061 }
3062
3063 extern __inline __m512
3064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3065 _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3066 __m512 __C, const int __R)
3067 {
3068 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3069 (__v16sf) __B,
3070 -(__v16sf) __C,
3071 (__mmask16) __U, __R);
3072 }
3073
3074 extern __inline __m512
3075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3076 _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3077 __mmask16 __U, const int __R)
3078 {
3079 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3080 (__v16sf) __B,
3081 (__v16sf) __C,
3082 (__mmask16) __U, __R);
3083 }
3084
3085 extern __inline __m512
3086 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3087 _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3088 __m512 __C, const int __R)
3089 {
3090 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3091 (__v16sf) __B,
3092 -(__v16sf) __C,
3093 (__mmask16) __U, __R);
3094 }
3095
3096 extern __inline __m512d
3097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3098 _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3099 {
3100 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3101 (__v8df) __B,
3102 (__v8df) __C,
3103 (__mmask8) -1, __R);
3104 }
3105
3106 extern __inline __m512d
3107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3108 _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3109 __m512d __C, const int __R)
3110 {
3111 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3112 (__v8df) __B,
3113 (__v8df) __C,
3114 (__mmask8) __U, __R);
3115 }
3116
3117 extern __inline __m512d
3118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3119 _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3120 __mmask8 __U, const int __R)
3121 {
3122 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3123 (__v8df) __B,
3124 (__v8df) __C,
3125 (__mmask8) __U, __R);
3126 }
3127
3128 extern __inline __m512d
3129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3130 _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3131 __m512d __C, const int __R)
3132 {
3133 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3134 (__v8df) __B,
3135 (__v8df) __C,
3136 (__mmask8) __U, __R);
3137 }
3138
3139 extern __inline __m512
3140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3141 _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3142 {
3143 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3144 (__v16sf) __B,
3145 (__v16sf) __C,
3146 (__mmask16) -1, __R);
3147 }
3148
3149 extern __inline __m512
3150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3151 _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3152 __m512 __C, const int __R)
3153 {
3154 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3155 (__v16sf) __B,
3156 (__v16sf) __C,
3157 (__mmask16) __U, __R);
3158 }
3159
3160 extern __inline __m512
3161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3162 _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3163 __mmask16 __U, const int __R)
3164 {
3165 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3166 (__v16sf) __B,
3167 (__v16sf) __C,
3168 (__mmask16) __U, __R);
3169 }
3170
3171 extern __inline __m512
3172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3173 _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3174 __m512 __C, const int __R)
3175 {
3176 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3177 (__v16sf) __B,
3178 (__v16sf) __C,
3179 (__mmask16) __U, __R);
3180 }
3181
3182 extern __inline __m512d
3183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3184 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3185 {
3186 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3187 (__v8df) __B,
3188 -(__v8df) __C,
3189 (__mmask8) -1, __R);
3190 }
3191
3192 extern __inline __m512d
3193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3194 _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3195 __m512d __C, const int __R)
3196 {
3197 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3198 (__v8df) __B,
3199 (__v8df) __C,
3200 (__mmask8) __U, __R);
3201 }
3202
3203 extern __inline __m512d
3204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3205 _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3206 __mmask8 __U, const int __R)
3207 {
3208 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3209 (__v8df) __B,
3210 (__v8df) __C,
3211 (__mmask8) __U, __R);
3212 }
3213
3214 extern __inline __m512d
3215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3216 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3217 __m512d __C, const int __R)
3218 {
3219 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3220 (__v8df) __B,
3221 -(__v8df) __C,
3222 (__mmask8) __U, __R);
3223 }
3224
3225 extern __inline __m512
3226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3227 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3228 {
3229 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3230 (__v16sf) __B,
3231 -(__v16sf) __C,
3232 (__mmask16) -1, __R);
3233 }
3234
3235 extern __inline __m512
3236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3237 _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3238 __m512 __C, const int __R)
3239 {
3240 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3241 (__v16sf) __B,
3242 (__v16sf) __C,
3243 (__mmask16) __U, __R);
3244 }
3245
3246 extern __inline __m512
3247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3248 _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3249 __mmask16 __U, const int __R)
3250 {
3251 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3252 (__v16sf) __B,
3253 (__v16sf) __C,
3254 (__mmask16) __U, __R);
3255 }
3256
3257 extern __inline __m512
3258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3259 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3260 __m512 __C, const int __R)
3261 {
3262 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3263 (__v16sf) __B,
3264 -(__v16sf) __C,
3265 (__mmask16) __U, __R);
3266 }
3267 #else
3268 #define _mm512_fmadd_round_pd(A, B, C, R) \
3269 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3270
3271 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3272 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3273
3274 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3275 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3276
3277 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3278 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3279
3280 #define _mm512_fmadd_round_ps(A, B, C, R) \
3281 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3282
3283 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3284 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3285
3286 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3287 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3288
3289 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3290 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3291
3292 #define _mm512_fmsub_round_pd(A, B, C, R) \
3293 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3294
3295 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
3296 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3297
3298 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3299 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3300
3301 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
3302 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3303
3304 #define _mm512_fmsub_round_ps(A, B, C, R) \
3305 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3306
3307 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
3308 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3309
3310 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3311 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3312
3313 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
3314 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3315
3316 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
3317 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3318
3319 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
3320 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3321
3322 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3323 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3324
3325 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3326 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3327
3328 #define _mm512_fmaddsub_round_ps(A, B, C, R) \
3329 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3330
3331 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3332 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3333
3334 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3335 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3336
3337 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3338 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3339
3340 #define _mm512_fmsubadd_round_pd(A, B, C, R) \
3341 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3342
3343 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3344 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3345
3346 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3347 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3348
3349 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3350 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3351
3352 #define _mm512_fmsubadd_round_ps(A, B, C, R) \
3353 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3354
3355 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3356 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3357
3358 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3359 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3360
3361 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3362 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3363
3364 #define _mm512_fnmadd_round_pd(A, B, C, R) \
3365 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3366
3367 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3368 (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3369
3370 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
3371 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3372
3373 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
3374 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3375
3376 #define _mm512_fnmadd_round_ps(A, B, C, R) \
3377 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3378
3379 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3380 (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3381
3382 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
3383 (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3384
3385 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
3386 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3387
3388 #define _mm512_fnmsub_round_pd(A, B, C, R) \
3389 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3390
3391 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3392 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3393
3394 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3395 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3396
3397 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
3398 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3399
3400 #define _mm512_fnmsub_round_ps(A, B, C, R) \
3401 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3402
3403 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3404 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3405
3406 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3407 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3408
3409 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
3410 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3411 #endif
3412
3413 extern __inline __m512i
3414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3415 _mm512_abs_epi64 (__m512i __A)
3416 {
3417 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3418 (__v8di)
3419 _mm512_undefined_epi32 (),
3420 (__mmask8) -1);
3421 }
3422
3423 extern __inline __m512i
3424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3425 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3426 {
3427 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3428 (__v8di) __W,
3429 (__mmask8) __U);
3430 }
3431
3432 extern __inline __m512i
3433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3434 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3435 {
3436 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3437 (__v8di)
3438 _mm512_setzero_si512 (),
3439 (__mmask8) __U);
3440 }
3441
3442 extern __inline __m512i
3443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3444 _mm512_abs_epi32 (__m512i __A)
3445 {
3446 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3447 (__v16si)
3448 _mm512_undefined_epi32 (),
3449 (__mmask16) -1);
3450 }
3451
3452 extern __inline __m512i
3453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3454 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3455 {
3456 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3457 (__v16si) __W,
3458 (__mmask16) __U);
3459 }
3460
3461 extern __inline __m512i
3462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3463 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3464 {
3465 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3466 (__v16si)
3467 _mm512_setzero_si512 (),
3468 (__mmask16) __U);
3469 }
3470
3471 extern __inline __m512
3472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3473 _mm512_broadcastss_ps (__m128 __A)
3474 {
3475 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3476 (__v16sf)
3477 _mm512_undefined_ps (),
3478 (__mmask16) -1);
3479 }
3480
3481 extern __inline __m512
3482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3483 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3484 {
3485 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3486 (__v16sf) __O, __M);
3487 }
3488
3489 extern __inline __m512
3490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3491 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
3492 {
3493 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3494 (__v16sf)
3495 _mm512_setzero_ps (),
3496 __M);
3497 }
3498
3499 extern __inline __m512d
3500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3501 _mm512_broadcastsd_pd (__m128d __A)
3502 {
3503 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3504 (__v8df)
3505 _mm512_undefined_pd (),
3506 (__mmask8) -1);
3507 }
3508
3509 extern __inline __m512d
3510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3511 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
3512 {
3513 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3514 (__v8df) __O, __M);
3515 }
3516
3517 extern __inline __m512d
3518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3519 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
3520 {
3521 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3522 (__v8df)
3523 _mm512_setzero_pd (),
3524 __M);
3525 }
3526
3527 extern __inline __m512i
3528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3529 _mm512_broadcastd_epi32 (__m128i __A)
3530 {
3531 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3532 (__v16si)
3533 _mm512_undefined_epi32 (),
3534 (__mmask16) -1);
3535 }
3536
3537 extern __inline __m512i
3538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3539 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
3540 {
3541 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3542 (__v16si) __O, __M);
3543 }
3544
3545 extern __inline __m512i
3546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3547 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
3548 {
3549 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3550 (__v16si)
3551 _mm512_setzero_si512 (),
3552 __M);
3553 }
3554
3555 extern __inline __m512i
3556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3557 _mm512_set1_epi32 (int __A)
3558 {
3559 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3560 (__v16si)
3561 _mm512_undefined_epi32 (),
3562 (__mmask16)(-1));
3563 }
3564
3565 extern __inline __m512i
3566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3567 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
3568 {
3569 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
3570 __M);
3571 }
3572
3573 extern __inline __m512i
3574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3575 _mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
3576 {
3577 return (__m512i)
3578 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3579 (__v16si) _mm512_setzero_si512 (),
3580 __M);
3581 }
3582
3583 extern __inline __m512i
3584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3585 _mm512_broadcastq_epi64 (__m128i __A)
3586 {
3587 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3588 (__v8di)
3589 _mm512_undefined_epi32 (),
3590 (__mmask8) -1);
3591 }
3592
3593 extern __inline __m512i
3594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3595 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
3596 {
3597 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3598 (__v8di) __O, __M);
3599 }
3600
3601 extern __inline __m512i
3602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3603 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
3604 {
3605 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3606 (__v8di)
3607 _mm512_setzero_si512 (),
3608 __M);
3609 }
3610
3611 extern __inline __m512i
3612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3613 _mm512_set1_epi64 (long long __A)
3614 {
3615 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3616 (__v8di)
3617 _mm512_undefined_epi32 (),
3618 (__mmask8)(-1));
3619 }
3620
3621 extern __inline __m512i
3622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3623 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
3624 {
3625 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
3626 __M);
3627 }
3628
3629 extern __inline __m512i
3630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3631 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
3632 {
3633 return (__m512i)
3634 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3635 (__v8di) _mm512_setzero_si512 (),
3636 __M);
3637 }
3638
3639 extern __inline __m512
3640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3641 _mm512_broadcast_f32x4 (__m128 __A)
3642 {
3643 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3644 (__v16sf)
3645 _mm512_undefined_ps (),
3646 (__mmask16) -1);
3647 }
3648
3649 extern __inline __m512
3650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3651 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
3652 {
3653 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3654 (__v16sf) __O,
3655 __M);
3656 }
3657
3658 extern __inline __m512
3659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3660 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
3661 {
3662 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3663 (__v16sf)
3664 _mm512_setzero_ps (),
3665 __M);
3666 }
3667
3668 extern __inline __m512i
3669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3670 _mm512_broadcast_i32x4 (__m128i __A)
3671 {
3672 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3673 (__v16si)
3674 _mm512_undefined_epi32 (),
3675 (__mmask16) -1);
3676 }
3677
3678 extern __inline __m512i
3679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3680 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
3681 {
3682 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3683 (__v16si) __O,
3684 __M);
3685 }
3686
3687 extern __inline __m512i
3688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3689 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
3690 {
3691 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3692 (__v16si)
3693 _mm512_setzero_si512 (),
3694 __M);
3695 }
3696
3697 extern __inline __m512d
3698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3699 _mm512_broadcast_f64x4 (__m256d __A)
3700 {
3701 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3702 (__v8df)
3703 _mm512_undefined_pd (),
3704 (__mmask8) -1);
3705 }
3706
3707 extern __inline __m512d
3708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3709 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
3710 {
3711 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3712 (__v8df) __O,
3713 __M);
3714 }
3715
3716 extern __inline __m512d
3717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3718 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
3719 {
3720 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3721 (__v8df)
3722 _mm512_setzero_pd (),
3723 __M);
3724 }
3725
3726 extern __inline __m512i
3727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3728 _mm512_broadcast_i64x4 (__m256i __A)
3729 {
3730 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3731 (__v8di)
3732 _mm512_undefined_epi32 (),
3733 (__mmask8) -1);
3734 }
3735
3736 extern __inline __m512i
3737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3738 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
3739 {
3740 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3741 (__v8di) __O,
3742 __M);
3743 }
3744
3745 extern __inline __m512i
3746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3747 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
3748 {
3749 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3750 (__v8di)
3751 _mm512_setzero_si512 (),
3752 __M);
3753 }
3754
3755 typedef enum
3756 {
3757 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
3758 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
3759 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
3760 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
3761 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
3762 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
3763 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
3764 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
3765 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
3766 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
3767 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
3768 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
3769 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
3770 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
3771 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
3772 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
3773 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
3774 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
3775 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
3776 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
3777 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
3778 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
3779 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
3780 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
3781 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
3782 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
3783 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
3784 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
3785 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
3786 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
3787 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
3788 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
3789 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
3790 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
3791 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
3792 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
3793 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
3794 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
3795 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
3796 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
3797 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
3798 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
3799 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
3800 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
3801 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
3802 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
3803 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
3804 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
3805 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
3806 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
3807 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
3808 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
3809 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
3810 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
3811 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
3812 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
3813 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
3814 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
3815 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
3816 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
3817 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
3818 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
3819 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
3820 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
3821 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
3822 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
3823 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
3824 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
3825 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
3826 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
3827 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
3828 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
3829 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
3830 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
3831 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
3832 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
3833 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
3834 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
3835 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
3836 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
3837 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
3838 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
3839 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
3840 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
3841 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
3842 _MM_PERM_DDDD = 0xFF
3843 } _MM_PERM_ENUM;
3844
3845 #ifdef __OPTIMIZE__
3846 extern __inline __m512i
3847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3848 _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
3849 {
3850 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3851 __mask,
3852 (__v16si)
3853 _mm512_undefined_epi32 (),
3854 (__mmask16) -1);
3855 }
3856
3857 extern __inline __m512i
3858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3859 _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
3860 _MM_PERM_ENUM __mask)
3861 {
3862 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3863 __mask,
3864 (__v16si) __W,
3865 (__mmask16) __U);
3866 }
3867
3868 extern __inline __m512i
3869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3870 _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
3871 {
3872 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3873 __mask,
3874 (__v16si)
3875 _mm512_setzero_si512 (),
3876 (__mmask16) __U);
3877 }
3878
3879 extern __inline __m512i
3880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3881 _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
3882 {
3883 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3884 (__v8di) __B, __imm,
3885 (__v8di)
3886 _mm512_undefined_epi32 (),
3887 (__mmask8) -1);
3888 }
3889
3890 extern __inline __m512i
3891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3892 _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
3893 __m512i __B, const int __imm)
3894 {
3895 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3896 (__v8di) __B, __imm,
3897 (__v8di) __W,
3898 (__mmask8) __U);
3899 }
3900
3901 extern __inline __m512i
3902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3903 _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
3904 const int __imm)
3905 {
3906 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3907 (__v8di) __B, __imm,
3908 (__v8di)
3909 _mm512_setzero_si512 (),
3910 (__mmask8) __U);
3911 }
3912
3913 extern __inline __m512i
3914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3915 _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
3916 {
3917 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3918 (__v16si) __B,
3919 __imm,
3920 (__v16si)
3921 _mm512_undefined_epi32 (),
3922 (__mmask16) -1);
3923 }
3924
3925 extern __inline __m512i
3926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3927 _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
3928 __m512i __B, const int __imm)
3929 {
3930 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3931 (__v16si) __B,
3932 __imm,
3933 (__v16si) __W,
3934 (__mmask16) __U);
3935 }
3936
3937 extern __inline __m512i
3938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3939 _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
3940 const int __imm)
3941 {
3942 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3943 (__v16si) __B,
3944 __imm,
3945 (__v16si)
3946 _mm512_setzero_si512 (),
3947 (__mmask16) __U);
3948 }
3949
3950 extern __inline __m512d
3951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3952 _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
3953 {
3954 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3955 (__v8df) __B, __imm,
3956 (__v8df)
3957 _mm512_undefined_pd (),
3958 (__mmask8) -1);
3959 }
3960
3961 extern __inline __m512d
3962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3963 _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
3964 __m512d __B, const int __imm)
3965 {
3966 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3967 (__v8df) __B, __imm,
3968 (__v8df) __W,
3969 (__mmask8) __U);
3970 }
3971
3972 extern __inline __m512d
3973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3974 _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
3975 const int __imm)
3976 {
3977 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3978 (__v8df) __B, __imm,
3979 (__v8df)
3980 _mm512_setzero_pd (),
3981 (__mmask8) __U);
3982 }
3983
3984 extern __inline __m512
3985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3986 _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
3987 {
3988 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3989 (__v16sf) __B, __imm,
3990 (__v16sf)
3991 _mm512_undefined_ps (),
3992 (__mmask16) -1);
3993 }
3994
3995 extern __inline __m512
3996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3997 _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
3998 __m512 __B, const int __imm)
3999 {
4000 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4001 (__v16sf) __B, __imm,
4002 (__v16sf) __W,
4003 (__mmask16) __U);
4004 }
4005
4006 extern __inline __m512
4007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4008 _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4009 const int __imm)
4010 {
4011 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4012 (__v16sf) __B, __imm,
4013 (__v16sf)
4014 _mm512_setzero_ps (),
4015 (__mmask16) __U);
4016 }
4017
4018 #else
4019 #define _mm512_shuffle_epi32(X, C) \
4020 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4021 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4022 (__mmask16)-1))
4023
4024 #define _mm512_mask_shuffle_epi32(W, U, X, C) \
4025 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4026 (__v16si)(__m512i)(W),\
4027 (__mmask16)(U)))
4028
4029 #define _mm512_maskz_shuffle_epi32(U, X, C) \
4030 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4031 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4032 (__mmask16)(U)))
4033
4034 #define _mm512_shuffle_i64x2(X, Y, C) \
4035 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4036 (__v8di)(__m512i)(Y), (int)(C),\
4037 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
4038 (__mmask8)-1))
4039
4040 #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
4041 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4042 (__v8di)(__m512i)(Y), (int)(C),\
4043 (__v8di)(__m512i)(W),\
4044 (__mmask8)(U)))
4045
4046 #define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
4047 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4048 (__v8di)(__m512i)(Y), (int)(C),\
4049 (__v8di)(__m512i)_mm512_setzero_si512 (),\
4050 (__mmask8)(U)))
4051
4052 #define _mm512_shuffle_i32x4(X, Y, C) \
4053 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4054 (__v16si)(__m512i)(Y), (int)(C),\
4055 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4056 (__mmask16)-1))
4057
4058 #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
4059 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4060 (__v16si)(__m512i)(Y), (int)(C),\
4061 (__v16si)(__m512i)(W),\
4062 (__mmask16)(U)))
4063
4064 #define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
4065 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4066 (__v16si)(__m512i)(Y), (int)(C),\
4067 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4068 (__mmask16)(U)))
4069
4070 #define _mm512_shuffle_f64x2(X, Y, C) \
4071 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4072 (__v8df)(__m512d)(Y), (int)(C),\
4073 (__v8df)(__m512d)_mm512_undefined_pd(),\
4074 (__mmask8)-1))
4075
4076 #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
4077 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4078 (__v8df)(__m512d)(Y), (int)(C),\
4079 (__v8df)(__m512d)(W),\
4080 (__mmask8)(U)))
4081
4082 #define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
4083 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4084 (__v8df)(__m512d)(Y), (int)(C),\
4085 (__v8df)(__m512d)_mm512_setzero_pd(),\
4086 (__mmask8)(U)))
4087
4088 #define _mm512_shuffle_f32x4(X, Y, C) \
4089 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4090 (__v16sf)(__m512)(Y), (int)(C),\
4091 (__v16sf)(__m512)_mm512_undefined_ps(),\
4092 (__mmask16)-1))
4093
4094 #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
4095 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4096 (__v16sf)(__m512)(Y), (int)(C),\
4097 (__v16sf)(__m512)(W),\
4098 (__mmask16)(U)))
4099
4100 #define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
4101 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4102 (__v16sf)(__m512)(Y), (int)(C),\
4103 (__v16sf)(__m512)_mm512_setzero_ps(),\
4104 (__mmask16)(U)))
4105 #endif
4106
4107 extern __inline __m512i
4108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4109 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
4110 {
4111 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4112 (__v16si) __B,
4113 (__v16si)
4114 _mm512_undefined_epi32 (),
4115 (__mmask16) -1);
4116 }
4117
4118 extern __inline __m512i
4119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4120 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4121 {
4122 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4123 (__v16si) __B,
4124 (__v16si) __W,
4125 (__mmask16) __U);
4126 }
4127
4128 extern __inline __m512i
4129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4130 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4131 {
4132 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4133 (__v16si) __B,
4134 (__v16si)
4135 _mm512_setzero_si512 (),
4136 (__mmask16) __U);
4137 }
4138
4139 extern __inline __m512i
4140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4141 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
4142 {
4143 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4144 (__v16si) __B,
4145 (__v16si)
4146 _mm512_undefined_epi32 (),
4147 (__mmask16) -1);
4148 }
4149
4150 extern __inline __m512i
4151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4152 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4153 {
4154 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4155 (__v16si) __B,
4156 (__v16si) __W,
4157 (__mmask16) __U);
4158 }
4159
4160 extern __inline __m512i
4161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4162 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4163 {
4164 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4165 (__v16si) __B,
4166 (__v16si)
4167 _mm512_setzero_si512 (),
4168 (__mmask16) __U);
4169 }
4170
4171 extern __inline __m512i
4172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4173 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
4174 {
4175 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4176 (__v8di) __B,
4177 (__v8di)
4178 _mm512_undefined_epi32 (),
4179 (__mmask8) -1);
4180 }
4181
4182 extern __inline __m512i
4183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4184 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4185 {
4186 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4187 (__v8di) __B,
4188 (__v8di) __W,
4189 (__mmask8) __U);
4190 }
4191
4192 extern __inline __m512i
4193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4194 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4195 {
4196 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4197 (__v8di) __B,
4198 (__v8di)
4199 _mm512_setzero_si512 (),
4200 (__mmask8) __U);
4201 }
4202
4203 extern __inline __m512i
4204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4205 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
4206 {
4207 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4208 (__v8di) __B,
4209 (__v8di)
4210 _mm512_undefined_epi32 (),
4211 (__mmask8) -1);
4212 }
4213
4214 extern __inline __m512i
4215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4216 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4217 {
4218 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4219 (__v8di) __B,
4220 (__v8di) __W,
4221 (__mmask8) __U);
4222 }
4223
4224 extern __inline __m512i
4225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4226 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4227 {
4228 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4229 (__v8di) __B,
4230 (__v8di)
4231 _mm512_setzero_si512 (),
4232 (__mmask8) __U);
4233 }
4234
4235 #ifdef __OPTIMIZE__
4236 extern __inline __m256i
4237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4238 _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4239 {
4240 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4241 (__v8si)
4242 _mm256_undefined_si256 (),
4243 (__mmask8) -1, __R);
4244 }
4245
4246 extern __inline __m256i
4247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4248 _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4249 const int __R)
4250 {
4251 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4252 (__v8si) __W,
4253 (__mmask8) __U, __R);
4254 }
4255
4256 extern __inline __m256i
4257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4258 _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4259 {
4260 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4261 (__v8si)
4262 _mm256_setzero_si256 (),
4263 (__mmask8) __U, __R);
4264 }
4265
4266 extern __inline __m256i
4267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4268 _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4269 {
4270 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4271 (__v8si)
4272 _mm256_undefined_si256 (),
4273 (__mmask8) -1, __R);
4274 }
4275
4276 extern __inline __m256i
4277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4278 _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4279 const int __R)
4280 {
4281 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4282 (__v8si) __W,
4283 (__mmask8) __U, __R);
4284 }
4285
4286 extern __inline __m256i
4287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4288 _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4289 {
4290 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4291 (__v8si)
4292 _mm256_setzero_si256 (),
4293 (__mmask8) __U, __R);
4294 }
4295 #else
4296 #define _mm512_cvtt_roundpd_epi32(A, B) \
4297 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4298
4299 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4300 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4301
4302 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4303 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4304
4305 #define _mm512_cvtt_roundpd_epu32(A, B) \
4306 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4307
4308 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4309 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4310
4311 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4312 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4313 #endif
4314
4315 #ifdef __OPTIMIZE__
4316 extern __inline __m256i
4317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4318 _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4319 {
4320 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4321 (__v8si)
4322 _mm256_undefined_si256 (),
4323 (__mmask8) -1, __R);
4324 }
4325
4326 extern __inline __m256i
4327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4328 _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4329 const int __R)
4330 {
4331 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4332 (__v8si) __W,
4333 (__mmask8) __U, __R);
4334 }
4335
4336 extern __inline __m256i
4337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4338 _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4339 {
4340 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4341 (__v8si)
4342 _mm256_setzero_si256 (),
4343 (__mmask8) __U, __R);
4344 }
4345
4346 extern __inline __m256i
4347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4348 _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4349 {
4350 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4351 (__v8si)
4352 _mm256_undefined_si256 (),
4353 (__mmask8) -1, __R);
4354 }
4355
4356 extern __inline __m256i
4357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4358 _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4359 const int __R)
4360 {
4361 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4362 (__v8si) __W,
4363 (__mmask8) __U, __R);
4364 }
4365
4366 extern __inline __m256i
4367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4368 _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4369 {
4370 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4371 (__v8si)
4372 _mm256_setzero_si256 (),
4373 (__mmask8) __U, __R);
4374 }
4375 #else
4376 #define _mm512_cvt_roundpd_epi32(A, B) \
4377 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4378
4379 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4380 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4381
4382 #define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4383 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4384
4385 #define _mm512_cvt_roundpd_epu32(A, B) \
4386 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4387
4388 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4389 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4390
4391 #define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4392 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4393 #endif
4394
4395 #ifdef __OPTIMIZE__
4396 extern __inline __m512i
4397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4398 _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4399 {
4400 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4401 (__v16si)
4402 _mm512_undefined_epi32 (),
4403 (__mmask16) -1, __R);
4404 }
4405
4406 extern __inline __m512i
4407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4408 _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4409 const int __R)
4410 {
4411 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4412 (__v16si) __W,
4413 (__mmask16) __U, __R);
4414 }
4415
4416 extern __inline __m512i
4417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4418 _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4419 {
4420 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4421 (__v16si)
4422 _mm512_setzero_si512 (),
4423 (__mmask16) __U, __R);
4424 }
4425
4426 extern __inline __m512i
4427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4428 _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4429 {
4430 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4431 (__v16si)
4432 _mm512_undefined_epi32 (),
4433 (__mmask16) -1, __R);
4434 }
4435
4436 extern __inline __m512i
4437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4438 _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4439 const int __R)
4440 {
4441 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4442 (__v16si) __W,
4443 (__mmask16) __U, __R);
4444 }
4445
4446 extern __inline __m512i
4447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4448 _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4449 {
4450 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4451 (__v16si)
4452 _mm512_setzero_si512 (),
4453 (__mmask16) __U, __R);
4454 }
4455 #else
4456 #define _mm512_cvtt_roundps_epi32(A, B) \
4457 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4458
4459 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
4460 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4461
4462 #define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
4463 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4464
4465 #define _mm512_cvtt_roundps_epu32(A, B) \
4466 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4467
4468 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
4469 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4470
4471 #define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
4472 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4473 #endif
4474
4475 #ifdef __OPTIMIZE__
4476 extern __inline __m512i
4477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4478 _mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4479 {
4480 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4481 (__v16si)
4482 _mm512_undefined_epi32 (),
4483 (__mmask16) -1, __R);
4484 }
4485
4486 extern __inline __m512i
4487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4488 _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4489 const int __R)
4490 {
4491 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4492 (__v16si) __W,
4493 (__mmask16) __U, __R);
4494 }
4495
4496 extern __inline __m512i
4497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4498 _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4499 {
4500 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4501 (__v16si)
4502 _mm512_setzero_si512 (),
4503 (__mmask16) __U, __R);
4504 }
4505
4506 extern __inline __m512i
4507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4508 _mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
4509 {
4510 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4511 (__v16si)
4512 _mm512_undefined_epi32 (),
4513 (__mmask16) -1, __R);
4514 }
4515
4516 extern __inline __m512i
4517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4518 _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4519 const int __R)
4520 {
4521 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4522 (__v16si) __W,
4523 (__mmask16) __U, __R);
4524 }
4525
4526 extern __inline __m512i
4527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4528 _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4529 {
4530 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4531 (__v16si)
4532 _mm512_setzero_si512 (),
4533 (__mmask16) __U, __R);
4534 }
4535 #else
4536 #define _mm512_cvt_roundps_epi32(A, B) \
4537 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4538
4539 #define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
4540 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
4541
4542 #define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
4543 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4544
4545 #define _mm512_cvt_roundps_epu32(A, B) \
4546 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4547
4548 #define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
4549 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
4550
4551 #define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
4552 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4553 #endif
4554
4555 extern __inline __m128d
4556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4557 _mm_cvtu32_sd (__m128d __A, unsigned __B)
4558 {
4559 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
4560 }
4561
4562 #ifdef __x86_64__
4563 #ifdef __OPTIMIZE__
4564 extern __inline __m128d
4565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4566 _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
4567 {
4568 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
4569 }
4570
4571 extern __inline __m128d
4572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4573 _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
4574 {
4575 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4576 }
4577
4578 extern __inline __m128d
4579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4580 _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
4581 {
4582 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4583 }
4584 #else
4585 #define _mm_cvt_roundu64_sd(A, B, C) \
4586 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
4587
4588 #define _mm_cvt_roundi64_sd(A, B, C) \
4589 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4590
4591 #define _mm_cvt_roundsi64_sd(A, B, C) \
4592 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4593 #endif
4594
4595 #endif
4596
4597 #ifdef __OPTIMIZE__
4598 extern __inline __m128
4599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4600 _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
4601 {
4602 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
4603 }
4604
4605 extern __inline __m128
4606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4607 _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
4608 {
4609 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4610 }
4611
4612 extern __inline __m128
4613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4614 _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
4615 {
4616 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4617 }
4618 #else
4619 #define _mm_cvt_roundu32_ss(A, B, C) \
4620 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
4621
4622 #define _mm_cvt_roundi32_ss(A, B, C) \
4623 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4624
4625 #define _mm_cvt_roundsi32_ss(A, B, C) \
4626 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4627 #endif
4628
4629 #ifdef __x86_64__
4630 #ifdef __OPTIMIZE__
4631 extern __inline __m128
4632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4633 _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
4634 {
4635 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
4636 }
4637
4638 extern __inline __m128
4639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4640 _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
4641 {
4642 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4643 }
4644
4645 extern __inline __m128
4646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4647 _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
4648 {
4649 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4650 }
4651 #else
4652 #define _mm_cvt_roundu64_ss(A, B, C) \
4653 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
4654
4655 #define _mm_cvt_roundi64_ss(A, B, C) \
4656 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4657
4658 #define _mm_cvt_roundsi64_ss(A, B, C) \
4659 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4660 #endif
4661
4662 #endif
4663
4664 extern __inline __m128i
4665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4666 _mm512_cvtepi32_epi8 (__m512i __A)
4667 {
4668 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4669 (__v16qi)
4670 _mm_undefined_si128 (),
4671 (__mmask16) -1);
4672 }
4673
4674 extern __inline void
4675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4676 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4677 {
4678 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4679 }
4680
4681 extern __inline __m128i
4682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4683 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4684 {
4685 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4686 (__v16qi) __O, __M);
4687 }
4688
4689 extern __inline __m128i
4690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4691 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
4692 {
4693 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4694 (__v16qi)
4695 _mm_setzero_si128 (),
4696 __M);
4697 }
4698
4699 extern __inline __m128i
4700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4701 _mm512_cvtsepi32_epi8 (__m512i __A)
4702 {
4703 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4704 (__v16qi)
4705 _mm_undefined_si128 (),
4706 (__mmask16) -1);
4707 }
4708
4709 extern __inline void
4710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4711 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4712 {
4713 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4714 }
4715
4716 extern __inline __m128i
4717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4718 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4719 {
4720 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4721 (__v16qi) __O, __M);
4722 }
4723
4724 extern __inline __m128i
4725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4726 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
4727 {
4728 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4729 (__v16qi)
4730 _mm_setzero_si128 (),
4731 __M);
4732 }
4733
4734 extern __inline __m128i
4735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4736 _mm512_cvtusepi32_epi8 (__m512i __A)
4737 {
4738 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4739 (__v16qi)
4740 _mm_undefined_si128 (),
4741 (__mmask16) -1);
4742 }
4743
4744 extern __inline void
4745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4746 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4747 {
4748 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4749 }
4750
4751 extern __inline __m128i
4752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4753 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4754 {
4755 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4756 (__v16qi) __O,
4757 __M);
4758 }
4759
4760 extern __inline __m128i
4761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4762 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
4763 {
4764 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4765 (__v16qi)
4766 _mm_setzero_si128 (),
4767 __M);
4768 }
4769
4770 extern __inline __m256i
4771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4772 _mm512_cvtepi32_epi16 (__m512i __A)
4773 {
4774 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4775 (__v16hi)
4776 _mm256_undefined_si256 (),
4777 (__mmask16) -1);
4778 }
4779
4780 extern __inline void
4781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4782 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
4783 {
4784 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
4785 }
4786
4787 extern __inline __m256i
4788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4789 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4790 {
4791 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4792 (__v16hi) __O, __M);
4793 }
4794
4795 extern __inline __m256i
4796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4797 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
4798 {
4799 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4800 (__v16hi)
4801 _mm256_setzero_si256 (),
4802 __M);
4803 }
4804
4805 extern __inline __m256i
4806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4807 _mm512_cvtsepi32_epi16 (__m512i __A)
4808 {
4809 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4810 (__v16hi)
4811 _mm256_undefined_si256 (),
4812 (__mmask16) -1);
4813 }
4814
4815 extern __inline void
4816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4817 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4818 {
4819 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4820 }
4821
4822 extern __inline __m256i
4823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4824 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4825 {
4826 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4827 (__v16hi) __O, __M);
4828 }
4829
4830 extern __inline __m256i
4831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4832 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
4833 {
4834 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4835 (__v16hi)
4836 _mm256_setzero_si256 (),
4837 __M);
4838 }
4839
4840 extern __inline __m256i
4841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4842 _mm512_cvtusepi32_epi16 (__m512i __A)
4843 {
4844 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4845 (__v16hi)
4846 _mm256_undefined_si256 (),
4847 (__mmask16) -1);
4848 }
4849
4850 extern __inline void
4851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4852 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4853 {
4854 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4855 }
4856
4857 extern __inline __m256i
4858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4859 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4860 {
4861 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4862 (__v16hi) __O,
4863 __M);
4864 }
4865
4866 extern __inline __m256i
4867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4868 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
4869 {
4870 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4871 (__v16hi)
4872 _mm256_setzero_si256 (),
4873 __M);
4874 }
4875
4876 extern __inline __m256i
4877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4878 _mm512_cvtepi64_epi32 (__m512i __A)
4879 {
4880 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4881 (__v8si)
4882 _mm256_undefined_si256 (),
4883 (__mmask8) -1);
4884 }
4885
4886 extern __inline void
4887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4888 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4889 {
4890 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4891 }
4892
4893 extern __inline __m256i
4894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4895 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4896 {
4897 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4898 (__v8si) __O, __M);
4899 }
4900
4901 extern __inline __m256i
4902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4903 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
4904 {
4905 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4906 (__v8si)
4907 _mm256_setzero_si256 (),
4908 __M);
4909 }
4910
4911 extern __inline __m256i
4912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4913 _mm512_cvtsepi64_epi32 (__m512i __A)
4914 {
4915 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4916 (__v8si)
4917 _mm256_undefined_si256 (),
4918 (__mmask8) -1);
4919 }
4920
4921 extern __inline void
4922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4923 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
4924 {
4925 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4926 }
4927
4928 extern __inline __m256i
4929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4930 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4931 {
4932 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4933 (__v8si) __O, __M);
4934 }
4935
4936 extern __inline __m256i
4937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4938 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
4939 {
4940 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4941 (__v8si)
4942 _mm256_setzero_si256 (),
4943 __M);
4944 }
4945
4946 extern __inline __m256i
4947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4948 _mm512_cvtusepi64_epi32 (__m512i __A)
4949 {
4950 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4951 (__v8si)
4952 _mm256_undefined_si256 (),
4953 (__mmask8) -1);
4954 }
4955
4956 extern __inline void
4957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4958 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4959 {
4960 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
4961 }
4962
4963 extern __inline __m256i
4964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4965 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4966 {
4967 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4968 (__v8si) __O, __M);
4969 }
4970
4971 extern __inline __m256i
4972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4973 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
4974 {
4975 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4976 (__v8si)
4977 _mm256_setzero_si256 (),
4978 __M);
4979 }
4980
4981 extern __inline __m128i
4982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4983 _mm512_cvtepi64_epi16 (__m512i __A)
4984 {
4985 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4986 (__v8hi)
4987 _mm_undefined_si128 (),
4988 (__mmask8) -1);
4989 }
4990
4991 extern __inline void
4992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4993 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
4994 {
4995 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
4996 }
4997
4998 extern __inline __m128i
4999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5000 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5001 {
5002 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5003 (__v8hi) __O, __M);
5004 }
5005
5006 extern __inline __m128i
5007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5008 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5009 {
5010 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5011 (__v8hi)
5012 _mm_setzero_si128 (),
5013 __M);
5014 }
5015
5016 extern __inline __m128i
5017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5018 _mm512_cvtsepi64_epi16 (__m512i __A)
5019 {
5020 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5021 (__v8hi)
5022 _mm_undefined_si128 (),
5023 (__mmask8) -1);
5024 }
5025
5026 extern __inline void
5027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5028 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5029 {
5030 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5031 }
5032
5033 extern __inline __m128i
5034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5035 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5036 {
5037 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5038 (__v8hi) __O, __M);
5039 }
5040
5041 extern __inline __m128i
5042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5043 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5044 {
5045 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5046 (__v8hi)
5047 _mm_setzero_si128 (),
5048 __M);
5049 }
5050
5051 extern __inline __m128i
5052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5053 _mm512_cvtusepi64_epi16 (__m512i __A)
5054 {
5055 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5056 (__v8hi)
5057 _mm_undefined_si128 (),
5058 (__mmask8) -1);
5059 }
5060
5061 extern __inline void
5062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5063 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5064 {
5065 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5066 }
5067
5068 extern __inline __m128i
5069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5070 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5071 {
5072 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5073 (__v8hi) __O, __M);
5074 }
5075
5076 extern __inline __m128i
5077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5078 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5079 {
5080 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5081 (__v8hi)
5082 _mm_setzero_si128 (),
5083 __M);
5084 }
5085
5086 extern __inline __m128i
5087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5088 _mm512_cvtepi64_epi8 (__m512i __A)
5089 {
5090 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5091 (__v16qi)
5092 _mm_undefined_si128 (),
5093 (__mmask8) -1);
5094 }
5095
5096 extern __inline void
5097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5098 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5099 {
5100 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5101 }
5102
5103 extern __inline __m128i
5104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5105 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5106 {
5107 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5108 (__v16qi) __O, __M);
5109 }
5110
5111 extern __inline __m128i
5112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5113 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5114 {
5115 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5116 (__v16qi)
5117 _mm_setzero_si128 (),
5118 __M);
5119 }
5120
5121 extern __inline __m128i
5122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5123 _mm512_cvtsepi64_epi8 (__m512i __A)
5124 {
5125 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5126 (__v16qi)
5127 _mm_undefined_si128 (),
5128 (__mmask8) -1);
5129 }
5130
5131 extern __inline void
5132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5133 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5134 {
5135 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5136 }
5137
5138 extern __inline __m128i
5139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5140 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5141 {
5142 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5143 (__v16qi) __O, __M);
5144 }
5145
5146 extern __inline __m128i
5147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5148 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5149 {
5150 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5151 (__v16qi)
5152 _mm_setzero_si128 (),
5153 __M);
5154 }
5155
5156 extern __inline __m128i
5157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5158 _mm512_cvtusepi64_epi8 (__m512i __A)
5159 {
5160 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5161 (__v16qi)
5162 _mm_undefined_si128 (),
5163 (__mmask8) -1);
5164 }
5165
5166 extern __inline void
5167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5168 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5169 {
5170 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5171 }
5172
5173 extern __inline __m128i
5174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5175 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5176 {
5177 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5178 (__v16qi) __O,
5179 __M);
5180 }
5181
5182 extern __inline __m128i
5183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5184 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5185 {
5186 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5187 (__v16qi)
5188 _mm_setzero_si128 (),
5189 __M);
5190 }
5191
5192 extern __inline __m512d
5193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5194 _mm512_cvtepi32_pd (__m256i __A)
5195 {
5196 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5197 (__v8df)
5198 _mm512_undefined_pd (),
5199 (__mmask8) -1);
5200 }
5201
5202 extern __inline __m512d
5203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5204 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5205 {
5206 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5207 (__v8df) __W,
5208 (__mmask8) __U);
5209 }
5210
5211 extern __inline __m512d
5212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5213 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5214 {
5215 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5216 (__v8df)
5217 _mm512_setzero_pd (),
5218 (__mmask8) __U);
5219 }
5220
5221 extern __inline __m512d
5222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5223 _mm512_cvtepu32_pd (__m256i __A)
5224 {
5225 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5226 (__v8df)
5227 _mm512_undefined_pd (),
5228 (__mmask8) -1);
5229 }
5230
5231 extern __inline __m512d
5232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5233 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5234 {
5235 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5236 (__v8df) __W,
5237 (__mmask8) __U);
5238 }
5239
5240 extern __inline __m512d
5241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5242 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5243 {
5244 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5245 (__v8df)
5246 _mm512_setzero_pd (),
5247 (__mmask8) __U);
5248 }
5249
5250 #ifdef __OPTIMIZE__
5251 extern __inline __m512
5252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5253 _mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5254 {
5255 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5256 (__v16sf)
5257 _mm512_undefined_ps (),
5258 (__mmask16) -1, __R);
5259 }
5260
5261 extern __inline __m512
5262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5263 _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5264 const int __R)
5265 {
5266 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5267 (__v16sf) __W,
5268 (__mmask16) __U, __R);
5269 }
5270
5271 extern __inline __m512
5272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5273 _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5274 {
5275 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5276 (__v16sf)
5277 _mm512_setzero_ps (),
5278 (__mmask16) __U, __R);
5279 }
5280
5281 extern __inline __m512
5282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5283 _mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5284 {
5285 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5286 (__v16sf)
5287 _mm512_undefined_ps (),
5288 (__mmask16) -1, __R);
5289 }
5290
5291 extern __inline __m512
5292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5293 _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5294 const int __R)
5295 {
5296 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5297 (__v16sf) __W,
5298 (__mmask16) __U, __R);
5299 }
5300
5301 extern __inline __m512
5302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5303 _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5304 {
5305 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5306 (__v16sf)
5307 _mm512_setzero_ps (),
5308 (__mmask16) __U, __R);
5309 }
5310
5311 #else
5312 #define _mm512_cvt_roundepi32_ps(A, B) \
5313 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5314
5315 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5316 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5317
5318 #define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5319 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5320
5321 #define _mm512_cvt_roundepu32_ps(A, B) \
5322 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5323
5324 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5325 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5326
5327 #define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5328 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5329 #endif
5330
5331 #ifdef __OPTIMIZE__
5332 extern __inline __m256d
5333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5334 _mm512_extractf64x4_pd (__m512d __A, const int __imm)
5335 {
5336 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5337 __imm,
5338 (__v4df)
5339 _mm256_undefined_pd (),
5340 (__mmask8) -1);
5341 }
5342
5343 extern __inline __m256d
5344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5345 _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5346 const int __imm)
5347 {
5348 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5349 __imm,
5350 (__v4df) __W,
5351 (__mmask8) __U);
5352 }
5353
5354 extern __inline __m256d
5355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5356 _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5357 {
5358 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5359 __imm,
5360 (__v4df)
5361 _mm256_setzero_pd (),
5362 (__mmask8) __U);
5363 }
5364
5365 extern __inline __m128
5366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5367 _mm512_extractf32x4_ps (__m512 __A, const int __imm)
5368 {
5369 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5370 __imm,
5371 (__v4sf)
5372 _mm_undefined_ps (),
5373 (__mmask8) -1);
5374 }
5375
5376 extern __inline __m128
5377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5378 _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5379 const int __imm)
5380 {
5381 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5382 __imm,
5383 (__v4sf) __W,
5384 (__mmask8) __U);
5385 }
5386
5387 extern __inline __m128
5388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5389 _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5390 {
5391 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5392 __imm,
5393 (__v4sf)
5394 _mm_setzero_ps (),
5395 (__mmask8) __U);
5396 }
5397
5398 extern __inline __m256i
5399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5400 _mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5401 {
5402 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5403 __imm,
5404 (__v4di)
5405 _mm256_undefined_si256 (),
5406 (__mmask8) -1);
5407 }
5408
5409 extern __inline __m256i
5410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5411 _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5412 const int __imm)
5413 {
5414 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5415 __imm,
5416 (__v4di) __W,
5417 (__mmask8) __U);
5418 }
5419
5420 extern __inline __m256i
5421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5422 _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5423 {
5424 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5425 __imm,
5426 (__v4di)
5427 _mm256_setzero_si256 (),
5428 (__mmask8) __U);
5429 }
5430
5431 extern __inline __m128i
5432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5433 _mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5434 {
5435 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5436 __imm,
5437 (__v4si)
5438 _mm_undefined_si128 (),
5439 (__mmask8) -1);
5440 }
5441
5442 extern __inline __m128i
5443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5444 _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5445 const int __imm)
5446 {
5447 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5448 __imm,
5449 (__v4si) __W,
5450 (__mmask8) __U);
5451 }
5452
5453 extern __inline __m128i
5454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5455 _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5456 {
5457 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5458 __imm,
5459 (__v4si)
5460 _mm_setzero_si128 (),
5461 (__mmask8) __U);
5462 }
5463 #else
5464
5465 #define _mm512_extractf64x4_pd(X, C) \
5466 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5467 (int) (C),\
5468 (__v4df)(__m256d)_mm256_undefined_pd(),\
5469 (__mmask8)-1))
5470
5471 #define _mm512_mask_extractf64x4_pd(W, U, X, C) \
5472 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5473 (int) (C),\
5474 (__v4df)(__m256d)(W),\
5475 (__mmask8)(U)))
5476
5477 #define _mm512_maskz_extractf64x4_pd(U, X, C) \
5478 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5479 (int) (C),\
5480 (__v4df)(__m256d)_mm256_setzero_pd(),\
5481 (__mmask8)(U)))
5482
5483 #define _mm512_extractf32x4_ps(X, C) \
5484 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5485 (int) (C),\
5486 (__v4sf)(__m128)_mm_undefined_ps(),\
5487 (__mmask8)-1))
5488
5489 #define _mm512_mask_extractf32x4_ps(W, U, X, C) \
5490 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5491 (int) (C),\
5492 (__v4sf)(__m128)(W),\
5493 (__mmask8)(U)))
5494
5495 #define _mm512_maskz_extractf32x4_ps(U, X, C) \
5496 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5497 (int) (C),\
5498 (__v4sf)(__m128)_mm_setzero_ps(),\
5499 (__mmask8)(U)))
5500
5501 #define _mm512_extracti64x4_epi64(X, C) \
5502 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5503 (int) (C),\
5504 (__v4di)(__m256i)_mm256_undefined_si256 (),\
5505 (__mmask8)-1))
5506
5507 #define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
5508 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5509 (int) (C),\
5510 (__v4di)(__m256i)(W),\
5511 (__mmask8)(U)))
5512
5513 #define _mm512_maskz_extracti64x4_epi64(U, X, C) \
5514 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5515 (int) (C),\
5516 (__v4di)(__m256i)_mm256_setzero_si256 (),\
5517 (__mmask8)(U)))
5518
5519 #define _mm512_extracti32x4_epi32(X, C) \
5520 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5521 (int) (C),\
5522 (__v4si)(__m128i)_mm_undefined_si128 (),\
5523 (__mmask8)-1))
5524
5525 #define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
5526 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5527 (int) (C),\
5528 (__v4si)(__m128i)(W),\
5529 (__mmask8)(U)))
5530
5531 #define _mm512_maskz_extracti32x4_epi32(U, X, C) \
5532 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5533 (int) (C),\
5534 (__v4si)(__m128i)_mm_setzero_si128 (),\
5535 (__mmask8)(U)))
5536 #endif
5537
5538 #ifdef __OPTIMIZE__
5539 extern __inline __m512i
5540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5541 _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
5542 {
5543 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
5544 (__v4si) __B,
5545 __imm,
5546 (__v16si) __A, -1);
5547 }
5548
5549 extern __inline __m512
5550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5551 _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
5552 {
5553 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
5554 (__v4sf) __B,
5555 __imm,
5556 (__v16sf) __A, -1);
5557 }
5558
5559 extern __inline __m512i
5560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5561 _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
5562 {
5563 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5564 (__v4di) __B,
5565 __imm,
5566 (__v8di)
5567 _mm512_undefined_epi32 (),
5568 (__mmask8) -1);
5569 }
5570
5571 extern __inline __m512i
5572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5573 _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
5574 __m256i __B, const int __imm)
5575 {
5576 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5577 (__v4di) __B,
5578 __imm,
5579 (__v8di) __W,
5580 (__mmask8) __U);
5581 }
5582
5583 extern __inline __m512i
5584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5585 _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
5586 const int __imm)
5587 {
5588 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5589 (__v4di) __B,
5590 __imm,
5591 (__v8di)
5592 _mm512_setzero_si512 (),
5593 (__mmask8) __U);
5594 }
5595
5596 extern __inline __m512d
5597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5598 _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
5599 {
5600 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5601 (__v4df) __B,
5602 __imm,
5603 (__v8df)
5604 _mm512_undefined_pd (),
5605 (__mmask8) -1);
5606 }
5607
5608 extern __inline __m512d
5609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5610 _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
5611 __m256d __B, const int __imm)
5612 {
5613 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5614 (__v4df) __B,
5615 __imm,
5616 (__v8df) __W,
5617 (__mmask8) __U);
5618 }
5619
5620 extern __inline __m512d
5621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5622 _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
5623 const int __imm)
5624 {
5625 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5626 (__v4df) __B,
5627 __imm,
5628 (__v8df)
5629 _mm512_setzero_pd (),
5630 (__mmask8) __U);
5631 }
5632 #else
5633 #define _mm512_insertf32x4(X, Y, C) \
5634 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
5635 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
5636
5637 #define _mm512_inserti32x4(X, Y, C) \
5638 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
5639 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
5640
5641 #define _mm512_insertf64x4(X, Y, C) \
5642 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5643 (__v4df)(__m256d) (Y), (int) (C), \
5644 (__v8df)(__m512d)_mm512_undefined_pd(), \
5645 (__mmask8)-1))
5646
5647 #define _mm512_mask_insertf64x4(W, U, X, Y, C) \
5648 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5649 (__v4df)(__m256d) (Y), (int) (C), \
5650 (__v8df)(__m512d)(W), \
5651 (__mmask8)(U)))
5652
5653 #define _mm512_maskz_insertf64x4(U, X, Y, C) \
5654 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5655 (__v4df)(__m256d) (Y), (int) (C), \
5656 (__v8df)(__m512d)_mm512_setzero_pd(), \
5657 (__mmask8)(U)))
5658
5659 #define _mm512_inserti64x4(X, Y, C) \
5660 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5661 (__v4di)(__m256i) (Y), (int) (C), \
5662 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
5663 (__mmask8)-1))
5664
5665 #define _mm512_mask_inserti64x4(W, U, X, Y, C) \
5666 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5667 (__v4di)(__m256i) (Y), (int) (C),\
5668 (__v8di)(__m512i)(W),\
5669 (__mmask8)(U)))
5670
5671 #define _mm512_maskz_inserti64x4(U, X, Y, C) \
5672 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5673 (__v4di)(__m256i) (Y), (int) (C), \
5674 (__v8di)(__m512i)_mm512_setzero_si512 (), \
5675 (__mmask8)(U)))
5676 #endif
5677
5678 extern __inline __m512d
5679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5680 _mm512_loadu_pd (void const *__P)
5681 {
5682 return *(__m512d_u *)__P;
5683 }
5684
5685 extern __inline __m512d
5686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5687 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
5688 {
5689 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
5690 (__v8df) __W,
5691 (__mmask8) __U);
5692 }
5693
5694 extern __inline __m512d
5695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5696 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
5697 {
5698 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
5699 (__v8df)
5700 _mm512_setzero_pd (),
5701 (__mmask8) __U);
5702 }
5703
5704 extern __inline void
5705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5706 _mm512_storeu_pd (void *__P, __m512d __A)
5707 {
5708 *(__m512d_u *)__P = __A;
5709 }
5710
5711 extern __inline void
5712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5713 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
5714 {
5715 __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
5716 (__mmask8) __U);
5717 }
5718
5719 extern __inline __m512
5720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5721 _mm512_loadu_ps (void const *__P)
5722 {
5723 return *(__m512_u *)__P;
5724 }
5725
5726 extern __inline __m512
5727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5728 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
5729 {
5730 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
5731 (__v16sf) __W,
5732 (__mmask16) __U);
5733 }
5734
5735 extern __inline __m512
5736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5737 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
5738 {
5739 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
5740 (__v16sf)
5741 _mm512_setzero_ps (),
5742 (__mmask16) __U);
5743 }
5744
5745 extern __inline void
5746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5747 _mm512_storeu_ps (void *__P, __m512 __A)
5748 {
5749 *(__m512_u *)__P = __A;
5750 }
5751
5752 extern __inline void
5753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5754 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
5755 {
5756 __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
5757 (__mmask16) __U);
5758 }
5759
5760 extern __inline __m512i
5761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5762 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5763 {
5764 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
5765 (__v8di) __W,
5766 (__mmask8) __U);
5767 }
5768
5769 extern __inline __m512i
5770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5771 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5772 {
5773 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
5774 (__v8di)
5775 _mm512_setzero_si512 (),
5776 (__mmask8) __U);
5777 }
5778
5779 extern __inline void
5780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5781 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
5782 {
5783 __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A,
5784 (__mmask8) __U);
5785 }
5786
5787 extern __inline __m512i
5788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5789 _mm512_loadu_si512 (void const *__P)
5790 {
5791 return *(__m512i_u *)__P;
5792 }
5793
5794 extern __inline __m512i
5795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5796 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5797 {
5798 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
5799 (__v16si) __W,
5800 (__mmask16) __U);
5801 }
5802
5803 extern __inline __m512i
5804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5805 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
5806 {
5807 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
5808 (__v16si)
5809 _mm512_setzero_si512 (),
5810 (__mmask16) __U);
5811 }
5812
5813 extern __inline void
5814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5815 _mm512_storeu_si512 (void *__P, __m512i __A)
5816 {
5817 *(__m512i_u *)__P = __A;
5818 }
5819
5820 extern __inline void
5821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5822 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
5823 {
5824 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
5825 (__mmask16) __U);
5826 }
5827
5828 extern __inline __m512d
5829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5830 _mm512_permutevar_pd (__m512d __A, __m512i __C)
5831 {
5832 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5833 (__v8di) __C,
5834 (__v8df)
5835 _mm512_undefined_pd (),
5836 (__mmask8) -1);
5837 }
5838
5839 extern __inline __m512d
5840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5841 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
5842 {
5843 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5844 (__v8di) __C,
5845 (__v8df) __W,
5846 (__mmask8) __U);
5847 }
5848
5849 extern __inline __m512d
5850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5851 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
5852 {
5853 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5854 (__v8di) __C,
5855 (__v8df)
5856 _mm512_setzero_pd (),
5857 (__mmask8) __U);
5858 }
5859
5860 extern __inline __m512
5861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5862 _mm512_permutevar_ps (__m512 __A, __m512i __C)
5863 {
5864 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5865 (__v16si) __C,
5866 (__v16sf)
5867 _mm512_undefined_ps (),
5868 (__mmask16) -1);
5869 }
5870
5871 extern __inline __m512
5872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5873 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
5874 {
5875 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5876 (__v16si) __C,
5877 (__v16sf) __W,
5878 (__mmask16) __U);
5879 }
5880
5881 extern __inline __m512
5882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5883 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
5884 {
5885 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5886 (__v16si) __C,
5887 (__v16sf)
5888 _mm512_setzero_ps (),
5889 (__mmask16) __U);
5890 }
5891
5892 extern __inline __m512i
5893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5894 _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
5895 {
5896 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5897 /* idx */ ,
5898 (__v8di) __A,
5899 (__v8di) __B,
5900 (__mmask8) -1);
5901 }
5902
5903 extern __inline __m512i
5904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5905 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
5906 __m512i __B)
5907 {
5908 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5909 /* idx */ ,
5910 (__v8di) __A,
5911 (__v8di) __B,
5912 (__mmask8) __U);
5913 }
5914
5915 extern __inline __m512i
5916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5917 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
5918 __mmask8 __U, __m512i __B)
5919 {
5920 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
5921 (__v8di) __I
5922 /* idx */ ,
5923 (__v8di) __B,
5924 (__mmask8) __U);
5925 }
5926
5927 extern __inline __m512i
5928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5929 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
5930 __m512i __I, __m512i __B)
5931 {
5932 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
5933 /* idx */ ,
5934 (__v8di) __A,
5935 (__v8di) __B,
5936 (__mmask8) __U);
5937 }
5938
5939 extern __inline __m512i
5940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5941 _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
5942 {
5943 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5944 /* idx */ ,
5945 (__v16si) __A,
5946 (__v16si) __B,
5947 (__mmask16) -1);
5948 }
5949
5950 extern __inline __m512i
5951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5952 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
5953 __m512i __I, __m512i __B)
5954 {
5955 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5956 /* idx */ ,
5957 (__v16si) __A,
5958 (__v16si) __B,
5959 (__mmask16) __U);
5960 }
5961
5962 extern __inline __m512i
5963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5964 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
5965 __mmask16 __U, __m512i __B)
5966 {
5967 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
5968 (__v16si) __I
5969 /* idx */ ,
5970 (__v16si) __B,
5971 (__mmask16) __U);
5972 }
5973
5974 extern __inline __m512i
5975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5976 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
5977 __m512i __I, __m512i __B)
5978 {
5979 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
5980 /* idx */ ,
5981 (__v16si) __A,
5982 (__v16si) __B,
5983 (__mmask16) __U);
5984 }
5985
5986 extern __inline __m512d
5987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5988 _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
5989 {
5990 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
5991 /* idx */ ,
5992 (__v8df) __A,
5993 (__v8df) __B,
5994 (__mmask8) -1);
5995 }
5996
5997 extern __inline __m512d
5998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5999 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6000 __m512d __B)
6001 {
6002 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6003 /* idx */ ,
6004 (__v8df) __A,
6005 (__v8df) __B,
6006 (__mmask8) __U);
6007 }
6008
6009 extern __inline __m512d
6010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6011 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6012 __m512d __B)
6013 {
6014 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6015 (__v8di) __I
6016 /* idx */ ,
6017 (__v8df) __B,
6018 (__mmask8) __U);
6019 }
6020
6021 extern __inline __m512d
6022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6023 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6024 __m512d __B)
6025 {
6026 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6027 /* idx */ ,
6028 (__v8df) __A,
6029 (__v8df) __B,
6030 (__mmask8) __U);
6031 }
6032
6033 extern __inline __m512
6034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6035 _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6036 {
6037 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6038 /* idx */ ,
6039 (__v16sf) __A,
6040 (__v16sf) __B,
6041 (__mmask16) -1);
6042 }
6043
6044 extern __inline __m512
6045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6046 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6047 {
6048 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6049 /* idx */ ,
6050 (__v16sf) __A,
6051 (__v16sf) __B,
6052 (__mmask16) __U);
6053 }
6054
6055 extern __inline __m512
6056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6057 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6058 __m512 __B)
6059 {
6060 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6061 (__v16si) __I
6062 /* idx */ ,
6063 (__v16sf) __B,
6064 (__mmask16) __U);
6065 }
6066
6067 extern __inline __m512
6068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6069 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6070 __m512 __B)
6071 {
6072 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6073 /* idx */ ,
6074 (__v16sf) __A,
6075 (__v16sf) __B,
6076 (__mmask16) __U);
6077 }
6078
6079 #ifdef __OPTIMIZE__
6080 extern __inline __m512d
6081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6082 _mm512_permute_pd (__m512d __X, const int __C)
6083 {
6084 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6085 (__v8df)
6086 _mm512_undefined_pd (),
6087 (__mmask8) -1);
6088 }
6089
6090 extern __inline __m512d
6091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6092 _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6093 {
6094 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6095 (__v8df) __W,
6096 (__mmask8) __U);
6097 }
6098
6099 extern __inline __m512d
6100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6101 _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6102 {
6103 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6104 (__v8df)
6105 _mm512_setzero_pd (),
6106 (__mmask8) __U);
6107 }
6108
6109 extern __inline __m512
6110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6111 _mm512_permute_ps (__m512 __X, const int __C)
6112 {
6113 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6114 (__v16sf)
6115 _mm512_undefined_ps (),
6116 (__mmask16) -1);
6117 }
6118
6119 extern __inline __m512
6120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6121 _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6122 {
6123 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6124 (__v16sf) __W,
6125 (__mmask16) __U);
6126 }
6127
6128 extern __inline __m512
6129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6130 _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6131 {
6132 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6133 (__v16sf)
6134 _mm512_setzero_ps (),
6135 (__mmask16) __U);
6136 }
6137 #else
6138 #define _mm512_permute_pd(X, C) \
6139 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6140 (__v8df)(__m512d)_mm512_undefined_pd(),\
6141 (__mmask8)(-1)))
6142
6143 #define _mm512_mask_permute_pd(W, U, X, C) \
6144 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6145 (__v8df)(__m512d)(W), \
6146 (__mmask8)(U)))
6147
6148 #define _mm512_maskz_permute_pd(U, X, C) \
6149 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6150 (__v8df)(__m512d)_mm512_setzero_pd(), \
6151 (__mmask8)(U)))
6152
6153 #define _mm512_permute_ps(X, C) \
6154 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6155 (__v16sf)(__m512)_mm512_undefined_ps(),\
6156 (__mmask16)(-1)))
6157
6158 #define _mm512_mask_permute_ps(W, U, X, C) \
6159 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6160 (__v16sf)(__m512)(W), \
6161 (__mmask16)(U)))
6162
6163 #define _mm512_maskz_permute_ps(U, X, C) \
6164 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6165 (__v16sf)(__m512)_mm512_setzero_ps(), \
6166 (__mmask16)(U)))
6167 #endif
6168
6169 #ifdef __OPTIMIZE__
6170 extern __inline __m512i
6171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6172 _mm512_permutex_epi64 (__m512i __X, const int __I)
6173 {
6174 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6175 (__v8di)
6176 _mm512_undefined_epi32 (),
6177 (__mmask8) (-1));
6178 }
6179
6180 extern __inline __m512i
6181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6182 _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6183 __m512i __X, const int __I)
6184 {
6185 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6186 (__v8di) __W,
6187 (__mmask8) __M);
6188 }
6189
6190 extern __inline __m512i
6191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6192 _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6193 {
6194 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6195 (__v8di)
6196 _mm512_setzero_si512 (),
6197 (__mmask8) __M);
6198 }
6199
6200 extern __inline __m512d
6201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6202 _mm512_permutex_pd (__m512d __X, const int __M)
6203 {
6204 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6205 (__v8df)
6206 _mm512_undefined_pd (),
6207 (__mmask8) -1);
6208 }
6209
6210 extern __inline __m512d
6211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6212 _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6213 {
6214 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6215 (__v8df) __W,
6216 (__mmask8) __U);
6217 }
6218
6219 extern __inline __m512d
6220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6221 _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6222 {
6223 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6224 (__v8df)
6225 _mm512_setzero_pd (),
6226 (__mmask8) __U);
6227 }
6228 #else
6229 #define _mm512_permutex_pd(X, M) \
6230 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6231 (__v8df)(__m512d)_mm512_undefined_pd(),\
6232 (__mmask8)-1))
6233
6234 #define _mm512_mask_permutex_pd(W, U, X, M) \
6235 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6236 (__v8df)(__m512d)(W), (__mmask8)(U)))
6237
6238 #define _mm512_maskz_permutex_pd(U, X, M) \
6239 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6240 (__v8df)(__m512d)_mm512_setzero_pd(),\
6241 (__mmask8)(U)))
6242
6243 #define _mm512_permutex_epi64(X, I) \
6244 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6245 (int)(I), \
6246 (__v8di)(__m512i) \
6247 (_mm512_undefined_epi32 ()),\
6248 (__mmask8)(-1)))
6249
6250 #define _mm512_maskz_permutex_epi64(M, X, I) \
6251 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6252 (int)(I), \
6253 (__v8di)(__m512i) \
6254 (_mm512_setzero_si512 ()),\
6255 (__mmask8)(M)))
6256
6257 #define _mm512_mask_permutex_epi64(W, M, X, I) \
6258 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6259 (int)(I), \
6260 (__v8di)(__m512i)(W), \
6261 (__mmask8)(M)))
6262 #endif
6263
6264 extern __inline __m512i
6265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6266 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6267 {
6268 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6269 (__v8di) __X,
6270 (__v8di)
6271 _mm512_setzero_si512 (),
6272 __M);
6273 }
6274
6275 extern __inline __m512i
6276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6277 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6278 {
6279 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6280 (__v8di) __X,
6281 (__v8di)
6282 _mm512_undefined_epi32 (),
6283 (__mmask8) -1);
6284 }
6285
6286 extern __inline __m512i
6287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6288 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6289 __m512i __Y)
6290 {
6291 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6292 (__v8di) __X,
6293 (__v8di) __W,
6294 __M);
6295 }
6296
6297 extern __inline __m512i
6298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6299 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6300 {
6301 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6302 (__v16si) __X,
6303 (__v16si)
6304 _mm512_setzero_si512 (),
6305 __M);
6306 }
6307
6308 extern __inline __m512i
6309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6310 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6311 {
6312 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6313 (__v16si) __X,
6314 (__v16si)
6315 _mm512_undefined_epi32 (),
6316 (__mmask16) -1);
6317 }
6318
6319 extern __inline __m512i
6320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6321 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6322 __m512i __Y)
6323 {
6324 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6325 (__v16si) __X,
6326 (__v16si) __W,
6327 __M);
6328 }
6329
6330 extern __inline __m512d
6331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6332 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6333 {
6334 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6335 (__v8di) __X,
6336 (__v8df)
6337 _mm512_undefined_pd (),
6338 (__mmask8) -1);
6339 }
6340
6341 extern __inline __m512d
6342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6343 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6344 {
6345 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6346 (__v8di) __X,
6347 (__v8df) __W,
6348 (__mmask8) __U);
6349 }
6350
6351 extern __inline __m512d
6352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6353 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6354 {
6355 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6356 (__v8di) __X,
6357 (__v8df)
6358 _mm512_setzero_pd (),
6359 (__mmask8) __U);
6360 }
6361
6362 extern __inline __m512
6363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6364 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6365 {
6366 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6367 (__v16si) __X,
6368 (__v16sf)
6369 _mm512_undefined_ps (),
6370 (__mmask16) -1);
6371 }
6372
6373 extern __inline __m512
6374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6375 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6376 {
6377 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6378 (__v16si) __X,
6379 (__v16sf) __W,
6380 (__mmask16) __U);
6381 }
6382
6383 extern __inline __m512
6384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6385 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6386 {
6387 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6388 (__v16si) __X,
6389 (__v16sf)
6390 _mm512_setzero_ps (),
6391 (__mmask16) __U);
6392 }
6393
6394 #ifdef __OPTIMIZE__
6395 extern __inline __m512
6396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6397 _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6398 {
6399 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6400 (__v16sf) __V, __imm,
6401 (__v16sf)
6402 _mm512_undefined_ps (),
6403 (__mmask16) -1);
6404 }
6405
6406 extern __inline __m512
6407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6408 _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6409 __m512 __V, const int __imm)
6410 {
6411 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6412 (__v16sf) __V, __imm,
6413 (__v16sf) __W,
6414 (__mmask16) __U);
6415 }
6416
6417 extern __inline __m512
6418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6419 _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6420 {
6421 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6422 (__v16sf) __V, __imm,
6423 (__v16sf)
6424 _mm512_setzero_ps (),
6425 (__mmask16) __U);
6426 }
6427
6428 extern __inline __m512d
6429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6430 _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6431 {
6432 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6433 (__v8df) __V, __imm,
6434 (__v8df)
6435 _mm512_undefined_pd (),
6436 (__mmask8) -1);
6437 }
6438
6439 extern __inline __m512d
6440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6441 _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6442 __m512d __V, const int __imm)
6443 {
6444 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6445 (__v8df) __V, __imm,
6446 (__v8df) __W,
6447 (__mmask8) __U);
6448 }
6449
6450 extern __inline __m512d
6451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6452 _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6453 const int __imm)
6454 {
6455 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6456 (__v8df) __V, __imm,
6457 (__v8df)
6458 _mm512_setzero_pd (),
6459 (__mmask8) __U);
6460 }
6461
6462 extern __inline __m512d
6463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6464 _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6465 const int __imm, const int __R)
6466 {
6467 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6468 (__v8df) __B,
6469 (__v8di) __C,
6470 __imm,
6471 (__mmask8) -1, __R);
6472 }
6473
6474 extern __inline __m512d
6475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6476 _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6477 __m512i __C, const int __imm, const int __R)
6478 {
6479 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6480 (__v8df) __B,
6481 (__v8di) __C,
6482 __imm,
6483 (__mmask8) __U, __R);
6484 }
6485
6486 extern __inline __m512d
6487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6488 _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6489 __m512i __C, const int __imm, const int __R)
6490 {
6491 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
6492 (__v8df) __B,
6493 (__v8di) __C,
6494 __imm,
6495 (__mmask8) __U, __R);
6496 }
6497
6498 extern __inline __m512
6499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6500 _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
6501 const int __imm, const int __R)
6502 {
6503 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6504 (__v16sf) __B,
6505 (__v16si) __C,
6506 __imm,
6507 (__mmask16) -1, __R);
6508 }
6509
6510 extern __inline __m512
6511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6512 _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6513 __m512i __C, const int __imm, const int __R)
6514 {
6515 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6516 (__v16sf) __B,
6517 (__v16si) __C,
6518 __imm,
6519 (__mmask16) __U, __R);
6520 }
6521
6522 extern __inline __m512
6523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6524 _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6525 __m512i __C, const int __imm, const int __R)
6526 {
6527 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
6528 (__v16sf) __B,
6529 (__v16si) __C,
6530 __imm,
6531 (__mmask16) __U, __R);
6532 }
6533
6534 extern __inline __m128d
6535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6536 _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
6537 const int __imm, const int __R)
6538 {
6539 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6540 (__v2df) __B,
6541 (__v2di) __C, __imm,
6542 (__mmask8) -1, __R);
6543 }
6544
6545 extern __inline __m128d
6546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6547 _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
6548 __m128i __C, const int __imm, const int __R)
6549 {
6550 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6551 (__v2df) __B,
6552 (__v2di) __C, __imm,
6553 (__mmask8) __U, __R);
6554 }
6555
6556 extern __inline __m128d
6557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6558 _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
6559 __m128i __C, const int __imm, const int __R)
6560 {
6561 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
6562 (__v2df) __B,
6563 (__v2di) __C,
6564 __imm,
6565 (__mmask8) __U, __R);
6566 }
6567
6568 extern __inline __m128
6569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6570 _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
6571 const int __imm, const int __R)
6572 {
6573 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6574 (__v4sf) __B,
6575 (__v4si) __C, __imm,
6576 (__mmask8) -1, __R);
6577 }
6578
6579 extern __inline __m128
6580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6581 _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
6582 __m128i __C, const int __imm, const int __R)
6583 {
6584 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6585 (__v4sf) __B,
6586 (__v4si) __C, __imm,
6587 (__mmask8) __U, __R);
6588 }
6589
6590 extern __inline __m128
6591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6592 _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
6593 __m128i __C, const int __imm, const int __R)
6594 {
6595 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
6596 (__v4sf) __B,
6597 (__v4si) __C, __imm,
6598 (__mmask8) __U, __R);
6599 }
6600
6601 #else
6602 #define _mm512_shuffle_pd(X, Y, C) \
6603 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6604 (__v8df)(__m512d)(Y), (int)(C),\
6605 (__v8df)(__m512d)_mm512_undefined_pd(),\
6606 (__mmask8)-1))
6607
6608 #define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
6609 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6610 (__v8df)(__m512d)(Y), (int)(C),\
6611 (__v8df)(__m512d)(W),\
6612 (__mmask8)(U)))
6613
6614 #define _mm512_maskz_shuffle_pd(U, X, Y, C) \
6615 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6616 (__v8df)(__m512d)(Y), (int)(C),\
6617 (__v8df)(__m512d)_mm512_setzero_pd(),\
6618 (__mmask8)(U)))
6619
6620 #define _mm512_shuffle_ps(X, Y, C) \
6621 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6622 (__v16sf)(__m512)(Y), (int)(C),\
6623 (__v16sf)(__m512)_mm512_undefined_ps(),\
6624 (__mmask16)-1))
6625
6626 #define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
6627 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6628 (__v16sf)(__m512)(Y), (int)(C),\
6629 (__v16sf)(__m512)(W),\
6630 (__mmask16)(U)))
6631
6632 #define _mm512_maskz_shuffle_ps(U, X, Y, C) \
6633 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6634 (__v16sf)(__m512)(Y), (int)(C),\
6635 (__v16sf)(__m512)_mm512_setzero_ps(),\
6636 (__mmask16)(U)))
6637
6638 #define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
6639 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6640 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6641 (__mmask8)(-1), (R)))
6642
6643 #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
6644 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6645 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6646 (__mmask8)(U), (R)))
6647
6648 #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
6649 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
6650 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6651 (__mmask8)(U), (R)))
6652
6653 #define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
6654 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6655 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6656 (__mmask16)(-1), (R)))
6657
6658 #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
6659 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6660 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6661 (__mmask16)(U), (R)))
6662
6663 #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
6664 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
6665 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6666 (__mmask16)(U), (R)))
6667
6668 #define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
6669 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6670 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6671 (__mmask8)(-1), (R)))
6672
6673 #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
6674 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6675 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6676 (__mmask8)(U), (R)))
6677
6678 #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
6679 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
6680 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6681 (__mmask8)(U), (R)))
6682
6683 #define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
6684 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6685 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6686 (__mmask8)(-1), (R)))
6687
6688 #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
6689 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6690 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6691 (__mmask8)(U), (R)))
6692
6693 #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
6694 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
6695 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6696 (__mmask8)(U), (R)))
6697 #endif
6698
6699 extern __inline __m512
6700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6701 _mm512_movehdup_ps (__m512 __A)
6702 {
6703 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6704 (__v16sf)
6705 _mm512_undefined_ps (),
6706 (__mmask16) -1);
6707 }
6708
6709 extern __inline __m512
6710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6711 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6712 {
6713 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6714 (__v16sf) __W,
6715 (__mmask16) __U);
6716 }
6717
6718 extern __inline __m512
6719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6720 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
6721 {
6722 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6723 (__v16sf)
6724 _mm512_setzero_ps (),
6725 (__mmask16) __U);
6726 }
6727
6728 extern __inline __m512
6729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6730 _mm512_moveldup_ps (__m512 __A)
6731 {
6732 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6733 (__v16sf)
6734 _mm512_undefined_ps (),
6735 (__mmask16) -1);
6736 }
6737
6738 extern __inline __m512
6739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6740 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6741 {
6742 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6743 (__v16sf) __W,
6744 (__mmask16) __U);
6745 }
6746
6747 extern __inline __m512
6748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6749 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
6750 {
6751 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6752 (__v16sf)
6753 _mm512_setzero_ps (),
6754 (__mmask16) __U);
6755 }
6756
6757 extern __inline __m512i
6758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6759 _mm512_or_si512 (__m512i __A, __m512i __B)
6760 {
6761 return (__m512i) ((__v16su) __A | (__v16su) __B);
6762 }
6763
6764 extern __inline __m512i
6765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6766 _mm512_or_epi32 (__m512i __A, __m512i __B)
6767 {
6768 return (__m512i) ((__v16su) __A | (__v16su) __B);
6769 }
6770
6771 extern __inline __m512i
6772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6773 _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6774 {
6775 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6776 (__v16si) __B,
6777 (__v16si) __W,
6778 (__mmask16) __U);
6779 }
6780
6781 extern __inline __m512i
6782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6783 _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6784 {
6785 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6786 (__v16si) __B,
6787 (__v16si)
6788 _mm512_setzero_si512 (),
6789 (__mmask16) __U);
6790 }
6791
6792 extern __inline __m512i
6793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6794 _mm512_or_epi64 (__m512i __A, __m512i __B)
6795 {
6796 return (__m512i) ((__v8du) __A | (__v8du) __B);
6797 }
6798
6799 extern __inline __m512i
6800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6801 _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
6802 {
6803 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6804 (__v8di) __B,
6805 (__v8di) __W,
6806 (__mmask8) __U);
6807 }
6808
6809 extern __inline __m512i
6810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6811 _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
6812 {
6813 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6814 (__v8di) __B,
6815 (__v8di)
6816 _mm512_setzero_si512 (),
6817 (__mmask8) __U);
6818 }
6819
6820 extern __inline __m512i
6821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6822 _mm512_xor_si512 (__m512i __A, __m512i __B)
6823 {
6824 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
6825 }
6826
6827 extern __inline __m512i
6828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6829 _mm512_xor_epi32 (__m512i __A, __m512i __B)
6830 {
6831 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
6832 }
6833
6834 extern __inline __m512i
6835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6836 _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6837 {
6838 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6839 (__v16si) __B,
6840 (__v16si) __W,
6841 (__mmask16) __U);
6842 }
6843
6844 extern __inline __m512i
6845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6846 _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6847 {
6848 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6849 (__v16si) __B,
6850 (__v16si)
6851 _mm512_setzero_si512 (),
6852 (__mmask16) __U);
6853 }
6854
6855 extern __inline __m512i
6856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6857 _mm512_xor_epi64 (__m512i __A, __m512i __B)
6858 {
6859 return (__m512i) ((__v8du) __A ^ (__v8du) __B);
6860 }
6861
6862 extern __inline __m512i
6863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6864 _mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6865 {
6866 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6867 (__v8di) __B,
6868 (__v8di) __W,
6869 (__mmask8) __U);
6870 }
6871
6872 extern __inline __m512i
6873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6874 _mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
6875 {
6876 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6877 (__v8di) __B,
6878 (__v8di)
6879 _mm512_setzero_si512 (),
6880 (__mmask8) __U);
6881 }
6882
6883 #ifdef __OPTIMIZE__
6884 extern __inline __m512i
6885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6886 _mm512_rol_epi32 (__m512i __A, const int __B)
6887 {
6888 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6889 (__v16si)
6890 _mm512_undefined_epi32 (),
6891 (__mmask16) -1);
6892 }
6893
6894 extern __inline __m512i
6895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6896 _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
6897 {
6898 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6899 (__v16si) __W,
6900 (__mmask16) __U);
6901 }
6902
6903 extern __inline __m512i
6904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6905 _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
6906 {
6907 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6908 (__v16si)
6909 _mm512_setzero_si512 (),
6910 (__mmask16) __U);
6911 }
6912
6913 extern __inline __m512i
6914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6915 _mm512_ror_epi32 (__m512i __A, int __B)
6916 {
6917 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6918 (__v16si)
6919 _mm512_undefined_epi32 (),
6920 (__mmask16) -1);
6921 }
6922
6923 extern __inline __m512i
6924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6925 _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
6926 {
6927 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6928 (__v16si) __W,
6929 (__mmask16) __U);
6930 }
6931
6932 extern __inline __m512i
6933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6934 _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
6935 {
6936 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6937 (__v16si)
6938 _mm512_setzero_si512 (),
6939 (__mmask16) __U);
6940 }
6941
6942 extern __inline __m512i
6943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6944 _mm512_rol_epi64 (__m512i __A, const int __B)
6945 {
6946 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6947 (__v8di)
6948 _mm512_undefined_epi32 (),
6949 (__mmask8) -1);
6950 }
6951
6952 extern __inline __m512i
6953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6954 _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
6955 {
6956 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6957 (__v8di) __W,
6958 (__mmask8) __U);
6959 }
6960
6961 extern __inline __m512i
6962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6963 _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
6964 {
6965 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6966 (__v8di)
6967 _mm512_setzero_si512 (),
6968 (__mmask8) __U);
6969 }
6970
6971 extern __inline __m512i
6972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6973 _mm512_ror_epi64 (__m512i __A, int __B)
6974 {
6975 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6976 (__v8di)
6977 _mm512_undefined_epi32 (),
6978 (__mmask8) -1);
6979 }
6980
6981 extern __inline __m512i
6982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6983 _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
6984 {
6985 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6986 (__v8di) __W,
6987 (__mmask8) __U);
6988 }
6989
6990 extern __inline __m512i
6991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6992 _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
6993 {
6994 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6995 (__v8di)
6996 _mm512_setzero_si512 (),
6997 (__mmask8) __U);
6998 }
6999
7000 #else
7001 #define _mm512_rol_epi32(A, B) \
7002 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7003 (int)(B), \
7004 (__v16si)_mm512_undefined_epi32 (), \
7005 (__mmask16)(-1)))
7006 #define _mm512_mask_rol_epi32(W, U, A, B) \
7007 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7008 (int)(B), \
7009 (__v16si)(__m512i)(W), \
7010 (__mmask16)(U)))
7011 #define _mm512_maskz_rol_epi32(U, A, B) \
7012 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7013 (int)(B), \
7014 (__v16si)_mm512_setzero_si512 (), \
7015 (__mmask16)(U)))
7016 #define _mm512_ror_epi32(A, B) \
7017 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7018 (int)(B), \
7019 (__v16si)_mm512_undefined_epi32 (), \
7020 (__mmask16)(-1)))
7021 #define _mm512_mask_ror_epi32(W, U, A, B) \
7022 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7023 (int)(B), \
7024 (__v16si)(__m512i)(W), \
7025 (__mmask16)(U)))
7026 #define _mm512_maskz_ror_epi32(U, A, B) \
7027 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7028 (int)(B), \
7029 (__v16si)_mm512_setzero_si512 (), \
7030 (__mmask16)(U)))
7031 #define _mm512_rol_epi64(A, B) \
7032 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7033 (int)(B), \
7034 (__v8di)_mm512_undefined_epi32 (), \
7035 (__mmask8)(-1)))
7036 #define _mm512_mask_rol_epi64(W, U, A, B) \
7037 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7038 (int)(B), \
7039 (__v8di)(__m512i)(W), \
7040 (__mmask8)(U)))
7041 #define _mm512_maskz_rol_epi64(U, A, B) \
7042 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7043 (int)(B), \
7044 (__v8di)_mm512_setzero_si512 (), \
7045 (__mmask8)(U)))
7046
7047 #define _mm512_ror_epi64(A, B) \
7048 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7049 (int)(B), \
7050 (__v8di)_mm512_undefined_epi32 (), \
7051 (__mmask8)(-1)))
7052 #define _mm512_mask_ror_epi64(W, U, A, B) \
7053 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7054 (int)(B), \
7055 (__v8di)(__m512i)(W), \
7056 (__mmask8)(U)))
7057 #define _mm512_maskz_ror_epi64(U, A, B) \
7058 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7059 (int)(B), \
7060 (__v8di)_mm512_setzero_si512 (), \
7061 (__mmask8)(U)))
7062 #endif
7063
7064 extern __inline __m512i
7065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7066 _mm512_and_si512 (__m512i __A, __m512i __B)
7067 {
7068 return (__m512i) ((__v16su) __A & (__v16su) __B);
7069 }
7070
7071 extern __inline __m512i
7072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7073 _mm512_and_epi32 (__m512i __A, __m512i __B)
7074 {
7075 return (__m512i) ((__v16su) __A & (__v16su) __B);
7076 }
7077
7078 extern __inline __m512i
7079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7080 _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7081 {
7082 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7083 (__v16si) __B,
7084 (__v16si) __W,
7085 (__mmask16) __U);
7086 }
7087
7088 extern __inline __m512i
7089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7090 _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7091 {
7092 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7093 (__v16si) __B,
7094 (__v16si)
7095 _mm512_setzero_si512 (),
7096 (__mmask16) __U);
7097 }
7098
7099 extern __inline __m512i
7100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7101 _mm512_and_epi64 (__m512i __A, __m512i __B)
7102 {
7103 return (__m512i) ((__v8du) __A & (__v8du) __B);
7104 }
7105
7106 extern __inline __m512i
7107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7108 _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7109 {
7110 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7111 (__v8di) __B,
7112 (__v8di) __W, __U);
7113 }
7114
7115 extern __inline __m512i
7116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7117 _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7118 {
7119 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7120 (__v8di) __B,
7121 (__v8di)
7122 _mm512_setzero_pd (),
7123 __U);
7124 }
7125
7126 extern __inline __m512i
7127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7128 _mm512_andnot_si512 (__m512i __A, __m512i __B)
7129 {
7130 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7131 (__v16si) __B,
7132 (__v16si)
7133 _mm512_undefined_epi32 (),
7134 (__mmask16) -1);
7135 }
7136
7137 extern __inline __m512i
7138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7139 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
7140 {
7141 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7142 (__v16si) __B,
7143 (__v16si)
7144 _mm512_undefined_epi32 (),
7145 (__mmask16) -1);
7146 }
7147
7148 extern __inline __m512i
7149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7150 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7151 {
7152 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7153 (__v16si) __B,
7154 (__v16si) __W,
7155 (__mmask16) __U);
7156 }
7157
7158 extern __inline __m512i
7159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7160 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7161 {
7162 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7163 (__v16si) __B,
7164 (__v16si)
7165 _mm512_setzero_si512 (),
7166 (__mmask16) __U);
7167 }
7168
7169 extern __inline __m512i
7170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7171 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
7172 {
7173 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7174 (__v8di) __B,
7175 (__v8di)
7176 _mm512_undefined_epi32 (),
7177 (__mmask8) -1);
7178 }
7179
7180 extern __inline __m512i
7181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7182 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7183 {
7184 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7185 (__v8di) __B,
7186 (__v8di) __W, __U);
7187 }
7188
7189 extern __inline __m512i
7190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7191 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7192 {
7193 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7194 (__v8di) __B,
7195 (__v8di)
7196 _mm512_setzero_pd (),
7197 __U);
7198 }
7199
7200 extern __inline __mmask16
7201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7202 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
7203 {
7204 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7205 (__v16si) __B,
7206 (__mmask16) -1);
7207 }
7208
7209 extern __inline __mmask16
7210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7211 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7212 {
7213 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7214 (__v16si) __B, __U);
7215 }
7216
7217 extern __inline __mmask8
7218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7219 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
7220 {
7221 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7222 (__v8di) __B,
7223 (__mmask8) -1);
7224 }
7225
7226 extern __inline __mmask8
7227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7228 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7229 {
7230 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7231 }
7232
7233 extern __inline __mmask16
7234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7235 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7236 {
7237 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7238 (__v16si) __B,
7239 (__mmask16) -1);
7240 }
7241
7242 extern __inline __mmask16
7243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7244 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7245 {
7246 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7247 (__v16si) __B, __U);
7248 }
7249
7250 extern __inline __mmask8
7251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7252 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7253 {
7254 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7255 (__v8di) __B,
7256 (__mmask8) -1);
7257 }
7258
7259 extern __inline __mmask8
7260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7261 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7262 {
7263 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7264 (__v8di) __B, __U);
7265 }
7266
7267 extern __inline __m512i
7268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7269 _mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7270 {
7271 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7272 (__v16si) __B,
7273 (__v16si)
7274 _mm512_undefined_epi32 (),
7275 (__mmask16) -1);
7276 }
7277
7278 extern __inline __m512i
7279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7280 _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7281 __m512i __B)
7282 {
7283 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7284 (__v16si) __B,
7285 (__v16si) __W,
7286 (__mmask16) __U);
7287 }
7288
7289 extern __inline __m512i
7290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7291 _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7292 {
7293 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7294 (__v16si) __B,
7295 (__v16si)
7296 _mm512_setzero_si512 (),
7297 (__mmask16) __U);
7298 }
7299
7300 extern __inline __m512i
7301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7302 _mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7303 {
7304 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7305 (__v8di) __B,
7306 (__v8di)
7307 _mm512_undefined_epi32 (),
7308 (__mmask8) -1);
7309 }
7310
7311 extern __inline __m512i
7312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7313 _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7314 {
7315 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7316 (__v8di) __B,
7317 (__v8di) __W,
7318 (__mmask8) __U);
7319 }
7320
7321 extern __inline __m512i
7322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7323 _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7324 {
7325 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7326 (__v8di) __B,
7327 (__v8di)
7328 _mm512_setzero_si512 (),
7329 (__mmask8) __U);
7330 }
7331
7332 extern __inline __m512i
7333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7334 _mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7335 {
7336 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7337 (__v16si) __B,
7338 (__v16si)
7339 _mm512_undefined_epi32 (),
7340 (__mmask16) -1);
7341 }
7342
7343 extern __inline __m512i
7344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7345 _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7346 __m512i __B)
7347 {
7348 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7349 (__v16si) __B,
7350 (__v16si) __W,
7351 (__mmask16) __U);
7352 }
7353
7354 extern __inline __m512i
7355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7356 _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7357 {
7358 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7359 (__v16si) __B,
7360 (__v16si)
7361 _mm512_setzero_si512 (),
7362 (__mmask16) __U);
7363 }
7364
7365 extern __inline __m512i
7366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7367 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7368 {
7369 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7370 (__v8di) __B,
7371 (__v8di)
7372 _mm512_undefined_epi32 (),
7373 (__mmask8) -1);
7374 }
7375
7376 extern __inline __m512i
7377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7378 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7379 {
7380 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7381 (__v8di) __B,
7382 (__v8di) __W,
7383 (__mmask8) __U);
7384 }
7385
7386 extern __inline __m512i
7387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7388 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7389 {
7390 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7391 (__v8di) __B,
7392 (__v8di)
7393 _mm512_setzero_si512 (),
7394 (__mmask8) __U);
7395 }
7396
7397 #ifdef __x86_64__
7398 #ifdef __OPTIMIZE__
7399 extern __inline unsigned long long
7400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7401 _mm_cvt_roundss_u64 (__m128 __A, const int __R)
7402 {
7403 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7404 }
7405
7406 extern __inline long long
7407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7408 _mm_cvt_roundss_si64 (__m128 __A, const int __R)
7409 {
7410 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7411 }
7412
7413 extern __inline long long
7414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7415 _mm_cvt_roundss_i64 (__m128 __A, const int __R)
7416 {
7417 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7418 }
7419
7420 extern __inline unsigned long long
7421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7422 _mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7423 {
7424 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7425 }
7426
7427 extern __inline long long
7428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7429 _mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7430 {
7431 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7432 }
7433
7434 extern __inline long long
7435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7436 _mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7437 {
7438 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7439 }
7440 #else
7441 #define _mm_cvt_roundss_u64(A, B) \
7442 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7443
7444 #define _mm_cvt_roundss_si64(A, B) \
7445 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7446
7447 #define _mm_cvt_roundss_i64(A, B) \
7448 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7449
7450 #define _mm_cvtt_roundss_u64(A, B) \
7451 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
7452
7453 #define _mm_cvtt_roundss_i64(A, B) \
7454 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7455
7456 #define _mm_cvtt_roundss_si64(A, B) \
7457 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7458 #endif
7459 #endif
7460
7461 #ifdef __OPTIMIZE__
7462 extern __inline unsigned
7463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7464 _mm_cvt_roundss_u32 (__m128 __A, const int __R)
7465 {
7466 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
7467 }
7468
7469 extern __inline int
7470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7471 _mm_cvt_roundss_si32 (__m128 __A, const int __R)
7472 {
7473 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7474 }
7475
7476 extern __inline int
7477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7478 _mm_cvt_roundss_i32 (__m128 __A, const int __R)
7479 {
7480 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7481 }
7482
7483 extern __inline unsigned
7484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7485 _mm_cvtt_roundss_u32 (__m128 __A, const int __R)
7486 {
7487 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
7488 }
7489
7490 extern __inline int
7491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7492 _mm_cvtt_roundss_i32 (__m128 __A, const int __R)
7493 {
7494 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7495 }
7496
7497 extern __inline int
7498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7499 _mm_cvtt_roundss_si32 (__m128 __A, const int __R)
7500 {
7501 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7502 }
7503 #else
7504 #define _mm_cvt_roundss_u32(A, B) \
7505 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
7506
7507 #define _mm_cvt_roundss_si32(A, B) \
7508 ((int)__builtin_ia32_vcvtss2si32(A, B))
7509
7510 #define _mm_cvt_roundss_i32(A, B) \
7511 ((int)__builtin_ia32_vcvtss2si32(A, B))
7512
7513 #define _mm_cvtt_roundss_u32(A, B) \
7514 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
7515
7516 #define _mm_cvtt_roundss_si32(A, B) \
7517 ((int)__builtin_ia32_vcvttss2si32(A, B))
7518
7519 #define _mm_cvtt_roundss_i32(A, B) \
7520 ((int)__builtin_ia32_vcvttss2si32(A, B))
7521 #endif
7522
7523 #ifdef __x86_64__
7524 #ifdef __OPTIMIZE__
7525 extern __inline unsigned long long
7526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7527 _mm_cvt_roundsd_u64 (__m128d __A, const int __R)
7528 {
7529 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
7530 }
7531
7532 extern __inline long long
7533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7534 _mm_cvt_roundsd_si64 (__m128d __A, const int __R)
7535 {
7536 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7537 }
7538
7539 extern __inline long long
7540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7541 _mm_cvt_roundsd_i64 (__m128d __A, const int __R)
7542 {
7543 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7544 }
7545
7546 extern __inline unsigned long long
7547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7548 _mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
7549 {
7550 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
7551 }
7552
7553 extern __inline long long
7554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7555 _mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
7556 {
7557 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7558 }
7559
7560 extern __inline long long
7561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7562 _mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
7563 {
7564 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7565 }
7566 #else
7567 #define _mm_cvt_roundsd_u64(A, B) \
7568 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
7569
7570 #define _mm_cvt_roundsd_si64(A, B) \
7571 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7572
7573 #define _mm_cvt_roundsd_i64(A, B) \
7574 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7575
7576 #define _mm_cvtt_roundsd_u64(A, B) \
7577 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
7578
7579 #define _mm_cvtt_roundsd_si64(A, B) \
7580 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7581
7582 #define _mm_cvtt_roundsd_i64(A, B) \
7583 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7584 #endif
7585 #endif
7586
7587 #ifdef __OPTIMIZE__
7588 extern __inline unsigned
7589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7590 _mm_cvt_roundsd_u32 (__m128d __A, const int __R)
7591 {
7592 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
7593 }
7594
7595 extern __inline int
7596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7597 _mm_cvt_roundsd_si32 (__m128d __A, const int __R)
7598 {
7599 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7600 }
7601
7602 extern __inline int
7603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7604 _mm_cvt_roundsd_i32 (__m128d __A, const int __R)
7605 {
7606 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7607 }
7608
7609 extern __inline unsigned
7610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7611 _mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
7612 {
7613 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
7614 }
7615
7616 extern __inline int
7617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7618 _mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
7619 {
7620 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7621 }
7622
7623 extern __inline int
7624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7625 _mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
7626 {
7627 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7628 }
7629 #else
7630 #define _mm_cvt_roundsd_u32(A, B) \
7631 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
7632
7633 #define _mm_cvt_roundsd_si32(A, B) \
7634 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7635
7636 #define _mm_cvt_roundsd_i32(A, B) \
7637 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7638
7639 #define _mm_cvtt_roundsd_u32(A, B) \
7640 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
7641
7642 #define _mm_cvtt_roundsd_si32(A, B) \
7643 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7644
7645 #define _mm_cvtt_roundsd_i32(A, B) \
7646 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7647 #endif
7648
7649 extern __inline __m512d
7650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7651 _mm512_movedup_pd (__m512d __A)
7652 {
7653 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7654 (__v8df)
7655 _mm512_undefined_pd (),
7656 (__mmask8) -1);
7657 }
7658
7659 extern __inline __m512d
7660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7661 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
7662 {
7663 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7664 (__v8df) __W,
7665 (__mmask8) __U);
7666 }
7667
7668 extern __inline __m512d
7669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7670 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
7671 {
7672 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7673 (__v8df)
7674 _mm512_setzero_pd (),
7675 (__mmask8) __U);
7676 }
7677
7678 extern __inline __m512d
7679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7680 _mm512_unpacklo_pd (__m512d __A, __m512d __B)
7681 {
7682 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7683 (__v8df) __B,
7684 (__v8df)
7685 _mm512_undefined_pd (),
7686 (__mmask8) -1);
7687 }
7688
7689 extern __inline __m512d
7690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7691 _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7692 {
7693 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7694 (__v8df) __B,
7695 (__v8df) __W,
7696 (__mmask8) __U);
7697 }
7698
7699 extern __inline __m512d
7700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7701 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
7702 {
7703 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7704 (__v8df) __B,
7705 (__v8df)
7706 _mm512_setzero_pd (),
7707 (__mmask8) __U);
7708 }
7709
7710 extern __inline __m512d
7711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7712 _mm512_unpackhi_pd (__m512d __A, __m512d __B)
7713 {
7714 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7715 (__v8df) __B,
7716 (__v8df)
7717 _mm512_undefined_pd (),
7718 (__mmask8) -1);
7719 }
7720
7721 extern __inline __m512d
7722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7723 _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7724 {
7725 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7726 (__v8df) __B,
7727 (__v8df) __W,
7728 (__mmask8) __U);
7729 }
7730
7731 extern __inline __m512d
7732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7733 _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
7734 {
7735 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7736 (__v8df) __B,
7737 (__v8df)
7738 _mm512_setzero_pd (),
7739 (__mmask8) __U);
7740 }
7741
7742 extern __inline __m512
7743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7744 _mm512_unpackhi_ps (__m512 __A, __m512 __B)
7745 {
7746 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7747 (__v16sf) __B,
7748 (__v16sf)
7749 _mm512_undefined_ps (),
7750 (__mmask16) -1);
7751 }
7752
7753 extern __inline __m512
7754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7755 _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
7756 {
7757 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7758 (__v16sf) __B,
7759 (__v16sf) __W,
7760 (__mmask16) __U);
7761 }
7762
7763 extern __inline __m512
7764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7765 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
7766 {
7767 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7768 (__v16sf) __B,
7769 (__v16sf)
7770 _mm512_setzero_ps (),
7771 (__mmask16) __U);
7772 }
7773
7774 #ifdef __OPTIMIZE__
7775 extern __inline __m512d
7776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7777 _mm512_cvt_roundps_pd (__m256 __A, const int __R)
7778 {
7779 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7780 (__v8df)
7781 _mm512_undefined_pd (),
7782 (__mmask8) -1, __R);
7783 }
7784
7785 extern __inline __m512d
7786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7787 _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
7788 const int __R)
7789 {
7790 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7791 (__v8df) __W,
7792 (__mmask8) __U, __R);
7793 }
7794
7795 extern __inline __m512d
7796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7797 _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
7798 {
7799 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7800 (__v8df)
7801 _mm512_setzero_pd (),
7802 (__mmask8) __U, __R);
7803 }
7804
7805 extern __inline __m512
7806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7807 _mm512_cvt_roundph_ps (__m256i __A, const int __R)
7808 {
7809 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7810 (__v16sf)
7811 _mm512_undefined_ps (),
7812 (__mmask16) -1, __R);
7813 }
7814
7815 extern __inline __m512
7816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7817 _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
7818 const int __R)
7819 {
7820 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7821 (__v16sf) __W,
7822 (__mmask16) __U, __R);
7823 }
7824
7825 extern __inline __m512
7826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7827 _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
7828 {
7829 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7830 (__v16sf)
7831 _mm512_setzero_ps (),
7832 (__mmask16) __U, __R);
7833 }
7834
7835 extern __inline __m256i
7836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7837 _mm512_cvt_roundps_ph (__m512 __A, const int __I)
7838 {
7839 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7840 __I,
7841 (__v16hi)
7842 _mm256_undefined_si256 (),
7843 -1);
7844 }
7845
7846 extern __inline __m256i
7847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7848 _mm512_cvtps_ph (__m512 __A, const int __I)
7849 {
7850 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7851 __I,
7852 (__v16hi)
7853 _mm256_undefined_si256 (),
7854 -1);
7855 }
7856
7857 extern __inline __m256i
7858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7859 _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
7860 const int __I)
7861 {
7862 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7863 __I,
7864 (__v16hi) __U,
7865 (__mmask16) __W);
7866 }
7867
7868 extern __inline __m256i
7869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7870 _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
7871 {
7872 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7873 __I,
7874 (__v16hi) __U,
7875 (__mmask16) __W);
7876 }
7877
7878 extern __inline __m256i
7879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7880 _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
7881 {
7882 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7883 __I,
7884 (__v16hi)
7885 _mm256_setzero_si256 (),
7886 (__mmask16) __W);
7887 }
7888
7889 extern __inline __m256i
7890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7891 _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
7892 {
7893 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7894 __I,
7895 (__v16hi)
7896 _mm256_setzero_si256 (),
7897 (__mmask16) __W);
7898 }
7899 #else
7900 #define _mm512_cvt_roundps_pd(A, B) \
7901 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
7902
7903 #define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
7904 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
7905
7906 #define _mm512_maskz_cvt_roundps_pd(U, A, B) \
7907 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
7908
7909 #define _mm512_cvt_roundph_ps(A, B) \
7910 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
7911
7912 #define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
7913 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
7914
7915 #define _mm512_maskz_cvt_roundph_ps(U, A, B) \
7916 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
7917
7918 #define _mm512_cvt_roundps_ph(A, I) \
7919 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7920 (__v16hi)_mm256_undefined_si256 (), -1))
7921 #define _mm512_cvtps_ph(A, I) \
7922 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7923 (__v16hi)_mm256_undefined_si256 (), -1))
7924 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
7925 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7926 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7927 #define _mm512_mask_cvtps_ph(U, W, A, I) \
7928 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7929 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7930 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
7931 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7932 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7933 #define _mm512_maskz_cvtps_ph(W, A, I) \
7934 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7935 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7936 #endif
7937
7938 #ifdef __OPTIMIZE__
7939 extern __inline __m256
7940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7941 _mm512_cvt_roundpd_ps (__m512d __A, const int __R)
7942 {
7943 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7944 (__v8sf)
7945 _mm256_undefined_ps (),
7946 (__mmask8) -1, __R);
7947 }
7948
7949 extern __inline __m256
7950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7951 _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
7952 const int __R)
7953 {
7954 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7955 (__v8sf) __W,
7956 (__mmask8) __U, __R);
7957 }
7958
7959 extern __inline __m256
7960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7961 _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
7962 {
7963 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7964 (__v8sf)
7965 _mm256_setzero_ps (),
7966 (__mmask8) __U, __R);
7967 }
7968
7969 extern __inline __m128
7970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7971 _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
7972 {
7973 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
7974 (__v2df) __B,
7975 __R);
7976 }
7977
7978 extern __inline __m128d
7979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7980 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
7981 {
7982 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
7983 (__v4sf) __B,
7984 __R);
7985 }
7986 #else
7987 #define _mm512_cvt_roundpd_ps(A, B) \
7988 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
7989
7990 #define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
7991 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
7992
7993 #define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
7994 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
7995
7996 #define _mm_cvt_roundsd_ss(A, B, C) \
7997 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
7998
7999 #define _mm_cvt_roundss_sd(A, B, C) \
8000 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
8001 #endif
8002
8003 extern __inline void
8004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8005 _mm512_stream_si512 (__m512i * __P, __m512i __A)
8006 {
8007 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8008 }
8009
8010 extern __inline void
8011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8012 _mm512_stream_ps (float *__P, __m512 __A)
8013 {
8014 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8015 }
8016
8017 extern __inline void
8018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8019 _mm512_stream_pd (double *__P, __m512d __A)
8020 {
8021 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8022 }
8023
8024 extern __inline __m512i
8025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8026 _mm512_stream_load_si512 (void *__P)
8027 {
8028 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8029 }
8030
8031 /* Constants for mantissa extraction */
8032 typedef enum
8033 {
8034 _MM_MANT_NORM_1_2, /* interval [1, 2) */
8035 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
8036 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
8037 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
8038 } _MM_MANTISSA_NORM_ENUM;
8039
8040 typedef enum
8041 {
8042 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
8043 _MM_MANT_SIGN_zero, /* sign = 0 */
8044 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
8045 } _MM_MANTISSA_SIGN_ENUM;
8046
8047 #ifdef __OPTIMIZE__
8048 extern __inline __m128
8049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8050 _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8051 {
8052 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8053 (__v4sf) __B,
8054 __R);
8055 }
8056
8057 extern __inline __m128d
8058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8059 _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8060 {
8061 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8062 (__v2df) __B,
8063 __R);
8064 }
8065
8066 extern __inline __m512
8067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8068 _mm512_getexp_round_ps (__m512 __A, const int __R)
8069 {
8070 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8071 (__v16sf)
8072 _mm512_undefined_ps (),
8073 (__mmask16) -1, __R);
8074 }
8075
8076 extern __inline __m512
8077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8078 _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8079 const int __R)
8080 {
8081 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8082 (__v16sf) __W,
8083 (__mmask16) __U, __R);
8084 }
8085
8086 extern __inline __m512
8087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8088 _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8089 {
8090 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8091 (__v16sf)
8092 _mm512_setzero_ps (),
8093 (__mmask16) __U, __R);
8094 }
8095
8096 extern __inline __m512d
8097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8098 _mm512_getexp_round_pd (__m512d __A, const int __R)
8099 {
8100 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8101 (__v8df)
8102 _mm512_undefined_pd (),
8103 (__mmask8) -1, __R);
8104 }
8105
8106 extern __inline __m512d
8107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8108 _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8109 const int __R)
8110 {
8111 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8112 (__v8df) __W,
8113 (__mmask8) __U, __R);
8114 }
8115
8116 extern __inline __m512d
8117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8118 _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8119 {
8120 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8121 (__v8df)
8122 _mm512_setzero_pd (),
8123 (__mmask8) __U, __R);
8124 }
8125
8126 extern __inline __m512d
8127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8128 _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8129 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8130 {
8131 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8132 (__C << 2) | __B,
8133 _mm512_undefined_pd (),
8134 (__mmask8) -1, __R);
8135 }
8136
8137 extern __inline __m512d
8138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8139 _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8140 _MM_MANTISSA_NORM_ENUM __B,
8141 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8142 {
8143 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8144 (__C << 2) | __B,
8145 (__v8df) __W, __U,
8146 __R);
8147 }
8148
8149 extern __inline __m512d
8150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8151 _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8152 _MM_MANTISSA_NORM_ENUM __B,
8153 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8154 {
8155 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8156 (__C << 2) | __B,
8157 (__v8df)
8158 _mm512_setzero_pd (),
8159 __U, __R);
8160 }
8161
8162 extern __inline __m512
8163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8164 _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8165 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8166 {
8167 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8168 (__C << 2) | __B,
8169 _mm512_undefined_ps (),
8170 (__mmask16) -1, __R);
8171 }
8172
8173 extern __inline __m512
8174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8175 _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8176 _MM_MANTISSA_NORM_ENUM __B,
8177 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8178 {
8179 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8180 (__C << 2) | __B,
8181 (__v16sf) __W, __U,
8182 __R);
8183 }
8184
8185 extern __inline __m512
8186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8187 _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8188 _MM_MANTISSA_NORM_ENUM __B,
8189 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8190 {
8191 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8192 (__C << 2) | __B,
8193 (__v16sf)
8194 _mm512_setzero_ps (),
8195 __U, __R);
8196 }
8197
8198 extern __inline __m128d
8199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8200 _mm_getmant_round_sd (__m128d __A, __m128d __B,
8201 _MM_MANTISSA_NORM_ENUM __C,
8202 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8203 {
8204 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8205 (__v2df) __B,
8206 (__D << 2) | __C,
8207 __R);
8208 }
8209
8210 extern __inline __m128
8211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8212 _mm_getmant_round_ss (__m128 __A, __m128 __B,
8213 _MM_MANTISSA_NORM_ENUM __C,
8214 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8215 {
8216 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8217 (__v4sf) __B,
8218 (__D << 2) | __C,
8219 __R);
8220 }
8221
8222 #else
8223 #define _mm512_getmant_round_pd(X, B, C, R) \
8224 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8225 (int)(((C)<<2) | (B)), \
8226 (__v8df)(__m512d)_mm512_undefined_pd(), \
8227 (__mmask8)-1,\
8228 (R)))
8229
8230 #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
8231 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8232 (int)(((C)<<2) | (B)), \
8233 (__v8df)(__m512d)(W), \
8234 (__mmask8)(U),\
8235 (R)))
8236
8237 #define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
8238 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8239 (int)(((C)<<2) | (B)), \
8240 (__v8df)(__m512d)_mm512_setzero_pd(), \
8241 (__mmask8)(U),\
8242 (R)))
8243 #define _mm512_getmant_round_ps(X, B, C, R) \
8244 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8245 (int)(((C)<<2) | (B)), \
8246 (__v16sf)(__m512)_mm512_undefined_ps(), \
8247 (__mmask16)-1,\
8248 (R)))
8249
8250 #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
8251 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8252 (int)(((C)<<2) | (B)), \
8253 (__v16sf)(__m512)(W), \
8254 (__mmask16)(U),\
8255 (R)))
8256
8257 #define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
8258 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8259 (int)(((C)<<2) | (B)), \
8260 (__v16sf)(__m512)_mm512_setzero_ps(), \
8261 (__mmask16)(U),\
8262 (R)))
8263 #define _mm_getmant_round_sd(X, Y, C, D, R) \
8264 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
8265 (__v2df)(__m128d)(Y), \
8266 (int)(((D)<<2) | (C)), \
8267 (R)))
8268
8269 #define _mm_getmant_round_ss(X, Y, C, D, R) \
8270 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
8271 (__v4sf)(__m128)(Y), \
8272 (int)(((D)<<2) | (C)), \
8273 (R)))
8274
8275 #define _mm_getexp_round_ss(A, B, R) \
8276 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8277
8278 #define _mm_getexp_round_sd(A, B, R) \
8279 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8280
8281 #define _mm512_getexp_round_ps(A, R) \
8282 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8283 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
8284
8285 #define _mm512_mask_getexp_round_ps(W, U, A, R) \
8286 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8287 (__v16sf)(__m512)(W), (__mmask16)(U), R))
8288
8289 #define _mm512_maskz_getexp_round_ps(U, A, R) \
8290 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8291 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8292
8293 #define _mm512_getexp_round_pd(A, R) \
8294 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8295 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
8296
8297 #define _mm512_mask_getexp_round_pd(W, U, A, R) \
8298 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8299 (__v8df)(__m512d)(W), (__mmask8)(U), R))
8300
8301 #define _mm512_maskz_getexp_round_pd(U, A, R) \
8302 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8303 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8304 #endif
8305
8306 #ifdef __OPTIMIZE__
8307 extern __inline __m512
8308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8309 _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
8310 {
8311 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
8312 (__v16sf)
8313 _mm512_undefined_ps (),
8314 -1, __R);
8315 }
8316
8317 extern __inline __m512
8318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8319 _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
8320 const int __imm, const int __R)
8321 {
8322 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
8323 (__v16sf) __A,
8324 (__mmask16) __B, __R);
8325 }
8326
8327 extern __inline __m512
8328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8329 _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
8330 const int __imm, const int __R)
8331 {
8332 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
8333 __imm,
8334 (__v16sf)
8335 _mm512_setzero_ps (),
8336 (__mmask16) __A, __R);
8337 }
8338
8339 extern __inline __m512d
8340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8341 _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
8342 {
8343 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
8344 (__v8df)
8345 _mm512_undefined_pd (),
8346 -1, __R);
8347 }
8348
8349 extern __inline __m512d
8350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8351 _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
8352 __m512d __C, const int __imm, const int __R)
8353 {
8354 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
8355 (__v8df) __A,
8356 (__mmask8) __B, __R);
8357 }
8358
8359 extern __inline __m512d
8360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8361 _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
8362 const int __imm, const int __R)
8363 {
8364 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
8365 __imm,
8366 (__v8df)
8367 _mm512_setzero_pd (),
8368 (__mmask8) __A, __R);
8369 }
8370
8371 extern __inline __m128
8372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8373 _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
8374 {
8375 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
8376 (__v4sf) __B, __imm, __R);
8377 }
8378
8379 extern __inline __m128d
8380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8381 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
8382 const int __R)
8383 {
8384 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
8385 (__v2df) __B, __imm, __R);
8386 }
8387
8388 #else
8389 #define _mm512_roundscale_round_ps(A, B, R) \
8390 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
8391 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
8392 #define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
8393 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
8394 (int)(D), \
8395 (__v16sf)(__m512)(A), \
8396 (__mmask16)(B), R))
8397 #define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
8398 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
8399 (int)(C), \
8400 (__v16sf)_mm512_setzero_ps(),\
8401 (__mmask16)(A), R))
8402 #define _mm512_roundscale_round_pd(A, B, R) \
8403 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
8404 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
8405 #define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
8406 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
8407 (int)(D), \
8408 (__v8df)(__m512d)(A), \
8409 (__mmask8)(B), R))
8410 #define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
8411 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
8412 (int)(C), \
8413 (__v8df)_mm512_setzero_pd(),\
8414 (__mmask8)(A), R))
8415 #define _mm_roundscale_round_ss(A, B, C, R) \
8416 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
8417 (__v4sf)(__m128)(B), (int)(C), R))
8418 #define _mm_roundscale_round_sd(A, B, C, R) \
8419 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
8420 (__v2df)(__m128d)(B), (int)(C), R))
8421 #endif
8422
8423 extern __inline __m512
8424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8425 _mm512_floor_ps (__m512 __A)
8426 {
8427 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8428 _MM_FROUND_FLOOR,
8429 (__v16sf) __A, -1,
8430 _MM_FROUND_CUR_DIRECTION);
8431 }
8432
8433 extern __inline __m512d
8434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8435 _mm512_floor_pd (__m512d __A)
8436 {
8437 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8438 _MM_FROUND_FLOOR,
8439 (__v8df) __A, -1,
8440 _MM_FROUND_CUR_DIRECTION);
8441 }
8442
8443 extern __inline __m512
8444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8445 _mm512_ceil_ps (__m512 __A)
8446 {
8447 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8448 _MM_FROUND_CEIL,
8449 (__v16sf) __A, -1,
8450 _MM_FROUND_CUR_DIRECTION);
8451 }
8452
8453 extern __inline __m512d
8454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8455 _mm512_ceil_pd (__m512d __A)
8456 {
8457 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8458 _MM_FROUND_CEIL,
8459 (__v8df) __A, -1,
8460 _MM_FROUND_CUR_DIRECTION);
8461 }
8462
8463 extern __inline __m512
8464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8465 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
8466 {
8467 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8468 _MM_FROUND_FLOOR,
8469 (__v16sf) __W, __U,
8470 _MM_FROUND_CUR_DIRECTION);
8471 }
8472
8473 extern __inline __m512d
8474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8475 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
8476 {
8477 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8478 _MM_FROUND_FLOOR,
8479 (__v8df) __W, __U,
8480 _MM_FROUND_CUR_DIRECTION);
8481 }
8482
8483 extern __inline __m512
8484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8485 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
8486 {
8487 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8488 _MM_FROUND_CEIL,
8489 (__v16sf) __W, __U,
8490 _MM_FROUND_CUR_DIRECTION);
8491 }
8492
8493 extern __inline __m512d
8494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8495 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
8496 {
8497 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8498 _MM_FROUND_CEIL,
8499 (__v8df) __W, __U,
8500 _MM_FROUND_CUR_DIRECTION);
8501 }
8502
8503 #ifdef __OPTIMIZE__
8504 extern __inline __m512i
8505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8506 _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
8507 {
8508 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8509 (__v16si) __B, __imm,
8510 (__v16si)
8511 _mm512_undefined_epi32 (),
8512 (__mmask16) -1);
8513 }
8514
8515 extern __inline __m512i
8516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8517 _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
8518 __m512i __B, const int __imm)
8519 {
8520 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8521 (__v16si) __B, __imm,
8522 (__v16si) __W,
8523 (__mmask16) __U);
8524 }
8525
8526 extern __inline __m512i
8527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8528 _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
8529 const int __imm)
8530 {
8531 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8532 (__v16si) __B, __imm,
8533 (__v16si)
8534 _mm512_setzero_si512 (),
8535 (__mmask16) __U);
8536 }
8537
8538 extern __inline __m512i
8539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8540 _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
8541 {
8542 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8543 (__v8di) __B, __imm,
8544 (__v8di)
8545 _mm512_undefined_epi32 (),
8546 (__mmask8) -1);
8547 }
8548
8549 extern __inline __m512i
8550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8551 _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
8552 __m512i __B, const int __imm)
8553 {
8554 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8555 (__v8di) __B, __imm,
8556 (__v8di) __W,
8557 (__mmask8) __U);
8558 }
8559
8560 extern __inline __m512i
8561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8562 _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
8563 const int __imm)
8564 {
8565 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8566 (__v8di) __B, __imm,
8567 (__v8di)
8568 _mm512_setzero_si512 (),
8569 (__mmask8) __U);
8570 }
8571 #else
8572 #define _mm512_alignr_epi32(X, Y, C) \
8573 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8574 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\
8575 (__mmask16)-1))
8576
8577 #define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
8578 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8579 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
8580 (__mmask16)(U)))
8581
8582 #define _mm512_maskz_alignr_epi32(U, X, Y, C) \
8583 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8584 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
8585 (__mmask16)(U)))
8586
8587 #define _mm512_alignr_epi64(X, Y, C) \
8588 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8589 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (), \
8590 (__mmask8)-1))
8591
8592 #define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
8593 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8594 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
8595
8596 #define _mm512_maskz_alignr_epi64(U, X, Y, C) \
8597 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8598 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
8599 (__mmask8)(U)))
8600 #endif
8601
8602 extern __inline __mmask16
8603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8604 _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
8605 {
8606 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8607 (__v16si) __B,
8608 (__mmask16) -1);
8609 }
8610
8611 extern __inline __mmask16
8612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8613 _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8614 {
8615 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8616 (__v16si) __B, __U);
8617 }
8618
8619 extern __inline __mmask8
8620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8621 _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8622 {
8623 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8624 (__v8di) __B, __U);
8625 }
8626
8627 extern __inline __mmask8
8628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8629 _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
8630 {
8631 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8632 (__v8di) __B,
8633 (__mmask8) -1);
8634 }
8635
8636 extern __inline __mmask16
8637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8638 _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
8639 {
8640 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8641 (__v16si) __B,
8642 (__mmask16) -1);
8643 }
8644
8645 extern __inline __mmask16
8646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8647 _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8648 {
8649 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8650 (__v16si) __B, __U);
8651 }
8652
8653 extern __inline __mmask8
8654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8655 _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8656 {
8657 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8658 (__v8di) __B, __U);
8659 }
8660
8661 extern __inline __mmask8
8662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8663 _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
8664 {
8665 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8666 (__v8di) __B,
8667 (__mmask8) -1);
8668 }
8669
8670 extern __inline __mmask16
8671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8672 _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
8673 {
8674 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8675 (__v16si) __Y, 5,
8676 (__mmask16) -1);
8677 }
8678
8679 extern __inline __mmask16
8680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8681 _mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8682 {
8683 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8684 (__v16si) __Y, 5,
8685 (__mmask16) __M);
8686 }
8687
8688 extern __inline __mmask16
8689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8690 _mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8691 {
8692 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8693 (__v16si) __Y, 5,
8694 (__mmask16) __M);
8695 }
8696
8697 extern __inline __mmask16
8698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8699 _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
8700 {
8701 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8702 (__v16si) __Y, 5,
8703 (__mmask16) -1);
8704 }
8705
8706 extern __inline __mmask8
8707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8708 _mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8709 {
8710 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8711 (__v8di) __Y, 5,
8712 (__mmask8) __M);
8713 }
8714
8715 extern __inline __mmask8
8716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8717 _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
8718 {
8719 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8720 (__v8di) __Y, 5,
8721 (__mmask8) -1);
8722 }
8723
8724 extern __inline __mmask8
8725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8726 _mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8727 {
8728 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8729 (__v8di) __Y, 5,
8730 (__mmask8) __M);
8731 }
8732
8733 extern __inline __mmask8
8734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8735 _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
8736 {
8737 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8738 (__v8di) __Y, 5,
8739 (__mmask8) -1);
8740 }
8741
8742 extern __inline __mmask16
8743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8744 _mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8745 {
8746 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8747 (__v16si) __Y, 2,
8748 (__mmask16) __M);
8749 }
8750
8751 extern __inline __mmask16
8752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8753 _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
8754 {
8755 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8756 (__v16si) __Y, 2,
8757 (__mmask16) -1);
8758 }
8759
8760 extern __inline __mmask16
8761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8762 _mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8763 {
8764 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8765 (__v16si) __Y, 2,
8766 (__mmask16) __M);
8767 }
8768
8769 extern __inline __mmask16
8770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8771 _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
8772 {
8773 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8774 (__v16si) __Y, 2,
8775 (__mmask16) -1);
8776 }
8777
8778 extern __inline __mmask8
8779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8780 _mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8781 {
8782 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8783 (__v8di) __Y, 2,
8784 (__mmask8) __M);
8785 }
8786
8787 extern __inline __mmask8
8788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8789 _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
8790 {
8791 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8792 (__v8di) __Y, 2,
8793 (__mmask8) -1);
8794 }
8795
8796 extern __inline __mmask8
8797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8798 _mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8799 {
8800 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8801 (__v8di) __Y, 2,
8802 (__mmask8) __M);
8803 }
8804
8805 extern __inline __mmask8
8806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8807 _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
8808 {
8809 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8810 (__v8di) __Y, 2,
8811 (__mmask8) -1);
8812 }
8813
8814 extern __inline __mmask16
8815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8816 _mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8817 {
8818 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8819 (__v16si) __Y, 1,
8820 (__mmask16) __M);
8821 }
8822
8823 extern __inline __mmask16
8824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8825 _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
8826 {
8827 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8828 (__v16si) __Y, 1,
8829 (__mmask16) -1);
8830 }
8831
8832 extern __inline __mmask16
8833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8834 _mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8835 {
8836 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8837 (__v16si) __Y, 1,
8838 (__mmask16) __M);
8839 }
8840
8841 extern __inline __mmask16
8842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8843 _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
8844 {
8845 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8846 (__v16si) __Y, 1,
8847 (__mmask16) -1);
8848 }
8849
8850 extern __inline __mmask8
8851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8852 _mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8853 {
8854 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8855 (__v8di) __Y, 1,
8856 (__mmask8) __M);
8857 }
8858
8859 extern __inline __mmask8
8860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8861 _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
8862 {
8863 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8864 (__v8di) __Y, 1,
8865 (__mmask8) -1);
8866 }
8867
8868 extern __inline __mmask8
8869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8870 _mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8871 {
8872 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8873 (__v8di) __Y, 1,
8874 (__mmask8) __M);
8875 }
8876
8877 extern __inline __mmask8
8878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8879 _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
8880 {
8881 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8882 (__v8di) __Y, 1,
8883 (__mmask8) -1);
8884 }
8885
8886 extern __inline __mmask16
8887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8888 _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
8889 {
8890 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8891 (__v16si) __Y, 4,
8892 (__mmask16) -1);
8893 }
8894
8895 extern __inline __mmask16
8896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8897 _mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8898 {
8899 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8900 (__v16si) __Y, 4,
8901 (__mmask16) __M);
8902 }
8903
8904 extern __inline __mmask16
8905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8906 _mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8907 {
8908 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8909 (__v16si) __Y, 4,
8910 (__mmask16) __M);
8911 }
8912
8913 extern __inline __mmask16
8914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8915 _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
8916 {
8917 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8918 (__v16si) __Y, 4,
8919 (__mmask16) -1);
8920 }
8921
8922 extern __inline __mmask8
8923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8924 _mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8925 {
8926 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8927 (__v8di) __Y, 4,
8928 (__mmask8) __M);
8929 }
8930
8931 extern __inline __mmask8
8932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8933 _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
8934 {
8935 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8936 (__v8di) __Y, 4,
8937 (__mmask8) -1);
8938 }
8939
8940 extern __inline __mmask8
8941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8942 _mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8943 {
8944 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8945 (__v8di) __Y, 4,
8946 (__mmask8) __M);
8947 }
8948
8949 extern __inline __mmask8
8950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8951 _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
8952 {
8953 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8954 (__v8di) __Y, 4,
8955 (__mmask8) -1);
8956 }
8957
8958 #define _MM_CMPINT_EQ 0x0
8959 #define _MM_CMPINT_LT 0x1
8960 #define _MM_CMPINT_LE 0x2
8961 #define _MM_CMPINT_UNUSED 0x3
8962 #define _MM_CMPINT_NE 0x4
8963 #define _MM_CMPINT_NLT 0x5
8964 #define _MM_CMPINT_GE 0x5
8965 #define _MM_CMPINT_NLE 0x6
8966 #define _MM_CMPINT_GT 0x6
8967
8968 #ifdef __OPTIMIZE__
8969 extern __inline __mmask8
8970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8971 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
8972 {
8973 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8974 (__v8di) __Y, __P,
8975 (__mmask8) -1);
8976 }
8977
8978 extern __inline __mmask16
8979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8980 _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
8981 {
8982 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8983 (__v16si) __Y, __P,
8984 (__mmask16) -1);
8985 }
8986
8987 extern __inline __mmask8
8988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8989 _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
8990 {
8991 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8992 (__v8di) __Y, __P,
8993 (__mmask8) -1);
8994 }
8995
8996 extern __inline __mmask16
8997 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8998 _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
8999 {
9000 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9001 (__v16si) __Y, __P,
9002 (__mmask16) -1);
9003 }
9004
9005 extern __inline __mmask8
9006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9007 _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
9008 const int __R)
9009 {
9010 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9011 (__v8df) __Y, __P,
9012 (__mmask8) -1, __R);
9013 }
9014
9015 extern __inline __mmask16
9016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9017 _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
9018 {
9019 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9020 (__v16sf) __Y, __P,
9021 (__mmask16) -1, __R);
9022 }
9023
9024 extern __inline __mmask8
9025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9026 _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9027 const int __P)
9028 {
9029 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9030 (__v8di) __Y, __P,
9031 (__mmask8) __U);
9032 }
9033
9034 extern __inline __mmask16
9035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9036 _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9037 const int __P)
9038 {
9039 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9040 (__v16si) __Y, __P,
9041 (__mmask16) __U);
9042 }
9043
9044 extern __inline __mmask8
9045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9046 _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9047 const int __P)
9048 {
9049 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9050 (__v8di) __Y, __P,
9051 (__mmask8) __U);
9052 }
9053
9054 extern __inline __mmask16
9055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9056 _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9057 const int __P)
9058 {
9059 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9060 (__v16si) __Y, __P,
9061 (__mmask16) __U);
9062 }
9063
9064 extern __inline __mmask8
9065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9066 _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9067 const int __P, const int __R)
9068 {
9069 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9070 (__v8df) __Y, __P,
9071 (__mmask8) __U, __R);
9072 }
9073
9074 extern __inline __mmask16
9075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9076 _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9077 const int __P, const int __R)
9078 {
9079 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9080 (__v16sf) __Y, __P,
9081 (__mmask16) __U, __R);
9082 }
9083
9084 extern __inline __mmask8
9085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9086 _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
9087 {
9088 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9089 (__v2df) __Y, __P,
9090 (__mmask8) -1, __R);
9091 }
9092
9093 extern __inline __mmask8
9094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9095 _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
9096 const int __P, const int __R)
9097 {
9098 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9099 (__v2df) __Y, __P,
9100 (__mmask8) __M, __R);
9101 }
9102
9103 extern __inline __mmask8
9104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9105 _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
9106 {
9107 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9108 (__v4sf) __Y, __P,
9109 (__mmask8) -1, __R);
9110 }
9111
9112 extern __inline __mmask8
9113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9114 _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
9115 const int __P, const int __R)
9116 {
9117 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9118 (__v4sf) __Y, __P,
9119 (__mmask8) __M, __R);
9120 }
9121
9122 #else
9123 #define _mm512_cmp_epi64_mask(X, Y, P) \
9124 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9125 (__v8di)(__m512i)(Y), (int)(P),\
9126 (__mmask8)-1))
9127
9128 #define _mm512_cmp_epi32_mask(X, Y, P) \
9129 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9130 (__v16si)(__m512i)(Y), (int)(P), \
9131 (__mmask16)-1))
9132
9133 #define _mm512_cmp_epu64_mask(X, Y, P) \
9134 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9135 (__v8di)(__m512i)(Y), (int)(P),\
9136 (__mmask8)-1))
9137
9138 #define _mm512_cmp_epu32_mask(X, Y, P) \
9139 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9140 (__v16si)(__m512i)(Y), (int)(P), \
9141 (__mmask16)-1))
9142
9143 #define _mm512_cmp_round_pd_mask(X, Y, P, R) \
9144 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9145 (__v8df)(__m512d)(Y), (int)(P),\
9146 (__mmask8)-1, R))
9147
9148 #define _mm512_cmp_round_ps_mask(X, Y, P, R) \
9149 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9150 (__v16sf)(__m512)(Y), (int)(P),\
9151 (__mmask16)-1, R))
9152
9153 #define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
9154 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9155 (__v8di)(__m512i)(Y), (int)(P),\
9156 (__mmask8)M))
9157
9158 #define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
9159 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9160 (__v16si)(__m512i)(Y), (int)(P), \
9161 (__mmask16)M))
9162
9163 #define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
9164 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9165 (__v8di)(__m512i)(Y), (int)(P),\
9166 (__mmask8)M))
9167
9168 #define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
9169 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9170 (__v16si)(__m512i)(Y), (int)(P), \
9171 (__mmask16)M))
9172
9173 #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
9174 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9175 (__v8df)(__m512d)(Y), (int)(P),\
9176 (__mmask8)M, R))
9177
9178 #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
9179 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9180 (__v16sf)(__m512)(Y), (int)(P),\
9181 (__mmask16)M, R))
9182
9183 #define _mm_cmp_round_sd_mask(X, Y, P, R) \
9184 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9185 (__v2df)(__m128d)(Y), (int)(P),\
9186 (__mmask8)-1, R))
9187
9188 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
9189 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9190 (__v2df)(__m128d)(Y), (int)(P),\
9191 (M), R))
9192
9193 #define _mm_cmp_round_ss_mask(X, Y, P, R) \
9194 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9195 (__v4sf)(__m128)(Y), (int)(P), \
9196 (__mmask8)-1, R))
9197
9198 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
9199 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9200 (__v4sf)(__m128)(Y), (int)(P), \
9201 (M), R))
9202 #endif
9203
9204 #ifdef __OPTIMIZE__
9205 extern __inline __m512
9206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9207 _mm512_i32gather_ps (__m512i __index, void const *__addr, int __scale)
9208 {
9209 __m512 __v1_old = _mm512_undefined_ps ();
9210 __mmask16 __mask = 0xFFFF;
9211
9212 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
9213 __addr,
9214 (__v16si) __index,
9215 __mask, __scale);
9216 }
9217
9218 extern __inline __m512
9219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9220 _mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask,
9221 __m512i __index, void const *__addr, int __scale)
9222 {
9223 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
9224 __addr,
9225 (__v16si) __index,
9226 __mask, __scale);
9227 }
9228
9229 extern __inline __m512d
9230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9231 _mm512_i32gather_pd (__m256i __index, void const *__addr, int __scale)
9232 {
9233 __m512d __v1_old = _mm512_undefined_pd ();
9234 __mmask8 __mask = 0xFF;
9235
9236 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9237 __addr,
9238 (__v8si) __index, __mask,
9239 __scale);
9240 }
9241
9242 extern __inline __m512d
9243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9244 _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
9245 __m256i __index, void const *__addr, int __scale)
9246 {
9247 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9248 __addr,
9249 (__v8si) __index,
9250 __mask, __scale);
9251 }
9252
9253 extern __inline __m256
9254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9255 _mm512_i64gather_ps (__m512i __index, void const *__addr, int __scale)
9256 {
9257 __m256 __v1_old = _mm256_undefined_ps ();
9258 __mmask8 __mask = 0xFF;
9259
9260 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9261 __addr,
9262 (__v8di) __index, __mask,
9263 __scale);
9264 }
9265
9266 extern __inline __m256
9267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9268 _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
9269 __m512i __index, void const *__addr, int __scale)
9270 {
9271 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9272 __addr,
9273 (__v8di) __index,
9274 __mask, __scale);
9275 }
9276
9277 extern __inline __m512d
9278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9279 _mm512_i64gather_pd (__m512i __index, void const *__addr, int __scale)
9280 {
9281 __m512d __v1_old = _mm512_undefined_pd ();
9282 __mmask8 __mask = 0xFF;
9283
9284 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9285 __addr,
9286 (__v8di) __index, __mask,
9287 __scale);
9288 }
9289
9290 extern __inline __m512d
9291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9292 _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
9293 __m512i __index, void const *__addr, int __scale)
9294 {
9295 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9296 __addr,
9297 (__v8di) __index,
9298 __mask, __scale);
9299 }
9300
9301 extern __inline __m512i
9302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9303 _mm512_i32gather_epi32 (__m512i __index, void const *__addr, int __scale)
9304 {
9305 __m512i __v1_old = _mm512_undefined_epi32 ();
9306 __mmask16 __mask = 0xFFFF;
9307
9308 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9309 __addr,
9310 (__v16si) __index,
9311 __mask, __scale);
9312 }
9313
9314 extern __inline __m512i
9315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9316 _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
9317 __m512i __index, void const *__addr, int __scale)
9318 {
9319 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9320 __addr,
9321 (__v16si) __index,
9322 __mask, __scale);
9323 }
9324
9325 extern __inline __m512i
9326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9327 _mm512_i32gather_epi64 (__m256i __index, void const *__addr, int __scale)
9328 {
9329 __m512i __v1_old = _mm512_undefined_epi32 ();
9330 __mmask8 __mask = 0xFF;
9331
9332 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9333 __addr,
9334 (__v8si) __index, __mask,
9335 __scale);
9336 }
9337
9338 extern __inline __m512i
9339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9340 _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9341 __m256i __index, void const *__addr,
9342 int __scale)
9343 {
9344 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9345 __addr,
9346 (__v8si) __index,
9347 __mask, __scale);
9348 }
9349
9350 extern __inline __m256i
9351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9352 _mm512_i64gather_epi32 (__m512i __index, void const *__addr, int __scale)
9353 {
9354 __m256i __v1_old = _mm256_undefined_si256 ();
9355 __mmask8 __mask = 0xFF;
9356
9357 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9358 __addr,
9359 (__v8di) __index,
9360 __mask, __scale);
9361 }
9362
9363 extern __inline __m256i
9364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9365 _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
9366 __m512i __index, void const *__addr, int __scale)
9367 {
9368 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9369 __addr,
9370 (__v8di) __index,
9371 __mask, __scale);
9372 }
9373
9374 extern __inline __m512i
9375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9376 _mm512_i64gather_epi64 (__m512i __index, void const *__addr, int __scale)
9377 {
9378 __m512i __v1_old = _mm512_undefined_epi32 ();
9379 __mmask8 __mask = 0xFF;
9380
9381 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9382 __addr,
9383 (__v8di) __index, __mask,
9384 __scale);
9385 }
9386
9387 extern __inline __m512i
9388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9389 _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9390 __m512i __index, void const *__addr,
9391 int __scale)
9392 {
9393 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9394 __addr,
9395 (__v8di) __index,
9396 __mask, __scale);
9397 }
9398
9399 extern __inline void
9400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9401 _mm512_i32scatter_ps (void *__addr, __m512i __index, __m512 __v1, int __scale)
9402 {
9403 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
9404 (__v16si) __index, (__v16sf) __v1, __scale);
9405 }
9406
9407 extern __inline void
9408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9409 _mm512_mask_i32scatter_ps (void *__addr, __mmask16 __mask,
9410 __m512i __index, __m512 __v1, int __scale)
9411 {
9412 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
9413 (__v16sf) __v1, __scale);
9414 }
9415
9416 extern __inline void
9417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9418 _mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
9419 int __scale)
9420 {
9421 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
9422 (__v8si) __index, (__v8df) __v1, __scale);
9423 }
9424
9425 extern __inline void
9426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9427 _mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
9428 __m256i __index, __m512d __v1, int __scale)
9429 {
9430 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
9431 (__v8df) __v1, __scale);
9432 }
9433
9434 extern __inline void
9435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9436 _mm512_i64scatter_ps (void *__addr, __m512i __index, __m256 __v1, int __scale)
9437 {
9438 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
9439 (__v8di) __index, (__v8sf) __v1, __scale);
9440 }
9441
9442 extern __inline void
9443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9444 _mm512_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
9445 __m512i __index, __m256 __v1, int __scale)
9446 {
9447 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
9448 (__v8sf) __v1, __scale);
9449 }
9450
9451 extern __inline void
9452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9453 _mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1,
9454 int __scale)
9455 {
9456 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
9457 (__v8di) __index, (__v8df) __v1, __scale);
9458 }
9459
9460 extern __inline void
9461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9462 _mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
9463 __m512i __index, __m512d __v1, int __scale)
9464 {
9465 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
9466 (__v8df) __v1, __scale);
9467 }
9468
9469 extern __inline void
9470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9471 _mm512_i32scatter_epi32 (void *__addr, __m512i __index,
9472 __m512i __v1, int __scale)
9473 {
9474 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
9475 (__v16si) __index, (__v16si) __v1, __scale);
9476 }
9477
9478 extern __inline void
9479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9480 _mm512_mask_i32scatter_epi32 (void *__addr, __mmask16 __mask,
9481 __m512i __index, __m512i __v1, int __scale)
9482 {
9483 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
9484 (__v16si) __v1, __scale);
9485 }
9486
9487 extern __inline void
9488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9489 _mm512_i32scatter_epi64 (void *__addr, __m256i __index,
9490 __m512i __v1, int __scale)
9491 {
9492 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
9493 (__v8si) __index, (__v8di) __v1, __scale);
9494 }
9495
9496 extern __inline void
9497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9498 _mm512_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
9499 __m256i __index, __m512i __v1, int __scale)
9500 {
9501 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
9502 (__v8di) __v1, __scale);
9503 }
9504
9505 extern __inline void
9506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9507 _mm512_i64scatter_epi32 (void *__addr, __m512i __index,
9508 __m256i __v1, int __scale)
9509 {
9510 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
9511 (__v8di) __index, (__v8si) __v1, __scale);
9512 }
9513
9514 extern __inline void
9515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9516 _mm512_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
9517 __m512i __index, __m256i __v1, int __scale)
9518 {
9519 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
9520 (__v8si) __v1, __scale);
9521 }
9522
9523 extern __inline void
9524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9525 _mm512_i64scatter_epi64 (void *__addr, __m512i __index,
9526 __m512i __v1, int __scale)
9527 {
9528 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
9529 (__v8di) __index, (__v8di) __v1, __scale);
9530 }
9531
9532 extern __inline void
9533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9534 _mm512_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
9535 __m512i __index, __m512i __v1, int __scale)
9536 {
9537 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
9538 (__v8di) __v1, __scale);
9539 }
9540 #else
9541 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
9542 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
9543 (void const *)ADDR, \
9544 (__v16si)(__m512i)INDEX, \
9545 (__mmask16)0xFFFF, (int)SCALE)
9546
9547 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9548 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
9549 (void const *)ADDR, \
9550 (__v16si)(__m512i)INDEX, \
9551 (__mmask16)MASK, (int)SCALE)
9552
9553 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
9554 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
9555 (void const *)ADDR, \
9556 (__v8si)(__m256i)INDEX, \
9557 (__mmask8)0xFF, (int)SCALE)
9558
9559 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9560 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
9561 (void const *)ADDR, \
9562 (__v8si)(__m256i)INDEX, \
9563 (__mmask8)MASK, (int)SCALE)
9564
9565 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
9566 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
9567 (void const *)ADDR, \
9568 (__v8di)(__m512i)INDEX, \
9569 (__mmask8)0xFF, (int)SCALE)
9570
9571 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9572 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
9573 (void const *)ADDR, \
9574 (__v8di)(__m512i)INDEX, \
9575 (__mmask8)MASK, (int)SCALE)
9576
9577 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
9578 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
9579 (void const *)ADDR, \
9580 (__v8di)(__m512i)INDEX, \
9581 (__mmask8)0xFF, (int)SCALE)
9582
9583 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9584 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
9585 (void const *)ADDR, \
9586 (__v8di)(__m512i)INDEX, \
9587 (__mmask8)MASK, (int)SCALE)
9588
9589 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
9590 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (), \
9591 (void const *)ADDR, \
9592 (__v16si)(__m512i)INDEX, \
9593 (__mmask16)0xFFFF, (int)SCALE)
9594
9595 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9596 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
9597 (void const *)ADDR, \
9598 (__v16si)(__m512i)INDEX, \
9599 (__mmask16)MASK, (int)SCALE)
9600
9601 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
9602 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (), \
9603 (void const *)ADDR, \
9604 (__v8si)(__m256i)INDEX, \
9605 (__mmask8)0xFF, (int)SCALE)
9606
9607 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9608 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
9609 (void const *)ADDR, \
9610 (__v8si)(__m256i)INDEX, \
9611 (__mmask8)MASK, (int)SCALE)
9612
9613 #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
9614 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
9615 (void const *)ADDR, \
9616 (__v8di)(__m512i)INDEX, \
9617 (__mmask8)0xFF, (int)SCALE)
9618
9619 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9620 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
9621 (void const *)ADDR, \
9622 (__v8di)(__m512i)INDEX, \
9623 (__mmask8)MASK, (int)SCALE)
9624
9625 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
9626 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (), \
9627 (void const *)ADDR, \
9628 (__v8di)(__m512i)INDEX, \
9629 (__mmask8)0xFF, (int)SCALE)
9630
9631 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9632 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
9633 (void const *)ADDR, \
9634 (__v8di)(__m512i)INDEX, \
9635 (__mmask8)MASK, (int)SCALE)
9636
9637 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
9638 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)0xFFFF, \
9639 (__v16si)(__m512i)INDEX, \
9640 (__v16sf)(__m512)V1, (int)SCALE)
9641
9642 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9643 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)MASK, \
9644 (__v16si)(__m512i)INDEX, \
9645 (__v16sf)(__m512)V1, (int)SCALE)
9646
9647 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
9648 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)0xFF, \
9649 (__v8si)(__m256i)INDEX, \
9650 (__v8df)(__m512d)V1, (int)SCALE)
9651
9652 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9653 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)MASK, \
9654 (__v8si)(__m256i)INDEX, \
9655 (__v8df)(__m512d)V1, (int)SCALE)
9656
9657 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
9658 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask8)0xFF, \
9659 (__v8di)(__m512i)INDEX, \
9660 (__v8sf)(__m256)V1, (int)SCALE)
9661
9662 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9663 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask16)MASK, \
9664 (__v8di)(__m512i)INDEX, \
9665 (__v8sf)(__m256)V1, (int)SCALE)
9666
9667 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
9668 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)0xFF, \
9669 (__v8di)(__m512i)INDEX, \
9670 (__v8df)(__m512d)V1, (int)SCALE)
9671
9672 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9673 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)MASK, \
9674 (__v8di)(__m512i)INDEX, \
9675 (__v8df)(__m512d)V1, (int)SCALE)
9676
9677 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
9678 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)0xFFFF, \
9679 (__v16si)(__m512i)INDEX, \
9680 (__v16si)(__m512i)V1, (int)SCALE)
9681
9682 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9683 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)MASK, \
9684 (__v16si)(__m512i)INDEX, \
9685 (__v16si)(__m512i)V1, (int)SCALE)
9686
9687 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
9688 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)0xFF, \
9689 (__v8si)(__m256i)INDEX, \
9690 (__v8di)(__m512i)V1, (int)SCALE)
9691
9692 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9693 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)MASK, \
9694 (__v8si)(__m256i)INDEX, \
9695 (__v8di)(__m512i)V1, (int)SCALE)
9696
9697 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
9698 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)0xFF, \
9699 (__v8di)(__m512i)INDEX, \
9700 (__v8si)(__m256i)V1, (int)SCALE)
9701
9702 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9703 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)MASK, \
9704 (__v8di)(__m512i)INDEX, \
9705 (__v8si)(__m256i)V1, (int)SCALE)
9706
9707 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
9708 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)0xFF, \
9709 (__v8di)(__m512i)INDEX, \
9710 (__v8di)(__m512i)V1, (int)SCALE)
9711
9712 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9713 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)MASK, \
9714 (__v8di)(__m512i)INDEX, \
9715 (__v8di)(__m512i)V1, (int)SCALE)
9716 #endif
9717
9718 extern __inline __m512d
9719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9720 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
9721 {
9722 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9723 (__v8df) __W,
9724 (__mmask8) __U);
9725 }
9726
9727 extern __inline __m512d
9728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9729 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
9730 {
9731 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9732 (__v8df)
9733 _mm512_setzero_pd (),
9734 (__mmask8) __U);
9735 }
9736
9737 extern __inline void
9738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9739 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9740 {
9741 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9742 (__mmask8) __U);
9743 }
9744
9745 extern __inline __m512
9746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9747 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
9748 {
9749 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9750 (__v16sf) __W,
9751 (__mmask16) __U);
9752 }
9753
9754 extern __inline __m512
9755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9756 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
9757 {
9758 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9759 (__v16sf)
9760 _mm512_setzero_ps (),
9761 (__mmask16) __U);
9762 }
9763
9764 extern __inline void
9765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9766 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9767 {
9768 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9769 (__mmask16) __U);
9770 }
9771
9772 extern __inline __m512i
9773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9774 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9775 {
9776 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9777 (__v8di) __W,
9778 (__mmask8) __U);
9779 }
9780
9781 extern __inline __m512i
9782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9783 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
9784 {
9785 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9786 (__v8di)
9787 _mm512_setzero_si512 (),
9788 (__mmask8) __U);
9789 }
9790
9791 extern __inline void
9792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9793 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9794 {
9795 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9796 (__mmask8) __U);
9797 }
9798
9799 extern __inline __m512i
9800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9801 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9802 {
9803 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9804 (__v16si) __W,
9805 (__mmask16) __U);
9806 }
9807
9808 extern __inline __m512i
9809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9810 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
9811 {
9812 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9813 (__v16si)
9814 _mm512_setzero_si512 (),
9815 (__mmask16) __U);
9816 }
9817
9818 extern __inline void
9819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9820 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9821 {
9822 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9823 (__mmask16) __U);
9824 }
9825
9826 extern __inline __m512d
9827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9828 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9829 {
9830 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9831 (__v8df) __W,
9832 (__mmask8) __U);
9833 }
9834
9835 extern __inline __m512d
9836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9837 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9838 {
9839 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
9840 (__v8df)
9841 _mm512_setzero_pd (),
9842 (__mmask8) __U);
9843 }
9844
9845 extern __inline __m512d
9846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9847 _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
9848 {
9849 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
9850 (__v8df) __W,
9851 (__mmask8) __U);
9852 }
9853
9854 extern __inline __m512d
9855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9856 _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
9857 {
9858 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
9859 (__v8df)
9860 _mm512_setzero_pd (),
9861 (__mmask8) __U);
9862 }
9863
9864 extern __inline __m512
9865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9866 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9867 {
9868 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9869 (__v16sf) __W,
9870 (__mmask16) __U);
9871 }
9872
9873 extern __inline __m512
9874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9875 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9876 {
9877 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
9878 (__v16sf)
9879 _mm512_setzero_ps (),
9880 (__mmask16) __U);
9881 }
9882
9883 extern __inline __m512
9884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9885 _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
9886 {
9887 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
9888 (__v16sf) __W,
9889 (__mmask16) __U);
9890 }
9891
9892 extern __inline __m512
9893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9894 _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
9895 {
9896 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
9897 (__v16sf)
9898 _mm512_setzero_ps (),
9899 (__mmask16) __U);
9900 }
9901
9902 extern __inline __m512i
9903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9904 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9905 {
9906 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9907 (__v8di) __W,
9908 (__mmask8) __U);
9909 }
9910
9911 extern __inline __m512i
9912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9913 _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
9914 {
9915 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
9916 (__v8di)
9917 _mm512_setzero_si512 (),
9918 (__mmask8) __U);
9919 }
9920
9921 extern __inline __m512i
9922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9923 _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
9924 {
9925 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
9926 (__v8di) __W,
9927 (__mmask8) __U);
9928 }
9929
9930 extern __inline __m512i
9931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9932 _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
9933 {
9934 return (__m512i)
9935 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
9936 (__v8di)
9937 _mm512_setzero_si512 (),
9938 (__mmask8) __U);
9939 }
9940
9941 extern __inline __m512i
9942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9943 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9944 {
9945 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9946 (__v16si) __W,
9947 (__mmask16) __U);
9948 }
9949
9950 extern __inline __m512i
9951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9952 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9953 {
9954 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
9955 (__v16si)
9956 _mm512_setzero_si512 (),
9957 (__mmask16) __U);
9958 }
9959
9960 extern __inline __m512i
9961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9962 _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
9963 {
9964 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
9965 (__v16si) __W,
9966 (__mmask16) __U);
9967 }
9968
9969 extern __inline __m512i
9970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9971 _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
9972 {
9973 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
9974 (__v16si)
9975 _mm512_setzero_si512
9976 (), (__mmask16) __U);
9977 }
9978
9979 /* Mask arithmetic operations */
9980 #define _kand_mask16 _mm512_kand
9981 #define _kandn_mask16 _mm512_kandn
9982 #define _knot_mask16 _mm512_knot
9983 #define _kor_mask16 _mm512_kor
9984 #define _kxnor_mask16 _mm512_kxnor
9985 #define _kxor_mask16 _mm512_kxor
9986
9987 extern __inline __mmask16
9988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9989 _kadd_mask16 (__mmask16 __A, __mmask16 __B)
9990 {
9991 return (__mmask16) __builtin_ia32_kaddhi ((__mmask16) __A, (__mmask16) __B);
9992 }
9993
9994 extern __inline unsigned int
9995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9996 _cvtmask16_u32 (__mmask16 __A)
9997 {
9998 return (unsigned int) __builtin_ia32_kmovw ((__mmask16 ) __A);
9999 }
10000
10001 extern __inline __mmask16
10002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10003 _cvtu32_mask16 (unsigned int __A)
10004 {
10005 return (__mmask16) __builtin_ia32_kmovw ((__mmask16 ) __A);
10006 }
10007
10008 extern __inline __mmask16
10009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10010 _load_mask16 (__mmask16 *__A)
10011 {
10012 return (__mmask16) __builtin_ia32_kmovw (*(__mmask16 *) __A);
10013 }
10014
10015 extern __inline void
10016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10017 _store_mask16 (__mmask16 *__A, __mmask16 __B)
10018 {
10019 *(__mmask16 *) __A = __builtin_ia32_kmovw (__B);
10020 }
10021
10022 extern __inline __mmask16
10023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10024 _mm512_kand (__mmask16 __A, __mmask16 __B)
10025 {
10026 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
10027 }
10028
10029 extern __inline __mmask16
10030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10031 _mm512_kandn (__mmask16 __A, __mmask16 __B)
10032 {
10033 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
10034 (__mmask16) __B);
10035 }
10036
10037 extern __inline __mmask16
10038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10039 _mm512_kor (__mmask16 __A, __mmask16 __B)
10040 {
10041 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
10042 }
10043
10044 extern __inline int
10045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10046 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
10047 {
10048 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10049 (__mmask16) __B);
10050 }
10051
10052 extern __inline int
10053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10054 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
10055 {
10056 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
10057 (__mmask16) __B);
10058 }
10059
10060 extern __inline __mmask16
10061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10062 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
10063 {
10064 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
10065 }
10066
10067 extern __inline __mmask16
10068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10069 _mm512_kxor (__mmask16 __A, __mmask16 __B)
10070 {
10071 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
10072 }
10073
10074 extern __inline __mmask16
10075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10076 _mm512_knot (__mmask16 __A)
10077 {
10078 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
10079 }
10080
10081 extern __inline __mmask16
10082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10083 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
10084 {
10085 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10086 }
10087
10088 extern __inline __mmask16
10089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10090 _kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
10091 {
10092 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10093 }
10094
10095 #ifdef __OPTIMIZE__
10096 extern __inline __m512i
10097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10098 _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
10099 const int __imm)
10100 {
10101 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10102 (__v4si) __D,
10103 __imm,
10104 (__v16si)
10105 _mm512_setzero_si512 (),
10106 __B);
10107 }
10108
10109 extern __inline __m512
10110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10111 _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
10112 const int __imm)
10113 {
10114 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10115 (__v4sf) __D,
10116 __imm,
10117 (__v16sf)
10118 _mm512_setzero_ps (), __B);
10119 }
10120
10121 extern __inline __m512i
10122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10123 _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
10124 __m128i __D, const int __imm)
10125 {
10126 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10127 (__v4si) __D,
10128 __imm,
10129 (__v16si) __A,
10130 __B);
10131 }
10132
10133 extern __inline __m512
10134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10135 _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
10136 __m128 __D, const int __imm)
10137 {
10138 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10139 (__v4sf) __D,
10140 __imm,
10141 (__v16sf) __A, __B);
10142 }
10143 #else
10144 #define _mm512_maskz_insertf32x4(A, X, Y, C) \
10145 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10146 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
10147 (__mmask8)(A)))
10148
10149 #define _mm512_maskz_inserti32x4(A, X, Y, C) \
10150 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10151 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
10152 (__mmask8)(A)))
10153
10154 #define _mm512_mask_insertf32x4(A, B, X, Y, C) \
10155 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10156 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
10157 (__mmask8)(B)))
10158
10159 #define _mm512_mask_inserti32x4(A, B, X, Y, C) \
10160 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10161 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
10162 (__mmask8)(B)))
10163 #endif
10164
10165 extern __inline __m512i
10166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10167 _mm512_max_epi64 (__m512i __A, __m512i __B)
10168 {
10169 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10170 (__v8di) __B,
10171 (__v8di)
10172 _mm512_undefined_epi32 (),
10173 (__mmask8) -1);
10174 }
10175
10176 extern __inline __m512i
10177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10178 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10179 {
10180 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10181 (__v8di) __B,
10182 (__v8di)
10183 _mm512_setzero_si512 (),
10184 __M);
10185 }
10186
10187 extern __inline __m512i
10188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10189 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10190 {
10191 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10192 (__v8di) __B,
10193 (__v8di) __W, __M);
10194 }
10195
10196 extern __inline __m512i
10197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10198 _mm512_min_epi64 (__m512i __A, __m512i __B)
10199 {
10200 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10201 (__v8di) __B,
10202 (__v8di)
10203 _mm512_undefined_epi32 (),
10204 (__mmask8) -1);
10205 }
10206
10207 extern __inline __m512i
10208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10209 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10210 {
10211 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10212 (__v8di) __B,
10213 (__v8di) __W, __M);
10214 }
10215
10216 extern __inline __m512i
10217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10218 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10219 {
10220 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10221 (__v8di) __B,
10222 (__v8di)
10223 _mm512_setzero_si512 (),
10224 __M);
10225 }
10226
10227 extern __inline __m512i
10228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10229 _mm512_max_epu64 (__m512i __A, __m512i __B)
10230 {
10231 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10232 (__v8di) __B,
10233 (__v8di)
10234 _mm512_undefined_epi32 (),
10235 (__mmask8) -1);
10236 }
10237
10238 extern __inline __m512i
10239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10240 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10241 {
10242 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10243 (__v8di) __B,
10244 (__v8di)
10245 _mm512_setzero_si512 (),
10246 __M);
10247 }
10248
10249 extern __inline __m512i
10250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10251 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10252 {
10253 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10254 (__v8di) __B,
10255 (__v8di) __W, __M);
10256 }
10257
10258 extern __inline __m512i
10259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10260 _mm512_min_epu64 (__m512i __A, __m512i __B)
10261 {
10262 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10263 (__v8di) __B,
10264 (__v8di)
10265 _mm512_undefined_epi32 (),
10266 (__mmask8) -1);
10267 }
10268
10269 extern __inline __m512i
10270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10271 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10272 {
10273 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10274 (__v8di) __B,
10275 (__v8di) __W, __M);
10276 }
10277
10278 extern __inline __m512i
10279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10280 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10281 {
10282 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10283 (__v8di) __B,
10284 (__v8di)
10285 _mm512_setzero_si512 (),
10286 __M);
10287 }
10288
10289 extern __inline __m512i
10290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10291 _mm512_max_epi32 (__m512i __A, __m512i __B)
10292 {
10293 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10294 (__v16si) __B,
10295 (__v16si)
10296 _mm512_undefined_epi32 (),
10297 (__mmask16) -1);
10298 }
10299
10300 extern __inline __m512i
10301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10302 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10303 {
10304 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10305 (__v16si) __B,
10306 (__v16si)
10307 _mm512_setzero_si512 (),
10308 __M);
10309 }
10310
10311 extern __inline __m512i
10312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10313 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10314 {
10315 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10316 (__v16si) __B,
10317 (__v16si) __W, __M);
10318 }
10319
10320 extern __inline __m512i
10321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10322 _mm512_min_epi32 (__m512i __A, __m512i __B)
10323 {
10324 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10325 (__v16si) __B,
10326 (__v16si)
10327 _mm512_undefined_epi32 (),
10328 (__mmask16) -1);
10329 }
10330
10331 extern __inline __m512i
10332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10333 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10334 {
10335 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10336 (__v16si) __B,
10337 (__v16si)
10338 _mm512_setzero_si512 (),
10339 __M);
10340 }
10341
10342 extern __inline __m512i
10343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10344 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10345 {
10346 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10347 (__v16si) __B,
10348 (__v16si) __W, __M);
10349 }
10350
10351 extern __inline __m512i
10352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10353 _mm512_max_epu32 (__m512i __A, __m512i __B)
10354 {
10355 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10356 (__v16si) __B,
10357 (__v16si)
10358 _mm512_undefined_epi32 (),
10359 (__mmask16) -1);
10360 }
10361
10362 extern __inline __m512i
10363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10364 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10365 {
10366 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10367 (__v16si) __B,
10368 (__v16si)
10369 _mm512_setzero_si512 (),
10370 __M);
10371 }
10372
10373 extern __inline __m512i
10374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10375 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10376 {
10377 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10378 (__v16si) __B,
10379 (__v16si) __W, __M);
10380 }
10381
10382 extern __inline __m512i
10383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10384 _mm512_min_epu32 (__m512i __A, __m512i __B)
10385 {
10386 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10387 (__v16si) __B,
10388 (__v16si)
10389 _mm512_undefined_epi32 (),
10390 (__mmask16) -1);
10391 }
10392
10393 extern __inline __m512i
10394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10395 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10396 {
10397 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10398 (__v16si) __B,
10399 (__v16si)
10400 _mm512_setzero_si512 (),
10401 __M);
10402 }
10403
10404 extern __inline __m512i
10405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10406 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10407 {
10408 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10409 (__v16si) __B,
10410 (__v16si) __W, __M);
10411 }
10412
10413 extern __inline __m512
10414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10415 _mm512_unpacklo_ps (__m512 __A, __m512 __B)
10416 {
10417 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10418 (__v16sf) __B,
10419 (__v16sf)
10420 _mm512_undefined_ps (),
10421 (__mmask16) -1);
10422 }
10423
10424 extern __inline __m512
10425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10426 _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10427 {
10428 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10429 (__v16sf) __B,
10430 (__v16sf) __W,
10431 (__mmask16) __U);
10432 }
10433
10434 extern __inline __m512
10435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10436 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
10437 {
10438 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10439 (__v16sf) __B,
10440 (__v16sf)
10441 _mm512_setzero_ps (),
10442 (__mmask16) __U);
10443 }
10444
10445 #ifdef __OPTIMIZE__
10446 extern __inline __m128d
10447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10448 _mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
10449 {
10450 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
10451 (__v2df) __B,
10452 __R);
10453 }
10454
10455 extern __inline __m128
10456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10457 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
10458 {
10459 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
10460 (__v4sf) __B,
10461 __R);
10462 }
10463
10464 extern __inline __m128d
10465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10466 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
10467 {
10468 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
10469 (__v2df) __B,
10470 __R);
10471 }
10472
10473 extern __inline __m128
10474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10475 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
10476 {
10477 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
10478 (__v4sf) __B,
10479 __R);
10480 }
10481
10482 #else
10483 #define _mm_max_round_sd(A, B, C) \
10484 (__m128d)__builtin_ia32_addsd_round(A, B, C)
10485
10486 #define _mm_max_round_ss(A, B, C) \
10487 (__m128)__builtin_ia32_addss_round(A, B, C)
10488
10489 #define _mm_min_round_sd(A, B, C) \
10490 (__m128d)__builtin_ia32_subsd_round(A, B, C)
10491
10492 #define _mm_min_round_ss(A, B, C) \
10493 (__m128)__builtin_ia32_subss_round(A, B, C)
10494 #endif
10495
10496 extern __inline __m512d
10497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10498 _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
10499 {
10500 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
10501 (__v8df) __W,
10502 (__mmask8) __U);
10503 }
10504
10505 extern __inline __m512
10506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10507 _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
10508 {
10509 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
10510 (__v16sf) __W,
10511 (__mmask16) __U);
10512 }
10513
10514 extern __inline __m512i
10515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10516 _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
10517 {
10518 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
10519 (__v8di) __W,
10520 (__mmask8) __U);
10521 }
10522
10523 extern __inline __m512i
10524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10525 _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
10526 {
10527 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
10528 (__v16si) __W,
10529 (__mmask16) __U);
10530 }
10531
10532 #ifdef __OPTIMIZE__
10533 extern __inline __m128d
10534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10535 _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10536 {
10537 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10538 (__v2df) __A,
10539 (__v2df) __B,
10540 __R);
10541 }
10542
10543 extern __inline __m128
10544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10545 _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10546 {
10547 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10548 (__v4sf) __A,
10549 (__v4sf) __B,
10550 __R);
10551 }
10552
10553 extern __inline __m128d
10554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10555 _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10556 {
10557 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10558 (__v2df) __A,
10559 -(__v2df) __B,
10560 __R);
10561 }
10562
10563 extern __inline __m128
10564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10565 _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10566 {
10567 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10568 (__v4sf) __A,
10569 -(__v4sf) __B,
10570 __R);
10571 }
10572
10573 extern __inline __m128d
10574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10575 _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10576 {
10577 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10578 -(__v2df) __A,
10579 (__v2df) __B,
10580 __R);
10581 }
10582
10583 extern __inline __m128
10584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10585 _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10586 {
10587 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10588 -(__v4sf) __A,
10589 (__v4sf) __B,
10590 __R);
10591 }
10592
10593 extern __inline __m128d
10594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10595 _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10596 {
10597 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10598 -(__v2df) __A,
10599 -(__v2df) __B,
10600 __R);
10601 }
10602
10603 extern __inline __m128
10604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10605 _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10606 {
10607 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10608 -(__v4sf) __A,
10609 -(__v4sf) __B,
10610 __R);
10611 }
10612 #else
10613 #define _mm_fmadd_round_sd(A, B, C, R) \
10614 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
10615
10616 #define _mm_fmadd_round_ss(A, B, C, R) \
10617 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
10618
10619 #define _mm_fmsub_round_sd(A, B, C, R) \
10620 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
10621
10622 #define _mm_fmsub_round_ss(A, B, C, R) \
10623 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
10624
10625 #define _mm_fnmadd_round_sd(A, B, C, R) \
10626 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
10627
10628 #define _mm_fnmadd_round_ss(A, B, C, R) \
10629 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
10630
10631 #define _mm_fnmsub_round_sd(A, B, C, R) \
10632 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
10633
10634 #define _mm_fnmsub_round_ss(A, B, C, R) \
10635 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
10636 #endif
10637
10638 #ifdef __OPTIMIZE__
10639 extern __inline int
10640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10641 _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
10642 {
10643 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
10644 }
10645
10646 extern __inline int
10647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10648 _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
10649 {
10650 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
10651 }
10652 #else
10653 #define _mm_comi_round_ss(A, B, C, D)\
10654 __builtin_ia32_vcomiss(A, B, C, D)
10655 #define _mm_comi_round_sd(A, B, C, D)\
10656 __builtin_ia32_vcomisd(A, B, C, D)
10657 #endif
10658
10659 extern __inline __m512d
10660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10661 _mm512_sqrt_pd (__m512d __A)
10662 {
10663 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10664 (__v8df)
10665 _mm512_undefined_pd (),
10666 (__mmask8) -1,
10667 _MM_FROUND_CUR_DIRECTION);
10668 }
10669
10670 extern __inline __m512d
10671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10672 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
10673 {
10674 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10675 (__v8df) __W,
10676 (__mmask8) __U,
10677 _MM_FROUND_CUR_DIRECTION);
10678 }
10679
10680 extern __inline __m512d
10681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10682 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
10683 {
10684 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10685 (__v8df)
10686 _mm512_setzero_pd (),
10687 (__mmask8) __U,
10688 _MM_FROUND_CUR_DIRECTION);
10689 }
10690
10691 extern __inline __m512
10692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10693 _mm512_sqrt_ps (__m512 __A)
10694 {
10695 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10696 (__v16sf)
10697 _mm512_undefined_ps (),
10698 (__mmask16) -1,
10699 _MM_FROUND_CUR_DIRECTION);
10700 }
10701
10702 extern __inline __m512
10703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10704 _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
10705 {
10706 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10707 (__v16sf) __W,
10708 (__mmask16) __U,
10709 _MM_FROUND_CUR_DIRECTION);
10710 }
10711
10712 extern __inline __m512
10713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10714 _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
10715 {
10716 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10717 (__v16sf)
10718 _mm512_setzero_ps (),
10719 (__mmask16) __U,
10720 _MM_FROUND_CUR_DIRECTION);
10721 }
10722
10723 extern __inline __m512d
10724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10725 _mm512_add_pd (__m512d __A, __m512d __B)
10726 {
10727 return (__m512d) ((__v8df)__A + (__v8df)__B);
10728 }
10729
10730 extern __inline __m512d
10731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10732 _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10733 {
10734 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10735 (__v8df) __B,
10736 (__v8df) __W,
10737 (__mmask8) __U,
10738 _MM_FROUND_CUR_DIRECTION);
10739 }
10740
10741 extern __inline __m512d
10742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10743 _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
10744 {
10745 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10746 (__v8df) __B,
10747 (__v8df)
10748 _mm512_setzero_pd (),
10749 (__mmask8) __U,
10750 _MM_FROUND_CUR_DIRECTION);
10751 }
10752
10753 extern __inline __m512
10754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10755 _mm512_add_ps (__m512 __A, __m512 __B)
10756 {
10757 return (__m512) ((__v16sf)__A + (__v16sf)__B);
10758 }
10759
10760 extern __inline __m512
10761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10762 _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10763 {
10764 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10765 (__v16sf) __B,
10766 (__v16sf) __W,
10767 (__mmask16) __U,
10768 _MM_FROUND_CUR_DIRECTION);
10769 }
10770
10771 extern __inline __m512
10772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10773 _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
10774 {
10775 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10776 (__v16sf) __B,
10777 (__v16sf)
10778 _mm512_setzero_ps (),
10779 (__mmask16) __U,
10780 _MM_FROUND_CUR_DIRECTION);
10781 }
10782
10783 extern __inline __m512d
10784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10785 _mm512_sub_pd (__m512d __A, __m512d __B)
10786 {
10787 return (__m512d) ((__v8df)__A - (__v8df)__B);
10788 }
10789
10790 extern __inline __m512d
10791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10792 _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10793 {
10794 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10795 (__v8df) __B,
10796 (__v8df) __W,
10797 (__mmask8) __U,
10798 _MM_FROUND_CUR_DIRECTION);
10799 }
10800
10801 extern __inline __m512d
10802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10803 _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
10804 {
10805 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10806 (__v8df) __B,
10807 (__v8df)
10808 _mm512_setzero_pd (),
10809 (__mmask8) __U,
10810 _MM_FROUND_CUR_DIRECTION);
10811 }
10812
10813 extern __inline __m512
10814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10815 _mm512_sub_ps (__m512 __A, __m512 __B)
10816 {
10817 return (__m512) ((__v16sf)__A - (__v16sf)__B);
10818 }
10819
10820 extern __inline __m512
10821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10822 _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10823 {
10824 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10825 (__v16sf) __B,
10826 (__v16sf) __W,
10827 (__mmask16) __U,
10828 _MM_FROUND_CUR_DIRECTION);
10829 }
10830
10831 extern __inline __m512
10832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10833 _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
10834 {
10835 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10836 (__v16sf) __B,
10837 (__v16sf)
10838 _mm512_setzero_ps (),
10839 (__mmask16) __U,
10840 _MM_FROUND_CUR_DIRECTION);
10841 }
10842
10843 extern __inline __m512d
10844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10845 _mm512_mul_pd (__m512d __A, __m512d __B)
10846 {
10847 return (__m512d) ((__v8df)__A * (__v8df)__B);
10848 }
10849
10850 extern __inline __m512d
10851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10852 _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10853 {
10854 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10855 (__v8df) __B,
10856 (__v8df) __W,
10857 (__mmask8) __U,
10858 _MM_FROUND_CUR_DIRECTION);
10859 }
10860
10861 extern __inline __m512d
10862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10863 _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
10864 {
10865 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10866 (__v8df) __B,
10867 (__v8df)
10868 _mm512_setzero_pd (),
10869 (__mmask8) __U,
10870 _MM_FROUND_CUR_DIRECTION);
10871 }
10872
10873 extern __inline __m512
10874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10875 _mm512_mul_ps (__m512 __A, __m512 __B)
10876 {
10877 return (__m512) ((__v16sf)__A * (__v16sf)__B);
10878 }
10879
10880 extern __inline __m512
10881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10882 _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10883 {
10884 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10885 (__v16sf) __B,
10886 (__v16sf) __W,
10887 (__mmask16) __U,
10888 _MM_FROUND_CUR_DIRECTION);
10889 }
10890
10891 extern __inline __m512
10892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10893 _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
10894 {
10895 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10896 (__v16sf) __B,
10897 (__v16sf)
10898 _mm512_setzero_ps (),
10899 (__mmask16) __U,
10900 _MM_FROUND_CUR_DIRECTION);
10901 }
10902
10903 extern __inline __m512d
10904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10905 _mm512_div_pd (__m512d __M, __m512d __V)
10906 {
10907 return (__m512d) ((__v8df)__M / (__v8df)__V);
10908 }
10909
10910 extern __inline __m512d
10911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10912 _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
10913 {
10914 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10915 (__v8df) __V,
10916 (__v8df) __W,
10917 (__mmask8) __U,
10918 _MM_FROUND_CUR_DIRECTION);
10919 }
10920
10921 extern __inline __m512d
10922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10923 _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
10924 {
10925 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10926 (__v8df) __V,
10927 (__v8df)
10928 _mm512_setzero_pd (),
10929 (__mmask8) __U,
10930 _MM_FROUND_CUR_DIRECTION);
10931 }
10932
10933 extern __inline __m512
10934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10935 _mm512_div_ps (__m512 __A, __m512 __B)
10936 {
10937 return (__m512) ((__v16sf)__A / (__v16sf)__B);
10938 }
10939
10940 extern __inline __m512
10941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10942 _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10943 {
10944 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10945 (__v16sf) __B,
10946 (__v16sf) __W,
10947 (__mmask16) __U,
10948 _MM_FROUND_CUR_DIRECTION);
10949 }
10950
10951 extern __inline __m512
10952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10953 _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
10954 {
10955 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10956 (__v16sf) __B,
10957 (__v16sf)
10958 _mm512_setzero_ps (),
10959 (__mmask16) __U,
10960 _MM_FROUND_CUR_DIRECTION);
10961 }
10962
10963 extern __inline __m512d
10964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10965 _mm512_max_pd (__m512d __A, __m512d __B)
10966 {
10967 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10968 (__v8df) __B,
10969 (__v8df)
10970 _mm512_undefined_pd (),
10971 (__mmask8) -1,
10972 _MM_FROUND_CUR_DIRECTION);
10973 }
10974
10975 extern __inline __m512d
10976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10977 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10978 {
10979 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10980 (__v8df) __B,
10981 (__v8df) __W,
10982 (__mmask8) __U,
10983 _MM_FROUND_CUR_DIRECTION);
10984 }
10985
10986 extern __inline __m512d
10987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10988 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
10989 {
10990 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10991 (__v8df) __B,
10992 (__v8df)
10993 _mm512_setzero_pd (),
10994 (__mmask8) __U,
10995 _MM_FROUND_CUR_DIRECTION);
10996 }
10997
10998 extern __inline __m512
10999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11000 _mm512_max_ps (__m512 __A, __m512 __B)
11001 {
11002 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11003 (__v16sf) __B,
11004 (__v16sf)
11005 _mm512_undefined_ps (),
11006 (__mmask16) -1,
11007 _MM_FROUND_CUR_DIRECTION);
11008 }
11009
11010 extern __inline __m512
11011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11012 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11013 {
11014 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11015 (__v16sf) __B,
11016 (__v16sf) __W,
11017 (__mmask16) __U,
11018 _MM_FROUND_CUR_DIRECTION);
11019 }
11020
11021 extern __inline __m512
11022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11023 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
11024 {
11025 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11026 (__v16sf) __B,
11027 (__v16sf)
11028 _mm512_setzero_ps (),
11029 (__mmask16) __U,
11030 _MM_FROUND_CUR_DIRECTION);
11031 }
11032
11033 extern __inline __m512d
11034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11035 _mm512_min_pd (__m512d __A, __m512d __B)
11036 {
11037 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11038 (__v8df) __B,
11039 (__v8df)
11040 _mm512_undefined_pd (),
11041 (__mmask8) -1,
11042 _MM_FROUND_CUR_DIRECTION);
11043 }
11044
11045 extern __inline __m512d
11046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11047 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11048 {
11049 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11050 (__v8df) __B,
11051 (__v8df) __W,
11052 (__mmask8) __U,
11053 _MM_FROUND_CUR_DIRECTION);
11054 }
11055
11056 extern __inline __m512d
11057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11058 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
11059 {
11060 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11061 (__v8df) __B,
11062 (__v8df)
11063 _mm512_setzero_pd (),
11064 (__mmask8) __U,
11065 _MM_FROUND_CUR_DIRECTION);
11066 }
11067
11068 extern __inline __m512
11069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11070 _mm512_min_ps (__m512 __A, __m512 __B)
11071 {
11072 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11073 (__v16sf) __B,
11074 (__v16sf)
11075 _mm512_undefined_ps (),
11076 (__mmask16) -1,
11077 _MM_FROUND_CUR_DIRECTION);
11078 }
11079
11080 extern __inline __m512
11081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11082 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11083 {
11084 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11085 (__v16sf) __B,
11086 (__v16sf) __W,
11087 (__mmask16) __U,
11088 _MM_FROUND_CUR_DIRECTION);
11089 }
11090
11091 extern __inline __m512
11092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11093 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
11094 {
11095 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11096 (__v16sf) __B,
11097 (__v16sf)
11098 _mm512_setzero_ps (),
11099 (__mmask16) __U,
11100 _MM_FROUND_CUR_DIRECTION);
11101 }
11102
11103 extern __inline __m512d
11104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11105 _mm512_scalef_pd (__m512d __A, __m512d __B)
11106 {
11107 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11108 (__v8df) __B,
11109 (__v8df)
11110 _mm512_undefined_pd (),
11111 (__mmask8) -1,
11112 _MM_FROUND_CUR_DIRECTION);
11113 }
11114
11115 extern __inline __m512d
11116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11117 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11118 {
11119 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11120 (__v8df) __B,
11121 (__v8df) __W,
11122 (__mmask8) __U,
11123 _MM_FROUND_CUR_DIRECTION);
11124 }
11125
11126 extern __inline __m512d
11127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11128 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
11129 {
11130 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11131 (__v8df) __B,
11132 (__v8df)
11133 _mm512_setzero_pd (),
11134 (__mmask8) __U,
11135 _MM_FROUND_CUR_DIRECTION);
11136 }
11137
11138 extern __inline __m512
11139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11140 _mm512_scalef_ps (__m512 __A, __m512 __B)
11141 {
11142 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11143 (__v16sf) __B,
11144 (__v16sf)
11145 _mm512_undefined_ps (),
11146 (__mmask16) -1,
11147 _MM_FROUND_CUR_DIRECTION);
11148 }
11149
11150 extern __inline __m512
11151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11152 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11153 {
11154 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11155 (__v16sf) __B,
11156 (__v16sf) __W,
11157 (__mmask16) __U,
11158 _MM_FROUND_CUR_DIRECTION);
11159 }
11160
11161 extern __inline __m512
11162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11163 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
11164 {
11165 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11166 (__v16sf) __B,
11167 (__v16sf)
11168 _mm512_setzero_ps (),
11169 (__mmask16) __U,
11170 _MM_FROUND_CUR_DIRECTION);
11171 }
11172
11173 extern __inline __m128d
11174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11175 _mm_scalef_sd (__m128d __A, __m128d __B)
11176 {
11177 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
11178 (__v2df) __B,
11179 _MM_FROUND_CUR_DIRECTION);
11180 }
11181
11182 extern __inline __m128
11183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11184 _mm_scalef_ss (__m128 __A, __m128 __B)
11185 {
11186 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
11187 (__v4sf) __B,
11188 _MM_FROUND_CUR_DIRECTION);
11189 }
11190
11191 extern __inline __m512d
11192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11193 _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11194 {
11195 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11196 (__v8df) __B,
11197 (__v8df) __C,
11198 (__mmask8) -1,
11199 _MM_FROUND_CUR_DIRECTION);
11200 }
11201
11202 extern __inline __m512d
11203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11204 _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11205 {
11206 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11207 (__v8df) __B,
11208 (__v8df) __C,
11209 (__mmask8) __U,
11210 _MM_FROUND_CUR_DIRECTION);
11211 }
11212
11213 extern __inline __m512d
11214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11215 _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11216 {
11217 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
11218 (__v8df) __B,
11219 (__v8df) __C,
11220 (__mmask8) __U,
11221 _MM_FROUND_CUR_DIRECTION);
11222 }
11223
11224 extern __inline __m512d
11225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11226 _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11227 {
11228 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11229 (__v8df) __B,
11230 (__v8df) __C,
11231 (__mmask8) __U,
11232 _MM_FROUND_CUR_DIRECTION);
11233 }
11234
11235 extern __inline __m512
11236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11237 _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11238 {
11239 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11240 (__v16sf) __B,
11241 (__v16sf) __C,
11242 (__mmask16) -1,
11243 _MM_FROUND_CUR_DIRECTION);
11244 }
11245
11246 extern __inline __m512
11247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11248 _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11249 {
11250 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11251 (__v16sf) __B,
11252 (__v16sf) __C,
11253 (__mmask16) __U,
11254 _MM_FROUND_CUR_DIRECTION);
11255 }
11256
11257 extern __inline __m512
11258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11259 _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11260 {
11261 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
11262 (__v16sf) __B,
11263 (__v16sf) __C,
11264 (__mmask16) __U,
11265 _MM_FROUND_CUR_DIRECTION);
11266 }
11267
11268 extern __inline __m512
11269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11270 _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11271 {
11272 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11273 (__v16sf) __B,
11274 (__v16sf) __C,
11275 (__mmask16) __U,
11276 _MM_FROUND_CUR_DIRECTION);
11277 }
11278
11279 extern __inline __m512d
11280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11281 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11282 {
11283 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11284 (__v8df) __B,
11285 -(__v8df) __C,
11286 (__mmask8) -1,
11287 _MM_FROUND_CUR_DIRECTION);
11288 }
11289
11290 extern __inline __m512d
11291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11292 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11293 {
11294 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11295 (__v8df) __B,
11296 -(__v8df) __C,
11297 (__mmask8) __U,
11298 _MM_FROUND_CUR_DIRECTION);
11299 }
11300
11301 extern __inline __m512d
11302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11303 _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11304 {
11305 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
11306 (__v8df) __B,
11307 (__v8df) __C,
11308 (__mmask8) __U,
11309 _MM_FROUND_CUR_DIRECTION);
11310 }
11311
11312 extern __inline __m512d
11313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11314 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11315 {
11316 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11317 (__v8df) __B,
11318 -(__v8df) __C,
11319 (__mmask8) __U,
11320 _MM_FROUND_CUR_DIRECTION);
11321 }
11322
11323 extern __inline __m512
11324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11325 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11326 {
11327 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11328 (__v16sf) __B,
11329 -(__v16sf) __C,
11330 (__mmask16) -1,
11331 _MM_FROUND_CUR_DIRECTION);
11332 }
11333
11334 extern __inline __m512
11335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11336 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11337 {
11338 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11339 (__v16sf) __B,
11340 -(__v16sf) __C,
11341 (__mmask16) __U,
11342 _MM_FROUND_CUR_DIRECTION);
11343 }
11344
11345 extern __inline __m512
11346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11347 _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11348 {
11349 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
11350 (__v16sf) __B,
11351 (__v16sf) __C,
11352 (__mmask16) __U,
11353 _MM_FROUND_CUR_DIRECTION);
11354 }
11355
11356 extern __inline __m512
11357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11358 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11359 {
11360 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11361 (__v16sf) __B,
11362 -(__v16sf) __C,
11363 (__mmask16) __U,
11364 _MM_FROUND_CUR_DIRECTION);
11365 }
11366
11367 extern __inline __m512d
11368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11369 _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
11370 {
11371 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11372 (__v8df) __B,
11373 (__v8df) __C,
11374 (__mmask8) -1,
11375 _MM_FROUND_CUR_DIRECTION);
11376 }
11377
11378 extern __inline __m512d
11379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11380 _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11381 {
11382 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11383 (__v8df) __B,
11384 (__v8df) __C,
11385 (__mmask8) __U,
11386 _MM_FROUND_CUR_DIRECTION);
11387 }
11388
11389 extern __inline __m512d
11390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11391 _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11392 {
11393 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
11394 (__v8df) __B,
11395 (__v8df) __C,
11396 (__mmask8) __U,
11397 _MM_FROUND_CUR_DIRECTION);
11398 }
11399
11400 extern __inline __m512d
11401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11402 _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11403 {
11404 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11405 (__v8df) __B,
11406 (__v8df) __C,
11407 (__mmask8) __U,
11408 _MM_FROUND_CUR_DIRECTION);
11409 }
11410
11411 extern __inline __m512
11412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11413 _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
11414 {
11415 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11416 (__v16sf) __B,
11417 (__v16sf) __C,
11418 (__mmask16) -1,
11419 _MM_FROUND_CUR_DIRECTION);
11420 }
11421
11422 extern __inline __m512
11423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11424 _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11425 {
11426 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11427 (__v16sf) __B,
11428 (__v16sf) __C,
11429 (__mmask16) __U,
11430 _MM_FROUND_CUR_DIRECTION);
11431 }
11432
11433 extern __inline __m512
11434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11435 _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11436 {
11437 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
11438 (__v16sf) __B,
11439 (__v16sf) __C,
11440 (__mmask16) __U,
11441 _MM_FROUND_CUR_DIRECTION);
11442 }
11443
11444 extern __inline __m512
11445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11446 _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11447 {
11448 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11449 (__v16sf) __B,
11450 (__v16sf) __C,
11451 (__mmask16) __U,
11452 _MM_FROUND_CUR_DIRECTION);
11453 }
11454
11455 extern __inline __m512d
11456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11457 _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
11458 {
11459 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11460 (__v8df) __B,
11461 -(__v8df) __C,
11462 (__mmask8) -1,
11463 _MM_FROUND_CUR_DIRECTION);
11464 }
11465
11466 extern __inline __m512d
11467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11468 _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11469 {
11470 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11471 (__v8df) __B,
11472 -(__v8df) __C,
11473 (__mmask8) __U,
11474 _MM_FROUND_CUR_DIRECTION);
11475 }
11476
11477 extern __inline __m512d
11478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11479 _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11480 {
11481 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
11482 (__v8df) __B,
11483 (__v8df) __C,
11484 (__mmask8) __U,
11485 _MM_FROUND_CUR_DIRECTION);
11486 }
11487
11488 extern __inline __m512d
11489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11490 _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11491 {
11492 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11493 (__v8df) __B,
11494 -(__v8df) __C,
11495 (__mmask8) __U,
11496 _MM_FROUND_CUR_DIRECTION);
11497 }
11498
11499 extern __inline __m512
11500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11501 _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
11502 {
11503 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11504 (__v16sf) __B,
11505 -(__v16sf) __C,
11506 (__mmask16) -1,
11507 _MM_FROUND_CUR_DIRECTION);
11508 }
11509
11510 extern __inline __m512
11511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11512 _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11513 {
11514 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11515 (__v16sf) __B,
11516 -(__v16sf) __C,
11517 (__mmask16) __U,
11518 _MM_FROUND_CUR_DIRECTION);
11519 }
11520
11521 extern __inline __m512
11522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11523 _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11524 {
11525 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
11526 (__v16sf) __B,
11527 (__v16sf) __C,
11528 (__mmask16) __U,
11529 _MM_FROUND_CUR_DIRECTION);
11530 }
11531
11532 extern __inline __m512
11533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11534 _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11535 {
11536 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11537 (__v16sf) __B,
11538 -(__v16sf) __C,
11539 (__mmask16) __U,
11540 _MM_FROUND_CUR_DIRECTION);
11541 }
11542
11543 extern __inline __m512d
11544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11545 _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11546 {
11547 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11548 (__v8df) __B,
11549 (__v8df) __C,
11550 (__mmask8) -1,
11551 _MM_FROUND_CUR_DIRECTION);
11552 }
11553
11554 extern __inline __m512d
11555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11556 _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11557 {
11558 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
11559 (__v8df) __B,
11560 (__v8df) __C,
11561 (__mmask8) __U,
11562 _MM_FROUND_CUR_DIRECTION);
11563 }
11564
11565 extern __inline __m512d
11566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11567 _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11568 {
11569 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
11570 (__v8df) __B,
11571 (__v8df) __C,
11572 (__mmask8) __U,
11573 _MM_FROUND_CUR_DIRECTION);
11574 }
11575
11576 extern __inline __m512d
11577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11578 _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11579 {
11580 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11581 (__v8df) __B,
11582 (__v8df) __C,
11583 (__mmask8) __U,
11584 _MM_FROUND_CUR_DIRECTION);
11585 }
11586
11587 extern __inline __m512
11588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11589 _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11590 {
11591 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11592 (__v16sf) __B,
11593 (__v16sf) __C,
11594 (__mmask16) -1,
11595 _MM_FROUND_CUR_DIRECTION);
11596 }
11597
11598 extern __inline __m512
11599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11600 _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11601 {
11602 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
11603 (__v16sf) __B,
11604 (__v16sf) __C,
11605 (__mmask16) __U,
11606 _MM_FROUND_CUR_DIRECTION);
11607 }
11608
11609 extern __inline __m512
11610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11611 _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11612 {
11613 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
11614 (__v16sf) __B,
11615 (__v16sf) __C,
11616 (__mmask16) __U,
11617 _MM_FROUND_CUR_DIRECTION);
11618 }
11619
11620 extern __inline __m512
11621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11622 _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11623 {
11624 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11625 (__v16sf) __B,
11626 (__v16sf) __C,
11627 (__mmask16) __U,
11628 _MM_FROUND_CUR_DIRECTION);
11629 }
11630
11631 extern __inline __m512d
11632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11633 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11634 {
11635 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11636 (__v8df) __B,
11637 -(__v8df) __C,
11638 (__mmask8) -1,
11639 _MM_FROUND_CUR_DIRECTION);
11640 }
11641
11642 extern __inline __m512d
11643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11644 _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11645 {
11646 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
11647 (__v8df) __B,
11648 (__v8df) __C,
11649 (__mmask8) __U,
11650 _MM_FROUND_CUR_DIRECTION);
11651 }
11652
11653 extern __inline __m512d
11654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11655 _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11656 {
11657 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
11658 (__v8df) __B,
11659 (__v8df) __C,
11660 (__mmask8) __U,
11661 _MM_FROUND_CUR_DIRECTION);
11662 }
11663
11664 extern __inline __m512d
11665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11666 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11667 {
11668 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11669 (__v8df) __B,
11670 -(__v8df) __C,
11671 (__mmask8) __U,
11672 _MM_FROUND_CUR_DIRECTION);
11673 }
11674
11675 extern __inline __m512
11676 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11677 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11678 {
11679 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11680 (__v16sf) __B,
11681 -(__v16sf) __C,
11682 (__mmask16) -1,
11683 _MM_FROUND_CUR_DIRECTION);
11684 }
11685
11686 extern __inline __m512
11687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11688 _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11689 {
11690 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
11691 (__v16sf) __B,
11692 (__v16sf) __C,
11693 (__mmask16) __U,
11694 _MM_FROUND_CUR_DIRECTION);
11695 }
11696
11697 extern __inline __m512
11698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11699 _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11700 {
11701 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
11702 (__v16sf) __B,
11703 (__v16sf) __C,
11704 (__mmask16) __U,
11705 _MM_FROUND_CUR_DIRECTION);
11706 }
11707
11708 extern __inline __m512
11709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11710 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11711 {
11712 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11713 (__v16sf) __B,
11714 -(__v16sf) __C,
11715 (__mmask16) __U,
11716 _MM_FROUND_CUR_DIRECTION);
11717 }
11718
11719 extern __inline __m256i
11720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11721 _mm512_cvttpd_epi32 (__m512d __A)
11722 {
11723 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11724 (__v8si)
11725 _mm256_undefined_si256 (),
11726 (__mmask8) -1,
11727 _MM_FROUND_CUR_DIRECTION);
11728 }
11729
11730 extern __inline __m256i
11731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11732 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11733 {
11734 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11735 (__v8si) __W,
11736 (__mmask8) __U,
11737 _MM_FROUND_CUR_DIRECTION);
11738 }
11739
11740 extern __inline __m256i
11741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11742 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
11743 {
11744 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11745 (__v8si)
11746 _mm256_setzero_si256 (),
11747 (__mmask8) __U,
11748 _MM_FROUND_CUR_DIRECTION);
11749 }
11750
11751 extern __inline __m256i
11752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11753 _mm512_cvttpd_epu32 (__m512d __A)
11754 {
11755 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11756 (__v8si)
11757 _mm256_undefined_si256 (),
11758 (__mmask8) -1,
11759 _MM_FROUND_CUR_DIRECTION);
11760 }
11761
11762 extern __inline __m256i
11763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11764 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11765 {
11766 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11767 (__v8si) __W,
11768 (__mmask8) __U,
11769 _MM_FROUND_CUR_DIRECTION);
11770 }
11771
11772 extern __inline __m256i
11773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11774 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
11775 {
11776 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11777 (__v8si)
11778 _mm256_setzero_si256 (),
11779 (__mmask8) __U,
11780 _MM_FROUND_CUR_DIRECTION);
11781 }
11782
11783 extern __inline __m256i
11784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11785 _mm512_cvtpd_epi32 (__m512d __A)
11786 {
11787 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11788 (__v8si)
11789 _mm256_undefined_si256 (),
11790 (__mmask8) -1,
11791 _MM_FROUND_CUR_DIRECTION);
11792 }
11793
11794 extern __inline __m256i
11795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11796 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11797 {
11798 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11799 (__v8si) __W,
11800 (__mmask8) __U,
11801 _MM_FROUND_CUR_DIRECTION);
11802 }
11803
11804 extern __inline __m256i
11805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11806 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
11807 {
11808 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11809 (__v8si)
11810 _mm256_setzero_si256 (),
11811 (__mmask8) __U,
11812 _MM_FROUND_CUR_DIRECTION);
11813 }
11814
11815 extern __inline __m256i
11816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11817 _mm512_cvtpd_epu32 (__m512d __A)
11818 {
11819 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11820 (__v8si)
11821 _mm256_undefined_si256 (),
11822 (__mmask8) -1,
11823 _MM_FROUND_CUR_DIRECTION);
11824 }
11825
11826 extern __inline __m256i
11827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11828 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11829 {
11830 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11831 (__v8si) __W,
11832 (__mmask8) __U,
11833 _MM_FROUND_CUR_DIRECTION);
11834 }
11835
11836 extern __inline __m256i
11837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11838 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
11839 {
11840 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11841 (__v8si)
11842 _mm256_setzero_si256 (),
11843 (__mmask8) __U,
11844 _MM_FROUND_CUR_DIRECTION);
11845 }
11846
11847 extern __inline __m512i
11848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11849 _mm512_cvttps_epi32 (__m512 __A)
11850 {
11851 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11852 (__v16si)
11853 _mm512_undefined_epi32 (),
11854 (__mmask16) -1,
11855 _MM_FROUND_CUR_DIRECTION);
11856 }
11857
11858 extern __inline __m512i
11859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11860 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11861 {
11862 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11863 (__v16si) __W,
11864 (__mmask16) __U,
11865 _MM_FROUND_CUR_DIRECTION);
11866 }
11867
11868 extern __inline __m512i
11869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11870 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
11871 {
11872 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11873 (__v16si)
11874 _mm512_setzero_si512 (),
11875 (__mmask16) __U,
11876 _MM_FROUND_CUR_DIRECTION);
11877 }
11878
11879 extern __inline __m512i
11880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11881 _mm512_cvttps_epu32 (__m512 __A)
11882 {
11883 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11884 (__v16si)
11885 _mm512_undefined_epi32 (),
11886 (__mmask16) -1,
11887 _MM_FROUND_CUR_DIRECTION);
11888 }
11889
11890 extern __inline __m512i
11891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11892 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11893 {
11894 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11895 (__v16si) __W,
11896 (__mmask16) __U,
11897 _MM_FROUND_CUR_DIRECTION);
11898 }
11899
11900 extern __inline __m512i
11901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11902 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
11903 {
11904 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11905 (__v16si)
11906 _mm512_setzero_si512 (),
11907 (__mmask16) __U,
11908 _MM_FROUND_CUR_DIRECTION);
11909 }
11910
11911 extern __inline __m512i
11912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11913 _mm512_cvtps_epi32 (__m512 __A)
11914 {
11915 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11916 (__v16si)
11917 _mm512_undefined_epi32 (),
11918 (__mmask16) -1,
11919 _MM_FROUND_CUR_DIRECTION);
11920 }
11921
11922 extern __inline __m512i
11923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11924 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11925 {
11926 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11927 (__v16si) __W,
11928 (__mmask16) __U,
11929 _MM_FROUND_CUR_DIRECTION);
11930 }
11931
11932 extern __inline __m512i
11933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11934 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
11935 {
11936 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11937 (__v16si)
11938 _mm512_setzero_si512 (),
11939 (__mmask16) __U,
11940 _MM_FROUND_CUR_DIRECTION);
11941 }
11942
11943 extern __inline __m512i
11944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11945 _mm512_cvtps_epu32 (__m512 __A)
11946 {
11947 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11948 (__v16si)
11949 _mm512_undefined_epi32 (),
11950 (__mmask16) -1,
11951 _MM_FROUND_CUR_DIRECTION);
11952 }
11953
11954 extern __inline __m512i
11955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11956 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11957 {
11958 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11959 (__v16si) __W,
11960 (__mmask16) __U,
11961 _MM_FROUND_CUR_DIRECTION);
11962 }
11963
11964 extern __inline __m512i
11965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11966 _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
11967 {
11968 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11969 (__v16si)
11970 _mm512_setzero_si512 (),
11971 (__mmask16) __U,
11972 _MM_FROUND_CUR_DIRECTION);
11973 }
11974
11975 #ifdef __x86_64__
11976 extern __inline __m128
11977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11978 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
11979 {
11980 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
11981 _MM_FROUND_CUR_DIRECTION);
11982 }
11983
11984 extern __inline __m128d
11985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11986 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
11987 {
11988 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
11989 _MM_FROUND_CUR_DIRECTION);
11990 }
11991 #endif
11992
11993 extern __inline __m128
11994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11995 _mm_cvtu32_ss (__m128 __A, unsigned __B)
11996 {
11997 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
11998 _MM_FROUND_CUR_DIRECTION);
11999 }
12000
12001 extern __inline __m512
12002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12003 _mm512_cvtepi32_ps (__m512i __A)
12004 {
12005 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12006 (__v16sf)
12007 _mm512_undefined_ps (),
12008 (__mmask16) -1,
12009 _MM_FROUND_CUR_DIRECTION);
12010 }
12011
12012 extern __inline __m512
12013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12014 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12015 {
12016 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12017 (__v16sf) __W,
12018 (__mmask16) __U,
12019 _MM_FROUND_CUR_DIRECTION);
12020 }
12021
12022 extern __inline __m512
12023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12024 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
12025 {
12026 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12027 (__v16sf)
12028 _mm512_setzero_ps (),
12029 (__mmask16) __U,
12030 _MM_FROUND_CUR_DIRECTION);
12031 }
12032
12033 extern __inline __m512
12034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12035 _mm512_cvtepu32_ps (__m512i __A)
12036 {
12037 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12038 (__v16sf)
12039 _mm512_undefined_ps (),
12040 (__mmask16) -1,
12041 _MM_FROUND_CUR_DIRECTION);
12042 }
12043
12044 extern __inline __m512
12045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12046 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12047 {
12048 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12049 (__v16sf) __W,
12050 (__mmask16) __U,
12051 _MM_FROUND_CUR_DIRECTION);
12052 }
12053
12054 extern __inline __m512
12055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12056 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
12057 {
12058 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12059 (__v16sf)
12060 _mm512_setzero_ps (),
12061 (__mmask16) __U,
12062 _MM_FROUND_CUR_DIRECTION);
12063 }
12064
12065 #ifdef __OPTIMIZE__
12066 extern __inline __m512d
12067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12068 _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
12069 {
12070 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12071 (__v8df) __B,
12072 (__v8di) __C,
12073 __imm,
12074 (__mmask8) -1,
12075 _MM_FROUND_CUR_DIRECTION);
12076 }
12077
12078 extern __inline __m512d
12079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12080 _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
12081 __m512i __C, const int __imm)
12082 {
12083 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12084 (__v8df) __B,
12085 (__v8di) __C,
12086 __imm,
12087 (__mmask8) __U,
12088 _MM_FROUND_CUR_DIRECTION);
12089 }
12090
12091 extern __inline __m512d
12092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12093 _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
12094 __m512i __C, const int __imm)
12095 {
12096 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
12097 (__v8df) __B,
12098 (__v8di) __C,
12099 __imm,
12100 (__mmask8) __U,
12101 _MM_FROUND_CUR_DIRECTION);
12102 }
12103
12104 extern __inline __m512
12105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12106 _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
12107 {
12108 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12109 (__v16sf) __B,
12110 (__v16si) __C,
12111 __imm,
12112 (__mmask16) -1,
12113 _MM_FROUND_CUR_DIRECTION);
12114 }
12115
12116 extern __inline __m512
12117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12118 _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
12119 __m512i __C, const int __imm)
12120 {
12121 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12122 (__v16sf) __B,
12123 (__v16si) __C,
12124 __imm,
12125 (__mmask16) __U,
12126 _MM_FROUND_CUR_DIRECTION);
12127 }
12128
12129 extern __inline __m512
12130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12131 _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
12132 __m512i __C, const int __imm)
12133 {
12134 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
12135 (__v16sf) __B,
12136 (__v16si) __C,
12137 __imm,
12138 (__mmask16) __U,
12139 _MM_FROUND_CUR_DIRECTION);
12140 }
12141
12142 extern __inline __m128d
12143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12144 _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
12145 {
12146 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12147 (__v2df) __B,
12148 (__v2di) __C, __imm,
12149 (__mmask8) -1,
12150 _MM_FROUND_CUR_DIRECTION);
12151 }
12152
12153 extern __inline __m128d
12154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12155 _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
12156 __m128i __C, const int __imm)
12157 {
12158 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12159 (__v2df) __B,
12160 (__v2di) __C, __imm,
12161 (__mmask8) __U,
12162 _MM_FROUND_CUR_DIRECTION);
12163 }
12164
12165 extern __inline __m128d
12166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12167 _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
12168 __m128i __C, const int __imm)
12169 {
12170 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
12171 (__v2df) __B,
12172 (__v2di) __C,
12173 __imm,
12174 (__mmask8) __U,
12175 _MM_FROUND_CUR_DIRECTION);
12176 }
12177
12178 extern __inline __m128
12179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12180 _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
12181 {
12182 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12183 (__v4sf) __B,
12184 (__v4si) __C, __imm,
12185 (__mmask8) -1,
12186 _MM_FROUND_CUR_DIRECTION);
12187 }
12188
12189 extern __inline __m128
12190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12191 _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
12192 __m128i __C, const int __imm)
12193 {
12194 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12195 (__v4sf) __B,
12196 (__v4si) __C, __imm,
12197 (__mmask8) __U,
12198 _MM_FROUND_CUR_DIRECTION);
12199 }
12200
12201 extern __inline __m128
12202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12203 _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
12204 __m128i __C, const int __imm)
12205 {
12206 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
12207 (__v4sf) __B,
12208 (__v4si) __C, __imm,
12209 (__mmask8) __U,
12210 _MM_FROUND_CUR_DIRECTION);
12211 }
12212 #else
12213 #define _mm512_fixupimm_pd(X, Y, Z, C) \
12214 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12215 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12216 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12217
12218 #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
12219 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12220 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12221 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12222
12223 #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
12224 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
12225 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12226 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12227
12228 #define _mm512_fixupimm_ps(X, Y, Z, C) \
12229 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12230 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12231 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12232
12233 #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
12234 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12235 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12236 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12237
12238 #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
12239 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
12240 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12241 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12242
12243 #define _mm_fixupimm_sd(X, Y, Z, C) \
12244 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12245 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12246 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12247
12248 #define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
12249 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12250 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12251 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12252
12253 #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
12254 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
12255 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12256 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12257
12258 #define _mm_fixupimm_ss(X, Y, Z, C) \
12259 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12260 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12261 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12262
12263 #define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
12264 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12265 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12266 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12267
12268 #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
12269 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
12270 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12271 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12272 #endif
12273
12274 #ifdef __x86_64__
12275 extern __inline unsigned long long
12276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12277 _mm_cvtss_u64 (__m128 __A)
12278 {
12279 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
12280 __A,
12281 _MM_FROUND_CUR_DIRECTION);
12282 }
12283
12284 extern __inline unsigned long long
12285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12286 _mm_cvttss_u64 (__m128 __A)
12287 {
12288 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
12289 __A,
12290 _MM_FROUND_CUR_DIRECTION);
12291 }
12292
12293 extern __inline long long
12294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12295 _mm_cvttss_i64 (__m128 __A)
12296 {
12297 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
12298 _MM_FROUND_CUR_DIRECTION);
12299 }
12300 #endif /* __x86_64__ */
12301
12302 extern __inline unsigned
12303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12304 _mm_cvtss_u32 (__m128 __A)
12305 {
12306 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
12307 _MM_FROUND_CUR_DIRECTION);
12308 }
12309
12310 extern __inline unsigned
12311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12312 _mm_cvttss_u32 (__m128 __A)
12313 {
12314 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
12315 _MM_FROUND_CUR_DIRECTION);
12316 }
12317
12318 extern __inline int
12319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12320 _mm_cvttss_i32 (__m128 __A)
12321 {
12322 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
12323 _MM_FROUND_CUR_DIRECTION);
12324 }
12325
12326 #ifdef __x86_64__
12327 extern __inline unsigned long long
12328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12329 _mm_cvtsd_u64 (__m128d __A)
12330 {
12331 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
12332 __A,
12333 _MM_FROUND_CUR_DIRECTION);
12334 }
12335
12336 extern __inline unsigned long long
12337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12338 _mm_cvttsd_u64 (__m128d __A)
12339 {
12340 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
12341 __A,
12342 _MM_FROUND_CUR_DIRECTION);
12343 }
12344
12345 extern __inline long long
12346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12347 _mm_cvttsd_i64 (__m128d __A)
12348 {
12349 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
12350 _MM_FROUND_CUR_DIRECTION);
12351 }
12352 #endif /* __x86_64__ */
12353
12354 extern __inline unsigned
12355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12356 _mm_cvtsd_u32 (__m128d __A)
12357 {
12358 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
12359 _MM_FROUND_CUR_DIRECTION);
12360 }
12361
12362 extern __inline unsigned
12363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12364 _mm_cvttsd_u32 (__m128d __A)
12365 {
12366 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
12367 _MM_FROUND_CUR_DIRECTION);
12368 }
12369
12370 extern __inline int
12371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12372 _mm_cvttsd_i32 (__m128d __A)
12373 {
12374 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
12375 _MM_FROUND_CUR_DIRECTION);
12376 }
12377
12378 extern __inline __m512d
12379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12380 _mm512_cvtps_pd (__m256 __A)
12381 {
12382 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12383 (__v8df)
12384 _mm512_undefined_pd (),
12385 (__mmask8) -1,
12386 _MM_FROUND_CUR_DIRECTION);
12387 }
12388
12389 extern __inline __m512d
12390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12391 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
12392 {
12393 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12394 (__v8df) __W,
12395 (__mmask8) __U,
12396 _MM_FROUND_CUR_DIRECTION);
12397 }
12398
12399 extern __inline __m512d
12400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12401 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
12402 {
12403 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12404 (__v8df)
12405 _mm512_setzero_pd (),
12406 (__mmask8) __U,
12407 _MM_FROUND_CUR_DIRECTION);
12408 }
12409
12410 extern __inline __m512
12411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12412 _mm512_cvtph_ps (__m256i __A)
12413 {
12414 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12415 (__v16sf)
12416 _mm512_undefined_ps (),
12417 (__mmask16) -1,
12418 _MM_FROUND_CUR_DIRECTION);
12419 }
12420
12421 extern __inline __m512
12422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12423 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
12424 {
12425 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12426 (__v16sf) __W,
12427 (__mmask16) __U,
12428 _MM_FROUND_CUR_DIRECTION);
12429 }
12430
12431 extern __inline __m512
12432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12433 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
12434 {
12435 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12436 (__v16sf)
12437 _mm512_setzero_ps (),
12438 (__mmask16) __U,
12439 _MM_FROUND_CUR_DIRECTION);
12440 }
12441
12442 extern __inline __m256
12443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12444 _mm512_cvtpd_ps (__m512d __A)
12445 {
12446 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12447 (__v8sf)
12448 _mm256_undefined_ps (),
12449 (__mmask8) -1,
12450 _MM_FROUND_CUR_DIRECTION);
12451 }
12452
12453 extern __inline __m256
12454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12455 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
12456 {
12457 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12458 (__v8sf) __W,
12459 (__mmask8) __U,
12460 _MM_FROUND_CUR_DIRECTION);
12461 }
12462
12463 extern __inline __m256
12464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12465 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
12466 {
12467 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12468 (__v8sf)
12469 _mm256_setzero_ps (),
12470 (__mmask8) __U,
12471 _MM_FROUND_CUR_DIRECTION);
12472 }
12473
12474 #ifdef __OPTIMIZE__
12475 extern __inline __m512
12476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12477 _mm512_getexp_ps (__m512 __A)
12478 {
12479 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12480 (__v16sf)
12481 _mm512_undefined_ps (),
12482 (__mmask16) -1,
12483 _MM_FROUND_CUR_DIRECTION);
12484 }
12485
12486 extern __inline __m512
12487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12488 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
12489 {
12490 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12491 (__v16sf) __W,
12492 (__mmask16) __U,
12493 _MM_FROUND_CUR_DIRECTION);
12494 }
12495
12496 extern __inline __m512
12497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12498 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
12499 {
12500 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12501 (__v16sf)
12502 _mm512_setzero_ps (),
12503 (__mmask16) __U,
12504 _MM_FROUND_CUR_DIRECTION);
12505 }
12506
12507 extern __inline __m512d
12508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12509 _mm512_getexp_pd (__m512d __A)
12510 {
12511 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12512 (__v8df)
12513 _mm512_undefined_pd (),
12514 (__mmask8) -1,
12515 _MM_FROUND_CUR_DIRECTION);
12516 }
12517
12518 extern __inline __m512d
12519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12520 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
12521 {
12522 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12523 (__v8df) __W,
12524 (__mmask8) __U,
12525 _MM_FROUND_CUR_DIRECTION);
12526 }
12527
12528 extern __inline __m512d
12529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12530 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
12531 {
12532 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12533 (__v8df)
12534 _mm512_setzero_pd (),
12535 (__mmask8) __U,
12536 _MM_FROUND_CUR_DIRECTION);
12537 }
12538
12539 extern __inline __m128
12540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12541 _mm_getexp_ss (__m128 __A, __m128 __B)
12542 {
12543 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
12544 (__v4sf) __B,
12545 _MM_FROUND_CUR_DIRECTION);
12546 }
12547
12548 extern __inline __m128d
12549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12550 _mm_getexp_sd (__m128d __A, __m128d __B)
12551 {
12552 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
12553 (__v2df) __B,
12554 _MM_FROUND_CUR_DIRECTION);
12555 }
12556
12557 extern __inline __m512d
12558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12559 _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
12560 _MM_MANTISSA_SIGN_ENUM __C)
12561 {
12562 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12563 (__C << 2) | __B,
12564 _mm512_undefined_pd (),
12565 (__mmask8) -1,
12566 _MM_FROUND_CUR_DIRECTION);
12567 }
12568
12569 extern __inline __m512d
12570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12571 _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
12572 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12573 {
12574 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12575 (__C << 2) | __B,
12576 (__v8df) __W, __U,
12577 _MM_FROUND_CUR_DIRECTION);
12578 }
12579
12580 extern __inline __m512d
12581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12582 _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
12583 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12584 {
12585 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12586 (__C << 2) | __B,
12587 (__v8df)
12588 _mm512_setzero_pd (),
12589 __U,
12590 _MM_FROUND_CUR_DIRECTION);
12591 }
12592
12593 extern __inline __m512
12594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12595 _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
12596 _MM_MANTISSA_SIGN_ENUM __C)
12597 {
12598 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12599 (__C << 2) | __B,
12600 _mm512_undefined_ps (),
12601 (__mmask16) -1,
12602 _MM_FROUND_CUR_DIRECTION);
12603 }
12604
12605 extern __inline __m512
12606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12607 _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
12608 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12609 {
12610 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12611 (__C << 2) | __B,
12612 (__v16sf) __W, __U,
12613 _MM_FROUND_CUR_DIRECTION);
12614 }
12615
12616 extern __inline __m512
12617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12618 _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
12619 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12620 {
12621 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12622 (__C << 2) | __B,
12623 (__v16sf)
12624 _mm512_setzero_ps (),
12625 __U,
12626 _MM_FROUND_CUR_DIRECTION);
12627 }
12628
12629 extern __inline __m128d
12630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12631 _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
12632 _MM_MANTISSA_SIGN_ENUM __D)
12633 {
12634 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
12635 (__v2df) __B,
12636 (__D << 2) | __C,
12637 _MM_FROUND_CUR_DIRECTION);
12638 }
12639
12640 extern __inline __m128
12641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12642 _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
12643 _MM_MANTISSA_SIGN_ENUM __D)
12644 {
12645 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
12646 (__v4sf) __B,
12647 (__D << 2) | __C,
12648 _MM_FROUND_CUR_DIRECTION);
12649 }
12650
12651 #else
12652 #define _mm512_getmant_pd(X, B, C) \
12653 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12654 (int)(((C)<<2) | (B)), \
12655 (__v8df)_mm512_undefined_pd(), \
12656 (__mmask8)-1,\
12657 _MM_FROUND_CUR_DIRECTION))
12658
12659 #define _mm512_mask_getmant_pd(W, U, X, B, C) \
12660 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12661 (int)(((C)<<2) | (B)), \
12662 (__v8df)(__m512d)(W), \
12663 (__mmask8)(U),\
12664 _MM_FROUND_CUR_DIRECTION))
12665
12666 #define _mm512_maskz_getmant_pd(U, X, B, C) \
12667 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12668 (int)(((C)<<2) | (B)), \
12669 (__v8df)_mm512_setzero_pd(), \
12670 (__mmask8)(U),\
12671 _MM_FROUND_CUR_DIRECTION))
12672 #define _mm512_getmant_ps(X, B, C) \
12673 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12674 (int)(((C)<<2) | (B)), \
12675 (__v16sf)_mm512_undefined_ps(), \
12676 (__mmask16)-1,\
12677 _MM_FROUND_CUR_DIRECTION))
12678
12679 #define _mm512_mask_getmant_ps(W, U, X, B, C) \
12680 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12681 (int)(((C)<<2) | (B)), \
12682 (__v16sf)(__m512)(W), \
12683 (__mmask16)(U),\
12684 _MM_FROUND_CUR_DIRECTION))
12685
12686 #define _mm512_maskz_getmant_ps(U, X, B, C) \
12687 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12688 (int)(((C)<<2) | (B)), \
12689 (__v16sf)_mm512_setzero_ps(), \
12690 (__mmask16)(U),\
12691 _MM_FROUND_CUR_DIRECTION))
12692 #define _mm_getmant_sd(X, Y, C, D) \
12693 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
12694 (__v2df)(__m128d)(Y), \
12695 (int)(((D)<<2) | (C)), \
12696 _MM_FROUND_CUR_DIRECTION))
12697
12698 #define _mm_getmant_ss(X, Y, C, D) \
12699 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
12700 (__v4sf)(__m128)(Y), \
12701 (int)(((D)<<2) | (C)), \
12702 _MM_FROUND_CUR_DIRECTION))
12703
12704 #define _mm_getexp_ss(A, B) \
12705 ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
12706 _MM_FROUND_CUR_DIRECTION))
12707
12708 #define _mm_getexp_sd(A, B) \
12709 ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
12710 _MM_FROUND_CUR_DIRECTION))
12711
12712 #define _mm512_getexp_ps(A) \
12713 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12714 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
12715
12716 #define _mm512_mask_getexp_ps(W, U, A) \
12717 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12718 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12719
12720 #define _mm512_maskz_getexp_ps(U, A) \
12721 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12722 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12723
12724 #define _mm512_getexp_pd(A) \
12725 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12726 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
12727
12728 #define _mm512_mask_getexp_pd(W, U, A) \
12729 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12730 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12731
12732 #define _mm512_maskz_getexp_pd(U, A) \
12733 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12734 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12735 #endif
12736
12737 #ifdef __OPTIMIZE__
12738 extern __inline __m512
12739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12740 _mm512_roundscale_ps (__m512 __A, const int __imm)
12741 {
12742 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
12743 (__v16sf)
12744 _mm512_undefined_ps (),
12745 -1,
12746 _MM_FROUND_CUR_DIRECTION);
12747 }
12748
12749 extern __inline __m512
12750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12751 _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
12752 const int __imm)
12753 {
12754 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
12755 (__v16sf) __A,
12756 (__mmask16) __B,
12757 _MM_FROUND_CUR_DIRECTION);
12758 }
12759
12760 extern __inline __m512
12761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12762 _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
12763 {
12764 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
12765 __imm,
12766 (__v16sf)
12767 _mm512_setzero_ps (),
12768 (__mmask16) __A,
12769 _MM_FROUND_CUR_DIRECTION);
12770 }
12771
12772 extern __inline __m512d
12773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12774 _mm512_roundscale_pd (__m512d __A, const int __imm)
12775 {
12776 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
12777 (__v8df)
12778 _mm512_undefined_pd (),
12779 -1,
12780 _MM_FROUND_CUR_DIRECTION);
12781 }
12782
12783 extern __inline __m512d
12784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12785 _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
12786 const int __imm)
12787 {
12788 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
12789 (__v8df) __A,
12790 (__mmask8) __B,
12791 _MM_FROUND_CUR_DIRECTION);
12792 }
12793
12794 extern __inline __m512d
12795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12796 _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
12797 {
12798 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
12799 __imm,
12800 (__v8df)
12801 _mm512_setzero_pd (),
12802 (__mmask8) __A,
12803 _MM_FROUND_CUR_DIRECTION);
12804 }
12805
12806 extern __inline __m128
12807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12808 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
12809 {
12810 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
12811 (__v4sf) __B, __imm,
12812 _MM_FROUND_CUR_DIRECTION);
12813 }
12814
12815 extern __inline __m128d
12816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12817 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
12818 {
12819 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
12820 (__v2df) __B, __imm,
12821 _MM_FROUND_CUR_DIRECTION);
12822 }
12823
12824 #else
12825 #define _mm512_roundscale_ps(A, B) \
12826 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
12827 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12828 #define _mm512_mask_roundscale_ps(A, B, C, D) \
12829 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
12830 (int)(D), \
12831 (__v16sf)(__m512)(A), \
12832 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
12833 #define _mm512_maskz_roundscale_ps(A, B, C) \
12834 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
12835 (int)(C), \
12836 (__v16sf)_mm512_setzero_ps(),\
12837 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
12838 #define _mm512_roundscale_pd(A, B) \
12839 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
12840 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12841 #define _mm512_mask_roundscale_pd(A, B, C, D) \
12842 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
12843 (int)(D), \
12844 (__v8df)(__m512d)(A), \
12845 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
12846 #define _mm512_maskz_roundscale_pd(A, B, C) \
12847 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
12848 (int)(C), \
12849 (__v8df)_mm512_setzero_pd(),\
12850 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
12851 #define _mm_roundscale_ss(A, B, C) \
12852 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
12853 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12854 #define _mm_roundscale_sd(A, B, C) \
12855 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
12856 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12857 #endif
12858
12859 #ifdef __OPTIMIZE__
12860 extern __inline __mmask8
12861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12862 _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
12863 {
12864 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12865 (__v8df) __Y, __P,
12866 (__mmask8) -1,
12867 _MM_FROUND_CUR_DIRECTION);
12868 }
12869
12870 extern __inline __mmask16
12871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12872 _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
12873 {
12874 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12875 (__v16sf) __Y, __P,
12876 (__mmask16) -1,
12877 _MM_FROUND_CUR_DIRECTION);
12878 }
12879
12880 extern __inline __mmask16
12881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12882 _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
12883 {
12884 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12885 (__v16sf) __Y, __P,
12886 (__mmask16) __U,
12887 _MM_FROUND_CUR_DIRECTION);
12888 }
12889
12890 extern __inline __mmask8
12891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12892 _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
12893 {
12894 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12895 (__v8df) __Y, __P,
12896 (__mmask8) __U,
12897 _MM_FROUND_CUR_DIRECTION);
12898 }
12899
12900 extern __inline __mmask8
12901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12902 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
12903 {
12904 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12905 (__v2df) __Y, __P,
12906 (__mmask8) -1,
12907 _MM_FROUND_CUR_DIRECTION);
12908 }
12909
12910 extern __inline __mmask8
12911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12912 _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
12913 {
12914 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12915 (__v2df) __Y, __P,
12916 (__mmask8) __M,
12917 _MM_FROUND_CUR_DIRECTION);
12918 }
12919
12920 extern __inline __mmask8
12921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12922 _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
12923 {
12924 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12925 (__v4sf) __Y, __P,
12926 (__mmask8) -1,
12927 _MM_FROUND_CUR_DIRECTION);
12928 }
12929
12930 extern __inline __mmask8
12931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12932 _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
12933 {
12934 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12935 (__v4sf) __Y, __P,
12936 (__mmask8) __M,
12937 _MM_FROUND_CUR_DIRECTION);
12938 }
12939
12940 #else
12941 #define _mm512_cmp_pd_mask(X, Y, P) \
12942 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12943 (__v8df)(__m512d)(Y), (int)(P),\
12944 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12945
12946 #define _mm512_cmp_ps_mask(X, Y, P) \
12947 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12948 (__v16sf)(__m512)(Y), (int)(P),\
12949 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
12950
12951 #define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
12952 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12953 (__v8df)(__m512d)(Y), (int)(P),\
12954 (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
12955
12956 #define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
12957 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12958 (__v16sf)(__m512)(Y), (int)(P),\
12959 (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
12960
12961 #define _mm_cmp_sd_mask(X, Y, P) \
12962 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
12963 (__v2df)(__m128d)(Y), (int)(P),\
12964 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12965
12966 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
12967 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
12968 (__v2df)(__m128d)(Y), (int)(P),\
12969 M,_MM_FROUND_CUR_DIRECTION))
12970
12971 #define _mm_cmp_ss_mask(X, Y, P) \
12972 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
12973 (__v4sf)(__m128)(Y), (int)(P), \
12974 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12975
12976 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
12977 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
12978 (__v4sf)(__m128)(Y), (int)(P), \
12979 M,_MM_FROUND_CUR_DIRECTION))
12980 #endif
12981
12982 extern __inline __mmask16
12983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12984 _mm512_kmov (__mmask16 __A)
12985 {
12986 return __builtin_ia32_kmovw (__A);
12987 }
12988
12989 extern __inline __m512
12990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12991 _mm512_castpd_ps (__m512d __A)
12992 {
12993 return (__m512) (__A);
12994 }
12995
12996 extern __inline __m512i
12997 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12998 _mm512_castpd_si512 (__m512d __A)
12999 {
13000 return (__m512i) (__A);
13001 }
13002
13003 extern __inline __m512d
13004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13005 _mm512_castps_pd (__m512 __A)
13006 {
13007 return (__m512d) (__A);
13008 }
13009
13010 extern __inline __m512i
13011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13012 _mm512_castps_si512 (__m512 __A)
13013 {
13014 return (__m512i) (__A);
13015 }
13016
13017 extern __inline __m512
13018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13019 _mm512_castsi512_ps (__m512i __A)
13020 {
13021 return (__m512) (__A);
13022 }
13023
13024 extern __inline __m512d
13025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13026 _mm512_castsi512_pd (__m512i __A)
13027 {
13028 return (__m512d) (__A);
13029 }
13030
13031 extern __inline __m128d
13032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13033 _mm512_castpd512_pd128 (__m512d __A)
13034 {
13035 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
13036 }
13037
13038 extern __inline __m128
13039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13040 _mm512_castps512_ps128 (__m512 __A)
13041 {
13042 return _mm512_extractf32x4_ps(__A, 0);
13043 }
13044
13045 extern __inline __m128i
13046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13047 _mm512_castsi512_si128 (__m512i __A)
13048 {
13049 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
13050 }
13051
13052 extern __inline __m256d
13053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13054 _mm512_castpd512_pd256 (__m512d __A)
13055 {
13056 return _mm512_extractf64x4_pd(__A, 0);
13057 }
13058
13059 extern __inline __m256
13060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13061 _mm512_castps512_ps256 (__m512 __A)
13062 {
13063 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
13064 }
13065
13066 extern __inline __m256i
13067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13068 _mm512_castsi512_si256 (__m512i __A)
13069 {
13070 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
13071 }
13072
13073 extern __inline __m512d
13074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13075 _mm512_castpd128_pd512 (__m128d __A)
13076 {
13077 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
13078 }
13079
13080 extern __inline __m512
13081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13082 _mm512_castps128_ps512 (__m128 __A)
13083 {
13084 return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
13085 }
13086
13087 extern __inline __m512i
13088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13089 _mm512_castsi128_si512 (__m128i __A)
13090 {
13091 return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
13092 }
13093
13094 extern __inline __m512d
13095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13096 _mm512_castpd256_pd512 (__m256d __A)
13097 {
13098 return __builtin_ia32_pd512_256pd (__A);
13099 }
13100
13101 extern __inline __m512
13102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13103 _mm512_castps256_ps512 (__m256 __A)
13104 {
13105 return __builtin_ia32_ps512_256ps (__A);
13106 }
13107
13108 extern __inline __m512i
13109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13110 _mm512_castsi256_si512 (__m256i __A)
13111 {
13112 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
13113 }
13114
13115 extern __inline __mmask16
13116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13117 _mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
13118 {
13119 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13120 (__v16si) __B, 0,
13121 (__mmask16) -1);
13122 }
13123
13124 extern __inline __mmask16
13125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13126 _mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13127 {
13128 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13129 (__v16si) __B, 0, __U);
13130 }
13131
13132 extern __inline __mmask8
13133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13134 _mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13135 {
13136 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13137 (__v8di) __B, 0, __U);
13138 }
13139
13140 extern __inline __mmask8
13141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13142 _mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
13143 {
13144 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13145 (__v8di) __B, 0,
13146 (__mmask8) -1);
13147 }
13148
13149 extern __inline __mmask16
13150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13151 _mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
13152 {
13153 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13154 (__v16si) __B, 6,
13155 (__mmask16) -1);
13156 }
13157
13158 extern __inline __mmask16
13159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13160 _mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13161 {
13162 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13163 (__v16si) __B, 6, __U);
13164 }
13165
13166 extern __inline __mmask8
13167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13168 _mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13169 {
13170 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13171 (__v8di) __B, 6, __U);
13172 }
13173
13174 extern __inline __mmask8
13175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13176 _mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
13177 {
13178 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13179 (__v8di) __B, 6,
13180 (__mmask8) -1);
13181 }
13182
13183 #ifdef __DISABLE_AVX512F__
13184 #undef __DISABLE_AVX512F__
13185 #pragma GCC pop_options
13186 #endif /* __DISABLE_AVX512F__ */
13187
13188 #endif /* _AVX512FINTRIN_H_INCLUDED */