]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/avx512fintrin.h
avx512fintrin.h (_mm512_cvtsepi64_epi32): Remove unused variable __O.
[thirdparty/gcc.git] / gcc / config / i386 / avx512fintrin.h
1 /* Copyright (C) 2013-2016 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef _AVX512FINTRIN_H_INCLUDED
29 #define _AVX512FINTRIN_H_INCLUDED
30
31 #ifndef __AVX512F__
32 #pragma GCC push_options
33 #pragma GCC target("avx512f")
34 #define __DISABLE_AVX512F__
35 #endif /* __AVX512F__ */
36
37 /* Internal data types for implementing the intrinsics. */
38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40 typedef long long __v8di __attribute__ ((__vector_size__ (64)));
41 typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
42 typedef int __v16si __attribute__ ((__vector_size__ (64)));
43 typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
44 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
45 typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
46 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
47 typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
48
49 /* The Intel API is flexible enough that we must allow aliasing with other
50 vector types, and their scalar components. */
51 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52 typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
54
55 typedef unsigned char __mmask8;
56 typedef unsigned short __mmask16;
57
58 extern __inline __m512i
59 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
60 _mm512_set_epi64 (long long __A, long long __B, long long __C,
61 long long __D, long long __E, long long __F,
62 long long __G, long long __H)
63 {
64 return __extension__ (__m512i) (__v8di)
65 { __H, __G, __F, __E, __D, __C, __B, __A };
66 }
67
68 /* Create the vector [A B C D E F G H I J K L M N O P]. */
69 extern __inline __m512i
70 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
71 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
72 int __E, int __F, int __G, int __H,
73 int __I, int __J, int __K, int __L,
74 int __M, int __N, int __O, int __P)
75 {
76 return __extension__ (__m512i)(__v16si)
77 { __P, __O, __N, __M, __L, __K, __J, __I,
78 __H, __G, __F, __E, __D, __C, __B, __A };
79 }
80
81 extern __inline __m512d
82 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
83 _mm512_set_pd (double __A, double __B, double __C, double __D,
84 double __E, double __F, double __G, double __H)
85 {
86 return __extension__ (__m512d)
87 { __H, __G, __F, __E, __D, __C, __B, __A };
88 }
89
90 extern __inline __m512
91 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
92 _mm512_set_ps (float __A, float __B, float __C, float __D,
93 float __E, float __F, float __G, float __H,
94 float __I, float __J, float __K, float __L,
95 float __M, float __N, float __O, float __P)
96 {
97 return __extension__ (__m512)
98 { __P, __O, __N, __M, __L, __K, __J, __I,
99 __H, __G, __F, __E, __D, __C, __B, __A };
100 }
101
102 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
103 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
104
105 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
106 e8,e9,e10,e11,e12,e13,e14,e15) \
107 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
108
109 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
110 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
111
112 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
113 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
114
115 extern __inline __m512
116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
117 _mm512_undefined_ps (void)
118 {
119 __m512 __Y = __Y;
120 return __Y;
121 }
122
123 extern __inline __m512d
124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
125 _mm512_undefined_pd (void)
126 {
127 __m512d __Y = __Y;
128 return __Y;
129 }
130
131 extern __inline __m512i
132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
133 _mm512_undefined_si512 (void)
134 {
135 __m512i __Y = __Y;
136 return __Y;
137 }
138
139 extern __inline __m512i
140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
141 _mm512_set1_epi8 (char __A)
142 {
143 return __extension__ (__m512i)(__v64qi)
144 { __A, __A, __A, __A, __A, __A, __A, __A,
145 __A, __A, __A, __A, __A, __A, __A, __A,
146 __A, __A, __A, __A, __A, __A, __A, __A,
147 __A, __A, __A, __A, __A, __A, __A, __A,
148 __A, __A, __A, __A, __A, __A, __A, __A,
149 __A, __A, __A, __A, __A, __A, __A, __A,
150 __A, __A, __A, __A, __A, __A, __A, __A,
151 __A, __A, __A, __A, __A, __A, __A, __A };
152 }
153
154 extern __inline __m512i
155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
156 _mm512_set1_epi16 (short __A)
157 {
158 return __extension__ (__m512i)(__v32hi)
159 { __A, __A, __A, __A, __A, __A, __A, __A,
160 __A, __A, __A, __A, __A, __A, __A, __A,
161 __A, __A, __A, __A, __A, __A, __A, __A,
162 __A, __A, __A, __A, __A, __A, __A, __A };
163 }
164
165 extern __inline __m512d
166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
167 _mm512_set1_pd (double __A)
168 {
169 return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
170 (__v2df) { __A, },
171 (__v8df)
172 _mm512_undefined_pd (),
173 (__mmask8) -1);
174 }
175
176 extern __inline __m512
177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
178 _mm512_set1_ps (float __A)
179 {
180 return (__m512) __builtin_ia32_broadcastss512 (__extension__
181 (__v4sf) { __A, },
182 (__v16sf)
183 _mm512_undefined_ps (),
184 (__mmask16) -1);
185 }
186
187 /* Create the vector [A B C D A B C D A B C D A B C D]. */
188 extern __inline __m512i
189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
190 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
191 {
192 return __extension__ (__m512i)(__v16si)
193 { __D, __C, __B, __A, __D, __C, __B, __A,
194 __D, __C, __B, __A, __D, __C, __B, __A };
195 }
196
197 extern __inline __m512i
198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
199 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
200 long long __D)
201 {
202 return __extension__ (__m512i) (__v8di)
203 { __D, __C, __B, __A, __D, __C, __B, __A };
204 }
205
206 extern __inline __m512d
207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
208 _mm512_set4_pd (double __A, double __B, double __C, double __D)
209 {
210 return __extension__ (__m512d)
211 { __D, __C, __B, __A, __D, __C, __B, __A };
212 }
213
214 extern __inline __m512
215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
216 _mm512_set4_ps (float __A, float __B, float __C, float __D)
217 {
218 return __extension__ (__m512)
219 { __D, __C, __B, __A, __D, __C, __B, __A,
220 __D, __C, __B, __A, __D, __C, __B, __A };
221 }
222
223 #define _mm512_setr4_epi64(e0,e1,e2,e3) \
224 _mm512_set4_epi64(e3,e2,e1,e0)
225
226 #define _mm512_setr4_epi32(e0,e1,e2,e3) \
227 _mm512_set4_epi32(e3,e2,e1,e0)
228
229 #define _mm512_setr4_pd(e0,e1,e2,e3) \
230 _mm512_set4_pd(e3,e2,e1,e0)
231
232 #define _mm512_setr4_ps(e0,e1,e2,e3) \
233 _mm512_set4_ps(e3,e2,e1,e0)
234
235 extern __inline __m512
236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
237 _mm512_setzero_ps (void)
238 {
239 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
240 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
241 }
242
243 extern __inline __m512d
244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
245 _mm512_setzero_pd (void)
246 {
247 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
248 }
249
250 extern __inline __m512i
251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
252 _mm512_setzero_epi32 (void)
253 {
254 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
255 }
256
257 extern __inline __m512i
258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
259 _mm512_setzero_si512 (void)
260 {
261 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
262 }
263
264 extern __inline __m512d
265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
266 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
267 {
268 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
269 (__v8df) __W,
270 (__mmask8) __U);
271 }
272
273 extern __inline __m512d
274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
275 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
276 {
277 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
278 (__v8df)
279 _mm512_setzero_pd (),
280 (__mmask8) __U);
281 }
282
283 extern __inline __m512
284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
285 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
286 {
287 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
288 (__v16sf) __W,
289 (__mmask16) __U);
290 }
291
292 extern __inline __m512
293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
294 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
295 {
296 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
297 (__v16sf)
298 _mm512_setzero_ps (),
299 (__mmask16) __U);
300 }
301
302 extern __inline __m512d
303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
304 _mm512_load_pd (void const *__P)
305 {
306 return *(__m512d *) __P;
307 }
308
309 extern __inline __m512d
310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
311 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
312 {
313 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
314 (__v8df) __W,
315 (__mmask8) __U);
316 }
317
318 extern __inline __m512d
319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
320 _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
321 {
322 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
323 (__v8df)
324 _mm512_setzero_pd (),
325 (__mmask8) __U);
326 }
327
328 extern __inline void
329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
330 _mm512_store_pd (void *__P, __m512d __A)
331 {
332 *(__m512d *) __P = __A;
333 }
334
335 extern __inline void
336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
337 _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
338 {
339 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
340 (__mmask8) __U);
341 }
342
343 extern __inline __m512
344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
345 _mm512_load_ps (void const *__P)
346 {
347 return *(__m512 *) __P;
348 }
349
350 extern __inline __m512
351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
352 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
353 {
354 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
355 (__v16sf) __W,
356 (__mmask16) __U);
357 }
358
359 extern __inline __m512
360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
361 _mm512_maskz_load_ps (__mmask16 __U, void const *__P)
362 {
363 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
364 (__v16sf)
365 _mm512_setzero_ps (),
366 (__mmask16) __U);
367 }
368
369 extern __inline void
370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
371 _mm512_store_ps (void *__P, __m512 __A)
372 {
373 *(__m512 *) __P = __A;
374 }
375
376 extern __inline void
377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
378 _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
379 {
380 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
381 (__mmask16) __U);
382 }
383
384 extern __inline __m512i
385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
386 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
387 {
388 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
389 (__v8di) __W,
390 (__mmask8) __U);
391 }
392
393 extern __inline __m512i
394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
395 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
396 {
397 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
398 (__v8di)
399 _mm512_setzero_si512 (),
400 (__mmask8) __U);
401 }
402
403 extern __inline __m512i
404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
405 _mm512_load_epi64 (void const *__P)
406 {
407 return *(__m512i *) __P;
408 }
409
410 extern __inline __m512i
411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
412 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
413 {
414 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
415 (__v8di) __W,
416 (__mmask8) __U);
417 }
418
419 extern __inline __m512i
420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
421 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
422 {
423 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
424 (__v8di)
425 _mm512_setzero_si512 (),
426 (__mmask8) __U);
427 }
428
429 extern __inline void
430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
431 _mm512_store_epi64 (void *__P, __m512i __A)
432 {
433 *(__m512i *) __P = __A;
434 }
435
436 extern __inline void
437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
438 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
439 {
440 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
441 (__mmask8) __U);
442 }
443
444 extern __inline __m512i
445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
446 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
447 {
448 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
449 (__v16si) __W,
450 (__mmask16) __U);
451 }
452
453 extern __inline __m512i
454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
455 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
456 {
457 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
458 (__v16si)
459 _mm512_setzero_si512 (),
460 (__mmask16) __U);
461 }
462
463 extern __inline __m512i
464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
465 _mm512_load_si512 (void const *__P)
466 {
467 return *(__m512i *) __P;
468 }
469
470 extern __inline __m512i
471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
472 _mm512_load_epi32 (void const *__P)
473 {
474 return *(__m512i *) __P;
475 }
476
477 extern __inline __m512i
478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
479 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
480 {
481 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
482 (__v16si) __W,
483 (__mmask16) __U);
484 }
485
486 extern __inline __m512i
487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
488 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
489 {
490 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
491 (__v16si)
492 _mm512_setzero_si512 (),
493 (__mmask16) __U);
494 }
495
496 extern __inline void
497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
498 _mm512_store_si512 (void *__P, __m512i __A)
499 {
500 *(__m512i *) __P = __A;
501 }
502
503 extern __inline void
504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
505 _mm512_store_epi32 (void *__P, __m512i __A)
506 {
507 *(__m512i *) __P = __A;
508 }
509
510 extern __inline void
511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
512 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
513 {
514 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
515 (__mmask16) __U);
516 }
517
518 extern __inline __m512i
519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
520 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
521 {
522 return (__m512i) ((__v16su) __A * (__v16su) __B);
523 }
524
525 extern __inline __m512i
526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
527 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
528 {
529 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
530 (__v16si) __B,
531 (__v16si)
532 _mm512_setzero_si512 (),
533 __M);
534 }
535
536 extern __inline __m512i
537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
538 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
539 {
540 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
541 (__v16si) __B,
542 (__v16si) __W, __M);
543 }
544
545 extern __inline __m512i
546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
547 _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
548 {
549 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
550 (__v16si) __Y,
551 (__v16si)
552 _mm512_undefined_si512 (),
553 (__mmask16) -1);
554 }
555
556 extern __inline __m512i
557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
558 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
559 {
560 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
561 (__v16si) __Y,
562 (__v16si) __W,
563 (__mmask16) __U);
564 }
565
566 extern __inline __m512i
567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
568 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
569 {
570 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
571 (__v16si) __Y,
572 (__v16si)
573 _mm512_setzero_si512 (),
574 (__mmask16) __U);
575 }
576
577 extern __inline __m512i
578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
579 _mm512_srav_epi32 (__m512i __X, __m512i __Y)
580 {
581 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
582 (__v16si) __Y,
583 (__v16si)
584 _mm512_undefined_si512 (),
585 (__mmask16) -1);
586 }
587
588 extern __inline __m512i
589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
590 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
591 {
592 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
593 (__v16si) __Y,
594 (__v16si) __W,
595 (__mmask16) __U);
596 }
597
598 extern __inline __m512i
599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
600 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
601 {
602 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
603 (__v16si) __Y,
604 (__v16si)
605 _mm512_setzero_si512 (),
606 (__mmask16) __U);
607 }
608
609 extern __inline __m512i
610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
611 _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
612 {
613 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
614 (__v16si) __Y,
615 (__v16si)
616 _mm512_undefined_si512 (),
617 (__mmask16) -1);
618 }
619
620 extern __inline __m512i
621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
622 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
623 {
624 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
625 (__v16si) __Y,
626 (__v16si) __W,
627 (__mmask16) __U);
628 }
629
630 extern __inline __m512i
631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
632 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
633 {
634 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
635 (__v16si) __Y,
636 (__v16si)
637 _mm512_setzero_si512 (),
638 (__mmask16) __U);
639 }
640
641 extern __inline __m512i
642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
643 _mm512_add_epi64 (__m512i __A, __m512i __B)
644 {
645 return (__m512i) ((__v8du) __A + (__v8du) __B);
646 }
647
648 extern __inline __m512i
649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
650 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
651 {
652 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
653 (__v8di) __B,
654 (__v8di) __W,
655 (__mmask8) __U);
656 }
657
658 extern __inline __m512i
659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
660 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
661 {
662 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
663 (__v8di) __B,
664 (__v8di)
665 _mm512_setzero_si512 (),
666 (__mmask8) __U);
667 }
668
669 extern __inline __m512i
670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
671 _mm512_sub_epi64 (__m512i __A, __m512i __B)
672 {
673 return (__m512i) ((__v8du) __A - (__v8du) __B);
674 }
675
676 extern __inline __m512i
677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
678 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
679 {
680 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
681 (__v8di) __B,
682 (__v8di) __W,
683 (__mmask8) __U);
684 }
685
686 extern __inline __m512i
687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
688 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
689 {
690 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
691 (__v8di) __B,
692 (__v8di)
693 _mm512_setzero_si512 (),
694 (__mmask8) __U);
695 }
696
697 extern __inline __m512i
698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
699 _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
700 {
701 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
702 (__v8di) __Y,
703 (__v8di)
704 _mm512_undefined_pd (),
705 (__mmask8) -1);
706 }
707
708 extern __inline __m512i
709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
710 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
711 {
712 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
713 (__v8di) __Y,
714 (__v8di) __W,
715 (__mmask8) __U);
716 }
717
718 extern __inline __m512i
719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
720 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
721 {
722 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
723 (__v8di) __Y,
724 (__v8di)
725 _mm512_setzero_si512 (),
726 (__mmask8) __U);
727 }
728
729 extern __inline __m512i
730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
731 _mm512_srav_epi64 (__m512i __X, __m512i __Y)
732 {
733 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
734 (__v8di) __Y,
735 (__v8di)
736 _mm512_undefined_si512 (),
737 (__mmask8) -1);
738 }
739
740 extern __inline __m512i
741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
742 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
743 {
744 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
745 (__v8di) __Y,
746 (__v8di) __W,
747 (__mmask8) __U);
748 }
749
750 extern __inline __m512i
751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
752 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
753 {
754 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
755 (__v8di) __Y,
756 (__v8di)
757 _mm512_setzero_si512 (),
758 (__mmask8) __U);
759 }
760
761 extern __inline __m512i
762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
763 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
764 {
765 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
766 (__v8di) __Y,
767 (__v8di)
768 _mm512_undefined_si512 (),
769 (__mmask8) -1);
770 }
771
772 extern __inline __m512i
773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
774 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
775 {
776 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
777 (__v8di) __Y,
778 (__v8di) __W,
779 (__mmask8) __U);
780 }
781
782 extern __inline __m512i
783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
784 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
785 {
786 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
787 (__v8di) __Y,
788 (__v8di)
789 _mm512_setzero_si512 (),
790 (__mmask8) __U);
791 }
792
793 extern __inline __m512i
794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
795 _mm512_add_epi32 (__m512i __A, __m512i __B)
796 {
797 return (__m512i) ((__v16su) __A + (__v16su) __B);
798 }
799
800 extern __inline __m512i
801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
802 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
803 {
804 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
805 (__v16si) __B,
806 (__v16si) __W,
807 (__mmask16) __U);
808 }
809
810 extern __inline __m512i
811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
812 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
813 {
814 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
815 (__v16si) __B,
816 (__v16si)
817 _mm512_setzero_si512 (),
818 (__mmask16) __U);
819 }
820
821 extern __inline __m512i
822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
823 _mm512_mul_epi32 (__m512i __X, __m512i __Y)
824 {
825 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
826 (__v16si) __Y,
827 (__v8di)
828 _mm512_undefined_si512 (),
829 (__mmask8) -1);
830 }
831
832 extern __inline __m512i
833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
834 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
835 {
836 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
837 (__v16si) __Y,
838 (__v8di) __W, __M);
839 }
840
841 extern __inline __m512i
842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
843 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
844 {
845 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
846 (__v16si) __Y,
847 (__v8di)
848 _mm512_setzero_si512 (),
849 __M);
850 }
851
852 extern __inline __m512i
853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
854 _mm512_sub_epi32 (__m512i __A, __m512i __B)
855 {
856 return (__m512i) ((__v16su) __A - (__v16su) __B);
857 }
858
859 extern __inline __m512i
860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
861 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
862 {
863 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
864 (__v16si) __B,
865 (__v16si) __W,
866 (__mmask16) __U);
867 }
868
869 extern __inline __m512i
870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
871 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
872 {
873 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
874 (__v16si) __B,
875 (__v16si)
876 _mm512_setzero_si512 (),
877 (__mmask16) __U);
878 }
879
880 extern __inline __m512i
881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
882 _mm512_mul_epu32 (__m512i __X, __m512i __Y)
883 {
884 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
885 (__v16si) __Y,
886 (__v8di)
887 _mm512_undefined_si512 (),
888 (__mmask8) -1);
889 }
890
891 extern __inline __m512i
892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
893 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
894 {
895 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
896 (__v16si) __Y,
897 (__v8di) __W, __M);
898 }
899
900 extern __inline __m512i
901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
902 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
903 {
904 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
905 (__v16si) __Y,
906 (__v8di)
907 _mm512_setzero_si512 (),
908 __M);
909 }
910
911 #ifdef __OPTIMIZE__
912 extern __inline __m512i
913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
914 _mm512_slli_epi64 (__m512i __A, unsigned int __B)
915 {
916 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
917 (__v8di)
918 _mm512_undefined_si512 (),
919 (__mmask8) -1);
920 }
921
922 extern __inline __m512i
923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
924 _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
925 unsigned int __B)
926 {
927 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
928 (__v8di) __W,
929 (__mmask8) __U);
930 }
931
932 extern __inline __m512i
933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
934 _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
935 {
936 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
937 (__v8di)
938 _mm512_setzero_si512 (),
939 (__mmask8) __U);
940 }
941 #else
942 #define _mm512_slli_epi64(X, C) \
943 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
944 (__v8di)(__m512i)_mm512_undefined_si512 (),\
945 (__mmask8)-1))
946
947 #define _mm512_mask_slli_epi64(W, U, X, C) \
948 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
949 (__v8di)(__m512i)(W),\
950 (__mmask8)(U)))
951
952 #define _mm512_maskz_slli_epi64(U, X, C) \
953 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
954 (__v8di)(__m512i)_mm512_setzero_si512 (),\
955 (__mmask8)(U)))
956 #endif
957
958 extern __inline __m512i
959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
960 _mm512_sll_epi64 (__m512i __A, __m128i __B)
961 {
962 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
963 (__v2di) __B,
964 (__v8di)
965 _mm512_undefined_si512 (),
966 (__mmask8) -1);
967 }
968
969 extern __inline __m512i
970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
971 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
972 {
973 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
974 (__v2di) __B,
975 (__v8di) __W,
976 (__mmask8) __U);
977 }
978
979 extern __inline __m512i
980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
981 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
982 {
983 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
984 (__v2di) __B,
985 (__v8di)
986 _mm512_setzero_si512 (),
987 (__mmask8) __U);
988 }
989
990 #ifdef __OPTIMIZE__
991 extern __inline __m512i
992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
993 _mm512_srli_epi64 (__m512i __A, unsigned int __B)
994 {
995 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
996 (__v8di)
997 _mm512_undefined_si512 (),
998 (__mmask8) -1);
999 }
1000
1001 extern __inline __m512i
1002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1003 _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1004 __m512i __A, unsigned int __B)
1005 {
1006 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1007 (__v8di) __W,
1008 (__mmask8) __U);
1009 }
1010
1011 extern __inline __m512i
1012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1013 _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1014 {
1015 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1016 (__v8di)
1017 _mm512_setzero_si512 (),
1018 (__mmask8) __U);
1019 }
1020 #else
1021 #define _mm512_srli_epi64(X, C) \
1022 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1023 (__v8di)(__m512i)_mm512_undefined_si512 (),\
1024 (__mmask8)-1))
1025
1026 #define _mm512_mask_srli_epi64(W, U, X, C) \
1027 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1028 (__v8di)(__m512i)(W),\
1029 (__mmask8)(U)))
1030
1031 #define _mm512_maskz_srli_epi64(U, X, C) \
1032 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1033 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1034 (__mmask8)(U)))
1035 #endif
1036
1037 extern __inline __m512i
1038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1039 _mm512_srl_epi64 (__m512i __A, __m128i __B)
1040 {
1041 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1042 (__v2di) __B,
1043 (__v8di)
1044 _mm512_undefined_si512 (),
1045 (__mmask8) -1);
1046 }
1047
1048 extern __inline __m512i
1049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1050 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1051 {
1052 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1053 (__v2di) __B,
1054 (__v8di) __W,
1055 (__mmask8) __U);
1056 }
1057
1058 extern __inline __m512i
1059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1060 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1061 {
1062 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1063 (__v2di) __B,
1064 (__v8di)
1065 _mm512_setzero_si512 (),
1066 (__mmask8) __U);
1067 }
1068
1069 #ifdef __OPTIMIZE__
1070 extern __inline __m512i
1071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1072 _mm512_srai_epi64 (__m512i __A, unsigned int __B)
1073 {
1074 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1075 (__v8di)
1076 _mm512_undefined_si512 (),
1077 (__mmask8) -1);
1078 }
1079
1080 extern __inline __m512i
1081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082 _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1083 unsigned int __B)
1084 {
1085 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1086 (__v8di) __W,
1087 (__mmask8) __U);
1088 }
1089
1090 extern __inline __m512i
1091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1092 _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1093 {
1094 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1095 (__v8di)
1096 _mm512_setzero_si512 (),
1097 (__mmask8) __U);
1098 }
1099 #else
1100 #define _mm512_srai_epi64(X, C) \
1101 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1102 (__v8di)(__m512i)_mm512_undefined_si512 (),\
1103 (__mmask8)-1))
1104
1105 #define _mm512_mask_srai_epi64(W, U, X, C) \
1106 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1107 (__v8di)(__m512i)(W),\
1108 (__mmask8)(U)))
1109
1110 #define _mm512_maskz_srai_epi64(U, X, C) \
1111 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1112 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1113 (__mmask8)(U)))
1114 #endif
1115
1116 extern __inline __m512i
1117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1118 _mm512_sra_epi64 (__m512i __A, __m128i __B)
1119 {
1120 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1121 (__v2di) __B,
1122 (__v8di)
1123 _mm512_undefined_si512 (),
1124 (__mmask8) -1);
1125 }
1126
1127 extern __inline __m512i
1128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1129 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1130 {
1131 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1132 (__v2di) __B,
1133 (__v8di) __W,
1134 (__mmask8) __U);
1135 }
1136
1137 extern __inline __m512i
1138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1139 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1140 {
1141 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1142 (__v2di) __B,
1143 (__v8di)
1144 _mm512_setzero_si512 (),
1145 (__mmask8) __U);
1146 }
1147
1148 #ifdef __OPTIMIZE__
1149 extern __inline __m512i
1150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1151 _mm512_slli_epi32 (__m512i __A, unsigned int __B)
1152 {
1153 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1154 (__v16si)
1155 _mm512_undefined_si512 (),
1156 (__mmask16) -1);
1157 }
1158
1159 extern __inline __m512i
1160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1161 _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1162 unsigned int __B)
1163 {
1164 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1165 (__v16si) __W,
1166 (__mmask16) __U);
1167 }
1168
1169 extern __inline __m512i
1170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1171 _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1172 {
1173 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1174 (__v16si)
1175 _mm512_setzero_si512 (),
1176 (__mmask16) __U);
1177 }
1178 #else
1179 #define _mm512_slli_epi32(X, C) \
1180 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1181 (__v16si)(__m512i)_mm512_undefined_si512 (),\
1182 (__mmask16)-1))
1183
1184 #define _mm512_mask_slli_epi32(W, U, X, C) \
1185 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1186 (__v16si)(__m512i)(W),\
1187 (__mmask16)(U)))
1188
1189 #define _mm512_maskz_slli_epi32(U, X, C) \
1190 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1191 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1192 (__mmask16)(U)))
1193 #endif
1194
1195 extern __inline __m512i
1196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1197 _mm512_sll_epi32 (__m512i __A, __m128i __B)
1198 {
1199 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1200 (__v4si) __B,
1201 (__v16si)
1202 _mm512_undefined_si512 (),
1203 (__mmask16) -1);
1204 }
1205
1206 extern __inline __m512i
1207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1208 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1209 {
1210 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1211 (__v4si) __B,
1212 (__v16si) __W,
1213 (__mmask16) __U);
1214 }
1215
1216 extern __inline __m512i
1217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1218 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1219 {
1220 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1221 (__v4si) __B,
1222 (__v16si)
1223 _mm512_setzero_si512 (),
1224 (__mmask16) __U);
1225 }
1226
1227 #ifdef __OPTIMIZE__
1228 extern __inline __m512i
1229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1230 _mm512_srli_epi32 (__m512i __A, unsigned int __B)
1231 {
1232 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1233 (__v16si)
1234 _mm512_undefined_si512 (),
1235 (__mmask16) -1);
1236 }
1237
1238 extern __inline __m512i
1239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1240 _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1241 __m512i __A, unsigned int __B)
1242 {
1243 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1244 (__v16si) __W,
1245 (__mmask16) __U);
1246 }
1247
1248 extern __inline __m512i
1249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1250 _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1251 {
1252 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1253 (__v16si)
1254 _mm512_setzero_si512 (),
1255 (__mmask16) __U);
1256 }
1257 #else
1258 #define _mm512_srli_epi32(X, C) \
1259 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1260 (__v16si)(__m512i)_mm512_undefined_si512 (),\
1261 (__mmask16)-1))
1262
1263 #define _mm512_mask_srli_epi32(W, U, X, C) \
1264 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1265 (__v16si)(__m512i)(W),\
1266 (__mmask16)(U)))
1267
1268 #define _mm512_maskz_srli_epi32(U, X, C) \
1269 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1270 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1271 (__mmask16)(U)))
1272 #endif
1273
1274 extern __inline __m512i
1275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1276 _mm512_srl_epi32 (__m512i __A, __m128i __B)
1277 {
1278 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1279 (__v4si) __B,
1280 (__v16si)
1281 _mm512_undefined_si512 (),
1282 (__mmask16) -1);
1283 }
1284
1285 extern __inline __m512i
1286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1287 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1288 {
1289 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1290 (__v4si) __B,
1291 (__v16si) __W,
1292 (__mmask16) __U);
1293 }
1294
1295 extern __inline __m512i
1296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1297 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1298 {
1299 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1300 (__v4si) __B,
1301 (__v16si)
1302 _mm512_setzero_si512 (),
1303 (__mmask16) __U);
1304 }
1305
1306 #ifdef __OPTIMIZE__
1307 extern __inline __m512i
1308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1309 _mm512_srai_epi32 (__m512i __A, unsigned int __B)
1310 {
1311 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1312 (__v16si)
1313 _mm512_undefined_si512 (),
1314 (__mmask16) -1);
1315 }
1316
1317 extern __inline __m512i
1318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1319 _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1320 unsigned int __B)
1321 {
1322 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1323 (__v16si) __W,
1324 (__mmask16) __U);
1325 }
1326
1327 extern __inline __m512i
1328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1329 _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1330 {
1331 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1332 (__v16si)
1333 _mm512_setzero_si512 (),
1334 (__mmask16) __U);
1335 }
1336 #else
1337 #define _mm512_srai_epi32(X, C) \
1338 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1339 (__v16si)(__m512i)_mm512_undefined_si512 (),\
1340 (__mmask16)-1))
1341
1342 #define _mm512_mask_srai_epi32(W, U, X, C) \
1343 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1344 (__v16si)(__m512i)(W),\
1345 (__mmask16)(U)))
1346
1347 #define _mm512_maskz_srai_epi32(U, X, C) \
1348 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1349 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1350 (__mmask16)(U)))
1351 #endif
1352
1353 extern __inline __m512i
1354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1355 _mm512_sra_epi32 (__m512i __A, __m128i __B)
1356 {
1357 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1358 (__v4si) __B,
1359 (__v16si)
1360 _mm512_undefined_si512 (),
1361 (__mmask16) -1);
1362 }
1363
1364 extern __inline __m512i
1365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1366 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1367 {
1368 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1369 (__v4si) __B,
1370 (__v16si) __W,
1371 (__mmask16) __U);
1372 }
1373
1374 extern __inline __m512i
1375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1376 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1377 {
1378 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1379 (__v4si) __B,
1380 (__v16si)
1381 _mm512_setzero_si512 (),
1382 (__mmask16) __U);
1383 }
1384
1385 #ifdef __OPTIMIZE__
1386 extern __inline __m128d
1387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1388 _mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1389 {
1390 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1391 (__v2df) __B,
1392 __R);
1393 }
1394
1395 extern __inline __m128
1396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1397 _mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1398 {
1399 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1400 (__v4sf) __B,
1401 __R);
1402 }
1403
1404 extern __inline __m128d
1405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1406 _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1407 {
1408 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1409 (__v2df) __B,
1410 __R);
1411 }
1412
1413 extern __inline __m128
1414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1415 _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1416 {
1417 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1418 (__v4sf) __B,
1419 __R);
1420 }
1421
1422 #else
1423 #define _mm_add_round_sd(A, B, C) \
1424 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1425
1426 #define _mm_add_round_ss(A, B, C) \
1427 (__m128)__builtin_ia32_addss_round(A, B, C)
1428
1429 #define _mm_sub_round_sd(A, B, C) \
1430 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1431
1432 #define _mm_sub_round_ss(A, B, C) \
1433 (__m128)__builtin_ia32_subss_round(A, B, C)
1434 #endif
1435
1436 #ifdef __OPTIMIZE__
1437 extern __inline __m512i
1438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1439 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1440 {
1441 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1442 (__v8di) __B,
1443 (__v8di) __C, imm,
1444 (__mmask8) -1);
1445 }
1446
1447 extern __inline __m512i
1448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1449 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
1450 __m512i __C, const int imm)
1451 {
1452 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1453 (__v8di) __B,
1454 (__v8di) __C, imm,
1455 (__mmask8) __U);
1456 }
1457
1458 extern __inline __m512i
1459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1460 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
1461 __m512i __C, const int imm)
1462 {
1463 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1464 (__v8di) __B,
1465 (__v8di) __C,
1466 imm, (__mmask8) __U);
1467 }
1468
1469 extern __inline __m512i
1470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1471 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1472 {
1473 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1474 (__v16si) __B,
1475 (__v16si) __C,
1476 imm, (__mmask16) -1);
1477 }
1478
1479 extern __inline __m512i
1480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1481 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
1482 __m512i __C, const int imm)
1483 {
1484 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1485 (__v16si) __B,
1486 (__v16si) __C,
1487 imm, (__mmask16) __U);
1488 }
1489
1490 extern __inline __m512i
1491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1492 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
1493 __m512i __C, const int imm)
1494 {
1495 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1496 (__v16si) __B,
1497 (__v16si) __C,
1498 imm, (__mmask16) __U);
1499 }
1500 #else
1501 #define _mm512_ternarylogic_epi64(A, B, C, I) \
1502 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1503 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1504 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1505 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1506 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1507 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1508 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
1509 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1510 #define _mm512_ternarylogic_epi32(A, B, C, I) \
1511 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1512 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1513 (__mmask16)-1))
1514 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1515 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1516 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1517 (__mmask16)(U)))
1518 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1519 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
1520 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1521 (__mmask16)(U)))
1522 #endif
1523
1524 extern __inline __m512d
1525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1526 _mm512_rcp14_pd (__m512d __A)
1527 {
1528 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1529 (__v8df)
1530 _mm512_undefined_pd (),
1531 (__mmask8) -1);
1532 }
1533
1534 extern __inline __m512d
1535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1536 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1537 {
1538 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1539 (__v8df) __W,
1540 (__mmask8) __U);
1541 }
1542
1543 extern __inline __m512d
1544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1545 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1546 {
1547 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1548 (__v8df)
1549 _mm512_setzero_pd (),
1550 (__mmask8) __U);
1551 }
1552
1553 extern __inline __m512
1554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1555 _mm512_rcp14_ps (__m512 __A)
1556 {
1557 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1558 (__v16sf)
1559 _mm512_undefined_ps (),
1560 (__mmask16) -1);
1561 }
1562
1563 extern __inline __m512
1564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1565 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1566 {
1567 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1568 (__v16sf) __W,
1569 (__mmask16) __U);
1570 }
1571
1572 extern __inline __m512
1573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1574 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1575 {
1576 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1577 (__v16sf)
1578 _mm512_setzero_ps (),
1579 (__mmask16) __U);
1580 }
1581
1582 extern __inline __m128d
1583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1584 _mm_rcp14_sd (__m128d __A, __m128d __B)
1585 {
1586 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1587 (__v2df) __A);
1588 }
1589
1590 extern __inline __m128
1591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1592 _mm_rcp14_ss (__m128 __A, __m128 __B)
1593 {
1594 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1595 (__v4sf) __A);
1596 }
1597
1598 extern __inline __m512d
1599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1600 _mm512_rsqrt14_pd (__m512d __A)
1601 {
1602 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1603 (__v8df)
1604 _mm512_undefined_pd (),
1605 (__mmask8) -1);
1606 }
1607
1608 extern __inline __m512d
1609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1610 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1611 {
1612 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1613 (__v8df) __W,
1614 (__mmask8) __U);
1615 }
1616
1617 extern __inline __m512d
1618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1619 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1620 {
1621 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1622 (__v8df)
1623 _mm512_setzero_pd (),
1624 (__mmask8) __U);
1625 }
1626
1627 extern __inline __m512
1628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1629 _mm512_rsqrt14_ps (__m512 __A)
1630 {
1631 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1632 (__v16sf)
1633 _mm512_undefined_ps (),
1634 (__mmask16) -1);
1635 }
1636
1637 extern __inline __m512
1638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1639 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1640 {
1641 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1642 (__v16sf) __W,
1643 (__mmask16) __U);
1644 }
1645
1646 extern __inline __m512
1647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1648 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1649 {
1650 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1651 (__v16sf)
1652 _mm512_setzero_ps (),
1653 (__mmask16) __U);
1654 }
1655
1656 extern __inline __m128d
1657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1658 _mm_rsqrt14_sd (__m128d __A, __m128d __B)
1659 {
1660 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1661 (__v2df) __A);
1662 }
1663
1664 extern __inline __m128
1665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1666 _mm_rsqrt14_ss (__m128 __A, __m128 __B)
1667 {
1668 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1669 (__v4sf) __A);
1670 }
1671
1672 #ifdef __OPTIMIZE__
1673 extern __inline __m512d
1674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1675 _mm512_sqrt_round_pd (__m512d __A, const int __R)
1676 {
1677 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1678 (__v8df)
1679 _mm512_undefined_pd (),
1680 (__mmask8) -1, __R);
1681 }
1682
1683 extern __inline __m512d
1684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1685 _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1686 const int __R)
1687 {
1688 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1689 (__v8df) __W,
1690 (__mmask8) __U, __R);
1691 }
1692
1693 extern __inline __m512d
1694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1695 _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1696 {
1697 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1698 (__v8df)
1699 _mm512_setzero_pd (),
1700 (__mmask8) __U, __R);
1701 }
1702
1703 extern __inline __m512
1704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705 _mm512_sqrt_round_ps (__m512 __A, const int __R)
1706 {
1707 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1708 (__v16sf)
1709 _mm512_undefined_ps (),
1710 (__mmask16) -1, __R);
1711 }
1712
1713 extern __inline __m512
1714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1715 _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
1716 {
1717 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1718 (__v16sf) __W,
1719 (__mmask16) __U, __R);
1720 }
1721
1722 extern __inline __m512
1723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1724 _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
1725 {
1726 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1727 (__v16sf)
1728 _mm512_setzero_ps (),
1729 (__mmask16) __U, __R);
1730 }
1731
1732 extern __inline __m128d
1733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1734 _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
1735 {
1736 return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
1737 (__v2df) __A,
1738 __R);
1739 }
1740
1741 extern __inline __m128
1742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1743 _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
1744 {
1745 return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
1746 (__v4sf) __A,
1747 __R);
1748 }
1749 #else
1750 #define _mm512_sqrt_round_pd(A, C) \
1751 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
1752
1753 #define _mm512_mask_sqrt_round_pd(W, U, A, C) \
1754 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
1755
1756 #define _mm512_maskz_sqrt_round_pd(U, A, C) \
1757 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
1758
1759 #define _mm512_sqrt_round_ps(A, C) \
1760 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
1761
1762 #define _mm512_mask_sqrt_round_ps(W, U, A, C) \
1763 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
1764
1765 #define _mm512_maskz_sqrt_round_ps(U, A, C) \
1766 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
1767
1768 #define _mm_sqrt_round_sd(A, B, C) \
1769 (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
1770
1771 #define _mm_sqrt_round_ss(A, B, C) \
1772 (__m128)__builtin_ia32_sqrtss_round(A, B, C)
1773 #endif
1774
1775 extern __inline __m512i
1776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1777 _mm512_cvtepi8_epi32 (__m128i __A)
1778 {
1779 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1780 (__v16si)
1781 _mm512_undefined_si512 (),
1782 (__mmask16) -1);
1783 }
1784
1785 extern __inline __m512i
1786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1787 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1788 {
1789 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1790 (__v16si) __W,
1791 (__mmask16) __U);
1792 }
1793
1794 extern __inline __m512i
1795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1796 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
1797 {
1798 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1799 (__v16si)
1800 _mm512_setzero_si512 (),
1801 (__mmask16) __U);
1802 }
1803
1804 extern __inline __m512i
1805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1806 _mm512_cvtepi8_epi64 (__m128i __A)
1807 {
1808 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1809 (__v8di)
1810 _mm512_undefined_si512 (),
1811 (__mmask8) -1);
1812 }
1813
1814 extern __inline __m512i
1815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1816 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1817 {
1818 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1819 (__v8di) __W,
1820 (__mmask8) __U);
1821 }
1822
1823 extern __inline __m512i
1824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1825 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
1826 {
1827 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1828 (__v8di)
1829 _mm512_setzero_si512 (),
1830 (__mmask8) __U);
1831 }
1832
1833 extern __inline __m512i
1834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1835 _mm512_cvtepi16_epi32 (__m256i __A)
1836 {
1837 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1838 (__v16si)
1839 _mm512_undefined_si512 (),
1840 (__mmask16) -1);
1841 }
1842
1843 extern __inline __m512i
1844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1845 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1846 {
1847 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1848 (__v16si) __W,
1849 (__mmask16) __U);
1850 }
1851
1852 extern __inline __m512i
1853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1854 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
1855 {
1856 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1857 (__v16si)
1858 _mm512_setzero_si512 (),
1859 (__mmask16) __U);
1860 }
1861
1862 extern __inline __m512i
1863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1864 _mm512_cvtepi16_epi64 (__m128i __A)
1865 {
1866 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1867 (__v8di)
1868 _mm512_undefined_si512 (),
1869 (__mmask8) -1);
1870 }
1871
1872 extern __inline __m512i
1873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1874 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1875 {
1876 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1877 (__v8di) __W,
1878 (__mmask8) __U);
1879 }
1880
1881 extern __inline __m512i
1882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1883 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
1884 {
1885 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1886 (__v8di)
1887 _mm512_setzero_si512 (),
1888 (__mmask8) __U);
1889 }
1890
1891 extern __inline __m512i
1892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1893 _mm512_cvtepi32_epi64 (__m256i __X)
1894 {
1895 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1896 (__v8di)
1897 _mm512_undefined_si512 (),
1898 (__mmask8) -1);
1899 }
1900
1901 extern __inline __m512i
1902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1903 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
1904 {
1905 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1906 (__v8di) __W,
1907 (__mmask8) __U);
1908 }
1909
1910 extern __inline __m512i
1911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1912 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
1913 {
1914 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1915 (__v8di)
1916 _mm512_setzero_si512 (),
1917 (__mmask8) __U);
1918 }
1919
1920 extern __inline __m512i
1921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1922 _mm512_cvtepu8_epi32 (__m128i __A)
1923 {
1924 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1925 (__v16si)
1926 _mm512_undefined_si512 (),
1927 (__mmask16) -1);
1928 }
1929
1930 extern __inline __m512i
1931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1932 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1933 {
1934 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1935 (__v16si) __W,
1936 (__mmask16) __U);
1937 }
1938
1939 extern __inline __m512i
1940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1941 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
1942 {
1943 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1944 (__v16si)
1945 _mm512_setzero_si512 (),
1946 (__mmask16) __U);
1947 }
1948
1949 extern __inline __m512i
1950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1951 _mm512_cvtepu8_epi64 (__m128i __A)
1952 {
1953 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1954 (__v8di)
1955 _mm512_undefined_si512 (),
1956 (__mmask8) -1);
1957 }
1958
1959 extern __inline __m512i
1960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1961 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1962 {
1963 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1964 (__v8di) __W,
1965 (__mmask8) __U);
1966 }
1967
1968 extern __inline __m512i
1969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1970 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
1971 {
1972 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1973 (__v8di)
1974 _mm512_setzero_si512 (),
1975 (__mmask8) __U);
1976 }
1977
1978 extern __inline __m512i
1979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1980 _mm512_cvtepu16_epi32 (__m256i __A)
1981 {
1982 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1983 (__v16si)
1984 _mm512_undefined_si512 (),
1985 (__mmask16) -1);
1986 }
1987
1988 extern __inline __m512i
1989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1990 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1991 {
1992 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1993 (__v16si) __W,
1994 (__mmask16) __U);
1995 }
1996
1997 extern __inline __m512i
1998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1999 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2000 {
2001 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2002 (__v16si)
2003 _mm512_setzero_si512 (),
2004 (__mmask16) __U);
2005 }
2006
2007 extern __inline __m512i
2008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2009 _mm512_cvtepu16_epi64 (__m128i __A)
2010 {
2011 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2012 (__v8di)
2013 _mm512_undefined_si512 (),
2014 (__mmask8) -1);
2015 }
2016
2017 extern __inline __m512i
2018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2019 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2020 {
2021 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2022 (__v8di) __W,
2023 (__mmask8) __U);
2024 }
2025
2026 extern __inline __m512i
2027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2029 {
2030 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2031 (__v8di)
2032 _mm512_setzero_si512 (),
2033 (__mmask8) __U);
2034 }
2035
2036 extern __inline __m512i
2037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2038 _mm512_cvtepu32_epi64 (__m256i __X)
2039 {
2040 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2041 (__v8di)
2042 _mm512_undefined_si512 (),
2043 (__mmask8) -1);
2044 }
2045
2046 extern __inline __m512i
2047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2048 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2049 {
2050 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2051 (__v8di) __W,
2052 (__mmask8) __U);
2053 }
2054
2055 extern __inline __m512i
2056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2057 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2058 {
2059 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2060 (__v8di)
2061 _mm512_setzero_si512 (),
2062 (__mmask8) __U);
2063 }
2064
2065 #ifdef __OPTIMIZE__
2066 extern __inline __m512d
2067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2068 _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2069 {
2070 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2071 (__v8df) __B,
2072 (__v8df)
2073 _mm512_undefined_pd (),
2074 (__mmask8) -1, __R);
2075 }
2076
2077 extern __inline __m512d
2078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2079 _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2080 __m512d __B, const int __R)
2081 {
2082 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2083 (__v8df) __B,
2084 (__v8df) __W,
2085 (__mmask8) __U, __R);
2086 }
2087
2088 extern __inline __m512d
2089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2090 _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2091 const int __R)
2092 {
2093 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2094 (__v8df) __B,
2095 (__v8df)
2096 _mm512_setzero_pd (),
2097 (__mmask8) __U, __R);
2098 }
2099
2100 extern __inline __m512
2101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2102 _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2103 {
2104 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2105 (__v16sf) __B,
2106 (__v16sf)
2107 _mm512_undefined_ps (),
2108 (__mmask16) -1, __R);
2109 }
2110
2111 extern __inline __m512
2112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2113 _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2114 __m512 __B, const int __R)
2115 {
2116 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2117 (__v16sf) __B,
2118 (__v16sf) __W,
2119 (__mmask16) __U, __R);
2120 }
2121
2122 extern __inline __m512
2123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2124 _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2125 {
2126 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2127 (__v16sf) __B,
2128 (__v16sf)
2129 _mm512_setzero_ps (),
2130 (__mmask16) __U, __R);
2131 }
2132
2133 extern __inline __m512d
2134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2135 _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2136 {
2137 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2138 (__v8df) __B,
2139 (__v8df)
2140 _mm512_undefined_pd (),
2141 (__mmask8) -1, __R);
2142 }
2143
2144 extern __inline __m512d
2145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2146 _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2147 __m512d __B, const int __R)
2148 {
2149 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2150 (__v8df) __B,
2151 (__v8df) __W,
2152 (__mmask8) __U, __R);
2153 }
2154
2155 extern __inline __m512d
2156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2157 _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2158 const int __R)
2159 {
2160 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2161 (__v8df) __B,
2162 (__v8df)
2163 _mm512_setzero_pd (),
2164 (__mmask8) __U, __R);
2165 }
2166
2167 extern __inline __m512
2168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2169 _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2170 {
2171 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2172 (__v16sf) __B,
2173 (__v16sf)
2174 _mm512_undefined_ps (),
2175 (__mmask16) -1, __R);
2176 }
2177
2178 extern __inline __m512
2179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2180 _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2181 __m512 __B, const int __R)
2182 {
2183 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2184 (__v16sf) __B,
2185 (__v16sf) __W,
2186 (__mmask16) __U, __R);
2187 }
2188
2189 extern __inline __m512
2190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2191 _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2192 {
2193 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2194 (__v16sf) __B,
2195 (__v16sf)
2196 _mm512_setzero_ps (),
2197 (__mmask16) __U, __R);
2198 }
2199 #else
2200 #define _mm512_add_round_pd(A, B, C) \
2201 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2202
2203 #define _mm512_mask_add_round_pd(W, U, A, B, C) \
2204 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2205
2206 #define _mm512_maskz_add_round_pd(U, A, B, C) \
2207 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2208
2209 #define _mm512_add_round_ps(A, B, C) \
2210 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2211
2212 #define _mm512_mask_add_round_ps(W, U, A, B, C) \
2213 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2214
2215 #define _mm512_maskz_add_round_ps(U, A, B, C) \
2216 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2217
2218 #define _mm512_sub_round_pd(A, B, C) \
2219 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2220
2221 #define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2222 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2223
2224 #define _mm512_maskz_sub_round_pd(U, A, B, C) \
2225 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2226
2227 #define _mm512_sub_round_ps(A, B, C) \
2228 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2229
2230 #define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2231 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2232
2233 #define _mm512_maskz_sub_round_ps(U, A, B, C) \
2234 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2235 #endif
2236
2237 #ifdef __OPTIMIZE__
2238 extern __inline __m512d
2239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2240 _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2241 {
2242 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2243 (__v8df) __B,
2244 (__v8df)
2245 _mm512_undefined_pd (),
2246 (__mmask8) -1, __R);
2247 }
2248
2249 extern __inline __m512d
2250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2251 _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2252 __m512d __B, const int __R)
2253 {
2254 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2255 (__v8df) __B,
2256 (__v8df) __W,
2257 (__mmask8) __U, __R);
2258 }
2259
2260 extern __inline __m512d
2261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2262 _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2263 const int __R)
2264 {
2265 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2266 (__v8df) __B,
2267 (__v8df)
2268 _mm512_setzero_pd (),
2269 (__mmask8) __U, __R);
2270 }
2271
2272 extern __inline __m512
2273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2274 _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2275 {
2276 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2277 (__v16sf) __B,
2278 (__v16sf)
2279 _mm512_undefined_ps (),
2280 (__mmask16) -1, __R);
2281 }
2282
2283 extern __inline __m512
2284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2285 _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2286 __m512 __B, const int __R)
2287 {
2288 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2289 (__v16sf) __B,
2290 (__v16sf) __W,
2291 (__mmask16) __U, __R);
2292 }
2293
2294 extern __inline __m512
2295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2296 _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2297 {
2298 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2299 (__v16sf) __B,
2300 (__v16sf)
2301 _mm512_setzero_ps (),
2302 (__mmask16) __U, __R);
2303 }
2304
2305 extern __inline __m512d
2306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2307 _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2308 {
2309 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2310 (__v8df) __V,
2311 (__v8df)
2312 _mm512_undefined_pd (),
2313 (__mmask8) -1, __R);
2314 }
2315
2316 extern __inline __m512d
2317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2318 _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2319 __m512d __V, const int __R)
2320 {
2321 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2322 (__v8df) __V,
2323 (__v8df) __W,
2324 (__mmask8) __U, __R);
2325 }
2326
2327 extern __inline __m512d
2328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2329 _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2330 const int __R)
2331 {
2332 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2333 (__v8df) __V,
2334 (__v8df)
2335 _mm512_setzero_pd (),
2336 (__mmask8) __U, __R);
2337 }
2338
2339 extern __inline __m512
2340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2341 _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2342 {
2343 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2344 (__v16sf) __B,
2345 (__v16sf)
2346 _mm512_undefined_ps (),
2347 (__mmask16) -1, __R);
2348 }
2349
2350 extern __inline __m512
2351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2352 _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2353 __m512 __B, const int __R)
2354 {
2355 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2356 (__v16sf) __B,
2357 (__v16sf) __W,
2358 (__mmask16) __U, __R);
2359 }
2360
2361 extern __inline __m512
2362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2363 _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2364 {
2365 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2366 (__v16sf) __B,
2367 (__v16sf)
2368 _mm512_setzero_ps (),
2369 (__mmask16) __U, __R);
2370 }
2371
2372 extern __inline __m128d
2373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2374 _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2375 {
2376 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2377 (__v2df) __B,
2378 __R);
2379 }
2380
2381 extern __inline __m128
2382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2383 _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2384 {
2385 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2386 (__v4sf) __B,
2387 __R);
2388 }
2389
2390 extern __inline __m128d
2391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2392 _mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2393 {
2394 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2395 (__v2df) __B,
2396 __R);
2397 }
2398
2399 extern __inline __m128
2400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2401 _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2402 {
2403 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2404 (__v4sf) __B,
2405 __R);
2406 }
2407
2408 #else
2409 #define _mm512_mul_round_pd(A, B, C) \
2410 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2411
2412 #define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2413 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2414
2415 #define _mm512_maskz_mul_round_pd(U, A, B, C) \
2416 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2417
2418 #define _mm512_mul_round_ps(A, B, C) \
2419 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2420
2421 #define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2422 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2423
2424 #define _mm512_maskz_mul_round_ps(U, A, B, C) \
2425 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2426
2427 #define _mm512_div_round_pd(A, B, C) \
2428 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2429
2430 #define _mm512_mask_div_round_pd(W, U, A, B, C) \
2431 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2432
2433 #define _mm512_maskz_div_round_pd(U, A, B, C) \
2434 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2435
2436 #define _mm512_div_round_ps(A, B, C) \
2437 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2438
2439 #define _mm512_mask_div_round_ps(W, U, A, B, C) \
2440 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2441
2442 #define _mm512_maskz_div_round_ps(U, A, B, C) \
2443 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2444
2445 #define _mm_mul_round_sd(A, B, C) \
2446 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2447
2448 #define _mm_mul_round_ss(A, B, C) \
2449 (__m128)__builtin_ia32_mulss_round(A, B, C)
2450
2451 #define _mm_div_round_sd(A, B, C) \
2452 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2453
2454 #define _mm_div_round_ss(A, B, C) \
2455 (__m128)__builtin_ia32_divss_round(A, B, C)
2456 #endif
2457
2458 #ifdef __OPTIMIZE__
2459 extern __inline __m512d
2460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2461 _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2462 {
2463 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2464 (__v8df) __B,
2465 (__v8df)
2466 _mm512_undefined_pd (),
2467 (__mmask8) -1, __R);
2468 }
2469
2470 extern __inline __m512d
2471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2472 _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2473 __m512d __B, const int __R)
2474 {
2475 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2476 (__v8df) __B,
2477 (__v8df) __W,
2478 (__mmask8) __U, __R);
2479 }
2480
2481 extern __inline __m512d
2482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2483 _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2484 const int __R)
2485 {
2486 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2487 (__v8df) __B,
2488 (__v8df)
2489 _mm512_setzero_pd (),
2490 (__mmask8) __U, __R);
2491 }
2492
2493 extern __inline __m512
2494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2495 _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2496 {
2497 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2498 (__v16sf) __B,
2499 (__v16sf)
2500 _mm512_undefined_ps (),
2501 (__mmask16) -1, __R);
2502 }
2503
2504 extern __inline __m512
2505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2506 _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2507 __m512 __B, const int __R)
2508 {
2509 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2510 (__v16sf) __B,
2511 (__v16sf) __W,
2512 (__mmask16) __U, __R);
2513 }
2514
2515 extern __inline __m512
2516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2517 _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2518 {
2519 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2520 (__v16sf) __B,
2521 (__v16sf)
2522 _mm512_setzero_ps (),
2523 (__mmask16) __U, __R);
2524 }
2525
2526 extern __inline __m512d
2527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2528 _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2529 {
2530 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2531 (__v8df) __B,
2532 (__v8df)
2533 _mm512_undefined_pd (),
2534 (__mmask8) -1, __R);
2535 }
2536
2537 extern __inline __m512d
2538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2539 _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2540 __m512d __B, const int __R)
2541 {
2542 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2543 (__v8df) __B,
2544 (__v8df) __W,
2545 (__mmask8) __U, __R);
2546 }
2547
2548 extern __inline __m512d
2549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2550 _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2551 const int __R)
2552 {
2553 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2554 (__v8df) __B,
2555 (__v8df)
2556 _mm512_setzero_pd (),
2557 (__mmask8) __U, __R);
2558 }
2559
2560 extern __inline __m512
2561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2562 _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
2563 {
2564 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2565 (__v16sf) __B,
2566 (__v16sf)
2567 _mm512_undefined_ps (),
2568 (__mmask16) -1, __R);
2569 }
2570
2571 extern __inline __m512
2572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2573 _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2574 __m512 __B, const int __R)
2575 {
2576 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2577 (__v16sf) __B,
2578 (__v16sf) __W,
2579 (__mmask16) __U, __R);
2580 }
2581
2582 extern __inline __m512
2583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2584 _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2585 {
2586 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2587 (__v16sf) __B,
2588 (__v16sf)
2589 _mm512_setzero_ps (),
2590 (__mmask16) __U, __R);
2591 }
2592 #else
2593 #define _mm512_max_round_pd(A, B, R) \
2594 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2595
2596 #define _mm512_mask_max_round_pd(W, U, A, B, R) \
2597 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
2598
2599 #define _mm512_maskz_max_round_pd(U, A, B, R) \
2600 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2601
2602 #define _mm512_max_round_ps(A, B, R) \
2603 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
2604
2605 #define _mm512_mask_max_round_ps(W, U, A, B, R) \
2606 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
2607
2608 #define _mm512_maskz_max_round_ps(U, A, B, R) \
2609 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2610
2611 #define _mm512_min_round_pd(A, B, R) \
2612 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2613
2614 #define _mm512_mask_min_round_pd(W, U, A, B, R) \
2615 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
2616
2617 #define _mm512_maskz_min_round_pd(U, A, B, R) \
2618 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2619
2620 #define _mm512_min_round_ps(A, B, R) \
2621 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
2622
2623 #define _mm512_mask_min_round_ps(W, U, A, B, R) \
2624 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
2625
2626 #define _mm512_maskz_min_round_ps(U, A, B, R) \
2627 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2628 #endif
2629
2630 #ifdef __OPTIMIZE__
2631 extern __inline __m512d
2632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2633 _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
2634 {
2635 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2636 (__v8df) __B,
2637 (__v8df)
2638 _mm512_undefined_pd (),
2639 (__mmask8) -1, __R);
2640 }
2641
2642 extern __inline __m512d
2643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2644 _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2645 __m512d __B, const int __R)
2646 {
2647 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2648 (__v8df) __B,
2649 (__v8df) __W,
2650 (__mmask8) __U, __R);
2651 }
2652
2653 extern __inline __m512d
2654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2655 _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2656 const int __R)
2657 {
2658 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2659 (__v8df) __B,
2660 (__v8df)
2661 _mm512_setzero_pd (),
2662 (__mmask8) __U, __R);
2663 }
2664
2665 extern __inline __m512
2666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2667 _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
2668 {
2669 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2670 (__v16sf) __B,
2671 (__v16sf)
2672 _mm512_undefined_ps (),
2673 (__mmask16) -1, __R);
2674 }
2675
2676 extern __inline __m512
2677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2678 _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2679 __m512 __B, const int __R)
2680 {
2681 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2682 (__v16sf) __B,
2683 (__v16sf) __W,
2684 (__mmask16) __U, __R);
2685 }
2686
2687 extern __inline __m512
2688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2689 _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2690 const int __R)
2691 {
2692 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2693 (__v16sf) __B,
2694 (__v16sf)
2695 _mm512_setzero_ps (),
2696 (__mmask16) __U, __R);
2697 }
2698
2699 extern __inline __m128d
2700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2701 _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
2702 {
2703 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
2704 (__v2df) __B,
2705 __R);
2706 }
2707
2708 extern __inline __m128
2709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2710 _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
2711 {
2712 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
2713 (__v4sf) __B,
2714 __R);
2715 }
2716 #else
2717 #define _mm512_scalef_round_pd(A, B, C) \
2718 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2719
2720 #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
2721 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
2722
2723 #define _mm512_maskz_scalef_round_pd(U, A, B, C) \
2724 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2725
2726 #define _mm512_scalef_round_ps(A, B, C) \
2727 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2728
2729 #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
2730 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
2731
2732 #define _mm512_maskz_scalef_round_ps(U, A, B, C) \
2733 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2734
2735 #define _mm_scalef_round_sd(A, B, C) \
2736 (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
2737
2738 #define _mm_scalef_round_ss(A, B, C) \
2739 (__m128)__builtin_ia32_scalefss_round(A, B, C)
2740 #endif
2741
2742 #ifdef __OPTIMIZE__
2743 extern __inline __m512d
2744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2745 _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2746 {
2747 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2748 (__v8df) __B,
2749 (__v8df) __C,
2750 (__mmask8) -1, __R);
2751 }
2752
2753 extern __inline __m512d
2754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2755 _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2756 __m512d __C, const int __R)
2757 {
2758 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2759 (__v8df) __B,
2760 (__v8df) __C,
2761 (__mmask8) __U, __R);
2762 }
2763
2764 extern __inline __m512d
2765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2766 _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
2767 __mmask8 __U, const int __R)
2768 {
2769 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2770 (__v8df) __B,
2771 (__v8df) __C,
2772 (__mmask8) __U, __R);
2773 }
2774
2775 extern __inline __m512d
2776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2777 _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2778 __m512d __C, const int __R)
2779 {
2780 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2781 (__v8df) __B,
2782 (__v8df) __C,
2783 (__mmask8) __U, __R);
2784 }
2785
2786 extern __inline __m512
2787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2788 _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2789 {
2790 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2791 (__v16sf) __B,
2792 (__v16sf) __C,
2793 (__mmask16) -1, __R);
2794 }
2795
2796 extern __inline __m512
2797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2798 _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2799 __m512 __C, const int __R)
2800 {
2801 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2802 (__v16sf) __B,
2803 (__v16sf) __C,
2804 (__mmask16) __U, __R);
2805 }
2806
2807 extern __inline __m512
2808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2809 _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
2810 __mmask16 __U, const int __R)
2811 {
2812 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2813 (__v16sf) __B,
2814 (__v16sf) __C,
2815 (__mmask16) __U, __R);
2816 }
2817
2818 extern __inline __m512
2819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2820 _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2821 __m512 __C, const int __R)
2822 {
2823 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2824 (__v16sf) __B,
2825 (__v16sf) __C,
2826 (__mmask16) __U, __R);
2827 }
2828
2829 extern __inline __m512d
2830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2831 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2832 {
2833 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2834 (__v8df) __B,
2835 -(__v8df) __C,
2836 (__mmask8) -1, __R);
2837 }
2838
2839 extern __inline __m512d
2840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2841 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2842 __m512d __C, const int __R)
2843 {
2844 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2845 (__v8df) __B,
2846 -(__v8df) __C,
2847 (__mmask8) __U, __R);
2848 }
2849
2850 extern __inline __m512d
2851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2852 _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2853 __mmask8 __U, const int __R)
2854 {
2855 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
2856 (__v8df) __B,
2857 (__v8df) __C,
2858 (__mmask8) __U, __R);
2859 }
2860
2861 extern __inline __m512d
2862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2863 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2864 __m512d __C, const int __R)
2865 {
2866 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2867 (__v8df) __B,
2868 -(__v8df) __C,
2869 (__mmask8) __U, __R);
2870 }
2871
2872 extern __inline __m512
2873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2874 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2875 {
2876 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2877 (__v16sf) __B,
2878 -(__v16sf) __C,
2879 (__mmask16) -1, __R);
2880 }
2881
2882 extern __inline __m512
2883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2884 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2885 __m512 __C, const int __R)
2886 {
2887 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2888 (__v16sf) __B,
2889 -(__v16sf) __C,
2890 (__mmask16) __U, __R);
2891 }
2892
2893 extern __inline __m512
2894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2895 _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2896 __mmask16 __U, const int __R)
2897 {
2898 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
2899 (__v16sf) __B,
2900 (__v16sf) __C,
2901 (__mmask16) __U, __R);
2902 }
2903
2904 extern __inline __m512
2905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2906 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2907 __m512 __C, const int __R)
2908 {
2909 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2910 (__v16sf) __B,
2911 -(__v16sf) __C,
2912 (__mmask16) __U, __R);
2913 }
2914
2915 extern __inline __m512d
2916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2917 _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2918 {
2919 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2920 (__v8df) __B,
2921 (__v8df) __C,
2922 (__mmask8) -1, __R);
2923 }
2924
2925 extern __inline __m512d
2926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2927 _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2928 __m512d __C, const int __R)
2929 {
2930 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2931 (__v8df) __B,
2932 (__v8df) __C,
2933 (__mmask8) __U, __R);
2934 }
2935
2936 extern __inline __m512d
2937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2938 _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2939 __mmask8 __U, const int __R)
2940 {
2941 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2942 (__v8df) __B,
2943 (__v8df) __C,
2944 (__mmask8) __U, __R);
2945 }
2946
2947 extern __inline __m512d
2948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2949 _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2950 __m512d __C, const int __R)
2951 {
2952 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2953 (__v8df) __B,
2954 (__v8df) __C,
2955 (__mmask8) __U, __R);
2956 }
2957
2958 extern __inline __m512
2959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2960 _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2961 {
2962 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2963 (__v16sf) __B,
2964 (__v16sf) __C,
2965 (__mmask16) -1, __R);
2966 }
2967
2968 extern __inline __m512
2969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2970 _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2971 __m512 __C, const int __R)
2972 {
2973 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2974 (__v16sf) __B,
2975 (__v16sf) __C,
2976 (__mmask16) __U, __R);
2977 }
2978
2979 extern __inline __m512
2980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2981 _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2982 __mmask16 __U, const int __R)
2983 {
2984 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2985 (__v16sf) __B,
2986 (__v16sf) __C,
2987 (__mmask16) __U, __R);
2988 }
2989
2990 extern __inline __m512
2991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2992 _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2993 __m512 __C, const int __R)
2994 {
2995 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2996 (__v16sf) __B,
2997 (__v16sf) __C,
2998 (__mmask16) __U, __R);
2999 }
3000
3001 extern __inline __m512d
3002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3003 _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3004 {
3005 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3006 (__v8df) __B,
3007 -(__v8df) __C,
3008 (__mmask8) -1, __R);
3009 }
3010
3011 extern __inline __m512d
3012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3013 _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3014 __m512d __C, const int __R)
3015 {
3016 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3017 (__v8df) __B,
3018 -(__v8df) __C,
3019 (__mmask8) __U, __R);
3020 }
3021
3022 extern __inline __m512d
3023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3024 _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3025 __mmask8 __U, const int __R)
3026 {
3027 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3028 (__v8df) __B,
3029 (__v8df) __C,
3030 (__mmask8) __U, __R);
3031 }
3032
3033 extern __inline __m512d
3034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3035 _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3036 __m512d __C, const int __R)
3037 {
3038 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3039 (__v8df) __B,
3040 -(__v8df) __C,
3041 (__mmask8) __U, __R);
3042 }
3043
3044 extern __inline __m512
3045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3046 _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3047 {
3048 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3049 (__v16sf) __B,
3050 -(__v16sf) __C,
3051 (__mmask16) -1, __R);
3052 }
3053
3054 extern __inline __m512
3055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3056 _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3057 __m512 __C, const int __R)
3058 {
3059 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3060 (__v16sf) __B,
3061 -(__v16sf) __C,
3062 (__mmask16) __U, __R);
3063 }
3064
3065 extern __inline __m512
3066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3067 _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3068 __mmask16 __U, const int __R)
3069 {
3070 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3071 (__v16sf) __B,
3072 (__v16sf) __C,
3073 (__mmask16) __U, __R);
3074 }
3075
3076 extern __inline __m512
3077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3078 _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3079 __m512 __C, const int __R)
3080 {
3081 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3082 (__v16sf) __B,
3083 -(__v16sf) __C,
3084 (__mmask16) __U, __R);
3085 }
3086
3087 extern __inline __m512d
3088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3089 _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3090 {
3091 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3092 (__v8df) __B,
3093 (__v8df) __C,
3094 (__mmask8) -1, __R);
3095 }
3096
3097 extern __inline __m512d
3098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3099 _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3100 __m512d __C, const int __R)
3101 {
3102 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3103 (__v8df) __B,
3104 (__v8df) __C,
3105 (__mmask8) __U, __R);
3106 }
3107
3108 extern __inline __m512d
3109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3110 _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3111 __mmask8 __U, const int __R)
3112 {
3113 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3114 (__v8df) __B,
3115 (__v8df) __C,
3116 (__mmask8) __U, __R);
3117 }
3118
3119 extern __inline __m512d
3120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3121 _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3122 __m512d __C, const int __R)
3123 {
3124 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3125 (__v8df) __B,
3126 (__v8df) __C,
3127 (__mmask8) __U, __R);
3128 }
3129
3130 extern __inline __m512
3131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3132 _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3133 {
3134 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3135 (__v16sf) __B,
3136 (__v16sf) __C,
3137 (__mmask16) -1, __R);
3138 }
3139
3140 extern __inline __m512
3141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3142 _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3143 __m512 __C, const int __R)
3144 {
3145 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3146 (__v16sf) __B,
3147 (__v16sf) __C,
3148 (__mmask16) __U, __R);
3149 }
3150
3151 extern __inline __m512
3152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3153 _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3154 __mmask16 __U, const int __R)
3155 {
3156 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3157 (__v16sf) __B,
3158 (__v16sf) __C,
3159 (__mmask16) __U, __R);
3160 }
3161
3162 extern __inline __m512
3163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3164 _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3165 __m512 __C, const int __R)
3166 {
3167 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3168 (__v16sf) __B,
3169 (__v16sf) __C,
3170 (__mmask16) __U, __R);
3171 }
3172
3173 extern __inline __m512d
3174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3175 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3176 {
3177 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3178 (__v8df) __B,
3179 -(__v8df) __C,
3180 (__mmask8) -1, __R);
3181 }
3182
3183 extern __inline __m512d
3184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3185 _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3186 __m512d __C, const int __R)
3187 {
3188 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3189 (__v8df) __B,
3190 (__v8df) __C,
3191 (__mmask8) __U, __R);
3192 }
3193
3194 extern __inline __m512d
3195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3196 _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3197 __mmask8 __U, const int __R)
3198 {
3199 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3200 (__v8df) __B,
3201 (__v8df) __C,
3202 (__mmask8) __U, __R);
3203 }
3204
3205 extern __inline __m512d
3206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3207 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3208 __m512d __C, const int __R)
3209 {
3210 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3211 (__v8df) __B,
3212 -(__v8df) __C,
3213 (__mmask8) __U, __R);
3214 }
3215
3216 extern __inline __m512
3217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3218 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3219 {
3220 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3221 (__v16sf) __B,
3222 -(__v16sf) __C,
3223 (__mmask16) -1, __R);
3224 }
3225
3226 extern __inline __m512
3227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3228 _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3229 __m512 __C, const int __R)
3230 {
3231 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3232 (__v16sf) __B,
3233 (__v16sf) __C,
3234 (__mmask16) __U, __R);
3235 }
3236
3237 extern __inline __m512
3238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3239 _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3240 __mmask16 __U, const int __R)
3241 {
3242 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3243 (__v16sf) __B,
3244 (__v16sf) __C,
3245 (__mmask16) __U, __R);
3246 }
3247
3248 extern __inline __m512
3249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3250 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3251 __m512 __C, const int __R)
3252 {
3253 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3254 (__v16sf) __B,
3255 -(__v16sf) __C,
3256 (__mmask16) __U, __R);
3257 }
3258 #else
3259 #define _mm512_fmadd_round_pd(A, B, C, R) \
3260 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3261
3262 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3263 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3264
3265 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3266 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3267
3268 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3269 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3270
3271 #define _mm512_fmadd_round_ps(A, B, C, R) \
3272 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3273
3274 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3275 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3276
3277 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3278 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3279
3280 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3281 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3282
3283 #define _mm512_fmsub_round_pd(A, B, C, R) \
3284 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3285
3286 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
3287 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3288
3289 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3290 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3291
3292 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
3293 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3294
3295 #define _mm512_fmsub_round_ps(A, B, C, R) \
3296 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3297
3298 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
3299 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3300
3301 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3302 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3303
3304 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
3305 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3306
3307 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
3308 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3309
3310 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
3311 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3312
3313 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3314 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3315
3316 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3317 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3318
3319 #define _mm512_fmaddsub_round_ps(A, B, C, R) \
3320 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3321
3322 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3323 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3324
3325 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3326 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3327
3328 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3329 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3330
3331 #define _mm512_fmsubadd_round_pd(A, B, C, R) \
3332 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3333
3334 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3335 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3336
3337 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3338 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3339
3340 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3341 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3342
3343 #define _mm512_fmsubadd_round_ps(A, B, C, R) \
3344 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3345
3346 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3347 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3348
3349 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3350 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3351
3352 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3353 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3354
3355 #define _mm512_fnmadd_round_pd(A, B, C, R) \
3356 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3357
3358 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3359 (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3360
3361 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
3362 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3363
3364 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
3365 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3366
3367 #define _mm512_fnmadd_round_ps(A, B, C, R) \
3368 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3369
3370 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3371 (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3372
3373 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
3374 (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3375
3376 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
3377 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3378
3379 #define _mm512_fnmsub_round_pd(A, B, C, R) \
3380 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3381
3382 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3383 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3384
3385 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3386 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3387
3388 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
3389 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3390
3391 #define _mm512_fnmsub_round_ps(A, B, C, R) \
3392 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3393
3394 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3395 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3396
3397 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3398 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3399
3400 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
3401 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3402 #endif
3403
3404 extern __inline __m512i
3405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3406 _mm512_abs_epi64 (__m512i __A)
3407 {
3408 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3409 (__v8di)
3410 _mm512_undefined_si512 (),
3411 (__mmask8) -1);
3412 }
3413
3414 extern __inline __m512i
3415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3416 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3417 {
3418 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3419 (__v8di) __W,
3420 (__mmask8) __U);
3421 }
3422
3423 extern __inline __m512i
3424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3425 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3426 {
3427 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3428 (__v8di)
3429 _mm512_setzero_si512 (),
3430 (__mmask8) __U);
3431 }
3432
3433 extern __inline __m512i
3434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3435 _mm512_abs_epi32 (__m512i __A)
3436 {
3437 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3438 (__v16si)
3439 _mm512_undefined_si512 (),
3440 (__mmask16) -1);
3441 }
3442
3443 extern __inline __m512i
3444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3445 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3446 {
3447 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3448 (__v16si) __W,
3449 (__mmask16) __U);
3450 }
3451
3452 extern __inline __m512i
3453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3454 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3455 {
3456 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3457 (__v16si)
3458 _mm512_setzero_si512 (),
3459 (__mmask16) __U);
3460 }
3461
3462 extern __inline __m512
3463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3464 _mm512_broadcastss_ps (__m128 __A)
3465 {
3466 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3467 (__v16sf)
3468 _mm512_undefined_ps (),
3469 (__mmask16) -1);
3470 }
3471
3472 extern __inline __m512
3473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3474 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3475 {
3476 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3477 (__v16sf) __O, __M);
3478 }
3479
3480 extern __inline __m512
3481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3482 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
3483 {
3484 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3485 (__v16sf)
3486 _mm512_setzero_ps (),
3487 __M);
3488 }
3489
3490 extern __inline __m512d
3491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3492 _mm512_broadcastsd_pd (__m128d __A)
3493 {
3494 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3495 (__v8df)
3496 _mm512_undefined_pd (),
3497 (__mmask8) -1);
3498 }
3499
3500 extern __inline __m512d
3501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3502 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
3503 {
3504 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3505 (__v8df) __O, __M);
3506 }
3507
3508 extern __inline __m512d
3509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3510 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
3511 {
3512 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3513 (__v8df)
3514 _mm512_setzero_pd (),
3515 __M);
3516 }
3517
3518 extern __inline __m512i
3519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3520 _mm512_broadcastd_epi32 (__m128i __A)
3521 {
3522 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3523 (__v16si)
3524 _mm512_undefined_si512 (),
3525 (__mmask16) -1);
3526 }
3527
3528 extern __inline __m512i
3529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3530 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
3531 {
3532 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3533 (__v16si) __O, __M);
3534 }
3535
3536 extern __inline __m512i
3537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3538 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
3539 {
3540 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3541 (__v16si)
3542 _mm512_setzero_si512 (),
3543 __M);
3544 }
3545
3546 extern __inline __m512i
3547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3548 _mm512_set1_epi32 (int __A)
3549 {
3550 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3551 (__v16si)
3552 _mm512_undefined_si512 (),
3553 (__mmask16)(-1));
3554 }
3555
3556 extern __inline __m512i
3557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3558 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
3559 {
3560 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
3561 __M);
3562 }
3563
3564 extern __inline __m512i
3565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3566 _mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
3567 {
3568 return (__m512i)
3569 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3570 (__v16si) _mm512_setzero_si512 (),
3571 __M);
3572 }
3573
3574 extern __inline __m512i
3575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3576 _mm512_broadcastq_epi64 (__m128i __A)
3577 {
3578 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3579 (__v8di)
3580 _mm512_undefined_si512 (),
3581 (__mmask8) -1);
3582 }
3583
3584 extern __inline __m512i
3585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3586 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
3587 {
3588 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3589 (__v8di) __O, __M);
3590 }
3591
3592 extern __inline __m512i
3593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3594 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
3595 {
3596 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3597 (__v8di)
3598 _mm512_setzero_si512 (),
3599 __M);
3600 }
3601
3602 extern __inline __m512i
3603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3604 _mm512_set1_epi64 (long long __A)
3605 {
3606 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3607 (__v8di)
3608 _mm512_undefined_si512 (),
3609 (__mmask8)(-1));
3610 }
3611
3612 extern __inline __m512i
3613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3614 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
3615 {
3616 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
3617 __M);
3618 }
3619
3620 extern __inline __m512i
3621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3622 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
3623 {
3624 return (__m512i)
3625 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3626 (__v8di) _mm512_setzero_si512 (),
3627 __M);
3628 }
3629
3630 extern __inline __m512
3631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3632 _mm512_broadcast_f32x4 (__m128 __A)
3633 {
3634 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3635 (__v16sf)
3636 _mm512_undefined_ps (),
3637 (__mmask16) -1);
3638 }
3639
3640 extern __inline __m512
3641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3642 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
3643 {
3644 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3645 (__v16sf) __O,
3646 __M);
3647 }
3648
3649 extern __inline __m512
3650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3651 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
3652 {
3653 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3654 (__v16sf)
3655 _mm512_setzero_ps (),
3656 __M);
3657 }
3658
3659 extern __inline __m512i
3660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3661 _mm512_broadcast_i32x4 (__m128i __A)
3662 {
3663 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3664 (__v16si)
3665 _mm512_undefined_si512 (),
3666 (__mmask16) -1);
3667 }
3668
3669 extern __inline __m512i
3670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3671 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
3672 {
3673 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3674 (__v16si) __O,
3675 __M);
3676 }
3677
3678 extern __inline __m512i
3679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3680 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
3681 {
3682 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3683 (__v16si)
3684 _mm512_setzero_si512 (),
3685 __M);
3686 }
3687
3688 extern __inline __m512d
3689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3690 _mm512_broadcast_f64x4 (__m256d __A)
3691 {
3692 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3693 (__v8df)
3694 _mm512_undefined_pd (),
3695 (__mmask8) -1);
3696 }
3697
3698 extern __inline __m512d
3699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3700 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
3701 {
3702 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3703 (__v8df) __O,
3704 __M);
3705 }
3706
3707 extern __inline __m512d
3708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3709 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
3710 {
3711 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3712 (__v8df)
3713 _mm512_setzero_pd (),
3714 __M);
3715 }
3716
3717 extern __inline __m512i
3718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3719 _mm512_broadcast_i64x4 (__m256i __A)
3720 {
3721 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3722 (__v8di)
3723 _mm512_undefined_si512 (),
3724 (__mmask8) -1);
3725 }
3726
3727 extern __inline __m512i
3728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3729 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
3730 {
3731 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3732 (__v8di) __O,
3733 __M);
3734 }
3735
3736 extern __inline __m512i
3737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3738 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
3739 {
3740 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3741 (__v8di)
3742 _mm512_setzero_si512 (),
3743 __M);
3744 }
3745
3746 typedef enum
3747 {
3748 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
3749 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
3750 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
3751 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
3752 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
3753 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
3754 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
3755 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
3756 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
3757 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
3758 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
3759 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
3760 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
3761 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
3762 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
3763 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
3764 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
3765 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
3766 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
3767 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
3768 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
3769 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
3770 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
3771 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
3772 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
3773 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
3774 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
3775 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
3776 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
3777 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
3778 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
3779 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
3780 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
3781 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
3782 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
3783 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
3784 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
3785 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
3786 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
3787 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
3788 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
3789 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
3790 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
3791 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
3792 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
3793 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
3794 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
3795 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
3796 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
3797 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
3798 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
3799 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
3800 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
3801 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
3802 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
3803 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
3804 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
3805 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
3806 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
3807 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
3808 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
3809 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
3810 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
3811 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
3812 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
3813 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
3814 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
3815 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
3816 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
3817 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
3818 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
3819 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
3820 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
3821 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
3822 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
3823 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
3824 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
3825 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
3826 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
3827 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
3828 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
3829 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
3830 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
3831 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
3832 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
3833 _MM_PERM_DDDD = 0xFF
3834 } _MM_PERM_ENUM;
3835
3836 #ifdef __OPTIMIZE__
3837 extern __inline __m512i
3838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3839 _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
3840 {
3841 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3842 __mask,
3843 (__v16si)
3844 _mm512_undefined_si512 (),
3845 (__mmask16) -1);
3846 }
3847
3848 extern __inline __m512i
3849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3850 _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
3851 _MM_PERM_ENUM __mask)
3852 {
3853 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3854 __mask,
3855 (__v16si) __W,
3856 (__mmask16) __U);
3857 }
3858
3859 extern __inline __m512i
3860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3861 _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
3862 {
3863 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3864 __mask,
3865 (__v16si)
3866 _mm512_setzero_si512 (),
3867 (__mmask16) __U);
3868 }
3869
3870 extern __inline __m512i
3871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3872 _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
3873 {
3874 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3875 (__v8di) __B, __imm,
3876 (__v8di)
3877 _mm512_undefined_si512 (),
3878 (__mmask8) -1);
3879 }
3880
3881 extern __inline __m512i
3882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3883 _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
3884 __m512i __B, const int __imm)
3885 {
3886 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3887 (__v8di) __B, __imm,
3888 (__v8di) __W,
3889 (__mmask8) __U);
3890 }
3891
3892 extern __inline __m512i
3893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3894 _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
3895 const int __imm)
3896 {
3897 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3898 (__v8di) __B, __imm,
3899 (__v8di)
3900 _mm512_setzero_si512 (),
3901 (__mmask8) __U);
3902 }
3903
3904 extern __inline __m512i
3905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3906 _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
3907 {
3908 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3909 (__v16si) __B,
3910 __imm,
3911 (__v16si)
3912 _mm512_undefined_si512 (),
3913 (__mmask16) -1);
3914 }
3915
3916 extern __inline __m512i
3917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3918 _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
3919 __m512i __B, const int __imm)
3920 {
3921 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3922 (__v16si) __B,
3923 __imm,
3924 (__v16si) __W,
3925 (__mmask16) __U);
3926 }
3927
3928 extern __inline __m512i
3929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3930 _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
3931 const int __imm)
3932 {
3933 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3934 (__v16si) __B,
3935 __imm,
3936 (__v16si)
3937 _mm512_setzero_si512 (),
3938 (__mmask16) __U);
3939 }
3940
3941 extern __inline __m512d
3942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3943 _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
3944 {
3945 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3946 (__v8df) __B, __imm,
3947 (__v8df)
3948 _mm512_undefined_pd (),
3949 (__mmask8) -1);
3950 }
3951
3952 extern __inline __m512d
3953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3954 _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
3955 __m512d __B, const int __imm)
3956 {
3957 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3958 (__v8df) __B, __imm,
3959 (__v8df) __W,
3960 (__mmask8) __U);
3961 }
3962
3963 extern __inline __m512d
3964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3965 _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
3966 const int __imm)
3967 {
3968 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3969 (__v8df) __B, __imm,
3970 (__v8df)
3971 _mm512_setzero_pd (),
3972 (__mmask8) __U);
3973 }
3974
3975 extern __inline __m512
3976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3977 _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
3978 {
3979 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3980 (__v16sf) __B, __imm,
3981 (__v16sf)
3982 _mm512_undefined_ps (),
3983 (__mmask16) -1);
3984 }
3985
3986 extern __inline __m512
3987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3988 _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
3989 __m512 __B, const int __imm)
3990 {
3991 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3992 (__v16sf) __B, __imm,
3993 (__v16sf) __W,
3994 (__mmask16) __U);
3995 }
3996
3997 extern __inline __m512
3998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3999 _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4000 const int __imm)
4001 {
4002 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4003 (__v16sf) __B, __imm,
4004 (__v16sf)
4005 _mm512_setzero_ps (),
4006 (__mmask16) __U);
4007 }
4008
4009 #else
4010 #define _mm512_shuffle_epi32(X, C) \
4011 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4012 (__v16si)(__m512i)_mm512_undefined_si512 (),\
4013 (__mmask16)-1))
4014
4015 #define _mm512_mask_shuffle_epi32(W, U, X, C) \
4016 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4017 (__v16si)(__m512i)(W),\
4018 (__mmask16)(U)))
4019
4020 #define _mm512_maskz_shuffle_epi32(U, X, C) \
4021 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4022 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4023 (__mmask16)(U)))
4024
4025 #define _mm512_shuffle_i64x2(X, Y, C) \
4026 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4027 (__v8di)(__m512i)(Y), (int)(C),\
4028 (__v8di)(__m512i)_mm512_undefined_si512 (),\
4029 (__mmask8)-1))
4030
4031 #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
4032 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4033 (__v8di)(__m512i)(Y), (int)(C),\
4034 (__v8di)(__m512i)(W),\
4035 (__mmask8)(U)))
4036
4037 #define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
4038 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4039 (__v8di)(__m512i)(Y), (int)(C),\
4040 (__v8di)(__m512i)_mm512_setzero_si512 (),\
4041 (__mmask8)(U)))
4042
4043 #define _mm512_shuffle_i32x4(X, Y, C) \
4044 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4045 (__v16si)(__m512i)(Y), (int)(C),\
4046 (__v16si)(__m512i)_mm512_undefined_si512 (),\
4047 (__mmask16)-1))
4048
4049 #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
4050 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4051 (__v16si)(__m512i)(Y), (int)(C),\
4052 (__v16si)(__m512i)(W),\
4053 (__mmask16)(U)))
4054
4055 #define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
4056 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4057 (__v16si)(__m512i)(Y), (int)(C),\
4058 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4059 (__mmask16)(U)))
4060
4061 #define _mm512_shuffle_f64x2(X, Y, C) \
4062 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4063 (__v8df)(__m512d)(Y), (int)(C),\
4064 (__v8df)(__m512d)_mm512_undefined_pd(),\
4065 (__mmask8)-1))
4066
4067 #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
4068 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4069 (__v8df)(__m512d)(Y), (int)(C),\
4070 (__v8df)(__m512d)(W),\
4071 (__mmask8)(U)))
4072
4073 #define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
4074 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4075 (__v8df)(__m512d)(Y), (int)(C),\
4076 (__v8df)(__m512d)_mm512_setzero_pd(),\
4077 (__mmask8)(U)))
4078
4079 #define _mm512_shuffle_f32x4(X, Y, C) \
4080 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4081 (__v16sf)(__m512)(Y), (int)(C),\
4082 (__v16sf)(__m512)_mm512_undefined_ps(),\
4083 (__mmask16)-1))
4084
4085 #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
4086 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4087 (__v16sf)(__m512)(Y), (int)(C),\
4088 (__v16sf)(__m512)(W),\
4089 (__mmask16)(U)))
4090
4091 #define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
4092 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4093 (__v16sf)(__m512)(Y), (int)(C),\
4094 (__v16sf)(__m512)_mm512_setzero_ps(),\
4095 (__mmask16)(U)))
4096 #endif
4097
4098 extern __inline __m512i
4099 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4100 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
4101 {
4102 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4103 (__v16si) __B,
4104 (__v16si)
4105 _mm512_undefined_si512 (),
4106 (__mmask16) -1);
4107 }
4108
4109 extern __inline __m512i
4110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4111 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4112 {
4113 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4114 (__v16si) __B,
4115 (__v16si) __W,
4116 (__mmask16) __U);
4117 }
4118
4119 extern __inline __m512i
4120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4121 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4122 {
4123 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4124 (__v16si) __B,
4125 (__v16si)
4126 _mm512_setzero_si512 (),
4127 (__mmask16) __U);
4128 }
4129
4130 extern __inline __m512i
4131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4132 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
4133 {
4134 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4135 (__v16si) __B,
4136 (__v16si)
4137 _mm512_undefined_si512 (),
4138 (__mmask16) -1);
4139 }
4140
4141 extern __inline __m512i
4142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4143 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4144 {
4145 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4146 (__v16si) __B,
4147 (__v16si) __W,
4148 (__mmask16) __U);
4149 }
4150
4151 extern __inline __m512i
4152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4153 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4154 {
4155 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4156 (__v16si) __B,
4157 (__v16si)
4158 _mm512_setzero_si512 (),
4159 (__mmask16) __U);
4160 }
4161
4162 extern __inline __m512i
4163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4164 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
4165 {
4166 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4167 (__v8di) __B,
4168 (__v8di)
4169 _mm512_undefined_si512 (),
4170 (__mmask8) -1);
4171 }
4172
4173 extern __inline __m512i
4174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4175 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4176 {
4177 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4178 (__v8di) __B,
4179 (__v8di) __W,
4180 (__mmask8) __U);
4181 }
4182
4183 extern __inline __m512i
4184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4185 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4186 {
4187 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4188 (__v8di) __B,
4189 (__v8di)
4190 _mm512_setzero_si512 (),
4191 (__mmask8) __U);
4192 }
4193
4194 extern __inline __m512i
4195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4196 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
4197 {
4198 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4199 (__v8di) __B,
4200 (__v8di)
4201 _mm512_undefined_si512 (),
4202 (__mmask8) -1);
4203 }
4204
4205 extern __inline __m512i
4206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4207 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4208 {
4209 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4210 (__v8di) __B,
4211 (__v8di) __W,
4212 (__mmask8) __U);
4213 }
4214
4215 extern __inline __m512i
4216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4217 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4218 {
4219 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4220 (__v8di) __B,
4221 (__v8di)
4222 _mm512_setzero_si512 (),
4223 (__mmask8) __U);
4224 }
4225
4226 #ifdef __OPTIMIZE__
4227 extern __inline __m256i
4228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4229 _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4230 {
4231 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4232 (__v8si)
4233 _mm256_undefined_si256 (),
4234 (__mmask8) -1, __R);
4235 }
4236
4237 extern __inline __m256i
4238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4239 _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4240 const int __R)
4241 {
4242 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4243 (__v8si) __W,
4244 (__mmask8) __U, __R);
4245 }
4246
4247 extern __inline __m256i
4248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4249 _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4250 {
4251 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4252 (__v8si)
4253 _mm256_setzero_si256 (),
4254 (__mmask8) __U, __R);
4255 }
4256
4257 extern __inline __m256i
4258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4259 _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4260 {
4261 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4262 (__v8si)
4263 _mm256_undefined_si256 (),
4264 (__mmask8) -1, __R);
4265 }
4266
4267 extern __inline __m256i
4268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4269 _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4270 const int __R)
4271 {
4272 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4273 (__v8si) __W,
4274 (__mmask8) __U, __R);
4275 }
4276
4277 extern __inline __m256i
4278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4279 _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4280 {
4281 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4282 (__v8si)
4283 _mm256_setzero_si256 (),
4284 (__mmask8) __U, __R);
4285 }
4286 #else
4287 #define _mm512_cvtt_roundpd_epi32(A, B) \
4288 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4289
4290 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4291 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4292
4293 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4294 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4295
4296 #define _mm512_cvtt_roundpd_epu32(A, B) \
4297 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4298
4299 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4300 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4301
4302 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4303 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4304 #endif
4305
4306 #ifdef __OPTIMIZE__
4307 extern __inline __m256i
4308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4309 _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4310 {
4311 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4312 (__v8si)
4313 _mm256_undefined_si256 (),
4314 (__mmask8) -1, __R);
4315 }
4316
4317 extern __inline __m256i
4318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4319 _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4320 const int __R)
4321 {
4322 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4323 (__v8si) __W,
4324 (__mmask8) __U, __R);
4325 }
4326
4327 extern __inline __m256i
4328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4329 _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4330 {
4331 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4332 (__v8si)
4333 _mm256_setzero_si256 (),
4334 (__mmask8) __U, __R);
4335 }
4336
4337 extern __inline __m256i
4338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4339 _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4340 {
4341 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4342 (__v8si)
4343 _mm256_undefined_si256 (),
4344 (__mmask8) -1, __R);
4345 }
4346
4347 extern __inline __m256i
4348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4349 _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4350 const int __R)
4351 {
4352 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4353 (__v8si) __W,
4354 (__mmask8) __U, __R);
4355 }
4356
4357 extern __inline __m256i
4358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4359 _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4360 {
4361 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4362 (__v8si)
4363 _mm256_setzero_si256 (),
4364 (__mmask8) __U, __R);
4365 }
4366 #else
4367 #define _mm512_cvt_roundpd_epi32(A, B) \
4368 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4369
4370 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4371 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4372
4373 #define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4374 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4375
4376 #define _mm512_cvt_roundpd_epu32(A, B) \
4377 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4378
4379 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4380 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4381
4382 #define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4383 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4384 #endif
4385
4386 #ifdef __OPTIMIZE__
4387 extern __inline __m512i
4388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4389 _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4390 {
4391 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4392 (__v16si)
4393 _mm512_undefined_si512 (),
4394 (__mmask16) -1, __R);
4395 }
4396
4397 extern __inline __m512i
4398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4399 _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4400 const int __R)
4401 {
4402 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4403 (__v16si) __W,
4404 (__mmask16) __U, __R);
4405 }
4406
4407 extern __inline __m512i
4408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4409 _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4410 {
4411 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4412 (__v16si)
4413 _mm512_setzero_si512 (),
4414 (__mmask16) __U, __R);
4415 }
4416
4417 extern __inline __m512i
4418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4419 _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4420 {
4421 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4422 (__v16si)
4423 _mm512_undefined_si512 (),
4424 (__mmask16) -1, __R);
4425 }
4426
4427 extern __inline __m512i
4428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4429 _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4430 const int __R)
4431 {
4432 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4433 (__v16si) __W,
4434 (__mmask16) __U, __R);
4435 }
4436
4437 extern __inline __m512i
4438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4439 _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4440 {
4441 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4442 (__v16si)
4443 _mm512_setzero_si512 (),
4444 (__mmask16) __U, __R);
4445 }
4446 #else
4447 #define _mm512_cvtt_roundps_epi32(A, B) \
4448 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4449
4450 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
4451 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4452
4453 #define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
4454 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4455
4456 #define _mm512_cvtt_roundps_epu32(A, B) \
4457 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4458
4459 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
4460 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4461
4462 #define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
4463 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4464 #endif
4465
4466 #ifdef __OPTIMIZE__
4467 extern __inline __m512i
4468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4469 _mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4470 {
4471 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4472 (__v16si)
4473 _mm512_undefined_si512 (),
4474 (__mmask16) -1, __R);
4475 }
4476
4477 extern __inline __m512i
4478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4479 _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4480 const int __R)
4481 {
4482 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4483 (__v16si) __W,
4484 (__mmask16) __U, __R);
4485 }
4486
4487 extern __inline __m512i
4488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4489 _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4490 {
4491 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4492 (__v16si)
4493 _mm512_setzero_si512 (),
4494 (__mmask16) __U, __R);
4495 }
4496
4497 extern __inline __m512i
4498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4499 _mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
4500 {
4501 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4502 (__v16si)
4503 _mm512_undefined_si512 (),
4504 (__mmask16) -1, __R);
4505 }
4506
4507 extern __inline __m512i
4508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4509 _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4510 const int __R)
4511 {
4512 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4513 (__v16si) __W,
4514 (__mmask16) __U, __R);
4515 }
4516
4517 extern __inline __m512i
4518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4519 _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4520 {
4521 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4522 (__v16si)
4523 _mm512_setzero_si512 (),
4524 (__mmask16) __U, __R);
4525 }
4526 #else
4527 #define _mm512_cvt_roundps_epi32(A, B) \
4528 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4529
4530 #define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
4531 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
4532
4533 #define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
4534 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4535
4536 #define _mm512_cvt_roundps_epu32(A, B) \
4537 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4538
4539 #define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
4540 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
4541
4542 #define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
4543 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4544 #endif
4545
4546 extern __inline __m128d
4547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4548 _mm_cvtu32_sd (__m128d __A, unsigned __B)
4549 {
4550 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
4551 }
4552
4553 #ifdef __x86_64__
4554 #ifdef __OPTIMIZE__
4555 extern __inline __m128d
4556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4557 _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
4558 {
4559 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
4560 }
4561
4562 extern __inline __m128d
4563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4564 _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
4565 {
4566 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4567 }
4568
4569 extern __inline __m128d
4570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4571 _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
4572 {
4573 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4574 }
4575 #else
4576 #define _mm_cvt_roundu64_sd(A, B, C) \
4577 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
4578
4579 #define _mm_cvt_roundi64_sd(A, B, C) \
4580 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4581
4582 #define _mm_cvt_roundsi64_sd(A, B, C) \
4583 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4584 #endif
4585
4586 #endif
4587
4588 #ifdef __OPTIMIZE__
4589 extern __inline __m128
4590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4591 _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
4592 {
4593 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
4594 }
4595
4596 extern __inline __m128
4597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4598 _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
4599 {
4600 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4601 }
4602
4603 extern __inline __m128
4604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4605 _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
4606 {
4607 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4608 }
4609 #else
4610 #define _mm_cvt_roundu32_ss(A, B, C) \
4611 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
4612
4613 #define _mm_cvt_roundi32_ss(A, B, C) \
4614 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4615
4616 #define _mm_cvt_roundsi32_ss(A, B, C) \
4617 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4618 #endif
4619
4620 #ifdef __x86_64__
4621 #ifdef __OPTIMIZE__
4622 extern __inline __m128
4623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4624 _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
4625 {
4626 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
4627 }
4628
4629 extern __inline __m128
4630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4631 _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
4632 {
4633 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4634 }
4635
4636 extern __inline __m128
4637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4638 _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
4639 {
4640 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4641 }
4642 #else
4643 #define _mm_cvt_roundu64_ss(A, B, C) \
4644 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
4645
4646 #define _mm_cvt_roundi64_ss(A, B, C) \
4647 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4648
4649 #define _mm_cvt_roundsi64_ss(A, B, C) \
4650 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4651 #endif
4652
4653 #endif
4654
4655 extern __inline __m128i
4656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4657 _mm512_cvtepi32_epi8 (__m512i __A)
4658 {
4659 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4660 (__v16qi)
4661 _mm_undefined_si128 (),
4662 (__mmask16) -1);
4663 }
4664
4665 extern __inline void
4666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4667 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4668 {
4669 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4670 }
4671
4672 extern __inline __m128i
4673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4674 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4675 {
4676 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4677 (__v16qi) __O, __M);
4678 }
4679
4680 extern __inline __m128i
4681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4682 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
4683 {
4684 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4685 (__v16qi)
4686 _mm_setzero_si128 (),
4687 __M);
4688 }
4689
4690 extern __inline __m128i
4691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4692 _mm512_cvtsepi32_epi8 (__m512i __A)
4693 {
4694 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4695 (__v16qi)
4696 _mm_undefined_si128 (),
4697 (__mmask16) -1);
4698 }
4699
4700 extern __inline void
4701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4702 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4703 {
4704 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4705 }
4706
4707 extern __inline __m128i
4708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4709 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4710 {
4711 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4712 (__v16qi) __O, __M);
4713 }
4714
4715 extern __inline __m128i
4716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4717 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
4718 {
4719 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4720 (__v16qi)
4721 _mm_setzero_si128 (),
4722 __M);
4723 }
4724
4725 extern __inline __m128i
4726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4727 _mm512_cvtusepi32_epi8 (__m512i __A)
4728 {
4729 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4730 (__v16qi)
4731 _mm_undefined_si128 (),
4732 (__mmask16) -1);
4733 }
4734
4735 extern __inline void
4736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4737 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4738 {
4739 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4740 }
4741
4742 extern __inline __m128i
4743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4744 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4745 {
4746 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4747 (__v16qi) __O,
4748 __M);
4749 }
4750
4751 extern __inline __m128i
4752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4753 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
4754 {
4755 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4756 (__v16qi)
4757 _mm_setzero_si128 (),
4758 __M);
4759 }
4760
4761 extern __inline __m256i
4762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4763 _mm512_cvtepi32_epi16 (__m512i __A)
4764 {
4765 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4766 (__v16hi)
4767 _mm256_undefined_si256 (),
4768 (__mmask16) -1);
4769 }
4770
4771 extern __inline void
4772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4773 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
4774 {
4775 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
4776 }
4777
4778 extern __inline __m256i
4779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4780 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4781 {
4782 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4783 (__v16hi) __O, __M);
4784 }
4785
4786 extern __inline __m256i
4787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4788 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
4789 {
4790 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4791 (__v16hi)
4792 _mm256_setzero_si256 (),
4793 __M);
4794 }
4795
4796 extern __inline __m256i
4797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4798 _mm512_cvtsepi32_epi16 (__m512i __A)
4799 {
4800 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4801 (__v16hi)
4802 _mm256_undefined_si256 (),
4803 (__mmask16) -1);
4804 }
4805
4806 extern __inline void
4807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4808 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4809 {
4810 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4811 }
4812
4813 extern __inline __m256i
4814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4815 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4816 {
4817 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4818 (__v16hi) __O, __M);
4819 }
4820
4821 extern __inline __m256i
4822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4823 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
4824 {
4825 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4826 (__v16hi)
4827 _mm256_setzero_si256 (),
4828 __M);
4829 }
4830
4831 extern __inline __m256i
4832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4833 _mm512_cvtusepi32_epi16 (__m512i __A)
4834 {
4835 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4836 (__v16hi)
4837 _mm256_undefined_si256 (),
4838 (__mmask16) -1);
4839 }
4840
4841 extern __inline void
4842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4843 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4844 {
4845 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4846 }
4847
4848 extern __inline __m256i
4849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4850 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4851 {
4852 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4853 (__v16hi) __O,
4854 __M);
4855 }
4856
4857 extern __inline __m256i
4858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4859 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
4860 {
4861 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4862 (__v16hi)
4863 _mm256_setzero_si256 (),
4864 __M);
4865 }
4866
4867 extern __inline __m256i
4868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4869 _mm512_cvtepi64_epi32 (__m512i __A)
4870 {
4871 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4872 (__v8si)
4873 _mm256_undefined_si256 (),
4874 (__mmask8) -1);
4875 }
4876
4877 extern __inline void
4878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4879 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4880 {
4881 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4882 }
4883
4884 extern __inline __m256i
4885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4886 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4887 {
4888 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4889 (__v8si) __O, __M);
4890 }
4891
4892 extern __inline __m256i
4893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4894 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
4895 {
4896 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4897 (__v8si)
4898 _mm256_setzero_si256 (),
4899 __M);
4900 }
4901
4902 extern __inline __m256i
4903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4904 _mm512_cvtsepi64_epi32 (__m512i __A)
4905 {
4906 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4907 (__v8si)
4908 _mm256_undefined_si256 (),
4909 (__mmask8) -1);
4910 }
4911
4912 extern __inline void
4913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4914 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
4915 {
4916 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4917 }
4918
4919 extern __inline __m256i
4920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4921 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4922 {
4923 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4924 (__v8si) __O, __M);
4925 }
4926
4927 extern __inline __m256i
4928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4929 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
4930 {
4931 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4932 (__v8si)
4933 _mm256_setzero_si256 (),
4934 __M);
4935 }
4936
4937 extern __inline __m256i
4938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4939 _mm512_cvtusepi64_epi32 (__m512i __A)
4940 {
4941 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4942 (__v8si)
4943 _mm256_undefined_si256 (),
4944 (__mmask8) -1);
4945 }
4946
4947 extern __inline void
4948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4949 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4950 {
4951 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
4952 }
4953
4954 extern __inline __m256i
4955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4956 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4957 {
4958 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4959 (__v8si) __O, __M);
4960 }
4961
4962 extern __inline __m256i
4963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4964 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
4965 {
4966 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4967 (__v8si)
4968 _mm256_setzero_si256 (),
4969 __M);
4970 }
4971
4972 extern __inline __m128i
4973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4974 _mm512_cvtepi64_epi16 (__m512i __A)
4975 {
4976 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4977 (__v8hi)
4978 _mm_undefined_si128 (),
4979 (__mmask8) -1);
4980 }
4981
4982 extern __inline void
4983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4984 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
4985 {
4986 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
4987 }
4988
4989 extern __inline __m128i
4990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4991 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
4992 {
4993 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4994 (__v8hi) __O, __M);
4995 }
4996
4997 extern __inline __m128i
4998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4999 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5000 {
5001 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5002 (__v8hi)
5003 _mm_setzero_si128 (),
5004 __M);
5005 }
5006
5007 extern __inline __m128i
5008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5009 _mm512_cvtsepi64_epi16 (__m512i __A)
5010 {
5011 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5012 (__v8hi)
5013 _mm_undefined_si128 (),
5014 (__mmask8) -1);
5015 }
5016
5017 extern __inline void
5018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5019 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5020 {
5021 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5022 }
5023
5024 extern __inline __m128i
5025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5026 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5027 {
5028 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5029 (__v8hi) __O, __M);
5030 }
5031
5032 extern __inline __m128i
5033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5034 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5035 {
5036 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5037 (__v8hi)
5038 _mm_setzero_si128 (),
5039 __M);
5040 }
5041
5042 extern __inline __m128i
5043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5044 _mm512_cvtusepi64_epi16 (__m512i __A)
5045 {
5046 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5047 (__v8hi)
5048 _mm_undefined_si128 (),
5049 (__mmask8) -1);
5050 }
5051
5052 extern __inline void
5053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5054 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5055 {
5056 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5057 }
5058
5059 extern __inline __m128i
5060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5061 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5062 {
5063 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5064 (__v8hi) __O, __M);
5065 }
5066
5067 extern __inline __m128i
5068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5069 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5070 {
5071 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5072 (__v8hi)
5073 _mm_setzero_si128 (),
5074 __M);
5075 }
5076
5077 extern __inline __m128i
5078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5079 _mm512_cvtepi64_epi8 (__m512i __A)
5080 {
5081 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5082 (__v16qi)
5083 _mm_undefined_si128 (),
5084 (__mmask8) -1);
5085 }
5086
5087 extern __inline void
5088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5089 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5090 {
5091 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5092 }
5093
5094 extern __inline __m128i
5095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5096 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5097 {
5098 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5099 (__v16qi) __O, __M);
5100 }
5101
5102 extern __inline __m128i
5103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5104 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5105 {
5106 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5107 (__v16qi)
5108 _mm_setzero_si128 (),
5109 __M);
5110 }
5111
5112 extern __inline __m128i
5113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5114 _mm512_cvtsepi64_epi8 (__m512i __A)
5115 {
5116 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5117 (__v16qi)
5118 _mm_undefined_si128 (),
5119 (__mmask8) -1);
5120 }
5121
5122 extern __inline void
5123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5124 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5125 {
5126 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5127 }
5128
5129 extern __inline __m128i
5130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5131 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5132 {
5133 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5134 (__v16qi) __O, __M);
5135 }
5136
5137 extern __inline __m128i
5138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5139 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5140 {
5141 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5142 (__v16qi)
5143 _mm_setzero_si128 (),
5144 __M);
5145 }
5146
5147 extern __inline __m128i
5148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5149 _mm512_cvtusepi64_epi8 (__m512i __A)
5150 {
5151 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5152 (__v16qi)
5153 _mm_undefined_si128 (),
5154 (__mmask8) -1);
5155 }
5156
5157 extern __inline void
5158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5159 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5160 {
5161 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5162 }
5163
5164 extern __inline __m128i
5165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5166 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5167 {
5168 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5169 (__v16qi) __O,
5170 __M);
5171 }
5172
5173 extern __inline __m128i
5174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5175 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5176 {
5177 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5178 (__v16qi)
5179 _mm_setzero_si128 (),
5180 __M);
5181 }
5182
5183 extern __inline __m512d
5184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5185 _mm512_cvtepi32_pd (__m256i __A)
5186 {
5187 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5188 (__v8df)
5189 _mm512_undefined_pd (),
5190 (__mmask8) -1);
5191 }
5192
5193 extern __inline __m512d
5194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5195 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5196 {
5197 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5198 (__v8df) __W,
5199 (__mmask8) __U);
5200 }
5201
5202 extern __inline __m512d
5203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5204 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5205 {
5206 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5207 (__v8df)
5208 _mm512_setzero_pd (),
5209 (__mmask8) __U);
5210 }
5211
5212 extern __inline __m512d
5213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5214 _mm512_cvtepu32_pd (__m256i __A)
5215 {
5216 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5217 (__v8df)
5218 _mm512_undefined_pd (),
5219 (__mmask8) -1);
5220 }
5221
5222 extern __inline __m512d
5223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5224 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5225 {
5226 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5227 (__v8df) __W,
5228 (__mmask8) __U);
5229 }
5230
5231 extern __inline __m512d
5232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5233 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5234 {
5235 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5236 (__v8df)
5237 _mm512_setzero_pd (),
5238 (__mmask8) __U);
5239 }
5240
5241 #ifdef __OPTIMIZE__
5242 extern __inline __m512
5243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5244 _mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5245 {
5246 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5247 (__v16sf)
5248 _mm512_undefined_ps (),
5249 (__mmask16) -1, __R);
5250 }
5251
5252 extern __inline __m512
5253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5254 _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5255 const int __R)
5256 {
5257 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5258 (__v16sf) __W,
5259 (__mmask16) __U, __R);
5260 }
5261
5262 extern __inline __m512
5263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5264 _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5265 {
5266 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5267 (__v16sf)
5268 _mm512_setzero_ps (),
5269 (__mmask16) __U, __R);
5270 }
5271
5272 extern __inline __m512
5273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5274 _mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5275 {
5276 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5277 (__v16sf)
5278 _mm512_undefined_ps (),
5279 (__mmask16) -1, __R);
5280 }
5281
5282 extern __inline __m512
5283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5284 _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5285 const int __R)
5286 {
5287 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5288 (__v16sf) __W,
5289 (__mmask16) __U, __R);
5290 }
5291
5292 extern __inline __m512
5293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5294 _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5295 {
5296 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5297 (__v16sf)
5298 _mm512_setzero_ps (),
5299 (__mmask16) __U, __R);
5300 }
5301
5302 #else
5303 #define _mm512_cvt_roundepi32_ps(A, B) \
5304 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5305
5306 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5307 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5308
5309 #define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5310 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5311
5312 #define _mm512_cvt_roundepu32_ps(A, B) \
5313 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5314
5315 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5316 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5317
5318 #define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5319 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5320 #endif
5321
5322 #ifdef __OPTIMIZE__
5323 extern __inline __m256d
5324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5325 _mm512_extractf64x4_pd (__m512d __A, const int __imm)
5326 {
5327 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5328 __imm,
5329 (__v4df)
5330 _mm256_undefined_pd (),
5331 (__mmask8) -1);
5332 }
5333
5334 extern __inline __m256d
5335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5336 _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5337 const int __imm)
5338 {
5339 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5340 __imm,
5341 (__v4df) __W,
5342 (__mmask8) __U);
5343 }
5344
5345 extern __inline __m256d
5346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5347 _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5348 {
5349 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5350 __imm,
5351 (__v4df)
5352 _mm256_setzero_pd (),
5353 (__mmask8) __U);
5354 }
5355
5356 extern __inline __m128
5357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5358 _mm512_extractf32x4_ps (__m512 __A, const int __imm)
5359 {
5360 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5361 __imm,
5362 (__v4sf)
5363 _mm_undefined_ps (),
5364 (__mmask8) -1);
5365 }
5366
5367 extern __inline __m128
5368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5369 _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5370 const int __imm)
5371 {
5372 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5373 __imm,
5374 (__v4sf) __W,
5375 (__mmask8) __U);
5376 }
5377
5378 extern __inline __m128
5379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5380 _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5381 {
5382 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5383 __imm,
5384 (__v4sf)
5385 _mm_setzero_ps (),
5386 (__mmask8) __U);
5387 }
5388
5389 extern __inline __m256i
5390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5391 _mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5392 {
5393 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5394 __imm,
5395 (__v4di)
5396 _mm256_undefined_si256 (),
5397 (__mmask8) -1);
5398 }
5399
5400 extern __inline __m256i
5401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5402 _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5403 const int __imm)
5404 {
5405 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5406 __imm,
5407 (__v4di) __W,
5408 (__mmask8) __U);
5409 }
5410
5411 extern __inline __m256i
5412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5413 _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5414 {
5415 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5416 __imm,
5417 (__v4di)
5418 _mm256_setzero_si256 (),
5419 (__mmask8) __U);
5420 }
5421
5422 extern __inline __m128i
5423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5424 _mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5425 {
5426 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5427 __imm,
5428 (__v4si)
5429 _mm_undefined_si128 (),
5430 (__mmask8) -1);
5431 }
5432
5433 extern __inline __m128i
5434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5435 _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5436 const int __imm)
5437 {
5438 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5439 __imm,
5440 (__v4si) __W,
5441 (__mmask8) __U);
5442 }
5443
5444 extern __inline __m128i
5445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5446 _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5447 {
5448 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5449 __imm,
5450 (__v4si)
5451 _mm_setzero_si128 (),
5452 (__mmask8) __U);
5453 }
5454 #else
5455
5456 #define _mm512_extractf64x4_pd(X, C) \
5457 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5458 (int) (C),\
5459 (__v4df)(__m256d)_mm256_undefined_pd(),\
5460 (__mmask8)-1))
5461
5462 #define _mm512_mask_extractf64x4_pd(W, U, X, C) \
5463 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5464 (int) (C),\
5465 (__v4df)(__m256d)(W),\
5466 (__mmask8)(U)))
5467
5468 #define _mm512_maskz_extractf64x4_pd(U, X, C) \
5469 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5470 (int) (C),\
5471 (__v4df)(__m256d)_mm256_setzero_pd(),\
5472 (__mmask8)(U)))
5473
5474 #define _mm512_extractf32x4_ps(X, C) \
5475 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5476 (int) (C),\
5477 (__v4sf)(__m128)_mm_undefined_ps(),\
5478 (__mmask8)-1))
5479
5480 #define _mm512_mask_extractf32x4_ps(W, U, X, C) \
5481 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5482 (int) (C),\
5483 (__v4sf)(__m128)(W),\
5484 (__mmask8)(U)))
5485
5486 #define _mm512_maskz_extractf32x4_ps(U, X, C) \
5487 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5488 (int) (C),\
5489 (__v4sf)(__m128)_mm_setzero_ps(),\
5490 (__mmask8)(U)))
5491
5492 #define _mm512_extracti64x4_epi64(X, C) \
5493 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5494 (int) (C),\
5495 (__v4di)(__m256i)_mm256_undefined_si256 (),\
5496 (__mmask8)-1))
5497
5498 #define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
5499 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5500 (int) (C),\
5501 (__v4di)(__m256i)(W),\
5502 (__mmask8)(U)))
5503
5504 #define _mm512_maskz_extracti64x4_epi64(U, X, C) \
5505 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5506 (int) (C),\
5507 (__v4di)(__m256i)_mm256_setzero_si256 (),\
5508 (__mmask8)(U)))
5509
5510 #define _mm512_extracti32x4_epi32(X, C) \
5511 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5512 (int) (C),\
5513 (__v4si)(__m128i)_mm_undefined_si128 (),\
5514 (__mmask8)-1))
5515
5516 #define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
5517 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5518 (int) (C),\
5519 (__v4si)(__m128i)(W),\
5520 (__mmask8)(U)))
5521
5522 #define _mm512_maskz_extracti32x4_epi32(U, X, C) \
5523 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5524 (int) (C),\
5525 (__v4si)(__m128i)_mm_setzero_si128 (),\
5526 (__mmask8)(U)))
5527 #endif
5528
5529 #ifdef __OPTIMIZE__
5530 extern __inline __m512i
5531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5532 _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
5533 {
5534 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
5535 (__v4si) __B,
5536 __imm,
5537 (__v16si) __A, -1);
5538 }
5539
5540 extern __inline __m512
5541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5542 _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
5543 {
5544 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
5545 (__v4sf) __B,
5546 __imm,
5547 (__v16sf) __A, -1);
5548 }
5549
5550 extern __inline __m512i
5551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5552 _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
5553 {
5554 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5555 (__v4di) __B,
5556 __imm,
5557 (__v8di)
5558 _mm512_undefined_si512 (),
5559 (__mmask8) -1);
5560 }
5561
5562 extern __inline __m512i
5563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5564 _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
5565 __m256i __B, const int __imm)
5566 {
5567 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5568 (__v4di) __B,
5569 __imm,
5570 (__v8di) __W,
5571 (__mmask8) __U);
5572 }
5573
5574 extern __inline __m512i
5575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5576 _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
5577 const int __imm)
5578 {
5579 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5580 (__v4di) __B,
5581 __imm,
5582 (__v8di)
5583 _mm512_setzero_si512 (),
5584 (__mmask8) __U);
5585 }
5586
5587 extern __inline __m512d
5588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5589 _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
5590 {
5591 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5592 (__v4df) __B,
5593 __imm,
5594 (__v8df)
5595 _mm512_undefined_pd (),
5596 (__mmask8) -1);
5597 }
5598
5599 extern __inline __m512d
5600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5601 _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
5602 __m256d __B, const int __imm)
5603 {
5604 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5605 (__v4df) __B,
5606 __imm,
5607 (__v8df) __W,
5608 (__mmask8) __U);
5609 }
5610
5611 extern __inline __m512d
5612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5613 _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
5614 const int __imm)
5615 {
5616 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5617 (__v4df) __B,
5618 __imm,
5619 (__v8df)
5620 _mm512_setzero_pd (),
5621 (__mmask8) __U);
5622 }
5623 #else
5624 #define _mm512_insertf32x4(X, Y, C) \
5625 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
5626 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
5627
5628 #define _mm512_inserti32x4(X, Y, C) \
5629 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
5630 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
5631
5632 #define _mm512_insertf64x4(X, Y, C) \
5633 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5634 (__v4df)(__m256d) (Y), (int) (C), \
5635 (__v8df)(__m512d)_mm512_undefined_pd(), \
5636 (__mmask8)-1))
5637
5638 #define _mm512_mask_insertf64x4(W, U, X, Y, C) \
5639 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5640 (__v4df)(__m256d) (Y), (int) (C), \
5641 (__v8df)(__m512d)(W), \
5642 (__mmask8)(U)))
5643
5644 #define _mm512_maskz_insertf64x4(U, X, Y, C) \
5645 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5646 (__v4df)(__m256d) (Y), (int) (C), \
5647 (__v8df)(__m512d)_mm512_setzero_pd(), \
5648 (__mmask8)(U)))
5649
5650 #define _mm512_inserti64x4(X, Y, C) \
5651 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5652 (__v4di)(__m256i) (Y), (int) (C), \
5653 (__v8di)(__m512i)_mm512_undefined_si512 (), \
5654 (__mmask8)-1))
5655
5656 #define _mm512_mask_inserti64x4(W, U, X, Y, C) \
5657 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5658 (__v4di)(__m256i) (Y), (int) (C),\
5659 (__v8di)(__m512i)(W),\
5660 (__mmask8)(U)))
5661
5662 #define _mm512_maskz_inserti64x4(U, X, Y, C) \
5663 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5664 (__v4di)(__m256i) (Y), (int) (C), \
5665 (__v8di)(__m512i)_mm512_setzero_si512 (), \
5666 (__mmask8)(U)))
5667 #endif
5668
5669 extern __inline __m512d
5670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5671 _mm512_loadu_pd (void const *__P)
5672 {
5673 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
5674 (__v8df)
5675 _mm512_undefined_pd (),
5676 (__mmask8) -1);
5677 }
5678
5679 extern __inline __m512d
5680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5681 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
5682 {
5683 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
5684 (__v8df) __W,
5685 (__mmask8) __U);
5686 }
5687
5688 extern __inline __m512d
5689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5690 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
5691 {
5692 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
5693 (__v8df)
5694 _mm512_setzero_pd (),
5695 (__mmask8) __U);
5696 }
5697
5698 extern __inline void
5699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5700 _mm512_storeu_pd (void *__P, __m512d __A)
5701 {
5702 __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
5703 (__mmask8) -1);
5704 }
5705
5706 extern __inline void
5707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5708 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
5709 {
5710 __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
5711 (__mmask8) __U);
5712 }
5713
5714 extern __inline __m512
5715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5716 _mm512_loadu_ps (void const *__P)
5717 {
5718 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
5719 (__v16sf)
5720 _mm512_undefined_ps (),
5721 (__mmask16) -1);
5722 }
5723
5724 extern __inline __m512
5725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5726 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
5727 {
5728 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
5729 (__v16sf) __W,
5730 (__mmask16) __U);
5731 }
5732
5733 extern __inline __m512
5734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5735 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
5736 {
5737 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
5738 (__v16sf)
5739 _mm512_setzero_ps (),
5740 (__mmask16) __U);
5741 }
5742
5743 extern __inline void
5744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5745 _mm512_storeu_ps (void *__P, __m512 __A)
5746 {
5747 __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
5748 (__mmask16) -1);
5749 }
5750
5751 extern __inline void
5752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5753 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
5754 {
5755 __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
5756 (__mmask16) __U);
5757 }
5758
5759 extern __inline __m512i
5760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5761 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5762 {
5763 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
5764 (__v8di) __W,
5765 (__mmask8) __U);
5766 }
5767
5768 extern __inline __m512i
5769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5770 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5771 {
5772 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
5773 (__v8di)
5774 _mm512_setzero_si512 (),
5775 (__mmask8) __U);
5776 }
5777
5778 extern __inline void
5779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5780 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
5781 {
5782 __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A,
5783 (__mmask8) __U);
5784 }
5785
5786 extern __inline __m512i
5787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5788 _mm512_loadu_si512 (void const *__P)
5789 {
5790 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
5791 (__v16si)
5792 _mm512_setzero_si512 (),
5793 (__mmask16) -1);
5794 }
5795
5796 extern __inline __m512i
5797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5798 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5799 {
5800 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
5801 (__v16si) __W,
5802 (__mmask16) __U);
5803 }
5804
5805 extern __inline __m512i
5806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5807 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
5808 {
5809 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
5810 (__v16si)
5811 _mm512_setzero_si512 (),
5812 (__mmask16) __U);
5813 }
5814
5815 extern __inline void
5816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5817 _mm512_storeu_si512 (void *__P, __m512i __A)
5818 {
5819 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
5820 (__mmask16) -1);
5821 }
5822
5823 extern __inline void
5824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5825 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
5826 {
5827 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
5828 (__mmask16) __U);
5829 }
5830
5831 extern __inline __m512d
5832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5833 _mm512_permutevar_pd (__m512d __A, __m512i __C)
5834 {
5835 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5836 (__v8di) __C,
5837 (__v8df)
5838 _mm512_undefined_pd (),
5839 (__mmask8) -1);
5840 }
5841
5842 extern __inline __m512d
5843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5844 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
5845 {
5846 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5847 (__v8di) __C,
5848 (__v8df) __W,
5849 (__mmask8) __U);
5850 }
5851
5852 extern __inline __m512d
5853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5854 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
5855 {
5856 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5857 (__v8di) __C,
5858 (__v8df)
5859 _mm512_setzero_pd (),
5860 (__mmask8) __U);
5861 }
5862
5863 extern __inline __m512
5864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5865 _mm512_permutevar_ps (__m512 __A, __m512i __C)
5866 {
5867 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5868 (__v16si) __C,
5869 (__v16sf)
5870 _mm512_undefined_ps (),
5871 (__mmask16) -1);
5872 }
5873
5874 extern __inline __m512
5875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5876 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
5877 {
5878 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5879 (__v16si) __C,
5880 (__v16sf) __W,
5881 (__mmask16) __U);
5882 }
5883
5884 extern __inline __m512
5885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5886 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
5887 {
5888 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5889 (__v16si) __C,
5890 (__v16sf)
5891 _mm512_setzero_ps (),
5892 (__mmask16) __U);
5893 }
5894
5895 extern __inline __m512i
5896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5897 _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
5898 {
5899 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5900 /* idx */ ,
5901 (__v8di) __A,
5902 (__v8di) __B,
5903 (__mmask8) -1);
5904 }
5905
5906 extern __inline __m512i
5907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5908 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
5909 __m512i __B)
5910 {
5911 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5912 /* idx */ ,
5913 (__v8di) __A,
5914 (__v8di) __B,
5915 (__mmask8) __U);
5916 }
5917
5918 extern __inline __m512i
5919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5920 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
5921 __mmask8 __U, __m512i __B)
5922 {
5923 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
5924 (__v8di) __I
5925 /* idx */ ,
5926 (__v8di) __B,
5927 (__mmask8) __U);
5928 }
5929
5930 extern __inline __m512i
5931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5932 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
5933 __m512i __I, __m512i __B)
5934 {
5935 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
5936 /* idx */ ,
5937 (__v8di) __A,
5938 (__v8di) __B,
5939 (__mmask8) __U);
5940 }
5941
5942 extern __inline __m512i
5943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5944 _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
5945 {
5946 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5947 /* idx */ ,
5948 (__v16si) __A,
5949 (__v16si) __B,
5950 (__mmask16) -1);
5951 }
5952
5953 extern __inline __m512i
5954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5955 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
5956 __m512i __I, __m512i __B)
5957 {
5958 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5959 /* idx */ ,
5960 (__v16si) __A,
5961 (__v16si) __B,
5962 (__mmask16) __U);
5963 }
5964
5965 extern __inline __m512i
5966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5967 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
5968 __mmask16 __U, __m512i __B)
5969 {
5970 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
5971 (__v16si) __I
5972 /* idx */ ,
5973 (__v16si) __B,
5974 (__mmask16) __U);
5975 }
5976
5977 extern __inline __m512i
5978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5979 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
5980 __m512i __I, __m512i __B)
5981 {
5982 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
5983 /* idx */ ,
5984 (__v16si) __A,
5985 (__v16si) __B,
5986 (__mmask16) __U);
5987 }
5988
5989 extern __inline __m512d
5990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5991 _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
5992 {
5993 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
5994 /* idx */ ,
5995 (__v8df) __A,
5996 (__v8df) __B,
5997 (__mmask8) -1);
5998 }
5999
6000 extern __inline __m512d
6001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6002 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6003 __m512d __B)
6004 {
6005 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6006 /* idx */ ,
6007 (__v8df) __A,
6008 (__v8df) __B,
6009 (__mmask8) __U);
6010 }
6011
6012 extern __inline __m512d
6013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6014 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6015 __m512d __B)
6016 {
6017 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6018 (__v8di) __I
6019 /* idx */ ,
6020 (__v8df) __B,
6021 (__mmask8) __U);
6022 }
6023
6024 extern __inline __m512d
6025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6026 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6027 __m512d __B)
6028 {
6029 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6030 /* idx */ ,
6031 (__v8df) __A,
6032 (__v8df) __B,
6033 (__mmask8) __U);
6034 }
6035
6036 extern __inline __m512
6037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6038 _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6039 {
6040 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6041 /* idx */ ,
6042 (__v16sf) __A,
6043 (__v16sf) __B,
6044 (__mmask16) -1);
6045 }
6046
6047 extern __inline __m512
6048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6049 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6050 {
6051 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6052 /* idx */ ,
6053 (__v16sf) __A,
6054 (__v16sf) __B,
6055 (__mmask16) __U);
6056 }
6057
6058 extern __inline __m512
6059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6060 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6061 __m512 __B)
6062 {
6063 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6064 (__v16si) __I
6065 /* idx */ ,
6066 (__v16sf) __B,
6067 (__mmask16) __U);
6068 }
6069
6070 extern __inline __m512
6071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6072 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6073 __m512 __B)
6074 {
6075 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6076 /* idx */ ,
6077 (__v16sf) __A,
6078 (__v16sf) __B,
6079 (__mmask16) __U);
6080 }
6081
6082 #ifdef __OPTIMIZE__
6083 extern __inline __m512d
6084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6085 _mm512_permute_pd (__m512d __X, const int __C)
6086 {
6087 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6088 (__v8df)
6089 _mm512_undefined_pd (),
6090 (__mmask8) -1);
6091 }
6092
6093 extern __inline __m512d
6094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6095 _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6096 {
6097 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6098 (__v8df) __W,
6099 (__mmask8) __U);
6100 }
6101
6102 extern __inline __m512d
6103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6104 _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6105 {
6106 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6107 (__v8df)
6108 _mm512_setzero_pd (),
6109 (__mmask8) __U);
6110 }
6111
6112 extern __inline __m512
6113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6114 _mm512_permute_ps (__m512 __X, const int __C)
6115 {
6116 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6117 (__v16sf)
6118 _mm512_undefined_ps (),
6119 (__mmask16) -1);
6120 }
6121
6122 extern __inline __m512
6123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6124 _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6125 {
6126 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6127 (__v16sf) __W,
6128 (__mmask16) __U);
6129 }
6130
6131 extern __inline __m512
6132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6133 _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6134 {
6135 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6136 (__v16sf)
6137 _mm512_setzero_ps (),
6138 (__mmask16) __U);
6139 }
6140 #else
6141 #define _mm512_permute_pd(X, C) \
6142 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6143 (__v8df)(__m512d)_mm512_undefined_pd(),\
6144 (__mmask8)(-1)))
6145
6146 #define _mm512_mask_permute_pd(W, U, X, C) \
6147 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6148 (__v8df)(__m512d)(W), \
6149 (__mmask8)(U)))
6150
6151 #define _mm512_maskz_permute_pd(U, X, C) \
6152 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6153 (__v8df)(__m512d)_mm512_setzero_pd(), \
6154 (__mmask8)(U)))
6155
6156 #define _mm512_permute_ps(X, C) \
6157 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6158 (__v16sf)(__m512)_mm512_undefined_ps(),\
6159 (__mmask16)(-1)))
6160
6161 #define _mm512_mask_permute_ps(W, U, X, C) \
6162 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6163 (__v16sf)(__m512)(W), \
6164 (__mmask16)(U)))
6165
6166 #define _mm512_maskz_permute_ps(U, X, C) \
6167 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6168 (__v16sf)(__m512)_mm512_setzero_ps(), \
6169 (__mmask16)(U)))
6170 #endif
6171
6172 #ifdef __OPTIMIZE__
6173 extern __inline __m512i
6174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6175 _mm512_permutex_epi64 (__m512i __X, const int __I)
6176 {
6177 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6178 (__v8di)
6179 _mm512_undefined_si512 (),
6180 (__mmask8) (-1));
6181 }
6182
6183 extern __inline __m512i
6184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6185 _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6186 __m512i __X, const int __I)
6187 {
6188 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6189 (__v8di) __W,
6190 (__mmask8) __M);
6191 }
6192
6193 extern __inline __m512i
6194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6195 _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6196 {
6197 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6198 (__v8di)
6199 _mm512_setzero_si512 (),
6200 (__mmask8) __M);
6201 }
6202
6203 extern __inline __m512d
6204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6205 _mm512_permutex_pd (__m512d __X, const int __M)
6206 {
6207 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6208 (__v8df)
6209 _mm512_undefined_pd (),
6210 (__mmask8) -1);
6211 }
6212
6213 extern __inline __m512d
6214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6215 _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6216 {
6217 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6218 (__v8df) __W,
6219 (__mmask8) __U);
6220 }
6221
6222 extern __inline __m512d
6223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6224 _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6225 {
6226 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6227 (__v8df)
6228 _mm512_setzero_pd (),
6229 (__mmask8) __U);
6230 }
6231 #else
6232 #define _mm512_permutex_pd(X, M) \
6233 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6234 (__v8df)(__m512d)_mm512_undefined_pd(),\
6235 (__mmask8)-1))
6236
6237 #define _mm512_mask_permutex_pd(W, U, X, M) \
6238 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6239 (__v8df)(__m512d)(W), (__mmask8)(U)))
6240
6241 #define _mm512_maskz_permutex_pd(U, X, M) \
6242 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6243 (__v8df)(__m512d)_mm512_setzero_pd(),\
6244 (__mmask8)(U)))
6245
6246 #define _mm512_permutex_epi64(X, I) \
6247 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6248 (int)(I), \
6249 (__v8di)(__m512i) \
6250 (_mm512_undefined_si512 ()),\
6251 (__mmask8)(-1)))
6252
6253 #define _mm512_maskz_permutex_epi64(M, X, I) \
6254 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6255 (int)(I), \
6256 (__v8di)(__m512i) \
6257 (_mm512_setzero_si512 ()),\
6258 (__mmask8)(M)))
6259
6260 #define _mm512_mask_permutex_epi64(W, M, X, I) \
6261 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6262 (int)(I), \
6263 (__v8di)(__m512i)(W), \
6264 (__mmask8)(M)))
6265 #endif
6266
6267 extern __inline __m512i
6268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6269 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6270 {
6271 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6272 (__v8di) __X,
6273 (__v8di)
6274 _mm512_setzero_si512 (),
6275 __M);
6276 }
6277
6278 extern __inline __m512i
6279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6280 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6281 {
6282 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6283 (__v8di) __X,
6284 (__v8di)
6285 _mm512_undefined_si512 (),
6286 (__mmask8) -1);
6287 }
6288
6289 extern __inline __m512i
6290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6291 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6292 __m512i __Y)
6293 {
6294 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6295 (__v8di) __X,
6296 (__v8di) __W,
6297 __M);
6298 }
6299
6300 extern __inline __m512i
6301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6302 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6303 {
6304 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6305 (__v16si) __X,
6306 (__v16si)
6307 _mm512_setzero_si512 (),
6308 __M);
6309 }
6310
6311 extern __inline __m512i
6312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6313 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6314 {
6315 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6316 (__v16si) __X,
6317 (__v16si)
6318 _mm512_undefined_si512 (),
6319 (__mmask16) -1);
6320 }
6321
6322 extern __inline __m512i
6323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6324 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6325 __m512i __Y)
6326 {
6327 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6328 (__v16si) __X,
6329 (__v16si) __W,
6330 __M);
6331 }
6332
6333 extern __inline __m512d
6334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6335 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6336 {
6337 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6338 (__v8di) __X,
6339 (__v8df)
6340 _mm512_undefined_pd (),
6341 (__mmask8) -1);
6342 }
6343
6344 extern __inline __m512d
6345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6346 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6347 {
6348 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6349 (__v8di) __X,
6350 (__v8df) __W,
6351 (__mmask8) __U);
6352 }
6353
6354 extern __inline __m512d
6355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6356 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6357 {
6358 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6359 (__v8di) __X,
6360 (__v8df)
6361 _mm512_setzero_pd (),
6362 (__mmask8) __U);
6363 }
6364
6365 extern __inline __m512
6366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6367 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6368 {
6369 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6370 (__v16si) __X,
6371 (__v16sf)
6372 _mm512_undefined_ps (),
6373 (__mmask16) -1);
6374 }
6375
6376 extern __inline __m512
6377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6378 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6379 {
6380 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6381 (__v16si) __X,
6382 (__v16sf) __W,
6383 (__mmask16) __U);
6384 }
6385
6386 extern __inline __m512
6387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6388 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6389 {
6390 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6391 (__v16si) __X,
6392 (__v16sf)
6393 _mm512_setzero_ps (),
6394 (__mmask16) __U);
6395 }
6396
6397 #ifdef __OPTIMIZE__
6398 extern __inline __m512
6399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6400 _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6401 {
6402 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6403 (__v16sf) __V, __imm,
6404 (__v16sf)
6405 _mm512_undefined_ps (),
6406 (__mmask16) -1);
6407 }
6408
6409 extern __inline __m512
6410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6411 _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6412 __m512 __V, const int __imm)
6413 {
6414 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6415 (__v16sf) __V, __imm,
6416 (__v16sf) __W,
6417 (__mmask16) __U);
6418 }
6419
6420 extern __inline __m512
6421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6422 _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6423 {
6424 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6425 (__v16sf) __V, __imm,
6426 (__v16sf)
6427 _mm512_setzero_ps (),
6428 (__mmask16) __U);
6429 }
6430
6431 extern __inline __m512d
6432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6433 _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6434 {
6435 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6436 (__v8df) __V, __imm,
6437 (__v8df)
6438 _mm512_undefined_pd (),
6439 (__mmask8) -1);
6440 }
6441
6442 extern __inline __m512d
6443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6444 _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6445 __m512d __V, const int __imm)
6446 {
6447 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6448 (__v8df) __V, __imm,
6449 (__v8df) __W,
6450 (__mmask8) __U);
6451 }
6452
6453 extern __inline __m512d
6454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6455 _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6456 const int __imm)
6457 {
6458 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6459 (__v8df) __V, __imm,
6460 (__v8df)
6461 _mm512_setzero_pd (),
6462 (__mmask8) __U);
6463 }
6464
6465 extern __inline __m512d
6466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6467 _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6468 const int __imm, const int __R)
6469 {
6470 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6471 (__v8df) __B,
6472 (__v8di) __C,
6473 __imm,
6474 (__mmask8) -1, __R);
6475 }
6476
6477 extern __inline __m512d
6478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6479 _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6480 __m512i __C, const int __imm, const int __R)
6481 {
6482 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6483 (__v8df) __B,
6484 (__v8di) __C,
6485 __imm,
6486 (__mmask8) __U, __R);
6487 }
6488
6489 extern __inline __m512d
6490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6491 _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6492 __m512i __C, const int __imm, const int __R)
6493 {
6494 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
6495 (__v8df) __B,
6496 (__v8di) __C,
6497 __imm,
6498 (__mmask8) __U, __R);
6499 }
6500
6501 extern __inline __m512
6502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6503 _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
6504 const int __imm, const int __R)
6505 {
6506 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6507 (__v16sf) __B,
6508 (__v16si) __C,
6509 __imm,
6510 (__mmask16) -1, __R);
6511 }
6512
6513 extern __inline __m512
6514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6515 _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6516 __m512i __C, const int __imm, const int __R)
6517 {
6518 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6519 (__v16sf) __B,
6520 (__v16si) __C,
6521 __imm,
6522 (__mmask16) __U, __R);
6523 }
6524
6525 extern __inline __m512
6526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6527 _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6528 __m512i __C, const int __imm, const int __R)
6529 {
6530 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
6531 (__v16sf) __B,
6532 (__v16si) __C,
6533 __imm,
6534 (__mmask16) __U, __R);
6535 }
6536
6537 extern __inline __m128d
6538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6539 _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
6540 const int __imm, const int __R)
6541 {
6542 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6543 (__v2df) __B,
6544 (__v2di) __C, __imm,
6545 (__mmask8) -1, __R);
6546 }
6547
6548 extern __inline __m128d
6549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6550 _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
6551 __m128i __C, const int __imm, const int __R)
6552 {
6553 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6554 (__v2df) __B,
6555 (__v2di) __C, __imm,
6556 (__mmask8) __U, __R);
6557 }
6558
6559 extern __inline __m128d
6560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6561 _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
6562 __m128i __C, const int __imm, const int __R)
6563 {
6564 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
6565 (__v2df) __B,
6566 (__v2di) __C,
6567 __imm,
6568 (__mmask8) __U, __R);
6569 }
6570
6571 extern __inline __m128
6572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6573 _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
6574 const int __imm, const int __R)
6575 {
6576 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6577 (__v4sf) __B,
6578 (__v4si) __C, __imm,
6579 (__mmask8) -1, __R);
6580 }
6581
6582 extern __inline __m128
6583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6584 _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
6585 __m128i __C, const int __imm, const int __R)
6586 {
6587 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6588 (__v4sf) __B,
6589 (__v4si) __C, __imm,
6590 (__mmask8) __U, __R);
6591 }
6592
6593 extern __inline __m128
6594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6595 _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
6596 __m128i __C, const int __imm, const int __R)
6597 {
6598 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
6599 (__v4sf) __B,
6600 (__v4si) __C, __imm,
6601 (__mmask8) __U, __R);
6602 }
6603
6604 #else
6605 #define _mm512_shuffle_pd(X, Y, C) \
6606 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6607 (__v8df)(__m512d)(Y), (int)(C),\
6608 (__v8df)(__m512d)_mm512_undefined_pd(),\
6609 (__mmask8)-1))
6610
6611 #define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
6612 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6613 (__v8df)(__m512d)(Y), (int)(C),\
6614 (__v8df)(__m512d)(W),\
6615 (__mmask8)(U)))
6616
6617 #define _mm512_maskz_shuffle_pd(U, X, Y, C) \
6618 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6619 (__v8df)(__m512d)(Y), (int)(C),\
6620 (__v8df)(__m512d)_mm512_setzero_pd(),\
6621 (__mmask8)(U)))
6622
6623 #define _mm512_shuffle_ps(X, Y, C) \
6624 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6625 (__v16sf)(__m512)(Y), (int)(C),\
6626 (__v16sf)(__m512)_mm512_undefined_ps(),\
6627 (__mmask16)-1))
6628
6629 #define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
6630 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6631 (__v16sf)(__m512)(Y), (int)(C),\
6632 (__v16sf)(__m512)(W),\
6633 (__mmask16)(U)))
6634
6635 #define _mm512_maskz_shuffle_ps(U, X, Y, C) \
6636 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6637 (__v16sf)(__m512)(Y), (int)(C),\
6638 (__v16sf)(__m512)_mm512_setzero_ps(),\
6639 (__mmask16)(U)))
6640
6641 #define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
6642 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6643 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6644 (__mmask8)(-1), (R)))
6645
6646 #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
6647 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6648 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6649 (__mmask8)(U), (R)))
6650
6651 #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
6652 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
6653 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6654 (__mmask8)(U), (R)))
6655
6656 #define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
6657 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6658 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6659 (__mmask16)(-1), (R)))
6660
6661 #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
6662 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6663 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6664 (__mmask16)(U), (R)))
6665
6666 #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
6667 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
6668 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6669 (__mmask16)(U), (R)))
6670
6671 #define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
6672 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6673 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6674 (__mmask8)(-1), (R)))
6675
6676 #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
6677 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6678 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6679 (__mmask8)(U), (R)))
6680
6681 #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
6682 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
6683 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6684 (__mmask8)(U), (R)))
6685
6686 #define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
6687 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6688 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6689 (__mmask8)(-1), (R)))
6690
6691 #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
6692 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6693 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6694 (__mmask8)(U), (R)))
6695
6696 #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
6697 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
6698 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6699 (__mmask8)(U), (R)))
6700 #endif
6701
6702 extern __inline __m512
6703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6704 _mm512_movehdup_ps (__m512 __A)
6705 {
6706 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6707 (__v16sf)
6708 _mm512_undefined_ps (),
6709 (__mmask16) -1);
6710 }
6711
6712 extern __inline __m512
6713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6714 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6715 {
6716 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6717 (__v16sf) __W,
6718 (__mmask16) __U);
6719 }
6720
6721 extern __inline __m512
6722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6723 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
6724 {
6725 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6726 (__v16sf)
6727 _mm512_setzero_ps (),
6728 (__mmask16) __U);
6729 }
6730
6731 extern __inline __m512
6732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6733 _mm512_moveldup_ps (__m512 __A)
6734 {
6735 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6736 (__v16sf)
6737 _mm512_undefined_ps (),
6738 (__mmask16) -1);
6739 }
6740
6741 extern __inline __m512
6742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6743 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6744 {
6745 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6746 (__v16sf) __W,
6747 (__mmask16) __U);
6748 }
6749
6750 extern __inline __m512
6751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6752 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
6753 {
6754 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6755 (__v16sf)
6756 _mm512_setzero_ps (),
6757 (__mmask16) __U);
6758 }
6759
6760 extern __inline __m512i
6761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6762 _mm512_or_si512 (__m512i __A, __m512i __B)
6763 {
6764 return (__m512i) ((__v16su) __A | (__v16su) __B);
6765 }
6766
6767 extern __inline __m512i
6768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6769 _mm512_or_epi32 (__m512i __A, __m512i __B)
6770 {
6771 return (__m512i) ((__v16su) __A | (__v16su) __B);
6772 }
6773
6774 extern __inline __m512i
6775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6776 _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6777 {
6778 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6779 (__v16si) __B,
6780 (__v16si) __W,
6781 (__mmask16) __U);
6782 }
6783
6784 extern __inline __m512i
6785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6786 _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6787 {
6788 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6789 (__v16si) __B,
6790 (__v16si)
6791 _mm512_setzero_si512 (),
6792 (__mmask16) __U);
6793 }
6794
6795 extern __inline __m512i
6796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6797 _mm512_or_epi64 (__m512i __A, __m512i __B)
6798 {
6799 return (__m512i) ((__v8du) __A | (__v8du) __B);
6800 }
6801
6802 extern __inline __m512i
6803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6804 _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
6805 {
6806 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6807 (__v8di) __B,
6808 (__v8di) __W,
6809 (__mmask8) __U);
6810 }
6811
6812 extern __inline __m512i
6813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6814 _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
6815 {
6816 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6817 (__v8di) __B,
6818 (__v8di)
6819 _mm512_setzero_si512 (),
6820 (__mmask8) __U);
6821 }
6822
6823 extern __inline __m512i
6824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6825 _mm512_xor_si512 (__m512i __A, __m512i __B)
6826 {
6827 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
6828 }
6829
6830 extern __inline __m512i
6831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6832 _mm512_xor_epi32 (__m512i __A, __m512i __B)
6833 {
6834 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
6835 }
6836
6837 extern __inline __m512i
6838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6839 _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6840 {
6841 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6842 (__v16si) __B,
6843 (__v16si) __W,
6844 (__mmask16) __U);
6845 }
6846
6847 extern __inline __m512i
6848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6849 _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6850 {
6851 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6852 (__v16si) __B,
6853 (__v16si)
6854 _mm512_setzero_si512 (),
6855 (__mmask16) __U);
6856 }
6857
6858 extern __inline __m512i
6859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6860 _mm512_xor_epi64 (__m512i __A, __m512i __B)
6861 {
6862 return (__m512i) ((__v8du) __A ^ (__v8du) __B);
6863 }
6864
6865 extern __inline __m512i
6866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6867 _mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6868 {
6869 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6870 (__v8di) __B,
6871 (__v8di) __W,
6872 (__mmask8) __U);
6873 }
6874
6875 extern __inline __m512i
6876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6877 _mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
6878 {
6879 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6880 (__v8di) __B,
6881 (__v8di)
6882 _mm512_setzero_si512 (),
6883 (__mmask8) __U);
6884 }
6885
6886 #ifdef __OPTIMIZE__
6887 extern __inline __m512i
6888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6889 _mm512_rol_epi32 (__m512i __A, const int __B)
6890 {
6891 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6892 (__v16si)
6893 _mm512_undefined_si512 (),
6894 (__mmask16) -1);
6895 }
6896
6897 extern __inline __m512i
6898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6899 _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
6900 {
6901 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6902 (__v16si) __W,
6903 (__mmask16) __U);
6904 }
6905
6906 extern __inline __m512i
6907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6908 _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
6909 {
6910 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6911 (__v16si)
6912 _mm512_setzero_si512 (),
6913 (__mmask16) __U);
6914 }
6915
6916 extern __inline __m512i
6917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6918 _mm512_ror_epi32 (__m512i __A, int __B)
6919 {
6920 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6921 (__v16si)
6922 _mm512_undefined_si512 (),
6923 (__mmask16) -1);
6924 }
6925
6926 extern __inline __m512i
6927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6928 _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
6929 {
6930 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6931 (__v16si) __W,
6932 (__mmask16) __U);
6933 }
6934
6935 extern __inline __m512i
6936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6937 _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
6938 {
6939 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6940 (__v16si)
6941 _mm512_setzero_si512 (),
6942 (__mmask16) __U);
6943 }
6944
6945 extern __inline __m512i
6946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6947 _mm512_rol_epi64 (__m512i __A, const int __B)
6948 {
6949 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6950 (__v8di)
6951 _mm512_undefined_si512 (),
6952 (__mmask8) -1);
6953 }
6954
6955 extern __inline __m512i
6956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6957 _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
6958 {
6959 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6960 (__v8di) __W,
6961 (__mmask8) __U);
6962 }
6963
6964 extern __inline __m512i
6965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6966 _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
6967 {
6968 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6969 (__v8di)
6970 _mm512_setzero_si512 (),
6971 (__mmask8) __U);
6972 }
6973
6974 extern __inline __m512i
6975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6976 _mm512_ror_epi64 (__m512i __A, int __B)
6977 {
6978 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6979 (__v8di)
6980 _mm512_undefined_si512 (),
6981 (__mmask8) -1);
6982 }
6983
6984 extern __inline __m512i
6985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6986 _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
6987 {
6988 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6989 (__v8di) __W,
6990 (__mmask8) __U);
6991 }
6992
6993 extern __inline __m512i
6994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6995 _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
6996 {
6997 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6998 (__v8di)
6999 _mm512_setzero_si512 (),
7000 (__mmask8) __U);
7001 }
7002
7003 #else
7004 #define _mm512_rol_epi32(A, B) \
7005 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7006 (int)(B), \
7007 (__v16si)_mm512_undefined_si512 (), \
7008 (__mmask16)(-1)))
7009 #define _mm512_mask_rol_epi32(W, U, A, B) \
7010 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7011 (int)(B), \
7012 (__v16si)(__m512i)(W), \
7013 (__mmask16)(U)))
7014 #define _mm512_maskz_rol_epi32(U, A, B) \
7015 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7016 (int)(B), \
7017 (__v16si)_mm512_setzero_si512 (), \
7018 (__mmask16)(U)))
7019 #define _mm512_ror_epi32(A, B) \
7020 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7021 (int)(B), \
7022 (__v16si)_mm512_undefined_si512 (), \
7023 (__mmask16)(-1)))
7024 #define _mm512_mask_ror_epi32(W, U, A, B) \
7025 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7026 (int)(B), \
7027 (__v16si)(__m512i)(W), \
7028 (__mmask16)(U)))
7029 #define _mm512_maskz_ror_epi32(U, A, B) \
7030 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7031 (int)(B), \
7032 (__v16si)_mm512_setzero_si512 (), \
7033 (__mmask16)(U)))
7034 #define _mm512_rol_epi64(A, B) \
7035 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7036 (int)(B), \
7037 (__v8di)_mm512_undefined_si512 (), \
7038 (__mmask8)(-1)))
7039 #define _mm512_mask_rol_epi64(W, U, A, B) \
7040 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7041 (int)(B), \
7042 (__v8di)(__m512i)(W), \
7043 (__mmask8)(U)))
7044 #define _mm512_maskz_rol_epi64(U, A, B) \
7045 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7046 (int)(B), \
7047 (__v8di)_mm512_setzero_si512 (), \
7048 (__mmask8)(U)))
7049
7050 #define _mm512_ror_epi64(A, B) \
7051 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7052 (int)(B), \
7053 (__v8di)_mm512_undefined_si512 (), \
7054 (__mmask8)(-1)))
7055 #define _mm512_mask_ror_epi64(W, U, A, B) \
7056 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7057 (int)(B), \
7058 (__v8di)(__m512i)(W), \
7059 (__mmask8)(U)))
7060 #define _mm512_maskz_ror_epi64(U, A, B) \
7061 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7062 (int)(B), \
7063 (__v8di)_mm512_setzero_si512 (), \
7064 (__mmask8)(U)))
7065 #endif
7066
7067 extern __inline __m512i
7068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7069 _mm512_and_si512 (__m512i __A, __m512i __B)
7070 {
7071 return (__m512i) ((__v16su) __A & (__v16su) __B);
7072 }
7073
7074 extern __inline __m512i
7075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7076 _mm512_and_epi32 (__m512i __A, __m512i __B)
7077 {
7078 return (__m512i) ((__v16su) __A & (__v16su) __B);
7079 }
7080
7081 extern __inline __m512i
7082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7083 _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7084 {
7085 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7086 (__v16si) __B,
7087 (__v16si) __W,
7088 (__mmask16) __U);
7089 }
7090
7091 extern __inline __m512i
7092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7093 _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7094 {
7095 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7096 (__v16si) __B,
7097 (__v16si)
7098 _mm512_setzero_si512 (),
7099 (__mmask16) __U);
7100 }
7101
7102 extern __inline __m512i
7103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7104 _mm512_and_epi64 (__m512i __A, __m512i __B)
7105 {
7106 return (__m512i) ((__v8du) __A & (__v8du) __B);
7107 }
7108
7109 extern __inline __m512i
7110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7111 _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7112 {
7113 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7114 (__v8di) __B,
7115 (__v8di) __W, __U);
7116 }
7117
7118 extern __inline __m512i
7119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7120 _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7121 {
7122 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7123 (__v8di) __B,
7124 (__v8di)
7125 _mm512_setzero_pd (),
7126 __U);
7127 }
7128
7129 extern __inline __m512i
7130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7131 _mm512_andnot_si512 (__m512i __A, __m512i __B)
7132 {
7133 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7134 (__v16si) __B,
7135 (__v16si)
7136 _mm512_undefined_si512 (),
7137 (__mmask16) -1);
7138 }
7139
7140 extern __inline __m512i
7141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7142 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
7143 {
7144 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7145 (__v16si) __B,
7146 (__v16si)
7147 _mm512_undefined_si512 (),
7148 (__mmask16) -1);
7149 }
7150
7151 extern __inline __m512i
7152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7153 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7154 {
7155 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7156 (__v16si) __B,
7157 (__v16si) __W,
7158 (__mmask16) __U);
7159 }
7160
7161 extern __inline __m512i
7162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7163 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7164 {
7165 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7166 (__v16si) __B,
7167 (__v16si)
7168 _mm512_setzero_si512 (),
7169 (__mmask16) __U);
7170 }
7171
7172 extern __inline __m512i
7173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7174 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
7175 {
7176 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7177 (__v8di) __B,
7178 (__v8di)
7179 _mm512_undefined_si512 (),
7180 (__mmask8) -1);
7181 }
7182
7183 extern __inline __m512i
7184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7185 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7186 {
7187 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7188 (__v8di) __B,
7189 (__v8di) __W, __U);
7190 }
7191
7192 extern __inline __m512i
7193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7194 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7195 {
7196 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7197 (__v8di) __B,
7198 (__v8di)
7199 _mm512_setzero_pd (),
7200 __U);
7201 }
7202
7203 extern __inline __mmask16
7204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7205 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
7206 {
7207 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7208 (__v16si) __B,
7209 (__mmask16) -1);
7210 }
7211
7212 extern __inline __mmask16
7213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7214 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7215 {
7216 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7217 (__v16si) __B, __U);
7218 }
7219
7220 extern __inline __mmask8
7221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7222 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
7223 {
7224 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7225 (__v8di) __B,
7226 (__mmask8) -1);
7227 }
7228
7229 extern __inline __mmask8
7230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7231 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7232 {
7233 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7234 }
7235
7236 extern __inline __mmask16
7237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7238 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7239 {
7240 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7241 (__v16si) __B,
7242 (__mmask16) -1);
7243 }
7244
7245 extern __inline __mmask16
7246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7247 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7248 {
7249 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7250 (__v16si) __B, __U);
7251 }
7252
7253 extern __inline __mmask8
7254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7255 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7256 {
7257 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7258 (__v8di) __B,
7259 (__mmask8) -1);
7260 }
7261
7262 extern __inline __mmask8
7263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7264 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7265 {
7266 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7267 (__v8di) __B, __U);
7268 }
7269
7270 extern __inline __m512i
7271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7272 _mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7273 {
7274 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7275 (__v16si) __B,
7276 (__v16si)
7277 _mm512_undefined_si512 (),
7278 (__mmask16) -1);
7279 }
7280
7281 extern __inline __m512i
7282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7283 _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7284 __m512i __B)
7285 {
7286 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7287 (__v16si) __B,
7288 (__v16si) __W,
7289 (__mmask16) __U);
7290 }
7291
7292 extern __inline __m512i
7293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7294 _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7295 {
7296 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7297 (__v16si) __B,
7298 (__v16si)
7299 _mm512_setzero_si512 (),
7300 (__mmask16) __U);
7301 }
7302
7303 extern __inline __m512i
7304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7305 _mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7306 {
7307 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7308 (__v8di) __B,
7309 (__v8di)
7310 _mm512_undefined_si512 (),
7311 (__mmask8) -1);
7312 }
7313
7314 extern __inline __m512i
7315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7316 _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7317 {
7318 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7319 (__v8di) __B,
7320 (__v8di) __W,
7321 (__mmask8) __U);
7322 }
7323
7324 extern __inline __m512i
7325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7326 _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7327 {
7328 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7329 (__v8di) __B,
7330 (__v8di)
7331 _mm512_setzero_si512 (),
7332 (__mmask8) __U);
7333 }
7334
7335 extern __inline __m512i
7336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7337 _mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7338 {
7339 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7340 (__v16si) __B,
7341 (__v16si)
7342 _mm512_undefined_si512 (),
7343 (__mmask16) -1);
7344 }
7345
7346 extern __inline __m512i
7347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7348 _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7349 __m512i __B)
7350 {
7351 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7352 (__v16si) __B,
7353 (__v16si) __W,
7354 (__mmask16) __U);
7355 }
7356
7357 extern __inline __m512i
7358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7359 _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7360 {
7361 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7362 (__v16si) __B,
7363 (__v16si)
7364 _mm512_setzero_si512 (),
7365 (__mmask16) __U);
7366 }
7367
7368 extern __inline __m512i
7369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7370 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7371 {
7372 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7373 (__v8di) __B,
7374 (__v8di)
7375 _mm512_undefined_si512 (),
7376 (__mmask8) -1);
7377 }
7378
7379 extern __inline __m512i
7380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7381 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7382 {
7383 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7384 (__v8di) __B,
7385 (__v8di) __W,
7386 (__mmask8) __U);
7387 }
7388
7389 extern __inline __m512i
7390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7391 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7392 {
7393 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7394 (__v8di) __B,
7395 (__v8di)
7396 _mm512_setzero_si512 (),
7397 (__mmask8) __U);
7398 }
7399
7400 #ifdef __x86_64__
7401 #ifdef __OPTIMIZE__
7402 extern __inline unsigned long long
7403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7404 _mm_cvt_roundss_u64 (__m128 __A, const int __R)
7405 {
7406 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7407 }
7408
7409 extern __inline long long
7410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7411 _mm_cvt_roundss_si64 (__m128 __A, const int __R)
7412 {
7413 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7414 }
7415
7416 extern __inline long long
7417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7418 _mm_cvt_roundss_i64 (__m128 __A, const int __R)
7419 {
7420 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7421 }
7422
7423 extern __inline unsigned long long
7424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7425 _mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7426 {
7427 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7428 }
7429
7430 extern __inline long long
7431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7432 _mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7433 {
7434 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7435 }
7436
7437 extern __inline long long
7438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7439 _mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7440 {
7441 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7442 }
7443 #else
7444 #define _mm_cvt_roundss_u64(A, B) \
7445 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7446
7447 #define _mm_cvt_roundss_si64(A, B) \
7448 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7449
7450 #define _mm_cvt_roundss_i64(A, B) \
7451 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7452
7453 #define _mm_cvtt_roundss_u64(A, B) \
7454 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
7455
7456 #define _mm_cvtt_roundss_i64(A, B) \
7457 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7458
7459 #define _mm_cvtt_roundss_si64(A, B) \
7460 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7461 #endif
7462 #endif
7463
7464 #ifdef __OPTIMIZE__
7465 extern __inline unsigned
7466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7467 _mm_cvt_roundss_u32 (__m128 __A, const int __R)
7468 {
7469 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
7470 }
7471
7472 extern __inline int
7473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7474 _mm_cvt_roundss_si32 (__m128 __A, const int __R)
7475 {
7476 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7477 }
7478
7479 extern __inline int
7480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7481 _mm_cvt_roundss_i32 (__m128 __A, const int __R)
7482 {
7483 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7484 }
7485
7486 extern __inline unsigned
7487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7488 _mm_cvtt_roundss_u32 (__m128 __A, const int __R)
7489 {
7490 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
7491 }
7492
7493 extern __inline int
7494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7495 _mm_cvtt_roundss_i32 (__m128 __A, const int __R)
7496 {
7497 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7498 }
7499
7500 extern __inline int
7501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7502 _mm_cvtt_roundss_si32 (__m128 __A, const int __R)
7503 {
7504 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7505 }
7506 #else
7507 #define _mm_cvt_roundss_u32(A, B) \
7508 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
7509
7510 #define _mm_cvt_roundss_si32(A, B) \
7511 ((int)__builtin_ia32_vcvtss2si32(A, B))
7512
7513 #define _mm_cvt_roundss_i32(A, B) \
7514 ((int)__builtin_ia32_vcvtss2si32(A, B))
7515
7516 #define _mm_cvtt_roundss_u32(A, B) \
7517 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
7518
7519 #define _mm_cvtt_roundss_si32(A, B) \
7520 ((int)__builtin_ia32_vcvttss2si32(A, B))
7521
7522 #define _mm_cvtt_roundss_i32(A, B) \
7523 ((int)__builtin_ia32_vcvttss2si32(A, B))
7524 #endif
7525
7526 #ifdef __x86_64__
7527 #ifdef __OPTIMIZE__
7528 extern __inline unsigned long long
7529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7530 _mm_cvt_roundsd_u64 (__m128d __A, const int __R)
7531 {
7532 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
7533 }
7534
7535 extern __inline long long
7536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7537 _mm_cvt_roundsd_si64 (__m128d __A, const int __R)
7538 {
7539 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7540 }
7541
7542 extern __inline long long
7543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7544 _mm_cvt_roundsd_i64 (__m128d __A, const int __R)
7545 {
7546 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7547 }
7548
7549 extern __inline unsigned long long
7550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7551 _mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
7552 {
7553 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
7554 }
7555
7556 extern __inline long long
7557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7558 _mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
7559 {
7560 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7561 }
7562
7563 extern __inline long long
7564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7565 _mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
7566 {
7567 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7568 }
7569 #else
7570 #define _mm_cvt_roundsd_u64(A, B) \
7571 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
7572
7573 #define _mm_cvt_roundsd_si64(A, B) \
7574 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7575
7576 #define _mm_cvt_roundsd_i64(A, B) \
7577 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7578
7579 #define _mm_cvtt_roundsd_u64(A, B) \
7580 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
7581
7582 #define _mm_cvtt_roundsd_si64(A, B) \
7583 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7584
7585 #define _mm_cvtt_roundsd_i64(A, B) \
7586 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7587 #endif
7588 #endif
7589
7590 #ifdef __OPTIMIZE__
7591 extern __inline unsigned
7592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7593 _mm_cvt_roundsd_u32 (__m128d __A, const int __R)
7594 {
7595 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
7596 }
7597
7598 extern __inline int
7599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7600 _mm_cvt_roundsd_si32 (__m128d __A, const int __R)
7601 {
7602 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7603 }
7604
7605 extern __inline int
7606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7607 _mm_cvt_roundsd_i32 (__m128d __A, const int __R)
7608 {
7609 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7610 }
7611
7612 extern __inline unsigned
7613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7614 _mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
7615 {
7616 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
7617 }
7618
7619 extern __inline int
7620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7621 _mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
7622 {
7623 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7624 }
7625
7626 extern __inline int
7627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7628 _mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
7629 {
7630 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7631 }
7632 #else
7633 #define _mm_cvt_roundsd_u32(A, B) \
7634 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
7635
7636 #define _mm_cvt_roundsd_si32(A, B) \
7637 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7638
7639 #define _mm_cvt_roundsd_i32(A, B) \
7640 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7641
7642 #define _mm_cvtt_roundsd_u32(A, B) \
7643 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
7644
7645 #define _mm_cvtt_roundsd_si32(A, B) \
7646 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7647
7648 #define _mm_cvtt_roundsd_i32(A, B) \
7649 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7650 #endif
7651
7652 extern __inline __m512d
7653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7654 _mm512_movedup_pd (__m512d __A)
7655 {
7656 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7657 (__v8df)
7658 _mm512_undefined_pd (),
7659 (__mmask8) -1);
7660 }
7661
7662 extern __inline __m512d
7663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7664 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
7665 {
7666 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7667 (__v8df) __W,
7668 (__mmask8) __U);
7669 }
7670
7671 extern __inline __m512d
7672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7673 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
7674 {
7675 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7676 (__v8df)
7677 _mm512_setzero_pd (),
7678 (__mmask8) __U);
7679 }
7680
7681 extern __inline __m512d
7682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7683 _mm512_unpacklo_pd (__m512d __A, __m512d __B)
7684 {
7685 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7686 (__v8df) __B,
7687 (__v8df)
7688 _mm512_undefined_pd (),
7689 (__mmask8) -1);
7690 }
7691
7692 extern __inline __m512d
7693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7694 _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7695 {
7696 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7697 (__v8df) __B,
7698 (__v8df) __W,
7699 (__mmask8) __U);
7700 }
7701
7702 extern __inline __m512d
7703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7704 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
7705 {
7706 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7707 (__v8df) __B,
7708 (__v8df)
7709 _mm512_setzero_pd (),
7710 (__mmask8) __U);
7711 }
7712
7713 extern __inline __m512d
7714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7715 _mm512_unpackhi_pd (__m512d __A, __m512d __B)
7716 {
7717 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7718 (__v8df) __B,
7719 (__v8df)
7720 _mm512_undefined_pd (),
7721 (__mmask8) -1);
7722 }
7723
7724 extern __inline __m512d
7725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7726 _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7727 {
7728 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7729 (__v8df) __B,
7730 (__v8df) __W,
7731 (__mmask8) __U);
7732 }
7733
7734 extern __inline __m512d
7735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7736 _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
7737 {
7738 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7739 (__v8df) __B,
7740 (__v8df)
7741 _mm512_setzero_pd (),
7742 (__mmask8) __U);
7743 }
7744
7745 extern __inline __m512
7746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7747 _mm512_unpackhi_ps (__m512 __A, __m512 __B)
7748 {
7749 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7750 (__v16sf) __B,
7751 (__v16sf)
7752 _mm512_undefined_ps (),
7753 (__mmask16) -1);
7754 }
7755
7756 extern __inline __m512
7757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7758 _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
7759 {
7760 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7761 (__v16sf) __B,
7762 (__v16sf) __W,
7763 (__mmask16) __U);
7764 }
7765
7766 extern __inline __m512
7767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7768 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
7769 {
7770 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7771 (__v16sf) __B,
7772 (__v16sf)
7773 _mm512_setzero_ps (),
7774 (__mmask16) __U);
7775 }
7776
7777 #ifdef __OPTIMIZE__
7778 extern __inline __m512d
7779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7780 _mm512_cvt_roundps_pd (__m256 __A, const int __R)
7781 {
7782 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7783 (__v8df)
7784 _mm512_undefined_pd (),
7785 (__mmask8) -1, __R);
7786 }
7787
7788 extern __inline __m512d
7789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7790 _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
7791 const int __R)
7792 {
7793 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7794 (__v8df) __W,
7795 (__mmask8) __U, __R);
7796 }
7797
7798 extern __inline __m512d
7799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7800 _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
7801 {
7802 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7803 (__v8df)
7804 _mm512_setzero_pd (),
7805 (__mmask8) __U, __R);
7806 }
7807
7808 extern __inline __m512
7809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7810 _mm512_cvt_roundph_ps (__m256i __A, const int __R)
7811 {
7812 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7813 (__v16sf)
7814 _mm512_undefined_ps (),
7815 (__mmask16) -1, __R);
7816 }
7817
7818 extern __inline __m512
7819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7820 _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
7821 const int __R)
7822 {
7823 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7824 (__v16sf) __W,
7825 (__mmask16) __U, __R);
7826 }
7827
7828 extern __inline __m512
7829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7830 _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
7831 {
7832 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7833 (__v16sf)
7834 _mm512_setzero_ps (),
7835 (__mmask16) __U, __R);
7836 }
7837
7838 extern __inline __m256i
7839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7840 _mm512_cvt_roundps_ph (__m512 __A, const int __I)
7841 {
7842 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7843 __I,
7844 (__v16hi)
7845 _mm256_undefined_si256 (),
7846 -1);
7847 }
7848
7849 extern __inline __m256i
7850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7851 _mm512_cvtps_ph (__m512 __A, const int __I)
7852 {
7853 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7854 __I,
7855 (__v16hi)
7856 _mm256_undefined_si256 (),
7857 -1);
7858 }
7859
7860 extern __inline __m256i
7861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7862 _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
7863 const int __I)
7864 {
7865 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7866 __I,
7867 (__v16hi) __U,
7868 (__mmask16) __W);
7869 }
7870
7871 extern __inline __m256i
7872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7873 _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
7874 {
7875 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7876 __I,
7877 (__v16hi) __U,
7878 (__mmask16) __W);
7879 }
7880
7881 extern __inline __m256i
7882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7883 _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
7884 {
7885 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7886 __I,
7887 (__v16hi)
7888 _mm256_setzero_si256 (),
7889 (__mmask16) __W);
7890 }
7891
7892 extern __inline __m256i
7893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7894 _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
7895 {
7896 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7897 __I,
7898 (__v16hi)
7899 _mm256_setzero_si256 (),
7900 (__mmask16) __W);
7901 }
7902 #else
7903 #define _mm512_cvt_roundps_pd(A, B) \
7904 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
7905
7906 #define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
7907 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
7908
7909 #define _mm512_maskz_cvt_roundps_pd(U, A, B) \
7910 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
7911
7912 #define _mm512_cvt_roundph_ps(A, B) \
7913 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
7914
7915 #define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
7916 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
7917
7918 #define _mm512_maskz_cvt_roundph_ps(U, A, B) \
7919 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
7920
7921 #define _mm512_cvt_roundps_ph(A, I) \
7922 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7923 (__v16hi)_mm256_undefined_si256 (), -1))
7924 #define _mm512_cvtps_ph(A, I) \
7925 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7926 (__v16hi)_mm256_undefined_si256 (), -1))
7927 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
7928 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7929 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7930 #define _mm512_mask_cvtps_ph(U, W, A, I) \
7931 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7932 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7933 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
7934 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7935 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7936 #define _mm512_maskz_cvtps_ph(W, A, I) \
7937 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7938 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7939 #endif
7940
7941 #ifdef __OPTIMIZE__
7942 extern __inline __m256
7943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7944 _mm512_cvt_roundpd_ps (__m512d __A, const int __R)
7945 {
7946 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7947 (__v8sf)
7948 _mm256_undefined_ps (),
7949 (__mmask8) -1, __R);
7950 }
7951
7952 extern __inline __m256
7953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7954 _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
7955 const int __R)
7956 {
7957 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7958 (__v8sf) __W,
7959 (__mmask8) __U, __R);
7960 }
7961
7962 extern __inline __m256
7963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7964 _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
7965 {
7966 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7967 (__v8sf)
7968 _mm256_setzero_ps (),
7969 (__mmask8) __U, __R);
7970 }
7971
7972 extern __inline __m128
7973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7974 _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
7975 {
7976 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
7977 (__v2df) __B,
7978 __R);
7979 }
7980
7981 extern __inline __m128d
7982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7983 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
7984 {
7985 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
7986 (__v4sf) __B,
7987 __R);
7988 }
7989 #else
7990 #define _mm512_cvt_roundpd_ps(A, B) \
7991 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
7992
7993 #define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
7994 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
7995
7996 #define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
7997 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
7998
7999 #define _mm_cvt_roundsd_ss(A, B, C) \
8000 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
8001
8002 #define _mm_cvt_roundss_sd(A, B, C) \
8003 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
8004 #endif
8005
8006 extern __inline void
8007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8008 _mm512_stream_si512 (__m512i * __P, __m512i __A)
8009 {
8010 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8011 }
8012
8013 extern __inline void
8014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8015 _mm512_stream_ps (float *__P, __m512 __A)
8016 {
8017 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8018 }
8019
8020 extern __inline void
8021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8022 _mm512_stream_pd (double *__P, __m512d __A)
8023 {
8024 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8025 }
8026
8027 extern __inline __m512i
8028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8029 _mm512_stream_load_si512 (void *__P)
8030 {
8031 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8032 }
8033
8034 /* Constants for mantissa extraction */
8035 typedef enum
8036 {
8037 _MM_MANT_NORM_1_2, /* interval [1, 2) */
8038 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
8039 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
8040 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
8041 } _MM_MANTISSA_NORM_ENUM;
8042
8043 typedef enum
8044 {
8045 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
8046 _MM_MANT_SIGN_zero, /* sign = 0 */
8047 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
8048 } _MM_MANTISSA_SIGN_ENUM;
8049
8050 #ifdef __OPTIMIZE__
8051 extern __inline __m128
8052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8053 _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8054 {
8055 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8056 (__v4sf) __B,
8057 __R);
8058 }
8059
8060 extern __inline __m128d
8061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8062 _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8063 {
8064 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8065 (__v2df) __B,
8066 __R);
8067 }
8068
8069 extern __inline __m512
8070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8071 _mm512_getexp_round_ps (__m512 __A, const int __R)
8072 {
8073 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8074 (__v16sf)
8075 _mm512_undefined_ps (),
8076 (__mmask16) -1, __R);
8077 }
8078
8079 extern __inline __m512
8080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8081 _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8082 const int __R)
8083 {
8084 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8085 (__v16sf) __W,
8086 (__mmask16) __U, __R);
8087 }
8088
8089 extern __inline __m512
8090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8091 _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8092 {
8093 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8094 (__v16sf)
8095 _mm512_setzero_ps (),
8096 (__mmask16) __U, __R);
8097 }
8098
8099 extern __inline __m512d
8100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8101 _mm512_getexp_round_pd (__m512d __A, const int __R)
8102 {
8103 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8104 (__v8df)
8105 _mm512_undefined_pd (),
8106 (__mmask8) -1, __R);
8107 }
8108
8109 extern __inline __m512d
8110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8111 _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8112 const int __R)
8113 {
8114 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8115 (__v8df) __W,
8116 (__mmask8) __U, __R);
8117 }
8118
8119 extern __inline __m512d
8120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8121 _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8122 {
8123 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8124 (__v8df)
8125 _mm512_setzero_pd (),
8126 (__mmask8) __U, __R);
8127 }
8128
8129 extern __inline __m512d
8130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8131 _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8132 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8133 {
8134 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8135 (__C << 2) | __B,
8136 _mm512_undefined_pd (),
8137 (__mmask8) -1, __R);
8138 }
8139
8140 extern __inline __m512d
8141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8142 _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8143 _MM_MANTISSA_NORM_ENUM __B,
8144 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8145 {
8146 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8147 (__C << 2) | __B,
8148 (__v8df) __W, __U,
8149 __R);
8150 }
8151
8152 extern __inline __m512d
8153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8154 _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8155 _MM_MANTISSA_NORM_ENUM __B,
8156 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8157 {
8158 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8159 (__C << 2) | __B,
8160 (__v8df)
8161 _mm512_setzero_pd (),
8162 __U, __R);
8163 }
8164
8165 extern __inline __m512
8166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8167 _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8168 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8169 {
8170 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8171 (__C << 2) | __B,
8172 _mm512_undefined_ps (),
8173 (__mmask16) -1, __R);
8174 }
8175
8176 extern __inline __m512
8177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8178 _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8179 _MM_MANTISSA_NORM_ENUM __B,
8180 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8181 {
8182 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8183 (__C << 2) | __B,
8184 (__v16sf) __W, __U,
8185 __R);
8186 }
8187
8188 extern __inline __m512
8189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8190 _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8191 _MM_MANTISSA_NORM_ENUM __B,
8192 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8193 {
8194 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8195 (__C << 2) | __B,
8196 (__v16sf)
8197 _mm512_setzero_ps (),
8198 __U, __R);
8199 }
8200
8201 extern __inline __m128d
8202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8203 _mm_getmant_round_sd (__m128d __A, __m128d __B,
8204 _MM_MANTISSA_NORM_ENUM __C,
8205 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8206 {
8207 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8208 (__v2df) __B,
8209 (__D << 2) | __C,
8210 __R);
8211 }
8212
8213 extern __inline __m128
8214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8215 _mm_getmant_round_ss (__m128 __A, __m128 __B,
8216 _MM_MANTISSA_NORM_ENUM __C,
8217 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8218 {
8219 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8220 (__v4sf) __B,
8221 (__D << 2) | __C,
8222 __R);
8223 }
8224
8225 #else
8226 #define _mm512_getmant_round_pd(X, B, C, R) \
8227 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8228 (int)(((C)<<2) | (B)), \
8229 (__v8df)(__m512d)_mm512_undefined_pd(), \
8230 (__mmask8)-1,\
8231 (R)))
8232
8233 #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
8234 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8235 (int)(((C)<<2) | (B)), \
8236 (__v8df)(__m512d)(W), \
8237 (__mmask8)(U),\
8238 (R)))
8239
8240 #define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
8241 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8242 (int)(((C)<<2) | (B)), \
8243 (__v8df)(__m512d)_mm512_setzero_pd(), \
8244 (__mmask8)(U),\
8245 (R)))
8246 #define _mm512_getmant_round_ps(X, B, C, R) \
8247 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8248 (int)(((C)<<2) | (B)), \
8249 (__v16sf)(__m512)_mm512_undefined_ps(), \
8250 (__mmask16)-1,\
8251 (R)))
8252
8253 #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
8254 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8255 (int)(((C)<<2) | (B)), \
8256 (__v16sf)(__m512)(W), \
8257 (__mmask16)(U),\
8258 (R)))
8259
8260 #define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
8261 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8262 (int)(((C)<<2) | (B)), \
8263 (__v16sf)(__m512)_mm512_setzero_ps(), \
8264 (__mmask16)(U),\
8265 (R)))
8266 #define _mm_getmant_round_sd(X, Y, C, D, R) \
8267 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
8268 (__v2df)(__m128d)(Y), \
8269 (int)(((D)<<2) | (C)), \
8270 (R)))
8271
8272 #define _mm_getmant_round_ss(X, Y, C, D, R) \
8273 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
8274 (__v4sf)(__m128)(Y), \
8275 (int)(((D)<<2) | (C)), \
8276 (R)))
8277
8278 #define _mm_getexp_round_ss(A, B, R) \
8279 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8280
8281 #define _mm_getexp_round_sd(A, B, R) \
8282 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8283
8284 #define _mm512_getexp_round_ps(A, R) \
8285 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8286 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
8287
8288 #define _mm512_mask_getexp_round_ps(W, U, A, R) \
8289 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8290 (__v16sf)(__m512)(W), (__mmask16)(U), R))
8291
8292 #define _mm512_maskz_getexp_round_ps(U, A, R) \
8293 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8294 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8295
8296 #define _mm512_getexp_round_pd(A, R) \
8297 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8298 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
8299
8300 #define _mm512_mask_getexp_round_pd(W, U, A, R) \
8301 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8302 (__v8df)(__m512d)(W), (__mmask8)(U), R))
8303
8304 #define _mm512_maskz_getexp_round_pd(U, A, R) \
8305 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8306 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8307 #endif
8308
8309 #ifdef __OPTIMIZE__
8310 extern __inline __m512
8311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8312 _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
8313 {
8314 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
8315 (__v16sf)
8316 _mm512_undefined_ps (),
8317 -1, __R);
8318 }
8319
8320 extern __inline __m512
8321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8322 _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
8323 const int __imm, const int __R)
8324 {
8325 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
8326 (__v16sf) __A,
8327 (__mmask16) __B, __R);
8328 }
8329
8330 extern __inline __m512
8331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8332 _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
8333 const int __imm, const int __R)
8334 {
8335 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
8336 __imm,
8337 (__v16sf)
8338 _mm512_setzero_ps (),
8339 (__mmask16) __A, __R);
8340 }
8341
8342 extern __inline __m512d
8343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8344 _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
8345 {
8346 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
8347 (__v8df)
8348 _mm512_undefined_pd (),
8349 -1, __R);
8350 }
8351
8352 extern __inline __m512d
8353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8354 _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
8355 __m512d __C, const int __imm, const int __R)
8356 {
8357 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
8358 (__v8df) __A,
8359 (__mmask8) __B, __R);
8360 }
8361
8362 extern __inline __m512d
8363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8364 _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
8365 const int __imm, const int __R)
8366 {
8367 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
8368 __imm,
8369 (__v8df)
8370 _mm512_setzero_pd (),
8371 (__mmask8) __A, __R);
8372 }
8373
8374 extern __inline __m128
8375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8376 _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
8377 {
8378 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
8379 (__v4sf) __B, __imm, __R);
8380 }
8381
8382 extern __inline __m128d
8383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8384 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
8385 const int __R)
8386 {
8387 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
8388 (__v2df) __B, __imm, __R);
8389 }
8390
8391 #else
8392 #define _mm512_roundscale_round_ps(A, B, R) \
8393 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
8394 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
8395 #define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
8396 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
8397 (int)(D), \
8398 (__v16sf)(__m512)(A), \
8399 (__mmask16)(B), R))
8400 #define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
8401 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
8402 (int)(C), \
8403 (__v16sf)_mm512_setzero_ps(),\
8404 (__mmask16)(A), R))
8405 #define _mm512_roundscale_round_pd(A, B, R) \
8406 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
8407 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
8408 #define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
8409 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
8410 (int)(D), \
8411 (__v8df)(__m512d)(A), \
8412 (__mmask8)(B), R))
8413 #define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
8414 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
8415 (int)(C), \
8416 (__v8df)_mm512_setzero_pd(),\
8417 (__mmask8)(A), R))
8418 #define _mm_roundscale_round_ss(A, B, C, R) \
8419 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
8420 (__v4sf)(__m128)(B), (int)(C), R))
8421 #define _mm_roundscale_round_sd(A, B, C, R) \
8422 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
8423 (__v2df)(__m128d)(B), (int)(C), R))
8424 #endif
8425
8426 extern __inline __m512
8427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8428 _mm512_floor_ps (__m512 __A)
8429 {
8430 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8431 _MM_FROUND_FLOOR,
8432 (__v16sf) __A, -1,
8433 _MM_FROUND_CUR_DIRECTION);
8434 }
8435
8436 extern __inline __m512d
8437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8438 _mm512_floor_pd (__m512d __A)
8439 {
8440 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8441 _MM_FROUND_FLOOR,
8442 (__v8df) __A, -1,
8443 _MM_FROUND_CUR_DIRECTION);
8444 }
8445
8446 extern __inline __m512
8447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8448 _mm512_ceil_ps (__m512 __A)
8449 {
8450 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8451 _MM_FROUND_CEIL,
8452 (__v16sf) __A, -1,
8453 _MM_FROUND_CUR_DIRECTION);
8454 }
8455
8456 extern __inline __m512d
8457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8458 _mm512_ceil_pd (__m512d __A)
8459 {
8460 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8461 _MM_FROUND_CEIL,
8462 (__v8df) __A, -1,
8463 _MM_FROUND_CUR_DIRECTION);
8464 }
8465
8466 extern __inline __m512
8467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8468 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
8469 {
8470 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8471 _MM_FROUND_FLOOR,
8472 (__v16sf) __W, __U,
8473 _MM_FROUND_CUR_DIRECTION);
8474 }
8475
8476 extern __inline __m512d
8477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8478 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
8479 {
8480 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8481 _MM_FROUND_FLOOR,
8482 (__v8df) __W, __U,
8483 _MM_FROUND_CUR_DIRECTION);
8484 }
8485
8486 extern __inline __m512
8487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8488 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
8489 {
8490 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8491 _MM_FROUND_CEIL,
8492 (__v16sf) __W, __U,
8493 _MM_FROUND_CUR_DIRECTION);
8494 }
8495
8496 extern __inline __m512d
8497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8498 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
8499 {
8500 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8501 _MM_FROUND_CEIL,
8502 (__v8df) __W, __U,
8503 _MM_FROUND_CUR_DIRECTION);
8504 }
8505
8506 #ifdef __OPTIMIZE__
8507 extern __inline __m512i
8508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8509 _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
8510 {
8511 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8512 (__v16si) __B, __imm,
8513 (__v16si)
8514 _mm512_undefined_si512 (),
8515 (__mmask16) -1);
8516 }
8517
8518 extern __inline __m512i
8519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8520 _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
8521 __m512i __B, const int __imm)
8522 {
8523 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8524 (__v16si) __B, __imm,
8525 (__v16si) __W,
8526 (__mmask16) __U);
8527 }
8528
8529 extern __inline __m512i
8530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8531 _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
8532 const int __imm)
8533 {
8534 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8535 (__v16si) __B, __imm,
8536 (__v16si)
8537 _mm512_setzero_si512 (),
8538 (__mmask16) __U);
8539 }
8540
8541 extern __inline __m512i
8542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8543 _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
8544 {
8545 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8546 (__v8di) __B, __imm,
8547 (__v8di)
8548 _mm512_undefined_si512 (),
8549 (__mmask8) -1);
8550 }
8551
8552 extern __inline __m512i
8553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8554 _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
8555 __m512i __B, const int __imm)
8556 {
8557 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8558 (__v8di) __B, __imm,
8559 (__v8di) __W,
8560 (__mmask8) __U);
8561 }
8562
8563 extern __inline __m512i
8564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8565 _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
8566 const int __imm)
8567 {
8568 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8569 (__v8di) __B, __imm,
8570 (__v8di)
8571 _mm512_setzero_si512 (),
8572 (__mmask8) __U);
8573 }
8574 #else
8575 #define _mm512_alignr_epi32(X, Y, C) \
8576 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8577 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_si512 (),\
8578 (__mmask16)-1))
8579
8580 #define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
8581 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8582 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
8583 (__mmask16)(U)))
8584
8585 #define _mm512_maskz_alignr_epi32(U, X, Y, C) \
8586 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8587 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
8588 (__mmask16)(U)))
8589
8590 #define _mm512_alignr_epi64(X, Y, C) \
8591 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8592 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_si512 (), \
8593 (__mmask8)-1))
8594
8595 #define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
8596 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8597 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
8598
8599 #define _mm512_maskz_alignr_epi64(U, X, Y, C) \
8600 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8601 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
8602 (__mmask8)(U)))
8603 #endif
8604
8605 extern __inline __mmask16
8606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8607 _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
8608 {
8609 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8610 (__v16si) __B,
8611 (__mmask16) -1);
8612 }
8613
8614 extern __inline __mmask16
8615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8616 _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8617 {
8618 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8619 (__v16si) __B, __U);
8620 }
8621
8622 extern __inline __mmask8
8623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8624 _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8625 {
8626 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8627 (__v8di) __B, __U);
8628 }
8629
8630 extern __inline __mmask8
8631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8632 _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
8633 {
8634 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8635 (__v8di) __B,
8636 (__mmask8) -1);
8637 }
8638
8639 extern __inline __mmask16
8640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8641 _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
8642 {
8643 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8644 (__v16si) __B,
8645 (__mmask16) -1);
8646 }
8647
8648 extern __inline __mmask16
8649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8650 _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8651 {
8652 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8653 (__v16si) __B, __U);
8654 }
8655
8656 extern __inline __mmask8
8657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8658 _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8659 {
8660 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8661 (__v8di) __B, __U);
8662 }
8663
8664 extern __inline __mmask8
8665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8666 _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
8667 {
8668 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8669 (__v8di) __B,
8670 (__mmask8) -1);
8671 }
8672
8673 extern __inline __mmask16
8674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8675 _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
8676 {
8677 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8678 (__v16si) __Y, 5,
8679 (__mmask16) -1);
8680 }
8681
8682 extern __inline __mmask16
8683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8684 _mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8685 {
8686 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8687 (__v16si) __Y, 5,
8688 (__mmask16) __M);
8689 }
8690
8691 extern __inline __mmask16
8692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8693 _mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8694 {
8695 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8696 (__v16si) __Y, 5,
8697 (__mmask16) __M);
8698 }
8699
8700 extern __inline __mmask16
8701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8702 _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
8703 {
8704 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8705 (__v16si) __Y, 5,
8706 (__mmask16) -1);
8707 }
8708
8709 extern __inline __mmask8
8710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8711 _mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8712 {
8713 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8714 (__v8di) __Y, 5,
8715 (__mmask8) __M);
8716 }
8717
8718 extern __inline __mmask8
8719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8720 _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
8721 {
8722 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8723 (__v8di) __Y, 5,
8724 (__mmask8) -1);
8725 }
8726
8727 extern __inline __mmask8
8728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8729 _mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8730 {
8731 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8732 (__v8di) __Y, 5,
8733 (__mmask8) __M);
8734 }
8735
8736 extern __inline __mmask8
8737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8738 _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
8739 {
8740 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8741 (__v8di) __Y, 5,
8742 (__mmask8) -1);
8743 }
8744
8745 extern __inline __mmask16
8746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8747 _mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8748 {
8749 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8750 (__v16si) __Y, 2,
8751 (__mmask16) __M);
8752 }
8753
8754 extern __inline __mmask16
8755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8756 _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
8757 {
8758 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8759 (__v16si) __Y, 2,
8760 (__mmask16) -1);
8761 }
8762
8763 extern __inline __mmask16
8764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8765 _mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8766 {
8767 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8768 (__v16si) __Y, 2,
8769 (__mmask16) __M);
8770 }
8771
8772 extern __inline __mmask16
8773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8774 _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
8775 {
8776 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8777 (__v16si) __Y, 2,
8778 (__mmask16) -1);
8779 }
8780
8781 extern __inline __mmask8
8782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8783 _mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8784 {
8785 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8786 (__v8di) __Y, 2,
8787 (__mmask8) __M);
8788 }
8789
8790 extern __inline __mmask8
8791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8792 _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
8793 {
8794 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8795 (__v8di) __Y, 2,
8796 (__mmask8) -1);
8797 }
8798
8799 extern __inline __mmask8
8800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8801 _mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8802 {
8803 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8804 (__v8di) __Y, 2,
8805 (__mmask8) __M);
8806 }
8807
8808 extern __inline __mmask8
8809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8810 _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
8811 {
8812 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8813 (__v8di) __Y, 2,
8814 (__mmask8) -1);
8815 }
8816
8817 extern __inline __mmask16
8818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8819 _mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8820 {
8821 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8822 (__v16si) __Y, 1,
8823 (__mmask16) __M);
8824 }
8825
8826 extern __inline __mmask16
8827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8828 _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
8829 {
8830 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8831 (__v16si) __Y, 1,
8832 (__mmask16) -1);
8833 }
8834
8835 extern __inline __mmask16
8836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8837 _mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8838 {
8839 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8840 (__v16si) __Y, 1,
8841 (__mmask16) __M);
8842 }
8843
8844 extern __inline __mmask16
8845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8846 _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
8847 {
8848 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8849 (__v16si) __Y, 1,
8850 (__mmask16) -1);
8851 }
8852
8853 extern __inline __mmask8
8854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8855 _mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8856 {
8857 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8858 (__v8di) __Y, 1,
8859 (__mmask8) __M);
8860 }
8861
8862 extern __inline __mmask8
8863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8864 _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
8865 {
8866 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8867 (__v8di) __Y, 1,
8868 (__mmask8) -1);
8869 }
8870
8871 extern __inline __mmask8
8872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8873 _mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8874 {
8875 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8876 (__v8di) __Y, 1,
8877 (__mmask8) __M);
8878 }
8879
8880 extern __inline __mmask8
8881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8882 _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
8883 {
8884 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8885 (__v8di) __Y, 1,
8886 (__mmask8) -1);
8887 }
8888
8889 extern __inline __mmask16
8890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8891 _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
8892 {
8893 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8894 (__v16si) __Y, 4,
8895 (__mmask16) -1);
8896 }
8897
8898 extern __inline __mmask16
8899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8900 _mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8901 {
8902 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8903 (__v16si) __Y, 4,
8904 (__mmask16) __M);
8905 }
8906
8907 extern __inline __mmask16
8908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8909 _mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8910 {
8911 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8912 (__v16si) __Y, 4,
8913 (__mmask16) __M);
8914 }
8915
8916 extern __inline __mmask16
8917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8918 _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
8919 {
8920 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8921 (__v16si) __Y, 4,
8922 (__mmask16) -1);
8923 }
8924
8925 extern __inline __mmask8
8926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8927 _mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8928 {
8929 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8930 (__v8di) __Y, 4,
8931 (__mmask8) __M);
8932 }
8933
8934 extern __inline __mmask8
8935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8936 _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
8937 {
8938 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8939 (__v8di) __Y, 4,
8940 (__mmask8) -1);
8941 }
8942
8943 extern __inline __mmask8
8944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8945 _mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8946 {
8947 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8948 (__v8di) __Y, 4,
8949 (__mmask8) __M);
8950 }
8951
8952 extern __inline __mmask8
8953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8954 _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
8955 {
8956 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8957 (__v8di) __Y, 4,
8958 (__mmask8) -1);
8959 }
8960
8961 #define _MM_CMPINT_EQ 0x0
8962 #define _MM_CMPINT_LT 0x1
8963 #define _MM_CMPINT_LE 0x2
8964 #define _MM_CMPINT_UNUSED 0x3
8965 #define _MM_CMPINT_NE 0x4
8966 #define _MM_CMPINT_NLT 0x5
8967 #define _MM_CMPINT_GE 0x5
8968 #define _MM_CMPINT_NLE 0x6
8969 #define _MM_CMPINT_GT 0x6
8970
8971 #ifdef __OPTIMIZE__
8972 extern __inline __mmask8
8973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8974 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
8975 {
8976 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8977 (__v8di) __Y, __P,
8978 (__mmask8) -1);
8979 }
8980
8981 extern __inline __mmask16
8982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8983 _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
8984 {
8985 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8986 (__v16si) __Y, __P,
8987 (__mmask16) -1);
8988 }
8989
8990 extern __inline __mmask8
8991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8992 _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
8993 {
8994 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8995 (__v8di) __Y, __P,
8996 (__mmask8) -1);
8997 }
8998
8999 extern __inline __mmask16
9000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9001 _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
9002 {
9003 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9004 (__v16si) __Y, __P,
9005 (__mmask16) -1);
9006 }
9007
9008 extern __inline __mmask8
9009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9010 _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
9011 const int __R)
9012 {
9013 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9014 (__v8df) __Y, __P,
9015 (__mmask8) -1, __R);
9016 }
9017
9018 extern __inline __mmask16
9019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9020 _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
9021 {
9022 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9023 (__v16sf) __Y, __P,
9024 (__mmask16) -1, __R);
9025 }
9026
9027 extern __inline __mmask8
9028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9029 _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9030 const int __P)
9031 {
9032 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9033 (__v8di) __Y, __P,
9034 (__mmask8) __U);
9035 }
9036
9037 extern __inline __mmask16
9038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9039 _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9040 const int __P)
9041 {
9042 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9043 (__v16si) __Y, __P,
9044 (__mmask16) __U);
9045 }
9046
9047 extern __inline __mmask8
9048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9049 _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9050 const int __P)
9051 {
9052 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9053 (__v8di) __Y, __P,
9054 (__mmask8) __U);
9055 }
9056
9057 extern __inline __mmask16
9058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9059 _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9060 const int __P)
9061 {
9062 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9063 (__v16si) __Y, __P,
9064 (__mmask16) __U);
9065 }
9066
9067 extern __inline __mmask8
9068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9069 _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9070 const int __P, const int __R)
9071 {
9072 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9073 (__v8df) __Y, __P,
9074 (__mmask8) __U, __R);
9075 }
9076
9077 extern __inline __mmask16
9078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9079 _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9080 const int __P, const int __R)
9081 {
9082 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9083 (__v16sf) __Y, __P,
9084 (__mmask16) __U, __R);
9085 }
9086
9087 extern __inline __mmask8
9088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9089 _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
9090 {
9091 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9092 (__v2df) __Y, __P,
9093 (__mmask8) -1, __R);
9094 }
9095
9096 extern __inline __mmask8
9097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9098 _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
9099 const int __P, const int __R)
9100 {
9101 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9102 (__v2df) __Y, __P,
9103 (__mmask8) __M, __R);
9104 }
9105
9106 extern __inline __mmask8
9107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9108 _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
9109 {
9110 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9111 (__v4sf) __Y, __P,
9112 (__mmask8) -1, __R);
9113 }
9114
9115 extern __inline __mmask8
9116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9117 _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
9118 const int __P, const int __R)
9119 {
9120 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9121 (__v4sf) __Y, __P,
9122 (__mmask8) __M, __R);
9123 }
9124
9125 #else
9126 #define _mm512_cmp_epi64_mask(X, Y, P) \
9127 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9128 (__v8di)(__m512i)(Y), (int)(P),\
9129 (__mmask8)-1))
9130
9131 #define _mm512_cmp_epi32_mask(X, Y, P) \
9132 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9133 (__v16si)(__m512i)(Y), (int)(P), \
9134 (__mmask16)-1))
9135
9136 #define _mm512_cmp_epu64_mask(X, Y, P) \
9137 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9138 (__v8di)(__m512i)(Y), (int)(P),\
9139 (__mmask8)-1))
9140
9141 #define _mm512_cmp_epu32_mask(X, Y, P) \
9142 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9143 (__v16si)(__m512i)(Y), (int)(P), \
9144 (__mmask16)-1))
9145
9146 #define _mm512_cmp_round_pd_mask(X, Y, P, R) \
9147 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9148 (__v8df)(__m512d)(Y), (int)(P),\
9149 (__mmask8)-1, R))
9150
9151 #define _mm512_cmp_round_ps_mask(X, Y, P, R) \
9152 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9153 (__v16sf)(__m512)(Y), (int)(P),\
9154 (__mmask16)-1, R))
9155
9156 #define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
9157 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9158 (__v8di)(__m512i)(Y), (int)(P),\
9159 (__mmask8)M))
9160
9161 #define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
9162 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9163 (__v16si)(__m512i)(Y), (int)(P), \
9164 (__mmask16)M))
9165
9166 #define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
9167 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9168 (__v8di)(__m512i)(Y), (int)(P),\
9169 (__mmask8)M))
9170
9171 #define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
9172 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9173 (__v16si)(__m512i)(Y), (int)(P), \
9174 (__mmask16)M))
9175
9176 #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
9177 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9178 (__v8df)(__m512d)(Y), (int)(P),\
9179 (__mmask8)M, R))
9180
9181 #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
9182 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9183 (__v16sf)(__m512)(Y), (int)(P),\
9184 (__mmask16)M, R))
9185
9186 #define _mm_cmp_round_sd_mask(X, Y, P, R) \
9187 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9188 (__v2df)(__m128d)(Y), (int)(P),\
9189 (__mmask8)-1, R))
9190
9191 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
9192 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9193 (__v2df)(__m128d)(Y), (int)(P),\
9194 (M), R))
9195
9196 #define _mm_cmp_round_ss_mask(X, Y, P, R) \
9197 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9198 (__v4sf)(__m128)(Y), (int)(P), \
9199 (__mmask8)-1, R))
9200
9201 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
9202 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9203 (__v4sf)(__m128)(Y), (int)(P), \
9204 (M), R))
9205 #endif
9206
9207 #ifdef __OPTIMIZE__
9208 extern __inline __m512
9209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9210 _mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale)
9211 {
9212 __m512 v1_old = _mm512_undefined_ps ();
9213 __mmask16 mask = 0xFFFF;
9214
9215 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
9216 __addr,
9217 (__v16si) __index,
9218 mask, __scale);
9219 }
9220
9221 extern __inline __m512
9222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9223 _mm512_mask_i32gather_ps (__m512 v1_old, __mmask16 __mask,
9224 __m512i __index, float const *__addr, int __scale)
9225 {
9226 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
9227 __addr,
9228 (__v16si) __index,
9229 __mask, __scale);
9230 }
9231
9232 extern __inline __m512d
9233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9234 _mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale)
9235 {
9236 __m512d v1_old = _mm512_undefined_pd ();
9237 __mmask8 mask = 0xFF;
9238
9239 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old,
9240 __addr,
9241 (__v8si) __index, mask,
9242 __scale);
9243 }
9244
9245 extern __inline __m512d
9246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9247 _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
9248 __m256i __index, double const *__addr, int __scale)
9249 {
9250 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9251 __addr,
9252 (__v8si) __index,
9253 __mask, __scale);
9254 }
9255
9256 extern __inline __m256
9257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9258 _mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale)
9259 {
9260 __m256 v1_old = _mm256_undefined_ps ();
9261 __mmask8 mask = 0xFF;
9262
9263 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old,
9264 __addr,
9265 (__v8di) __index, mask,
9266 __scale);
9267 }
9268
9269 extern __inline __m256
9270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9271 _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
9272 __m512i __index, float const *__addr, int __scale)
9273 {
9274 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9275 __addr,
9276 (__v8di) __index,
9277 __mask, __scale);
9278 }
9279
9280 extern __inline __m512d
9281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9282 _mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale)
9283 {
9284 __m512d v1_old = _mm512_undefined_pd ();
9285 __mmask8 mask = 0xFF;
9286
9287 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old,
9288 __addr,
9289 (__v8di) __index, mask,
9290 __scale);
9291 }
9292
9293 extern __inline __m512d
9294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9295 _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
9296 __m512i __index, double const *__addr, int __scale)
9297 {
9298 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9299 __addr,
9300 (__v8di) __index,
9301 __mask, __scale);
9302 }
9303
9304 extern __inline __m512i
9305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9306 _mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale)
9307 {
9308 __m512i v1_old = _mm512_undefined_si512 ();
9309 __mmask16 mask = 0xFFFF;
9310
9311 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old,
9312 __addr,
9313 (__v16si) __index,
9314 mask, __scale);
9315 }
9316
9317 extern __inline __m512i
9318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9319 _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
9320 __m512i __index, int const *__addr, int __scale)
9321 {
9322 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9323 __addr,
9324 (__v16si) __index,
9325 __mask, __scale);
9326 }
9327
9328 extern __inline __m512i
9329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9330 _mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale)
9331 {
9332 __m512i v1_old = _mm512_undefined_si512 ();
9333 __mmask8 mask = 0xFF;
9334
9335 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old,
9336 __addr,
9337 (__v8si) __index, mask,
9338 __scale);
9339 }
9340
9341 extern __inline __m512i
9342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9343 _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9344 __m256i __index, long long const *__addr,
9345 int __scale)
9346 {
9347 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9348 __addr,
9349 (__v8si) __index,
9350 __mask, __scale);
9351 }
9352
9353 extern __inline __m256i
9354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9355 _mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale)
9356 {
9357 __m256i v1_old = _mm256_undefined_si256 ();
9358 __mmask8 mask = 0xFF;
9359
9360 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old,
9361 __addr,
9362 (__v8di) __index,
9363 mask, __scale);
9364 }
9365
9366 extern __inline __m256i
9367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9368 _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
9369 __m512i __index, int const *__addr, int __scale)
9370 {
9371 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9372 __addr,
9373 (__v8di) __index,
9374 __mask, __scale);
9375 }
9376
9377 extern __inline __m512i
9378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9379 _mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale)
9380 {
9381 __m512i v1_old = _mm512_undefined_si512 ();
9382 __mmask8 mask = 0xFF;
9383
9384 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old,
9385 __addr,
9386 (__v8di) __index, mask,
9387 __scale);
9388 }
9389
9390 extern __inline __m512i
9391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9392 _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9393 __m512i __index, long long const *__addr,
9394 int __scale)
9395 {
9396 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9397 __addr,
9398 (__v8di) __index,
9399 __mask, __scale);
9400 }
9401
9402 extern __inline void
9403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9404 _mm512_i32scatter_ps (float *__addr, __m512i __index, __m512 __v1, int __scale)
9405 {
9406 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
9407 (__v16si) __index, (__v16sf) __v1, __scale);
9408 }
9409
9410 extern __inline void
9411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9412 _mm512_mask_i32scatter_ps (float *__addr, __mmask16 __mask,
9413 __m512i __index, __m512 __v1, int __scale)
9414 {
9415 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
9416 (__v16sf) __v1, __scale);
9417 }
9418
9419 extern __inline void
9420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9421 _mm512_i32scatter_pd (double *__addr, __m256i __index, __m512d __v1,
9422 int __scale)
9423 {
9424 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
9425 (__v8si) __index, (__v8df) __v1, __scale);
9426 }
9427
9428 extern __inline void
9429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9430 _mm512_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
9431 __m256i __index, __m512d __v1, int __scale)
9432 {
9433 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
9434 (__v8df) __v1, __scale);
9435 }
9436
9437 extern __inline void
9438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9439 _mm512_i64scatter_ps (float *__addr, __m512i __index, __m256 __v1, int __scale)
9440 {
9441 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
9442 (__v8di) __index, (__v8sf) __v1, __scale);
9443 }
9444
9445 extern __inline void
9446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9447 _mm512_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
9448 __m512i __index, __m256 __v1, int __scale)
9449 {
9450 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
9451 (__v8sf) __v1, __scale);
9452 }
9453
9454 extern __inline void
9455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9456 _mm512_i64scatter_pd (double *__addr, __m512i __index, __m512d __v1,
9457 int __scale)
9458 {
9459 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
9460 (__v8di) __index, (__v8df) __v1, __scale);
9461 }
9462
9463 extern __inline void
9464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9465 _mm512_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
9466 __m512i __index, __m512d __v1, int __scale)
9467 {
9468 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
9469 (__v8df) __v1, __scale);
9470 }
9471
9472 extern __inline void
9473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9474 _mm512_i32scatter_epi32 (int *__addr, __m512i __index,
9475 __m512i __v1, int __scale)
9476 {
9477 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
9478 (__v16si) __index, (__v16si) __v1, __scale);
9479 }
9480
9481 extern __inline void
9482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9483 _mm512_mask_i32scatter_epi32 (int *__addr, __mmask16 __mask,
9484 __m512i __index, __m512i __v1, int __scale)
9485 {
9486 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
9487 (__v16si) __v1, __scale);
9488 }
9489
9490 extern __inline void
9491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9492 _mm512_i32scatter_epi64 (long long *__addr, __m256i __index,
9493 __m512i __v1, int __scale)
9494 {
9495 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
9496 (__v8si) __index, (__v8di) __v1, __scale);
9497 }
9498
9499 extern __inline void
9500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9501 _mm512_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
9502 __m256i __index, __m512i __v1, int __scale)
9503 {
9504 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
9505 (__v8di) __v1, __scale);
9506 }
9507
9508 extern __inline void
9509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9510 _mm512_i64scatter_epi32 (int *__addr, __m512i __index,
9511 __m256i __v1, int __scale)
9512 {
9513 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
9514 (__v8di) __index, (__v8si) __v1, __scale);
9515 }
9516
9517 extern __inline void
9518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9519 _mm512_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
9520 __m512i __index, __m256i __v1, int __scale)
9521 {
9522 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
9523 (__v8si) __v1, __scale);
9524 }
9525
9526 extern __inline void
9527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9528 _mm512_i64scatter_epi64 (long long *__addr, __m512i __index,
9529 __m512i __v1, int __scale)
9530 {
9531 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
9532 (__v8di) __index, (__v8di) __v1, __scale);
9533 }
9534
9535 extern __inline void
9536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9537 _mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
9538 __m512i __index, __m512i __v1, int __scale)
9539 {
9540 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
9541 (__v8di) __v1, __scale);
9542 }
9543 #else
9544 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
9545 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
9546 (float const *)ADDR, \
9547 (__v16si)(__m512i)INDEX, \
9548 (__mmask16)0xFFFF, (int)SCALE)
9549
9550 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9551 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
9552 (float const *)ADDR, \
9553 (__v16si)(__m512i)INDEX, \
9554 (__mmask16)MASK, (int)SCALE)
9555
9556 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
9557 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
9558 (double const *)ADDR, \
9559 (__v8si)(__m256i)INDEX, \
9560 (__mmask8)0xFF, (int)SCALE)
9561
9562 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9563 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
9564 (double const *)ADDR, \
9565 (__v8si)(__m256i)INDEX, \
9566 (__mmask8)MASK, (int)SCALE)
9567
9568 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
9569 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
9570 (float const *)ADDR, \
9571 (__v8di)(__m512i)INDEX, \
9572 (__mmask8)0xFF, (int)SCALE)
9573
9574 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9575 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
9576 (float const *)ADDR, \
9577 (__v8di)(__m512i)INDEX, \
9578 (__mmask8)MASK, (int)SCALE)
9579
9580 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
9581 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
9582 (double const *)ADDR, \
9583 (__v8di)(__m512i)INDEX, \
9584 (__mmask8)0xFF, (int)SCALE)
9585
9586 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9587 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
9588 (double const *)ADDR, \
9589 (__v8di)(__m512i)INDEX, \
9590 (__mmask8)MASK, (int)SCALE)
9591
9592 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
9593 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_si512 (), \
9594 (int const *)ADDR, \
9595 (__v16si)(__m512i)INDEX, \
9596 (__mmask16)0xFFFF, (int)SCALE)
9597
9598 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9599 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
9600 (int const *)ADDR, \
9601 (__v16si)(__m512i)INDEX, \
9602 (__mmask16)MASK, (int)SCALE)
9603
9604 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
9605 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_si512 (), \
9606 (long long const *)ADDR, \
9607 (__v8si)(__m256i)INDEX, \
9608 (__mmask8)0xFF, (int)SCALE)
9609
9610 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9611 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
9612 (long long const *)ADDR, \
9613 (__v8si)(__m256i)INDEX, \
9614 (__mmask8)MASK, (int)SCALE)
9615
9616 #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
9617 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
9618 (int const *)ADDR, \
9619 (__v8di)(__m512i)INDEX, \
9620 (__mmask8)0xFF, (int)SCALE)
9621
9622 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9623 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
9624 (int const *)ADDR, \
9625 (__v8di)(__m512i)INDEX, \
9626 (__mmask8)MASK, (int)SCALE)
9627
9628 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
9629 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_si512 (), \
9630 (long long const *)ADDR, \
9631 (__v8di)(__m512i)INDEX, \
9632 (__mmask8)0xFF, (int)SCALE)
9633
9634 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9635 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
9636 (long long const *)ADDR, \
9637 (__v8di)(__m512i)INDEX, \
9638 (__mmask8)MASK, (int)SCALE)
9639
9640 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
9641 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)0xFFFF, \
9642 (__v16si)(__m512i)INDEX, \
9643 (__v16sf)(__m512)V1, (int)SCALE)
9644
9645 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9646 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)MASK, \
9647 (__v16si)(__m512i)INDEX, \
9648 (__v16sf)(__m512)V1, (int)SCALE)
9649
9650 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
9651 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)0xFF, \
9652 (__v8si)(__m256i)INDEX, \
9653 (__v8df)(__m512d)V1, (int)SCALE)
9654
9655 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9656 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)MASK, \
9657 (__v8si)(__m256i)INDEX, \
9658 (__v8df)(__m512d)V1, (int)SCALE)
9659
9660 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
9661 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask8)0xFF, \
9662 (__v8di)(__m512i)INDEX, \
9663 (__v8sf)(__m256)V1, (int)SCALE)
9664
9665 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9666 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask16)MASK, \
9667 (__v8di)(__m512i)INDEX, \
9668 (__v8sf)(__m256)V1, (int)SCALE)
9669
9670 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
9671 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)0xFF, \
9672 (__v8di)(__m512i)INDEX, \
9673 (__v8df)(__m512d)V1, (int)SCALE)
9674
9675 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9676 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)MASK, \
9677 (__v8di)(__m512i)INDEX, \
9678 (__v8df)(__m512d)V1, (int)SCALE)
9679
9680 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
9681 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)0xFFFF, \
9682 (__v16si)(__m512i)INDEX, \
9683 (__v16si)(__m512i)V1, (int)SCALE)
9684
9685 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9686 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)MASK, \
9687 (__v16si)(__m512i)INDEX, \
9688 (__v16si)(__m512i)V1, (int)SCALE)
9689
9690 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
9691 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)0xFF, \
9692 (__v8si)(__m256i)INDEX, \
9693 (__v8di)(__m512i)V1, (int)SCALE)
9694
9695 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9696 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)MASK, \
9697 (__v8si)(__m256i)INDEX, \
9698 (__v8di)(__m512i)V1, (int)SCALE)
9699
9700 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
9701 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)0xFF, \
9702 (__v8di)(__m512i)INDEX, \
9703 (__v8si)(__m256i)V1, (int)SCALE)
9704
9705 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9706 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)MASK, \
9707 (__v8di)(__m512i)INDEX, \
9708 (__v8si)(__m256i)V1, (int)SCALE)
9709
9710 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
9711 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)0xFF, \
9712 (__v8di)(__m512i)INDEX, \
9713 (__v8di)(__m512i)V1, (int)SCALE)
9714
9715 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9716 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)MASK, \
9717 (__v8di)(__m512i)INDEX, \
9718 (__v8di)(__m512i)V1, (int)SCALE)
9719 #endif
9720
9721 extern __inline __m512d
9722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9723 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
9724 {
9725 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9726 (__v8df) __W,
9727 (__mmask8) __U);
9728 }
9729
9730 extern __inline __m512d
9731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9732 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
9733 {
9734 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9735 (__v8df)
9736 _mm512_setzero_pd (),
9737 (__mmask8) __U);
9738 }
9739
9740 extern __inline void
9741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9742 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9743 {
9744 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9745 (__mmask8) __U);
9746 }
9747
9748 extern __inline __m512
9749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9750 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
9751 {
9752 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9753 (__v16sf) __W,
9754 (__mmask16) __U);
9755 }
9756
9757 extern __inline __m512
9758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9759 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
9760 {
9761 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9762 (__v16sf)
9763 _mm512_setzero_ps (),
9764 (__mmask16) __U);
9765 }
9766
9767 extern __inline void
9768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9769 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9770 {
9771 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9772 (__mmask16) __U);
9773 }
9774
9775 extern __inline __m512i
9776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9777 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9778 {
9779 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9780 (__v8di) __W,
9781 (__mmask8) __U);
9782 }
9783
9784 extern __inline __m512i
9785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9786 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
9787 {
9788 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9789 (__v8di)
9790 _mm512_setzero_si512 (),
9791 (__mmask8) __U);
9792 }
9793
9794 extern __inline void
9795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9796 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9797 {
9798 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9799 (__mmask8) __U);
9800 }
9801
9802 extern __inline __m512i
9803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9804 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9805 {
9806 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9807 (__v16si) __W,
9808 (__mmask16) __U);
9809 }
9810
9811 extern __inline __m512i
9812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9813 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
9814 {
9815 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9816 (__v16si)
9817 _mm512_setzero_si512 (),
9818 (__mmask16) __U);
9819 }
9820
9821 extern __inline void
9822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9823 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9824 {
9825 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9826 (__mmask16) __U);
9827 }
9828
9829 extern __inline __m512d
9830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9831 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9832 {
9833 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9834 (__v8df) __W,
9835 (__mmask8) __U);
9836 }
9837
9838 extern __inline __m512d
9839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9840 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9841 {
9842 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
9843 (__v8df)
9844 _mm512_setzero_pd (),
9845 (__mmask8) __U);
9846 }
9847
9848 extern __inline __m512d
9849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9850 _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
9851 {
9852 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
9853 (__v8df) __W,
9854 (__mmask8) __U);
9855 }
9856
9857 extern __inline __m512d
9858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9859 _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
9860 {
9861 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
9862 (__v8df)
9863 _mm512_setzero_pd (),
9864 (__mmask8) __U);
9865 }
9866
9867 extern __inline __m512
9868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9869 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9870 {
9871 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9872 (__v16sf) __W,
9873 (__mmask16) __U);
9874 }
9875
9876 extern __inline __m512
9877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9878 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9879 {
9880 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
9881 (__v16sf)
9882 _mm512_setzero_ps (),
9883 (__mmask16) __U);
9884 }
9885
9886 extern __inline __m512
9887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9888 _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
9889 {
9890 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
9891 (__v16sf) __W,
9892 (__mmask16) __U);
9893 }
9894
9895 extern __inline __m512
9896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9897 _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
9898 {
9899 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
9900 (__v16sf)
9901 _mm512_setzero_ps (),
9902 (__mmask16) __U);
9903 }
9904
9905 extern __inline __m512i
9906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9907 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9908 {
9909 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9910 (__v8di) __W,
9911 (__mmask8) __U);
9912 }
9913
9914 extern __inline __m512i
9915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9916 _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
9917 {
9918 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
9919 (__v8di)
9920 _mm512_setzero_si512 (),
9921 (__mmask8) __U);
9922 }
9923
9924 extern __inline __m512i
9925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9926 _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
9927 {
9928 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
9929 (__v8di) __W,
9930 (__mmask8) __U);
9931 }
9932
9933 extern __inline __m512i
9934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9935 _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
9936 {
9937 return (__m512i)
9938 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
9939 (__v8di)
9940 _mm512_setzero_si512 (),
9941 (__mmask8) __U);
9942 }
9943
9944 extern __inline __m512i
9945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9946 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9947 {
9948 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9949 (__v16si) __W,
9950 (__mmask16) __U);
9951 }
9952
9953 extern __inline __m512i
9954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9955 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9956 {
9957 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
9958 (__v16si)
9959 _mm512_setzero_si512 (),
9960 (__mmask16) __U);
9961 }
9962
9963 extern __inline __m512i
9964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9965 _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
9966 {
9967 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
9968 (__v16si) __W,
9969 (__mmask16) __U);
9970 }
9971
9972 extern __inline __m512i
9973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9974 _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
9975 {
9976 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
9977 (__v16si)
9978 _mm512_setzero_si512
9979 (), (__mmask16) __U);
9980 }
9981
9982 /* Mask arithmetic operations */
9983 extern __inline __mmask16
9984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9985 _mm512_kand (__mmask16 __A, __mmask16 __B)
9986 {
9987 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
9988 }
9989
9990 extern __inline __mmask16
9991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9992 _mm512_kandn (__mmask16 __A, __mmask16 __B)
9993 {
9994 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
9995 }
9996
9997 extern __inline __mmask16
9998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9999 _mm512_kor (__mmask16 __A, __mmask16 __B)
10000 {
10001 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
10002 }
10003
10004 extern __inline int
10005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10006 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
10007 {
10008 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10009 (__mmask16) __B);
10010 }
10011
10012 extern __inline int
10013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10014 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
10015 {
10016 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
10017 (__mmask16) __B);
10018 }
10019
10020 extern __inline __mmask16
10021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10022 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
10023 {
10024 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
10025 }
10026
10027 extern __inline __mmask16
10028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10029 _mm512_kxor (__mmask16 __A, __mmask16 __B)
10030 {
10031 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
10032 }
10033
10034 extern __inline __mmask16
10035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10036 _mm512_knot (__mmask16 __A)
10037 {
10038 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
10039 }
10040
10041 extern __inline __mmask16
10042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10043 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
10044 {
10045 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10046 }
10047
10048 #ifdef __OPTIMIZE__
10049 extern __inline __m512i
10050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10051 _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
10052 const int __imm)
10053 {
10054 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10055 (__v4si) __D,
10056 __imm,
10057 (__v16si)
10058 _mm512_setzero_si512 (),
10059 __B);
10060 }
10061
10062 extern __inline __m512
10063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10064 _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
10065 const int __imm)
10066 {
10067 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10068 (__v4sf) __D,
10069 __imm,
10070 (__v16sf)
10071 _mm512_setzero_ps (), __B);
10072 }
10073
10074 extern __inline __m512i
10075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10076 _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
10077 __m128i __D, const int __imm)
10078 {
10079 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10080 (__v4si) __D,
10081 __imm,
10082 (__v16si) __A,
10083 __B);
10084 }
10085
10086 extern __inline __m512
10087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10088 _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
10089 __m128 __D, const int __imm)
10090 {
10091 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10092 (__v4sf) __D,
10093 __imm,
10094 (__v16sf) __A, __B);
10095 }
10096 #else
10097 #define _mm512_maskz_insertf32x4(A, X, Y, C) \
10098 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10099 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
10100 (__mmask8)(A)))
10101
10102 #define _mm512_maskz_inserti32x4(A, X, Y, C) \
10103 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10104 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
10105 (__mmask8)(A)))
10106
10107 #define _mm512_mask_insertf32x4(A, B, X, Y, C) \
10108 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10109 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
10110 (__mmask8)(B)))
10111
10112 #define _mm512_mask_inserti32x4(A, B, X, Y, C) \
10113 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10114 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
10115 (__mmask8)(B)))
10116 #endif
10117
10118 extern __inline __m512i
10119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10120 _mm512_max_epi64 (__m512i __A, __m512i __B)
10121 {
10122 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10123 (__v8di) __B,
10124 (__v8di)
10125 _mm512_undefined_si512 (),
10126 (__mmask8) -1);
10127 }
10128
10129 extern __inline __m512i
10130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10131 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10132 {
10133 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10134 (__v8di) __B,
10135 (__v8di)
10136 _mm512_setzero_si512 (),
10137 __M);
10138 }
10139
10140 extern __inline __m512i
10141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10142 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10143 {
10144 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10145 (__v8di) __B,
10146 (__v8di) __W, __M);
10147 }
10148
10149 extern __inline __m512i
10150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10151 _mm512_min_epi64 (__m512i __A, __m512i __B)
10152 {
10153 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10154 (__v8di) __B,
10155 (__v8di)
10156 _mm512_undefined_si512 (),
10157 (__mmask8) -1);
10158 }
10159
10160 extern __inline __m512i
10161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10162 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10163 {
10164 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10165 (__v8di) __B,
10166 (__v8di) __W, __M);
10167 }
10168
10169 extern __inline __m512i
10170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10171 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10172 {
10173 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10174 (__v8di) __B,
10175 (__v8di)
10176 _mm512_setzero_si512 (),
10177 __M);
10178 }
10179
10180 extern __inline __m512i
10181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10182 _mm512_max_epu64 (__m512i __A, __m512i __B)
10183 {
10184 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10185 (__v8di) __B,
10186 (__v8di)
10187 _mm512_undefined_si512 (),
10188 (__mmask8) -1);
10189 }
10190
10191 extern __inline __m512i
10192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10193 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10194 {
10195 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10196 (__v8di) __B,
10197 (__v8di)
10198 _mm512_setzero_si512 (),
10199 __M);
10200 }
10201
10202 extern __inline __m512i
10203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10204 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10205 {
10206 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10207 (__v8di) __B,
10208 (__v8di) __W, __M);
10209 }
10210
10211 extern __inline __m512i
10212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10213 _mm512_min_epu64 (__m512i __A, __m512i __B)
10214 {
10215 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10216 (__v8di) __B,
10217 (__v8di)
10218 _mm512_undefined_si512 (),
10219 (__mmask8) -1);
10220 }
10221
10222 extern __inline __m512i
10223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10224 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10225 {
10226 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10227 (__v8di) __B,
10228 (__v8di) __W, __M);
10229 }
10230
10231 extern __inline __m512i
10232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10233 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10234 {
10235 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10236 (__v8di) __B,
10237 (__v8di)
10238 _mm512_setzero_si512 (),
10239 __M);
10240 }
10241
10242 extern __inline __m512i
10243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10244 _mm512_max_epi32 (__m512i __A, __m512i __B)
10245 {
10246 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10247 (__v16si) __B,
10248 (__v16si)
10249 _mm512_undefined_si512 (),
10250 (__mmask16) -1);
10251 }
10252
10253 extern __inline __m512i
10254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10255 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10256 {
10257 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10258 (__v16si) __B,
10259 (__v16si)
10260 _mm512_setzero_si512 (),
10261 __M);
10262 }
10263
10264 extern __inline __m512i
10265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10266 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10267 {
10268 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10269 (__v16si) __B,
10270 (__v16si) __W, __M);
10271 }
10272
10273 extern __inline __m512i
10274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10275 _mm512_min_epi32 (__m512i __A, __m512i __B)
10276 {
10277 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10278 (__v16si) __B,
10279 (__v16si)
10280 _mm512_undefined_si512 (),
10281 (__mmask16) -1);
10282 }
10283
10284 extern __inline __m512i
10285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10286 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10287 {
10288 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10289 (__v16si) __B,
10290 (__v16si)
10291 _mm512_setzero_si512 (),
10292 __M);
10293 }
10294
10295 extern __inline __m512i
10296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10297 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10298 {
10299 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10300 (__v16si) __B,
10301 (__v16si) __W, __M);
10302 }
10303
10304 extern __inline __m512i
10305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10306 _mm512_max_epu32 (__m512i __A, __m512i __B)
10307 {
10308 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10309 (__v16si) __B,
10310 (__v16si)
10311 _mm512_undefined_si512 (),
10312 (__mmask16) -1);
10313 }
10314
10315 extern __inline __m512i
10316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10317 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10318 {
10319 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10320 (__v16si) __B,
10321 (__v16si)
10322 _mm512_setzero_si512 (),
10323 __M);
10324 }
10325
10326 extern __inline __m512i
10327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10328 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10329 {
10330 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10331 (__v16si) __B,
10332 (__v16si) __W, __M);
10333 }
10334
10335 extern __inline __m512i
10336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10337 _mm512_min_epu32 (__m512i __A, __m512i __B)
10338 {
10339 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10340 (__v16si) __B,
10341 (__v16si)
10342 _mm512_undefined_si512 (),
10343 (__mmask16) -1);
10344 }
10345
10346 extern __inline __m512i
10347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10348 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10349 {
10350 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10351 (__v16si) __B,
10352 (__v16si)
10353 _mm512_setzero_si512 (),
10354 __M);
10355 }
10356
10357 extern __inline __m512i
10358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10359 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10360 {
10361 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10362 (__v16si) __B,
10363 (__v16si) __W, __M);
10364 }
10365
10366 extern __inline __m512
10367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10368 _mm512_unpacklo_ps (__m512 __A, __m512 __B)
10369 {
10370 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10371 (__v16sf) __B,
10372 (__v16sf)
10373 _mm512_undefined_ps (),
10374 (__mmask16) -1);
10375 }
10376
10377 extern __inline __m512
10378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10379 _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10380 {
10381 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10382 (__v16sf) __B,
10383 (__v16sf) __W,
10384 (__mmask16) __U);
10385 }
10386
10387 extern __inline __m512
10388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10389 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
10390 {
10391 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10392 (__v16sf) __B,
10393 (__v16sf)
10394 _mm512_setzero_ps (),
10395 (__mmask16) __U);
10396 }
10397
10398 #ifdef __OPTIMIZE__
10399 extern __inline __m128d
10400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10401 _mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
10402 {
10403 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
10404 (__v2df) __B,
10405 __R);
10406 }
10407
10408 extern __inline __m128
10409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10410 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
10411 {
10412 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
10413 (__v4sf) __B,
10414 __R);
10415 }
10416
10417 extern __inline __m128d
10418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10419 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
10420 {
10421 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
10422 (__v2df) __B,
10423 __R);
10424 }
10425
10426 extern __inline __m128
10427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10428 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
10429 {
10430 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
10431 (__v4sf) __B,
10432 __R);
10433 }
10434
10435 #else
10436 #define _mm_max_round_sd(A, B, C) \
10437 (__m128d)__builtin_ia32_addsd_round(A, B, C)
10438
10439 #define _mm_max_round_ss(A, B, C) \
10440 (__m128)__builtin_ia32_addss_round(A, B, C)
10441
10442 #define _mm_min_round_sd(A, B, C) \
10443 (__m128d)__builtin_ia32_subsd_round(A, B, C)
10444
10445 #define _mm_min_round_ss(A, B, C) \
10446 (__m128)__builtin_ia32_subss_round(A, B, C)
10447 #endif
10448
10449 extern __inline __m512d
10450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10451 _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
10452 {
10453 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
10454 (__v8df) __W,
10455 (__mmask8) __U);
10456 }
10457
10458 extern __inline __m512
10459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10460 _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
10461 {
10462 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
10463 (__v16sf) __W,
10464 (__mmask16) __U);
10465 }
10466
10467 extern __inline __m512i
10468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10469 _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
10470 {
10471 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
10472 (__v8di) __W,
10473 (__mmask8) __U);
10474 }
10475
10476 extern __inline __m512i
10477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10478 _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
10479 {
10480 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
10481 (__v16si) __W,
10482 (__mmask16) __U);
10483 }
10484
10485 #ifdef __OPTIMIZE__
10486 extern __inline __m128d
10487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10488 _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10489 {
10490 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10491 (__v2df) __A,
10492 (__v2df) __B,
10493 __R);
10494 }
10495
10496 extern __inline __m128
10497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10498 _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10499 {
10500 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10501 (__v4sf) __A,
10502 (__v4sf) __B,
10503 __R);
10504 }
10505
10506 extern __inline __m128d
10507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10508 _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10509 {
10510 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10511 (__v2df) __A,
10512 -(__v2df) __B,
10513 __R);
10514 }
10515
10516 extern __inline __m128
10517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10518 _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10519 {
10520 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10521 (__v4sf) __A,
10522 -(__v4sf) __B,
10523 __R);
10524 }
10525
10526 extern __inline __m128d
10527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10528 _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10529 {
10530 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10531 -(__v2df) __A,
10532 (__v2df) __B,
10533 __R);
10534 }
10535
10536 extern __inline __m128
10537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10538 _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10539 {
10540 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10541 -(__v4sf) __A,
10542 (__v4sf) __B,
10543 __R);
10544 }
10545
10546 extern __inline __m128d
10547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10548 _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10549 {
10550 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10551 -(__v2df) __A,
10552 -(__v2df) __B,
10553 __R);
10554 }
10555
10556 extern __inline __m128
10557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10558 _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10559 {
10560 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10561 -(__v4sf) __A,
10562 -(__v4sf) __B,
10563 __R);
10564 }
10565 #else
10566 #define _mm_fmadd_round_sd(A, B, C, R) \
10567 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
10568
10569 #define _mm_fmadd_round_ss(A, B, C, R) \
10570 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
10571
10572 #define _mm_fmsub_round_sd(A, B, C, R) \
10573 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
10574
10575 #define _mm_fmsub_round_ss(A, B, C, R) \
10576 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
10577
10578 #define _mm_fnmadd_round_sd(A, B, C, R) \
10579 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
10580
10581 #define _mm_fnmadd_round_ss(A, B, C, R) \
10582 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
10583
10584 #define _mm_fnmsub_round_sd(A, B, C, R) \
10585 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
10586
10587 #define _mm_fnmsub_round_ss(A, B, C, R) \
10588 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
10589 #endif
10590
10591 #ifdef __OPTIMIZE__
10592 extern __inline int
10593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10594 _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
10595 {
10596 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
10597 }
10598
10599 extern __inline int
10600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10601 _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
10602 {
10603 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
10604 }
10605 #else
10606 #define _mm_comi_round_ss(A, B, C, D)\
10607 __builtin_ia32_vcomiss(A, B, C, D)
10608 #define _mm_comi_round_sd(A, B, C, D)\
10609 __builtin_ia32_vcomisd(A, B, C, D)
10610 #endif
10611
10612 extern __inline __m512d
10613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10614 _mm512_sqrt_pd (__m512d __A)
10615 {
10616 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10617 (__v8df)
10618 _mm512_undefined_pd (),
10619 (__mmask8) -1,
10620 _MM_FROUND_CUR_DIRECTION);
10621 }
10622
10623 extern __inline __m512d
10624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10625 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
10626 {
10627 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10628 (__v8df) __W,
10629 (__mmask8) __U,
10630 _MM_FROUND_CUR_DIRECTION);
10631 }
10632
10633 extern __inline __m512d
10634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10635 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
10636 {
10637 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10638 (__v8df)
10639 _mm512_setzero_pd (),
10640 (__mmask8) __U,
10641 _MM_FROUND_CUR_DIRECTION);
10642 }
10643
10644 extern __inline __m512
10645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10646 _mm512_sqrt_ps (__m512 __A)
10647 {
10648 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10649 (__v16sf)
10650 _mm512_undefined_ps (),
10651 (__mmask16) -1,
10652 _MM_FROUND_CUR_DIRECTION);
10653 }
10654
10655 extern __inline __m512
10656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10657 _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
10658 {
10659 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10660 (__v16sf) __W,
10661 (__mmask16) __U,
10662 _MM_FROUND_CUR_DIRECTION);
10663 }
10664
10665 extern __inline __m512
10666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10667 _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
10668 {
10669 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10670 (__v16sf)
10671 _mm512_setzero_ps (),
10672 (__mmask16) __U,
10673 _MM_FROUND_CUR_DIRECTION);
10674 }
10675
10676 extern __inline __m512d
10677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10678 _mm512_add_pd (__m512d __A, __m512d __B)
10679 {
10680 return (__m512d) ((__v8df)__A + (__v8df)__B);
10681 }
10682
10683 extern __inline __m512d
10684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10685 _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10686 {
10687 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10688 (__v8df) __B,
10689 (__v8df) __W,
10690 (__mmask8) __U,
10691 _MM_FROUND_CUR_DIRECTION);
10692 }
10693
10694 extern __inline __m512d
10695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10696 _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
10697 {
10698 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10699 (__v8df) __B,
10700 (__v8df)
10701 _mm512_setzero_pd (),
10702 (__mmask8) __U,
10703 _MM_FROUND_CUR_DIRECTION);
10704 }
10705
10706 extern __inline __m512
10707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10708 _mm512_add_ps (__m512 __A, __m512 __B)
10709 {
10710 return (__m512) ((__v16sf)__A + (__v16sf)__B);
10711 }
10712
10713 extern __inline __m512
10714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10715 _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10716 {
10717 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10718 (__v16sf) __B,
10719 (__v16sf) __W,
10720 (__mmask16) __U,
10721 _MM_FROUND_CUR_DIRECTION);
10722 }
10723
10724 extern __inline __m512
10725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10726 _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
10727 {
10728 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10729 (__v16sf) __B,
10730 (__v16sf)
10731 _mm512_setzero_ps (),
10732 (__mmask16) __U,
10733 _MM_FROUND_CUR_DIRECTION);
10734 }
10735
10736 extern __inline __m512d
10737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10738 _mm512_sub_pd (__m512d __A, __m512d __B)
10739 {
10740 return (__m512d) ((__v8df)__A - (__v8df)__B);
10741 }
10742
10743 extern __inline __m512d
10744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10745 _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10746 {
10747 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10748 (__v8df) __B,
10749 (__v8df) __W,
10750 (__mmask8) __U,
10751 _MM_FROUND_CUR_DIRECTION);
10752 }
10753
10754 extern __inline __m512d
10755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10756 _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
10757 {
10758 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10759 (__v8df) __B,
10760 (__v8df)
10761 _mm512_setzero_pd (),
10762 (__mmask8) __U,
10763 _MM_FROUND_CUR_DIRECTION);
10764 }
10765
10766 extern __inline __m512
10767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10768 _mm512_sub_ps (__m512 __A, __m512 __B)
10769 {
10770 return (__m512) ((__v16sf)__A - (__v16sf)__B);
10771 }
10772
10773 extern __inline __m512
10774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10775 _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10776 {
10777 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10778 (__v16sf) __B,
10779 (__v16sf) __W,
10780 (__mmask16) __U,
10781 _MM_FROUND_CUR_DIRECTION);
10782 }
10783
10784 extern __inline __m512
10785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10786 _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
10787 {
10788 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10789 (__v16sf) __B,
10790 (__v16sf)
10791 _mm512_setzero_ps (),
10792 (__mmask16) __U,
10793 _MM_FROUND_CUR_DIRECTION);
10794 }
10795
10796 extern __inline __m512d
10797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10798 _mm512_mul_pd (__m512d __A, __m512d __B)
10799 {
10800 return (__m512d) ((__v8df)__A * (__v8df)__B);
10801 }
10802
10803 extern __inline __m512d
10804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10805 _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10806 {
10807 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10808 (__v8df) __B,
10809 (__v8df) __W,
10810 (__mmask8) __U,
10811 _MM_FROUND_CUR_DIRECTION);
10812 }
10813
10814 extern __inline __m512d
10815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10816 _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
10817 {
10818 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10819 (__v8df) __B,
10820 (__v8df)
10821 _mm512_setzero_pd (),
10822 (__mmask8) __U,
10823 _MM_FROUND_CUR_DIRECTION);
10824 }
10825
10826 extern __inline __m512
10827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10828 _mm512_mul_ps (__m512 __A, __m512 __B)
10829 {
10830 return (__m512) ((__v16sf)__A * (__v16sf)__B);
10831 }
10832
10833 extern __inline __m512
10834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10835 _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10836 {
10837 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10838 (__v16sf) __B,
10839 (__v16sf) __W,
10840 (__mmask16) __U,
10841 _MM_FROUND_CUR_DIRECTION);
10842 }
10843
10844 extern __inline __m512
10845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10846 _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
10847 {
10848 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10849 (__v16sf) __B,
10850 (__v16sf)
10851 _mm512_setzero_ps (),
10852 (__mmask16) __U,
10853 _MM_FROUND_CUR_DIRECTION);
10854 }
10855
10856 extern __inline __m512d
10857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10858 _mm512_div_pd (__m512d __M, __m512d __V)
10859 {
10860 return (__m512d) ((__v8df)__M / (__v8df)__V);
10861 }
10862
10863 extern __inline __m512d
10864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10865 _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
10866 {
10867 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10868 (__v8df) __V,
10869 (__v8df) __W,
10870 (__mmask8) __U,
10871 _MM_FROUND_CUR_DIRECTION);
10872 }
10873
10874 extern __inline __m512d
10875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10876 _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
10877 {
10878 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10879 (__v8df) __V,
10880 (__v8df)
10881 _mm512_setzero_pd (),
10882 (__mmask8) __U,
10883 _MM_FROUND_CUR_DIRECTION);
10884 }
10885
10886 extern __inline __m512
10887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10888 _mm512_div_ps (__m512 __A, __m512 __B)
10889 {
10890 return (__m512) ((__v16sf)__A / (__v16sf)__B);
10891 }
10892
10893 extern __inline __m512
10894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10895 _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10896 {
10897 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10898 (__v16sf) __B,
10899 (__v16sf) __W,
10900 (__mmask16) __U,
10901 _MM_FROUND_CUR_DIRECTION);
10902 }
10903
10904 extern __inline __m512
10905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10906 _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
10907 {
10908 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10909 (__v16sf) __B,
10910 (__v16sf)
10911 _mm512_setzero_ps (),
10912 (__mmask16) __U,
10913 _MM_FROUND_CUR_DIRECTION);
10914 }
10915
10916 extern __inline __m512d
10917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10918 _mm512_max_pd (__m512d __A, __m512d __B)
10919 {
10920 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10921 (__v8df) __B,
10922 (__v8df)
10923 _mm512_undefined_pd (),
10924 (__mmask8) -1,
10925 _MM_FROUND_CUR_DIRECTION);
10926 }
10927
10928 extern __inline __m512d
10929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10930 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10931 {
10932 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10933 (__v8df) __B,
10934 (__v8df) __W,
10935 (__mmask8) __U,
10936 _MM_FROUND_CUR_DIRECTION);
10937 }
10938
10939 extern __inline __m512d
10940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10941 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
10942 {
10943 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10944 (__v8df) __B,
10945 (__v8df)
10946 _mm512_setzero_pd (),
10947 (__mmask8) __U,
10948 _MM_FROUND_CUR_DIRECTION);
10949 }
10950
10951 extern __inline __m512
10952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10953 _mm512_max_ps (__m512 __A, __m512 __B)
10954 {
10955 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10956 (__v16sf) __B,
10957 (__v16sf)
10958 _mm512_undefined_ps (),
10959 (__mmask16) -1,
10960 _MM_FROUND_CUR_DIRECTION);
10961 }
10962
10963 extern __inline __m512
10964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10965 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10966 {
10967 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10968 (__v16sf) __B,
10969 (__v16sf) __W,
10970 (__mmask16) __U,
10971 _MM_FROUND_CUR_DIRECTION);
10972 }
10973
10974 extern __inline __m512
10975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10976 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
10977 {
10978 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10979 (__v16sf) __B,
10980 (__v16sf)
10981 _mm512_setzero_ps (),
10982 (__mmask16) __U,
10983 _MM_FROUND_CUR_DIRECTION);
10984 }
10985
10986 extern __inline __m512d
10987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10988 _mm512_min_pd (__m512d __A, __m512d __B)
10989 {
10990 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
10991 (__v8df) __B,
10992 (__v8df)
10993 _mm512_undefined_pd (),
10994 (__mmask8) -1,
10995 _MM_FROUND_CUR_DIRECTION);
10996 }
10997
10998 extern __inline __m512d
10999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11000 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11001 {
11002 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11003 (__v8df) __B,
11004 (__v8df) __W,
11005 (__mmask8) __U,
11006 _MM_FROUND_CUR_DIRECTION);
11007 }
11008
11009 extern __inline __m512d
11010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11011 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
11012 {
11013 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11014 (__v8df) __B,
11015 (__v8df)
11016 _mm512_setzero_pd (),
11017 (__mmask8) __U,
11018 _MM_FROUND_CUR_DIRECTION);
11019 }
11020
11021 extern __inline __m512
11022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11023 _mm512_min_ps (__m512 __A, __m512 __B)
11024 {
11025 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11026 (__v16sf) __B,
11027 (__v16sf)
11028 _mm512_undefined_ps (),
11029 (__mmask16) -1,
11030 _MM_FROUND_CUR_DIRECTION);
11031 }
11032
11033 extern __inline __m512
11034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11035 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11036 {
11037 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11038 (__v16sf) __B,
11039 (__v16sf) __W,
11040 (__mmask16) __U,
11041 _MM_FROUND_CUR_DIRECTION);
11042 }
11043
11044 extern __inline __m512
11045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11046 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
11047 {
11048 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11049 (__v16sf) __B,
11050 (__v16sf)
11051 _mm512_setzero_ps (),
11052 (__mmask16) __U,
11053 _MM_FROUND_CUR_DIRECTION);
11054 }
11055
11056 extern __inline __m512d
11057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11058 _mm512_scalef_pd (__m512d __A, __m512d __B)
11059 {
11060 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11061 (__v8df) __B,
11062 (__v8df)
11063 _mm512_undefined_pd (),
11064 (__mmask8) -1,
11065 _MM_FROUND_CUR_DIRECTION);
11066 }
11067
11068 extern __inline __m512d
11069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11070 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11071 {
11072 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11073 (__v8df) __B,
11074 (__v8df) __W,
11075 (__mmask8) __U,
11076 _MM_FROUND_CUR_DIRECTION);
11077 }
11078
11079 extern __inline __m512d
11080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11081 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
11082 {
11083 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11084 (__v8df) __B,
11085 (__v8df)
11086 _mm512_setzero_pd (),
11087 (__mmask8) __U,
11088 _MM_FROUND_CUR_DIRECTION);
11089 }
11090
11091 extern __inline __m512
11092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11093 _mm512_scalef_ps (__m512 __A, __m512 __B)
11094 {
11095 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11096 (__v16sf) __B,
11097 (__v16sf)
11098 _mm512_undefined_ps (),
11099 (__mmask16) -1,
11100 _MM_FROUND_CUR_DIRECTION);
11101 }
11102
11103 extern __inline __m512
11104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11105 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11106 {
11107 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11108 (__v16sf) __B,
11109 (__v16sf) __W,
11110 (__mmask16) __U,
11111 _MM_FROUND_CUR_DIRECTION);
11112 }
11113
11114 extern __inline __m512
11115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11116 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
11117 {
11118 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11119 (__v16sf) __B,
11120 (__v16sf)
11121 _mm512_setzero_ps (),
11122 (__mmask16) __U,
11123 _MM_FROUND_CUR_DIRECTION);
11124 }
11125
11126 extern __inline __m128d
11127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11128 _mm_scalef_sd (__m128d __A, __m128d __B)
11129 {
11130 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
11131 (__v2df) __B,
11132 _MM_FROUND_CUR_DIRECTION);
11133 }
11134
11135 extern __inline __m128
11136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11137 _mm_scalef_ss (__m128 __A, __m128 __B)
11138 {
11139 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
11140 (__v4sf) __B,
11141 _MM_FROUND_CUR_DIRECTION);
11142 }
11143
11144 extern __inline __m512d
11145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11146 _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11147 {
11148 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11149 (__v8df) __B,
11150 (__v8df) __C,
11151 (__mmask8) -1,
11152 _MM_FROUND_CUR_DIRECTION);
11153 }
11154
11155 extern __inline __m512d
11156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11157 _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11158 {
11159 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11160 (__v8df) __B,
11161 (__v8df) __C,
11162 (__mmask8) __U,
11163 _MM_FROUND_CUR_DIRECTION);
11164 }
11165
11166 extern __inline __m512d
11167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11168 _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11169 {
11170 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
11171 (__v8df) __B,
11172 (__v8df) __C,
11173 (__mmask8) __U,
11174 _MM_FROUND_CUR_DIRECTION);
11175 }
11176
11177 extern __inline __m512d
11178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11179 _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11180 {
11181 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11182 (__v8df) __B,
11183 (__v8df) __C,
11184 (__mmask8) __U,
11185 _MM_FROUND_CUR_DIRECTION);
11186 }
11187
11188 extern __inline __m512
11189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11190 _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11191 {
11192 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11193 (__v16sf) __B,
11194 (__v16sf) __C,
11195 (__mmask16) -1,
11196 _MM_FROUND_CUR_DIRECTION);
11197 }
11198
11199 extern __inline __m512
11200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11201 _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11202 {
11203 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11204 (__v16sf) __B,
11205 (__v16sf) __C,
11206 (__mmask16) __U,
11207 _MM_FROUND_CUR_DIRECTION);
11208 }
11209
11210 extern __inline __m512
11211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11212 _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11213 {
11214 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
11215 (__v16sf) __B,
11216 (__v16sf) __C,
11217 (__mmask16) __U,
11218 _MM_FROUND_CUR_DIRECTION);
11219 }
11220
11221 extern __inline __m512
11222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11223 _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11224 {
11225 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11226 (__v16sf) __B,
11227 (__v16sf) __C,
11228 (__mmask16) __U,
11229 _MM_FROUND_CUR_DIRECTION);
11230 }
11231
11232 extern __inline __m512d
11233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11234 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11235 {
11236 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11237 (__v8df) __B,
11238 -(__v8df) __C,
11239 (__mmask8) -1,
11240 _MM_FROUND_CUR_DIRECTION);
11241 }
11242
11243 extern __inline __m512d
11244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11245 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11246 {
11247 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11248 (__v8df) __B,
11249 -(__v8df) __C,
11250 (__mmask8) __U,
11251 _MM_FROUND_CUR_DIRECTION);
11252 }
11253
11254 extern __inline __m512d
11255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11256 _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11257 {
11258 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
11259 (__v8df) __B,
11260 (__v8df) __C,
11261 (__mmask8) __U,
11262 _MM_FROUND_CUR_DIRECTION);
11263 }
11264
11265 extern __inline __m512d
11266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11267 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11268 {
11269 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11270 (__v8df) __B,
11271 -(__v8df) __C,
11272 (__mmask8) __U,
11273 _MM_FROUND_CUR_DIRECTION);
11274 }
11275
11276 extern __inline __m512
11277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11278 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11279 {
11280 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11281 (__v16sf) __B,
11282 -(__v16sf) __C,
11283 (__mmask16) -1,
11284 _MM_FROUND_CUR_DIRECTION);
11285 }
11286
11287 extern __inline __m512
11288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11289 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11290 {
11291 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11292 (__v16sf) __B,
11293 -(__v16sf) __C,
11294 (__mmask16) __U,
11295 _MM_FROUND_CUR_DIRECTION);
11296 }
11297
11298 extern __inline __m512
11299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11300 _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11301 {
11302 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
11303 (__v16sf) __B,
11304 (__v16sf) __C,
11305 (__mmask16) __U,
11306 _MM_FROUND_CUR_DIRECTION);
11307 }
11308
11309 extern __inline __m512
11310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11311 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11312 {
11313 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11314 (__v16sf) __B,
11315 -(__v16sf) __C,
11316 (__mmask16) __U,
11317 _MM_FROUND_CUR_DIRECTION);
11318 }
11319
11320 extern __inline __m512d
11321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11322 _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
11323 {
11324 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11325 (__v8df) __B,
11326 (__v8df) __C,
11327 (__mmask8) -1,
11328 _MM_FROUND_CUR_DIRECTION);
11329 }
11330
11331 extern __inline __m512d
11332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11333 _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11334 {
11335 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11336 (__v8df) __B,
11337 (__v8df) __C,
11338 (__mmask8) __U,
11339 _MM_FROUND_CUR_DIRECTION);
11340 }
11341
11342 extern __inline __m512d
11343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11344 _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11345 {
11346 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
11347 (__v8df) __B,
11348 (__v8df) __C,
11349 (__mmask8) __U,
11350 _MM_FROUND_CUR_DIRECTION);
11351 }
11352
11353 extern __inline __m512d
11354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11355 _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11356 {
11357 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11358 (__v8df) __B,
11359 (__v8df) __C,
11360 (__mmask8) __U,
11361 _MM_FROUND_CUR_DIRECTION);
11362 }
11363
11364 extern __inline __m512
11365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11366 _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
11367 {
11368 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11369 (__v16sf) __B,
11370 (__v16sf) __C,
11371 (__mmask16) -1,
11372 _MM_FROUND_CUR_DIRECTION);
11373 }
11374
11375 extern __inline __m512
11376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11377 _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11378 {
11379 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11380 (__v16sf) __B,
11381 (__v16sf) __C,
11382 (__mmask16) __U,
11383 _MM_FROUND_CUR_DIRECTION);
11384 }
11385
11386 extern __inline __m512
11387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11388 _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11389 {
11390 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
11391 (__v16sf) __B,
11392 (__v16sf) __C,
11393 (__mmask16) __U,
11394 _MM_FROUND_CUR_DIRECTION);
11395 }
11396
11397 extern __inline __m512
11398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11399 _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11400 {
11401 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11402 (__v16sf) __B,
11403 (__v16sf) __C,
11404 (__mmask16) __U,
11405 _MM_FROUND_CUR_DIRECTION);
11406 }
11407
11408 extern __inline __m512d
11409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11410 _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
11411 {
11412 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11413 (__v8df) __B,
11414 -(__v8df) __C,
11415 (__mmask8) -1,
11416 _MM_FROUND_CUR_DIRECTION);
11417 }
11418
11419 extern __inline __m512d
11420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11421 _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11422 {
11423 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11424 (__v8df) __B,
11425 -(__v8df) __C,
11426 (__mmask8) __U,
11427 _MM_FROUND_CUR_DIRECTION);
11428 }
11429
11430 extern __inline __m512d
11431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11432 _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11433 {
11434 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
11435 (__v8df) __B,
11436 (__v8df) __C,
11437 (__mmask8) __U,
11438 _MM_FROUND_CUR_DIRECTION);
11439 }
11440
11441 extern __inline __m512d
11442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11443 _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11444 {
11445 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11446 (__v8df) __B,
11447 -(__v8df) __C,
11448 (__mmask8) __U,
11449 _MM_FROUND_CUR_DIRECTION);
11450 }
11451
11452 extern __inline __m512
11453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11454 _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
11455 {
11456 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11457 (__v16sf) __B,
11458 -(__v16sf) __C,
11459 (__mmask16) -1,
11460 _MM_FROUND_CUR_DIRECTION);
11461 }
11462
11463 extern __inline __m512
11464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11465 _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11466 {
11467 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11468 (__v16sf) __B,
11469 -(__v16sf) __C,
11470 (__mmask16) __U,
11471 _MM_FROUND_CUR_DIRECTION);
11472 }
11473
11474 extern __inline __m512
11475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11476 _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11477 {
11478 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
11479 (__v16sf) __B,
11480 (__v16sf) __C,
11481 (__mmask16) __U,
11482 _MM_FROUND_CUR_DIRECTION);
11483 }
11484
11485 extern __inline __m512
11486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11487 _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11488 {
11489 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11490 (__v16sf) __B,
11491 -(__v16sf) __C,
11492 (__mmask16) __U,
11493 _MM_FROUND_CUR_DIRECTION);
11494 }
11495
11496 extern __inline __m512d
11497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11498 _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11499 {
11500 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11501 (__v8df) __B,
11502 (__v8df) __C,
11503 (__mmask8) -1,
11504 _MM_FROUND_CUR_DIRECTION);
11505 }
11506
11507 extern __inline __m512d
11508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11509 _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11510 {
11511 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
11512 (__v8df) __B,
11513 (__v8df) __C,
11514 (__mmask8) __U,
11515 _MM_FROUND_CUR_DIRECTION);
11516 }
11517
11518 extern __inline __m512d
11519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11520 _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11521 {
11522 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
11523 (__v8df) __B,
11524 (__v8df) __C,
11525 (__mmask8) __U,
11526 _MM_FROUND_CUR_DIRECTION);
11527 }
11528
11529 extern __inline __m512d
11530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11531 _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11532 {
11533 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11534 (__v8df) __B,
11535 (__v8df) __C,
11536 (__mmask8) __U,
11537 _MM_FROUND_CUR_DIRECTION);
11538 }
11539
11540 extern __inline __m512
11541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11542 _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11543 {
11544 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11545 (__v16sf) __B,
11546 (__v16sf) __C,
11547 (__mmask16) -1,
11548 _MM_FROUND_CUR_DIRECTION);
11549 }
11550
11551 extern __inline __m512
11552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11553 _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11554 {
11555 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
11556 (__v16sf) __B,
11557 (__v16sf) __C,
11558 (__mmask16) __U,
11559 _MM_FROUND_CUR_DIRECTION);
11560 }
11561
11562 extern __inline __m512
11563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11564 _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11565 {
11566 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
11567 (__v16sf) __B,
11568 (__v16sf) __C,
11569 (__mmask16) __U,
11570 _MM_FROUND_CUR_DIRECTION);
11571 }
11572
11573 extern __inline __m512
11574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11575 _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11576 {
11577 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11578 (__v16sf) __B,
11579 (__v16sf) __C,
11580 (__mmask16) __U,
11581 _MM_FROUND_CUR_DIRECTION);
11582 }
11583
11584 extern __inline __m512d
11585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11586 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11587 {
11588 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11589 (__v8df) __B,
11590 -(__v8df) __C,
11591 (__mmask8) -1,
11592 _MM_FROUND_CUR_DIRECTION);
11593 }
11594
11595 extern __inline __m512d
11596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11597 _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11598 {
11599 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
11600 (__v8df) __B,
11601 (__v8df) __C,
11602 (__mmask8) __U,
11603 _MM_FROUND_CUR_DIRECTION);
11604 }
11605
11606 extern __inline __m512d
11607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11608 _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11609 {
11610 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
11611 (__v8df) __B,
11612 (__v8df) __C,
11613 (__mmask8) __U,
11614 _MM_FROUND_CUR_DIRECTION);
11615 }
11616
11617 extern __inline __m512d
11618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11619 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11620 {
11621 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11622 (__v8df) __B,
11623 -(__v8df) __C,
11624 (__mmask8) __U,
11625 _MM_FROUND_CUR_DIRECTION);
11626 }
11627
11628 extern __inline __m512
11629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11630 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11631 {
11632 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11633 (__v16sf) __B,
11634 -(__v16sf) __C,
11635 (__mmask16) -1,
11636 _MM_FROUND_CUR_DIRECTION);
11637 }
11638
11639 extern __inline __m512
11640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11641 _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11642 {
11643 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
11644 (__v16sf) __B,
11645 (__v16sf) __C,
11646 (__mmask16) __U,
11647 _MM_FROUND_CUR_DIRECTION);
11648 }
11649
11650 extern __inline __m512
11651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11652 _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11653 {
11654 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
11655 (__v16sf) __B,
11656 (__v16sf) __C,
11657 (__mmask16) __U,
11658 _MM_FROUND_CUR_DIRECTION);
11659 }
11660
11661 extern __inline __m512
11662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11663 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11664 {
11665 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11666 (__v16sf) __B,
11667 -(__v16sf) __C,
11668 (__mmask16) __U,
11669 _MM_FROUND_CUR_DIRECTION);
11670 }
11671
11672 extern __inline __m256i
11673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11674 _mm512_cvttpd_epi32 (__m512d __A)
11675 {
11676 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11677 (__v8si)
11678 _mm256_undefined_si256 (),
11679 (__mmask8) -1,
11680 _MM_FROUND_CUR_DIRECTION);
11681 }
11682
11683 extern __inline __m256i
11684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11685 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11686 {
11687 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11688 (__v8si) __W,
11689 (__mmask8) __U,
11690 _MM_FROUND_CUR_DIRECTION);
11691 }
11692
11693 extern __inline __m256i
11694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11695 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
11696 {
11697 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11698 (__v8si)
11699 _mm256_setzero_si256 (),
11700 (__mmask8) __U,
11701 _MM_FROUND_CUR_DIRECTION);
11702 }
11703
11704 extern __inline __m256i
11705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11706 _mm512_cvttpd_epu32 (__m512d __A)
11707 {
11708 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11709 (__v8si)
11710 _mm256_undefined_si256 (),
11711 (__mmask8) -1,
11712 _MM_FROUND_CUR_DIRECTION);
11713 }
11714
11715 extern __inline __m256i
11716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11717 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11718 {
11719 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11720 (__v8si) __W,
11721 (__mmask8) __U,
11722 _MM_FROUND_CUR_DIRECTION);
11723 }
11724
11725 extern __inline __m256i
11726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11727 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
11728 {
11729 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11730 (__v8si)
11731 _mm256_setzero_si256 (),
11732 (__mmask8) __U,
11733 _MM_FROUND_CUR_DIRECTION);
11734 }
11735
11736 extern __inline __m256i
11737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11738 _mm512_cvtpd_epi32 (__m512d __A)
11739 {
11740 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11741 (__v8si)
11742 _mm256_undefined_si256 (),
11743 (__mmask8) -1,
11744 _MM_FROUND_CUR_DIRECTION);
11745 }
11746
11747 extern __inline __m256i
11748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11749 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11750 {
11751 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11752 (__v8si) __W,
11753 (__mmask8) __U,
11754 _MM_FROUND_CUR_DIRECTION);
11755 }
11756
11757 extern __inline __m256i
11758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11759 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
11760 {
11761 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11762 (__v8si)
11763 _mm256_setzero_si256 (),
11764 (__mmask8) __U,
11765 _MM_FROUND_CUR_DIRECTION);
11766 }
11767
11768 extern __inline __m256i
11769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11770 _mm512_cvtpd_epu32 (__m512d __A)
11771 {
11772 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11773 (__v8si)
11774 _mm256_undefined_si256 (),
11775 (__mmask8) -1,
11776 _MM_FROUND_CUR_DIRECTION);
11777 }
11778
11779 extern __inline __m256i
11780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11781 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11782 {
11783 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11784 (__v8si) __W,
11785 (__mmask8) __U,
11786 _MM_FROUND_CUR_DIRECTION);
11787 }
11788
11789 extern __inline __m256i
11790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11791 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
11792 {
11793 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11794 (__v8si)
11795 _mm256_setzero_si256 (),
11796 (__mmask8) __U,
11797 _MM_FROUND_CUR_DIRECTION);
11798 }
11799
11800 extern __inline __m512i
11801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11802 _mm512_cvttps_epi32 (__m512 __A)
11803 {
11804 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11805 (__v16si)
11806 _mm512_undefined_si512 (),
11807 (__mmask16) -1,
11808 _MM_FROUND_CUR_DIRECTION);
11809 }
11810
11811 extern __inline __m512i
11812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11813 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11814 {
11815 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11816 (__v16si) __W,
11817 (__mmask16) __U,
11818 _MM_FROUND_CUR_DIRECTION);
11819 }
11820
11821 extern __inline __m512i
11822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11823 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
11824 {
11825 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11826 (__v16si)
11827 _mm512_setzero_si512 (),
11828 (__mmask16) __U,
11829 _MM_FROUND_CUR_DIRECTION);
11830 }
11831
11832 extern __inline __m512i
11833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11834 _mm512_cvttps_epu32 (__m512 __A)
11835 {
11836 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11837 (__v16si)
11838 _mm512_undefined_si512 (),
11839 (__mmask16) -1,
11840 _MM_FROUND_CUR_DIRECTION);
11841 }
11842
11843 extern __inline __m512i
11844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11845 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11846 {
11847 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11848 (__v16si) __W,
11849 (__mmask16) __U,
11850 _MM_FROUND_CUR_DIRECTION);
11851 }
11852
11853 extern __inline __m512i
11854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11855 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
11856 {
11857 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11858 (__v16si)
11859 _mm512_setzero_si512 (),
11860 (__mmask16) __U,
11861 _MM_FROUND_CUR_DIRECTION);
11862 }
11863
11864 extern __inline __m512i
11865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11866 _mm512_cvtps_epi32 (__m512 __A)
11867 {
11868 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11869 (__v16si)
11870 _mm512_undefined_si512 (),
11871 (__mmask16) -1,
11872 _MM_FROUND_CUR_DIRECTION);
11873 }
11874
11875 extern __inline __m512i
11876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11877 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11878 {
11879 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11880 (__v16si) __W,
11881 (__mmask16) __U,
11882 _MM_FROUND_CUR_DIRECTION);
11883 }
11884
11885 extern __inline __m512i
11886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11887 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
11888 {
11889 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11890 (__v16si)
11891 _mm512_setzero_si512 (),
11892 (__mmask16) __U,
11893 _MM_FROUND_CUR_DIRECTION);
11894 }
11895
11896 extern __inline __m512i
11897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11898 _mm512_cvtps_epu32 (__m512 __A)
11899 {
11900 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11901 (__v16si)
11902 _mm512_undefined_si512 (),
11903 (__mmask16) -1,
11904 _MM_FROUND_CUR_DIRECTION);
11905 }
11906
11907 extern __inline __m512i
11908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11909 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11910 {
11911 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11912 (__v16si) __W,
11913 (__mmask16) __U,
11914 _MM_FROUND_CUR_DIRECTION);
11915 }
11916
11917 extern __inline __m512i
11918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11919 _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
11920 {
11921 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11922 (__v16si)
11923 _mm512_setzero_si512 (),
11924 (__mmask16) __U,
11925 _MM_FROUND_CUR_DIRECTION);
11926 }
11927
11928 #ifdef __x86_64__
11929 extern __inline __m128
11930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11931 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
11932 {
11933 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
11934 _MM_FROUND_CUR_DIRECTION);
11935 }
11936
11937 extern __inline __m128d
11938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11939 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
11940 {
11941 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
11942 _MM_FROUND_CUR_DIRECTION);
11943 }
11944 #endif
11945
11946 extern __inline __m128
11947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11948 _mm_cvtu32_ss (__m128 __A, unsigned __B)
11949 {
11950 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
11951 _MM_FROUND_CUR_DIRECTION);
11952 }
11953
11954 extern __inline __m512
11955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11956 _mm512_cvtepi32_ps (__m512i __A)
11957 {
11958 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11959 (__v16sf)
11960 _mm512_undefined_ps (),
11961 (__mmask16) -1,
11962 _MM_FROUND_CUR_DIRECTION);
11963 }
11964
11965 extern __inline __m512
11966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11967 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
11968 {
11969 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11970 (__v16sf) __W,
11971 (__mmask16) __U,
11972 _MM_FROUND_CUR_DIRECTION);
11973 }
11974
11975 extern __inline __m512
11976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11977 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
11978 {
11979 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11980 (__v16sf)
11981 _mm512_setzero_ps (),
11982 (__mmask16) __U,
11983 _MM_FROUND_CUR_DIRECTION);
11984 }
11985
11986 extern __inline __m512
11987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11988 _mm512_cvtepu32_ps (__m512i __A)
11989 {
11990 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
11991 (__v16sf)
11992 _mm512_undefined_ps (),
11993 (__mmask16) -1,
11994 _MM_FROUND_CUR_DIRECTION);
11995 }
11996
11997 extern __inline __m512
11998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11999 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12000 {
12001 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12002 (__v16sf) __W,
12003 (__mmask16) __U,
12004 _MM_FROUND_CUR_DIRECTION);
12005 }
12006
12007 extern __inline __m512
12008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12009 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
12010 {
12011 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12012 (__v16sf)
12013 _mm512_setzero_ps (),
12014 (__mmask16) __U,
12015 _MM_FROUND_CUR_DIRECTION);
12016 }
12017
12018 #ifdef __OPTIMIZE__
12019 extern __inline __m512d
12020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12021 _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
12022 {
12023 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12024 (__v8df) __B,
12025 (__v8di) __C,
12026 __imm,
12027 (__mmask8) -1,
12028 _MM_FROUND_CUR_DIRECTION);
12029 }
12030
12031 extern __inline __m512d
12032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12033 _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
12034 __m512i __C, const int __imm)
12035 {
12036 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12037 (__v8df) __B,
12038 (__v8di) __C,
12039 __imm,
12040 (__mmask8) __U,
12041 _MM_FROUND_CUR_DIRECTION);
12042 }
12043
12044 extern __inline __m512d
12045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12046 _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
12047 __m512i __C, const int __imm)
12048 {
12049 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
12050 (__v8df) __B,
12051 (__v8di) __C,
12052 __imm,
12053 (__mmask8) __U,
12054 _MM_FROUND_CUR_DIRECTION);
12055 }
12056
12057 extern __inline __m512
12058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12059 _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
12060 {
12061 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12062 (__v16sf) __B,
12063 (__v16si) __C,
12064 __imm,
12065 (__mmask16) -1,
12066 _MM_FROUND_CUR_DIRECTION);
12067 }
12068
12069 extern __inline __m512
12070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12071 _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
12072 __m512i __C, const int __imm)
12073 {
12074 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12075 (__v16sf) __B,
12076 (__v16si) __C,
12077 __imm,
12078 (__mmask16) __U,
12079 _MM_FROUND_CUR_DIRECTION);
12080 }
12081
12082 extern __inline __m512
12083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12084 _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
12085 __m512i __C, const int __imm)
12086 {
12087 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
12088 (__v16sf) __B,
12089 (__v16si) __C,
12090 __imm,
12091 (__mmask16) __U,
12092 _MM_FROUND_CUR_DIRECTION);
12093 }
12094
12095 extern __inline __m128d
12096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12097 _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
12098 {
12099 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12100 (__v2df) __B,
12101 (__v2di) __C, __imm,
12102 (__mmask8) -1,
12103 _MM_FROUND_CUR_DIRECTION);
12104 }
12105
12106 extern __inline __m128d
12107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12108 _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
12109 __m128i __C, const int __imm)
12110 {
12111 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12112 (__v2df) __B,
12113 (__v2di) __C, __imm,
12114 (__mmask8) __U,
12115 _MM_FROUND_CUR_DIRECTION);
12116 }
12117
12118 extern __inline __m128d
12119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12120 _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
12121 __m128i __C, const int __imm)
12122 {
12123 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
12124 (__v2df) __B,
12125 (__v2di) __C,
12126 __imm,
12127 (__mmask8) __U,
12128 _MM_FROUND_CUR_DIRECTION);
12129 }
12130
12131 extern __inline __m128
12132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12133 _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
12134 {
12135 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12136 (__v4sf) __B,
12137 (__v4si) __C, __imm,
12138 (__mmask8) -1,
12139 _MM_FROUND_CUR_DIRECTION);
12140 }
12141
12142 extern __inline __m128
12143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12144 _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
12145 __m128i __C, const int __imm)
12146 {
12147 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12148 (__v4sf) __B,
12149 (__v4si) __C, __imm,
12150 (__mmask8) __U,
12151 _MM_FROUND_CUR_DIRECTION);
12152 }
12153
12154 extern __inline __m128
12155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12156 _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
12157 __m128i __C, const int __imm)
12158 {
12159 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
12160 (__v4sf) __B,
12161 (__v4si) __C, __imm,
12162 (__mmask8) __U,
12163 _MM_FROUND_CUR_DIRECTION);
12164 }
12165 #else
12166 #define _mm512_fixupimm_pd(X, Y, Z, C) \
12167 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12168 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12169 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12170
12171 #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
12172 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12173 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12174 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12175
12176 #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
12177 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
12178 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12179 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12180
12181 #define _mm512_fixupimm_ps(X, Y, Z, C) \
12182 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12183 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12184 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12185
12186 #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
12187 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12188 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12189 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12190
12191 #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
12192 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
12193 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12194 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12195
12196 #define _mm_fixupimm_sd(X, Y, Z, C) \
12197 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12198 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12199 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12200
12201 #define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
12202 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12203 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12204 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12205
12206 #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
12207 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
12208 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12209 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12210
12211 #define _mm_fixupimm_ss(X, Y, Z, C) \
12212 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12213 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12214 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12215
12216 #define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
12217 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12218 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12219 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12220
12221 #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
12222 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
12223 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12224 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12225 #endif
12226
12227 #ifdef __x86_64__
12228 extern __inline unsigned long long
12229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12230 _mm_cvtss_u64 (__m128 __A)
12231 {
12232 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
12233 __A,
12234 _MM_FROUND_CUR_DIRECTION);
12235 }
12236
12237 extern __inline unsigned long long
12238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12239 _mm_cvttss_u64 (__m128 __A)
12240 {
12241 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
12242 __A,
12243 _MM_FROUND_CUR_DIRECTION);
12244 }
12245
12246 extern __inline long long
12247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12248 _mm_cvttss_i64 (__m128 __A)
12249 {
12250 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
12251 _MM_FROUND_CUR_DIRECTION);
12252 }
12253 #endif /* __x86_64__ */
12254
12255 extern __inline unsigned
12256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12257 _mm_cvtss_u32 (__m128 __A)
12258 {
12259 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
12260 _MM_FROUND_CUR_DIRECTION);
12261 }
12262
12263 extern __inline unsigned
12264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12265 _mm_cvttss_u32 (__m128 __A)
12266 {
12267 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
12268 _MM_FROUND_CUR_DIRECTION);
12269 }
12270
12271 extern __inline int
12272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12273 _mm_cvttss_i32 (__m128 __A)
12274 {
12275 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
12276 _MM_FROUND_CUR_DIRECTION);
12277 }
12278
12279 #ifdef __x86_64__
12280 extern __inline unsigned long long
12281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12282 _mm_cvtsd_u64 (__m128d __A)
12283 {
12284 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
12285 __A,
12286 _MM_FROUND_CUR_DIRECTION);
12287 }
12288
12289 extern __inline unsigned long long
12290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12291 _mm_cvttsd_u64 (__m128d __A)
12292 {
12293 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
12294 __A,
12295 _MM_FROUND_CUR_DIRECTION);
12296 }
12297
12298 extern __inline long long
12299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12300 _mm_cvttsd_i64 (__m128d __A)
12301 {
12302 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
12303 _MM_FROUND_CUR_DIRECTION);
12304 }
12305 #endif /* __x86_64__ */
12306
12307 extern __inline unsigned
12308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12309 _mm_cvtsd_u32 (__m128d __A)
12310 {
12311 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
12312 _MM_FROUND_CUR_DIRECTION);
12313 }
12314
12315 extern __inline unsigned
12316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12317 _mm_cvttsd_u32 (__m128d __A)
12318 {
12319 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
12320 _MM_FROUND_CUR_DIRECTION);
12321 }
12322
12323 extern __inline int
12324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12325 _mm_cvttsd_i32 (__m128d __A)
12326 {
12327 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
12328 _MM_FROUND_CUR_DIRECTION);
12329 }
12330
12331 extern __inline __m512d
12332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12333 _mm512_cvtps_pd (__m256 __A)
12334 {
12335 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12336 (__v8df)
12337 _mm512_undefined_pd (),
12338 (__mmask8) -1,
12339 _MM_FROUND_CUR_DIRECTION);
12340 }
12341
12342 extern __inline __m512d
12343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12344 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
12345 {
12346 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12347 (__v8df) __W,
12348 (__mmask8) __U,
12349 _MM_FROUND_CUR_DIRECTION);
12350 }
12351
12352 extern __inline __m512d
12353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12354 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
12355 {
12356 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12357 (__v8df)
12358 _mm512_setzero_pd (),
12359 (__mmask8) __U,
12360 _MM_FROUND_CUR_DIRECTION);
12361 }
12362
12363 extern __inline __m512
12364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12365 _mm512_cvtph_ps (__m256i __A)
12366 {
12367 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12368 (__v16sf)
12369 _mm512_undefined_ps (),
12370 (__mmask16) -1,
12371 _MM_FROUND_CUR_DIRECTION);
12372 }
12373
12374 extern __inline __m512
12375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12376 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
12377 {
12378 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12379 (__v16sf) __W,
12380 (__mmask16) __U,
12381 _MM_FROUND_CUR_DIRECTION);
12382 }
12383
12384 extern __inline __m512
12385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12386 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
12387 {
12388 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12389 (__v16sf)
12390 _mm512_setzero_ps (),
12391 (__mmask16) __U,
12392 _MM_FROUND_CUR_DIRECTION);
12393 }
12394
12395 extern __inline __m256
12396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12397 _mm512_cvtpd_ps (__m512d __A)
12398 {
12399 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12400 (__v8sf)
12401 _mm256_undefined_ps (),
12402 (__mmask8) -1,
12403 _MM_FROUND_CUR_DIRECTION);
12404 }
12405
12406 extern __inline __m256
12407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12408 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
12409 {
12410 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12411 (__v8sf) __W,
12412 (__mmask8) __U,
12413 _MM_FROUND_CUR_DIRECTION);
12414 }
12415
12416 extern __inline __m256
12417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12418 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
12419 {
12420 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12421 (__v8sf)
12422 _mm256_setzero_ps (),
12423 (__mmask8) __U,
12424 _MM_FROUND_CUR_DIRECTION);
12425 }
12426
12427 #ifdef __OPTIMIZE__
12428 extern __inline __m512
12429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12430 _mm512_getexp_ps (__m512 __A)
12431 {
12432 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12433 (__v16sf)
12434 _mm512_undefined_ps (),
12435 (__mmask16) -1,
12436 _MM_FROUND_CUR_DIRECTION);
12437 }
12438
12439 extern __inline __m512
12440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12441 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
12442 {
12443 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12444 (__v16sf) __W,
12445 (__mmask16) __U,
12446 _MM_FROUND_CUR_DIRECTION);
12447 }
12448
12449 extern __inline __m512
12450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12451 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
12452 {
12453 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12454 (__v16sf)
12455 _mm512_setzero_ps (),
12456 (__mmask16) __U,
12457 _MM_FROUND_CUR_DIRECTION);
12458 }
12459
12460 extern __inline __m512d
12461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12462 _mm512_getexp_pd (__m512d __A)
12463 {
12464 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12465 (__v8df)
12466 _mm512_undefined_pd (),
12467 (__mmask8) -1,
12468 _MM_FROUND_CUR_DIRECTION);
12469 }
12470
12471 extern __inline __m512d
12472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12473 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
12474 {
12475 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12476 (__v8df) __W,
12477 (__mmask8) __U,
12478 _MM_FROUND_CUR_DIRECTION);
12479 }
12480
12481 extern __inline __m512d
12482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12483 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
12484 {
12485 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12486 (__v8df)
12487 _mm512_setzero_pd (),
12488 (__mmask8) __U,
12489 _MM_FROUND_CUR_DIRECTION);
12490 }
12491
12492 extern __inline __m128
12493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12494 _mm_getexp_ss (__m128 __A, __m128 __B)
12495 {
12496 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
12497 (__v4sf) __B,
12498 _MM_FROUND_CUR_DIRECTION);
12499 }
12500
12501 extern __inline __m128d
12502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12503 _mm_getexp_sd (__m128d __A, __m128d __B)
12504 {
12505 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
12506 (__v2df) __B,
12507 _MM_FROUND_CUR_DIRECTION);
12508 }
12509
12510 extern __inline __m512d
12511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12512 _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
12513 _MM_MANTISSA_SIGN_ENUM __C)
12514 {
12515 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12516 (__C << 2) | __B,
12517 _mm512_undefined_pd (),
12518 (__mmask8) -1,
12519 _MM_FROUND_CUR_DIRECTION);
12520 }
12521
12522 extern __inline __m512d
12523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12524 _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
12525 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12526 {
12527 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12528 (__C << 2) | __B,
12529 (__v8df) __W, __U,
12530 _MM_FROUND_CUR_DIRECTION);
12531 }
12532
12533 extern __inline __m512d
12534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12535 _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
12536 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12537 {
12538 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12539 (__C << 2) | __B,
12540 (__v8df)
12541 _mm512_setzero_pd (),
12542 __U,
12543 _MM_FROUND_CUR_DIRECTION);
12544 }
12545
12546 extern __inline __m512
12547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12548 _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
12549 _MM_MANTISSA_SIGN_ENUM __C)
12550 {
12551 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12552 (__C << 2) | __B,
12553 _mm512_undefined_ps (),
12554 (__mmask16) -1,
12555 _MM_FROUND_CUR_DIRECTION);
12556 }
12557
12558 extern __inline __m512
12559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12560 _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
12561 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12562 {
12563 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12564 (__C << 2) | __B,
12565 (__v16sf) __W, __U,
12566 _MM_FROUND_CUR_DIRECTION);
12567 }
12568
12569 extern __inline __m512
12570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12571 _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
12572 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12573 {
12574 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12575 (__C << 2) | __B,
12576 (__v16sf)
12577 _mm512_setzero_ps (),
12578 __U,
12579 _MM_FROUND_CUR_DIRECTION);
12580 }
12581
12582 extern __inline __m128d
12583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12584 _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
12585 _MM_MANTISSA_SIGN_ENUM __D)
12586 {
12587 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
12588 (__v2df) __B,
12589 (__D << 2) | __C,
12590 _MM_FROUND_CUR_DIRECTION);
12591 }
12592
12593 extern __inline __m128
12594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12595 _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
12596 _MM_MANTISSA_SIGN_ENUM __D)
12597 {
12598 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
12599 (__v4sf) __B,
12600 (__D << 2) | __C,
12601 _MM_FROUND_CUR_DIRECTION);
12602 }
12603
12604 #else
12605 #define _mm512_getmant_pd(X, B, C) \
12606 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12607 (int)(((C)<<2) | (B)), \
12608 (__v8df)_mm512_undefined_pd(), \
12609 (__mmask8)-1,\
12610 _MM_FROUND_CUR_DIRECTION))
12611
12612 #define _mm512_mask_getmant_pd(W, U, X, B, C) \
12613 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12614 (int)(((C)<<2) | (B)), \
12615 (__v8df)(__m512d)(W), \
12616 (__mmask8)(U),\
12617 _MM_FROUND_CUR_DIRECTION))
12618
12619 #define _mm512_maskz_getmant_pd(U, X, B, C) \
12620 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12621 (int)(((C)<<2) | (B)), \
12622 (__v8df)_mm512_setzero_pd(), \
12623 (__mmask8)(U),\
12624 _MM_FROUND_CUR_DIRECTION))
12625 #define _mm512_getmant_ps(X, B, C) \
12626 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12627 (int)(((C)<<2) | (B)), \
12628 (__v16sf)_mm512_undefined_ps(), \
12629 (__mmask16)-1,\
12630 _MM_FROUND_CUR_DIRECTION))
12631
12632 #define _mm512_mask_getmant_ps(W, U, X, B, C) \
12633 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12634 (int)(((C)<<2) | (B)), \
12635 (__v16sf)(__m512)(W), \
12636 (__mmask16)(U),\
12637 _MM_FROUND_CUR_DIRECTION))
12638
12639 #define _mm512_maskz_getmant_ps(U, X, B, C) \
12640 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12641 (int)(((C)<<2) | (B)), \
12642 (__v16sf)_mm512_setzero_ps(), \
12643 (__mmask16)(U),\
12644 _MM_FROUND_CUR_DIRECTION))
12645 #define _mm_getmant_sd(X, Y, C, D) \
12646 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
12647 (__v2df)(__m128d)(Y), \
12648 (int)(((D)<<2) | (C)), \
12649 _MM_FROUND_CUR_DIRECTION))
12650
12651 #define _mm_getmant_ss(X, Y, C, D) \
12652 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
12653 (__v4sf)(__m128)(Y), \
12654 (int)(((D)<<2) | (C)), \
12655 _MM_FROUND_CUR_DIRECTION))
12656
12657 #define _mm_getexp_ss(A, B) \
12658 ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
12659 _MM_FROUND_CUR_DIRECTION))
12660
12661 #define _mm_getexp_sd(A, B) \
12662 ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
12663 _MM_FROUND_CUR_DIRECTION))
12664
12665 #define _mm512_getexp_ps(A) \
12666 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12667 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
12668
12669 #define _mm512_mask_getexp_ps(W, U, A) \
12670 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12671 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12672
12673 #define _mm512_maskz_getexp_ps(U, A) \
12674 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12675 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12676
12677 #define _mm512_getexp_pd(A) \
12678 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12679 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
12680
12681 #define _mm512_mask_getexp_pd(W, U, A) \
12682 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12683 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12684
12685 #define _mm512_maskz_getexp_pd(U, A) \
12686 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12687 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12688 #endif
12689
12690 #ifdef __OPTIMIZE__
12691 extern __inline __m512
12692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12693 _mm512_roundscale_ps (__m512 __A, const int __imm)
12694 {
12695 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
12696 (__v16sf)
12697 _mm512_undefined_ps (),
12698 -1,
12699 _MM_FROUND_CUR_DIRECTION);
12700 }
12701
12702 extern __inline __m512
12703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12704 _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
12705 const int __imm)
12706 {
12707 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
12708 (__v16sf) __A,
12709 (__mmask16) __B,
12710 _MM_FROUND_CUR_DIRECTION);
12711 }
12712
12713 extern __inline __m512
12714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12715 _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
12716 {
12717 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
12718 __imm,
12719 (__v16sf)
12720 _mm512_setzero_ps (),
12721 (__mmask16) __A,
12722 _MM_FROUND_CUR_DIRECTION);
12723 }
12724
12725 extern __inline __m512d
12726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12727 _mm512_roundscale_pd (__m512d __A, const int __imm)
12728 {
12729 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
12730 (__v8df)
12731 _mm512_undefined_pd (),
12732 -1,
12733 _MM_FROUND_CUR_DIRECTION);
12734 }
12735
12736 extern __inline __m512d
12737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12738 _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
12739 const int __imm)
12740 {
12741 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
12742 (__v8df) __A,
12743 (__mmask8) __B,
12744 _MM_FROUND_CUR_DIRECTION);
12745 }
12746
12747 extern __inline __m512d
12748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12749 _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
12750 {
12751 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
12752 __imm,
12753 (__v8df)
12754 _mm512_setzero_pd (),
12755 (__mmask8) __A,
12756 _MM_FROUND_CUR_DIRECTION);
12757 }
12758
12759 extern __inline __m128
12760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12761 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
12762 {
12763 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
12764 (__v4sf) __B, __imm,
12765 _MM_FROUND_CUR_DIRECTION);
12766 }
12767
12768 extern __inline __m128d
12769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12770 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
12771 {
12772 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
12773 (__v2df) __B, __imm,
12774 _MM_FROUND_CUR_DIRECTION);
12775 }
12776
12777 #else
12778 #define _mm512_roundscale_ps(A, B) \
12779 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
12780 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12781 #define _mm512_mask_roundscale_ps(A, B, C, D) \
12782 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
12783 (int)(D), \
12784 (__v16sf)(__m512)(A), \
12785 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
12786 #define _mm512_maskz_roundscale_ps(A, B, C) \
12787 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
12788 (int)(C), \
12789 (__v16sf)_mm512_setzero_ps(),\
12790 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
12791 #define _mm512_roundscale_pd(A, B) \
12792 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
12793 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12794 #define _mm512_mask_roundscale_pd(A, B, C, D) \
12795 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
12796 (int)(D), \
12797 (__v8df)(__m512d)(A), \
12798 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
12799 #define _mm512_maskz_roundscale_pd(A, B, C) \
12800 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
12801 (int)(C), \
12802 (__v8df)_mm512_setzero_pd(),\
12803 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
12804 #define _mm_roundscale_ss(A, B, C) \
12805 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
12806 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12807 #define _mm_roundscale_sd(A, B, C) \
12808 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
12809 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12810 #endif
12811
12812 #ifdef __OPTIMIZE__
12813 extern __inline __mmask8
12814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12815 _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
12816 {
12817 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12818 (__v8df) __Y, __P,
12819 (__mmask8) -1,
12820 _MM_FROUND_CUR_DIRECTION);
12821 }
12822
12823 extern __inline __mmask16
12824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12825 _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
12826 {
12827 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12828 (__v16sf) __Y, __P,
12829 (__mmask16) -1,
12830 _MM_FROUND_CUR_DIRECTION);
12831 }
12832
12833 extern __inline __mmask16
12834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12835 _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
12836 {
12837 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12838 (__v16sf) __Y, __P,
12839 (__mmask16) __U,
12840 _MM_FROUND_CUR_DIRECTION);
12841 }
12842
12843 extern __inline __mmask8
12844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12845 _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
12846 {
12847 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12848 (__v8df) __Y, __P,
12849 (__mmask8) __U,
12850 _MM_FROUND_CUR_DIRECTION);
12851 }
12852
12853 extern __inline __mmask8
12854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12855 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
12856 {
12857 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12858 (__v2df) __Y, __P,
12859 (__mmask8) -1,
12860 _MM_FROUND_CUR_DIRECTION);
12861 }
12862
12863 extern __inline __mmask8
12864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12865 _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
12866 {
12867 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12868 (__v2df) __Y, __P,
12869 (__mmask8) __M,
12870 _MM_FROUND_CUR_DIRECTION);
12871 }
12872
12873 extern __inline __mmask8
12874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12875 _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
12876 {
12877 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12878 (__v4sf) __Y, __P,
12879 (__mmask8) -1,
12880 _MM_FROUND_CUR_DIRECTION);
12881 }
12882
12883 extern __inline __mmask8
12884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12885 _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
12886 {
12887 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12888 (__v4sf) __Y, __P,
12889 (__mmask8) __M,
12890 _MM_FROUND_CUR_DIRECTION);
12891 }
12892
12893 #else
12894 #define _mm512_cmp_pd_mask(X, Y, P) \
12895 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12896 (__v8df)(__m512d)(Y), (int)(P),\
12897 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12898
12899 #define _mm512_cmp_ps_mask(X, Y, P) \
12900 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12901 (__v16sf)(__m512)(Y), (int)(P),\
12902 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
12903
12904 #define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
12905 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12906 (__v8df)(__m512d)(Y), (int)(P),\
12907 (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
12908
12909 #define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
12910 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12911 (__v16sf)(__m512)(Y), (int)(P),\
12912 (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
12913
12914 #define _mm_cmp_sd_mask(X, Y, P) \
12915 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
12916 (__v2df)(__m128d)(Y), (int)(P),\
12917 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12918
12919 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
12920 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
12921 (__v2df)(__m128d)(Y), (int)(P),\
12922 M,_MM_FROUND_CUR_DIRECTION))
12923
12924 #define _mm_cmp_ss_mask(X, Y, P) \
12925 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
12926 (__v4sf)(__m128)(Y), (int)(P), \
12927 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12928
12929 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
12930 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
12931 (__v4sf)(__m128)(Y), (int)(P), \
12932 M,_MM_FROUND_CUR_DIRECTION))
12933 #endif
12934
12935 extern __inline __mmask16
12936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12937 _mm512_kmov (__mmask16 __A)
12938 {
12939 return __builtin_ia32_kmov16 (__A);
12940 }
12941
12942 extern __inline __m512
12943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12944 _mm512_castpd_ps (__m512d __A)
12945 {
12946 return (__m512) (__A);
12947 }
12948
12949 extern __inline __m512i
12950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12951 _mm512_castpd_si512 (__m512d __A)
12952 {
12953 return (__m512i) (__A);
12954 }
12955
12956 extern __inline __m512d
12957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12958 _mm512_castps_pd (__m512 __A)
12959 {
12960 return (__m512d) (__A);
12961 }
12962
12963 extern __inline __m512i
12964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12965 _mm512_castps_si512 (__m512 __A)
12966 {
12967 return (__m512i) (__A);
12968 }
12969
12970 extern __inline __m512
12971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12972 _mm512_castsi512_ps (__m512i __A)
12973 {
12974 return (__m512) (__A);
12975 }
12976
12977 extern __inline __m512d
12978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12979 _mm512_castsi512_pd (__m512i __A)
12980 {
12981 return (__m512d) (__A);
12982 }
12983
12984 extern __inline __m128d
12985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12986 _mm512_castpd512_pd128 (__m512d __A)
12987 {
12988 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
12989 }
12990
12991 extern __inline __m128
12992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12993 _mm512_castps512_ps128 (__m512 __A)
12994 {
12995 return _mm512_extractf32x4_ps(__A, 0);
12996 }
12997
12998 extern __inline __m128i
12999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13000 _mm512_castsi512_si128 (__m512i __A)
13001 {
13002 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
13003 }
13004
13005 extern __inline __m256d
13006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13007 _mm512_castpd512_pd256 (__m512d __A)
13008 {
13009 return _mm512_extractf64x4_pd(__A, 0);
13010 }
13011
13012 extern __inline __m256
13013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13014 _mm512_castps512_ps256 (__m512 __A)
13015 {
13016 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
13017 }
13018
13019 extern __inline __m256i
13020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13021 _mm512_castsi512_si256 (__m512i __A)
13022 {
13023 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
13024 }
13025
13026 extern __inline __m512d
13027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13028 _mm512_castpd128_pd512 (__m128d __A)
13029 {
13030 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
13031 }
13032
13033 extern __inline __m512
13034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13035 _mm512_castps128_ps512 (__m128 __A)
13036 {
13037 return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
13038 }
13039
13040 extern __inline __m512i
13041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13042 _mm512_castsi128_si512 (__m128i __A)
13043 {
13044 return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
13045 }
13046
13047 extern __inline __m512d
13048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13049 _mm512_castpd256_pd512 (__m256d __A)
13050 {
13051 return __builtin_ia32_pd512_256pd (__A);
13052 }
13053
13054 extern __inline __m512
13055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13056 _mm512_castps256_ps512 (__m256 __A)
13057 {
13058 return __builtin_ia32_ps512_256ps (__A);
13059 }
13060
13061 extern __inline __m512i
13062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13063 _mm512_castsi256_si512 (__m256i __A)
13064 {
13065 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
13066 }
13067
13068 extern __inline __mmask16
13069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13070 _mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
13071 {
13072 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13073 (__v16si) __B, 0,
13074 (__mmask16) -1);
13075 }
13076
13077 extern __inline __mmask16
13078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13079 _mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13080 {
13081 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13082 (__v16si) __B, 0, __U);
13083 }
13084
13085 extern __inline __mmask8
13086 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13087 _mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13088 {
13089 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13090 (__v8di) __B, 0, __U);
13091 }
13092
13093 extern __inline __mmask8
13094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13095 _mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
13096 {
13097 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13098 (__v8di) __B, 0,
13099 (__mmask8) -1);
13100 }
13101
13102 extern __inline __mmask16
13103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13104 _mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
13105 {
13106 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13107 (__v16si) __B, 6,
13108 (__mmask16) -1);
13109 }
13110
13111 extern __inline __mmask16
13112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13113 _mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13114 {
13115 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13116 (__v16si) __B, 6, __U);
13117 }
13118
13119 extern __inline __mmask8
13120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13121 _mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13122 {
13123 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13124 (__v8di) __B, 6, __U);
13125 }
13126
13127 extern __inline __mmask8
13128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13129 _mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
13130 {
13131 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13132 (__v8di) __B, 6,
13133 (__mmask8) -1);
13134 }
13135
13136 #ifdef __DISABLE_AVX512F__
13137 #undef __DISABLE_AVX512F__
13138 #pragma GCC pop_options
13139 #endif /* __DISABLE_AVX512F__ */
13140
13141 #endif /* _AVX512FINTRIN_H_INCLUDED */